Files
michaelschiemer/deployment/ansible/playbooks/monitor-workflow-performance.yml
Michael Schiemer 72757954dc
Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Failing after 33s
🚀 Build & Deploy Image / Build Runtime Base Image (push) Has been skipped
🚀 Build & Deploy Image / Build Docker Image (push) Has been skipped
Security Vulnerability Scan / Check for Dependency Changes (push) Successful in 32s
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been skipped
Security Vulnerability Scan / Composer Security Audit (push) Has been skipped
🧊 Warm Docker Build Cache / Refresh Buildx Caches (push) Failing after 11s
📊 Monitor Workflow Performance / Monitor Workflow Performance (push) Failing after 20s
feat: optimize workflows with repository artifacts and add performance monitoring
- Use repository artifacts in test and build jobs (reduces 2-3 git clones per run)
- Add comprehensive workflow performance monitoring system
- Add monitoring playbook and Gitea workflow for automated metrics collection
- Add monitoring documentation and scripts

Optimizations:
- Repository artifact caching: changes job uploads repo, test/build jobs download it
- Reduces Gitea load by eliminating redundant git operations
- Faster job starts (artifact download is typically faster than git clone)

Monitoring:
- Script for local workflow metrics collection via Gitea API
- Ansible playbook for server-side system and Gitea metrics
- Automated Gitea workflow that runs every 6 hours
- Tracks workflow durations, system load, Gitea API response times, and more
2025-11-09 04:03:51 +01:00

193 lines
8.1 KiB
YAML

---
# Monitor Workflow Performance
# Collects comprehensive metrics about workflow execution, Gitea load, and system resources
- name: Monitor Workflow Performance
hosts: production
gather_facts: yes
become: no
vars:
monitoring_output_dir: "/home/deploy/monitoring/workflow-metrics"
monitoring_lookback_hours: 24
gitea_stack_path: "{{ stacks_base_path }}/gitea"
traefik_stack_path: "{{ stacks_base_path }}/traefik"
tasks:
- name: Create monitoring output directory
ansible.builtin.file:
path: "{{ monitoring_output_dir }}"
state: directory
mode: '0755'
- name: Get system load average
ansible.builtin.shell: |
uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ' '
register: system_load
changed_when: false
- name: Get Docker container count
ansible.builtin.shell: |
docker ps --format '{{ '{{' }}.Names{{ '}}' }}' | wc -l
register: docker_container_count
changed_when: false
- name: Get Gitea Runner status
ansible.builtin.shell: |
if docker ps --format '{{ '{{' }}.Names{{ '}}' }}' | grep -q "gitea-runner"; then
echo "running"
else
echo "stopped"
fi
register: gitea_runner_status
changed_when: false
- name: Get Gitea container resource usage
ansible.builtin.shell: |
docker stats gitea --no-stream --format "{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.MemPerc{{ '}}' }}" 2>/dev/null || echo "N/A,N/A,N/A"
register: gitea_stats
changed_when: false
failed_when: false
- name: Get Traefik container resource usage
ansible.builtin.shell: |
docker stats traefik --no-stream --format "{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.MemPerc{{ '}}' }}" 2>/dev/null || echo "N/A,N/A,N/A"
register: traefik_stats
changed_when: false
failed_when: false
- name: Check Gitea API response time
ansible.builtin.uri:
url: "https://{{ gitea_domain }}/api/healthz"
method: GET
status_code: [200]
validate_certs: false
timeout: 10
register: gitea_api_test
changed_when: false
failed_when: false
- name: Get Gitea logs for workflow activity (last {{ monitoring_lookback_hours }} hours)
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose logs gitea --since "{{ monitoring_lookback_hours }}h" 2>&1 | \
grep -iE "workflow|action|runner" | \
tail -50 || echo "No workflow activity found"
register: gitea_workflow_logs
changed_when: false
failed_when: false
- name: Count workflow-related log entries
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose logs gitea --since "{{ monitoring_lookback_hours }}h" 2>&1 | \
grep -iE "workflow|action|runner" | \
wc -l
register: workflow_log_count
changed_when: false
failed_when: false
- name: Get disk usage for Gitea data
ansible.builtin.shell: |
du -sh {{ gitea_stack_path }}/data 2>/dev/null | awk '{print $1}' || echo "N/A"
register: gitea_data_size
changed_when: false
failed_when: false
- name: Get Docker system disk usage
ansible.builtin.shell: |
docker system df --format "{{ '{{' }}.Size{{ '}}' }}" 2>/dev/null | head -1 || echo "N/A"
register: docker_disk_usage
changed_when: false
failed_when: false
- name: Get memory usage
ansible.builtin.shell: |
free -h | grep Mem | awk '{print $3 "/" $2}'
register: memory_usage
changed_when: false
- name: Get CPU usage (1 minute average)
ansible.builtin.shell: |
top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}'
register: cpu_usage
changed_when: false
failed_when: false
- name: Generate metrics JSON
ansible.builtin.copy:
dest: "{{ monitoring_output_dir }}/workflow_metrics_{{ ansible_date_time.epoch }}.json"
content: |
{
"timestamp": "{{ ansible_date_time.iso8601 }}",
"system_metrics": {
"load_average": "{{ system_load.stdout }}",
"cpu_usage_percent": "{{ cpu_usage.stdout | default('N/A') }}",
"memory_usage": "{{ memory_usage.stdout }}",
"docker_containers": "{{ docker_container_count.stdout }}",
"docker_disk_usage": "{{ docker_disk_usage.stdout }}",
"gitea_data_size": "{{ gitea_data_size.stdout }}"
},
"gitea_metrics": {
"runner_status": "{{ gitea_runner_status.stdout }}",
"api_response_time_ms": "{{ (gitea_api_test.elapsed * 1000) | default('N/A') | int }}",
"workflow_log_entries_last_{{ monitoring_lookback_hours }}h": {{ workflow_log_count.stdout | int }},
"container_stats": {
"cpu_percent": "{{ gitea_stats.stdout.split(',')[0] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
"memory_usage": "{{ gitea_stats.stdout.split(',')[1] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
"memory_percent": "{{ gitea_stats.stdout.split(',')[2] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}"
}
},
"traefik_metrics": {
"container_stats": {
"cpu_percent": "{{ traefik_stats.stdout.split(',')[0] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
"memory_usage": "{{ traefik_stats.stdout.split(',')[1] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
"memory_percent": "{{ traefik_stats.stdout.split(',')[2] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}"
}
},
"optimizations": {
"repository_artifact_enabled": true,
"helper_script_caching_enabled": true,
"combined_deployment_playbook": true,
"exponential_backoff_health_checks": true,
"concurrency_groups": true
}
}
mode: '0644'
- name: Display monitoring summary
ansible.builtin.debug:
msg: |
================================================================================
WORKFLOW PERFORMANCE MONITORING - SUMMARY
================================================================================
System Metrics:
- Load Average: {{ system_load.stdout }}
- CPU Usage: {{ cpu_usage.stdout | default('N/A') }}%
- Memory Usage: {{ memory_usage.stdout }}
- Docker Containers: {{ docker_container_count.stdout }}
- Docker Disk Usage: {{ docker_disk_usage.stdout }}
- Gitea Data Size: {{ gitea_data_size.stdout }}
Gitea Metrics:
- Runner Status: {{ gitea_runner_status.stdout }}
- API Response Time: {{ (gitea_api_test.elapsed * 1000) | default('N/A') | int }}ms
- Workflow Log Entries (last {{ monitoring_lookback_hours }}h): {{ workflow_log_count.stdout }}
- Container CPU: {{ gitea_stats.stdout.split(',')[0] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
- Container Memory: {{ gitea_stats.stdout.split(',')[1] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
Traefik Metrics:
- Container CPU: {{ traefik_stats.stdout.split(',')[0] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
- Container Memory: {{ traefik_stats.stdout.split(',')[1] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
Optimizations Enabled:
✅ Repository Artifact Caching
✅ Helper Script Caching
✅ Combined Deployment Playbook
✅ Exponential Backoff Health Checks
✅ Concurrency Groups
Metrics saved to: {{ monitoring_output_dir }}/workflow_metrics_{{ ansible_date_time.epoch }}.json
================================================================================