feat: optimize workflows with repository artifacts and add performance monitoring
Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Failing after 33s
🚀 Build & Deploy Image / Build Runtime Base Image (push) Has been skipped
🚀 Build & Deploy Image / Build Docker Image (push) Has been skipped
Security Vulnerability Scan / Check for Dependency Changes (push) Successful in 32s
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been skipped
Security Vulnerability Scan / Composer Security Audit (push) Has been skipped
🧊 Warm Docker Build Cache / Refresh Buildx Caches (push) Failing after 11s
📊 Monitor Workflow Performance / Monitor Workflow Performance (push) Failing after 20s
Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Failing after 33s
🚀 Build & Deploy Image / Build Runtime Base Image (push) Has been skipped
🚀 Build & Deploy Image / Build Docker Image (push) Has been skipped
Security Vulnerability Scan / Check for Dependency Changes (push) Successful in 32s
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been skipped
Security Vulnerability Scan / Composer Security Audit (push) Has been skipped
🧊 Warm Docker Build Cache / Refresh Buildx Caches (push) Failing after 11s
📊 Monitor Workflow Performance / Monitor Workflow Performance (push) Failing after 20s
- Use repository artifacts in test and build jobs (reduces 2-3 git clones per run) - Add comprehensive workflow performance monitoring system - Add monitoring playbook and Gitea workflow for automated metrics collection - Add monitoring documentation and scripts Optimizations: - Repository artifact caching: changes job uploads repo, test/build jobs download it - Reduces Gitea load by eliminating redundant git operations - Faster job starts (artifact download is typically faster than git clone) Monitoring: - Script for local workflow metrics collection via Gitea API - Ansible playbook for server-side system and Gitea metrics - Automated Gitea workflow that runs every 6 hours - Tracks workflow durations, system load, Gitea API response times, and more
This commit is contained in:
192
deployment/ansible/playbooks/monitor-workflow-performance.yml
Normal file
192
deployment/ansible/playbooks/monitor-workflow-performance.yml
Normal file
@@ -0,0 +1,192 @@
|
||||
---
|
||||
# Monitor Workflow Performance
|
||||
# Collects comprehensive metrics about workflow execution, Gitea load, and system resources
|
||||
- name: Monitor Workflow Performance
|
||||
hosts: production
|
||||
gather_facts: yes
|
||||
become: no
|
||||
vars:
|
||||
monitoring_output_dir: "/home/deploy/monitoring/workflow-metrics"
|
||||
monitoring_lookback_hours: 24
|
||||
gitea_stack_path: "{{ stacks_base_path }}/gitea"
|
||||
traefik_stack_path: "{{ stacks_base_path }}/traefik"
|
||||
|
||||
tasks:
|
||||
- name: Create monitoring output directory
|
||||
ansible.builtin.file:
|
||||
path: "{{ monitoring_output_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Get system load average
|
||||
ansible.builtin.shell: |
|
||||
uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ' '
|
||||
register: system_load
|
||||
changed_when: false
|
||||
|
||||
- name: Get Docker container count
|
||||
ansible.builtin.shell: |
|
||||
docker ps --format '{{ '{{' }}.Names{{ '}}' }}' | wc -l
|
||||
register: docker_container_count
|
||||
changed_when: false
|
||||
|
||||
- name: Get Gitea Runner status
|
||||
ansible.builtin.shell: |
|
||||
if docker ps --format '{{ '{{' }}.Names{{ '}}' }}' | grep -q "gitea-runner"; then
|
||||
echo "running"
|
||||
else
|
||||
echo "stopped"
|
||||
fi
|
||||
register: gitea_runner_status
|
||||
changed_when: false
|
||||
|
||||
- name: Get Gitea container resource usage
|
||||
ansible.builtin.shell: |
|
||||
docker stats gitea --no-stream --format "{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.MemPerc{{ '}}' }}" 2>/dev/null || echo "N/A,N/A,N/A"
|
||||
register: gitea_stats
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Get Traefik container resource usage
|
||||
ansible.builtin.shell: |
|
||||
docker stats traefik --no-stream --format "{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.MemPerc{{ '}}' }}" 2>/dev/null || echo "N/A,N/A,N/A"
|
||||
register: traefik_stats
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Check Gitea API response time
|
||||
ansible.builtin.uri:
|
||||
url: "https://{{ gitea_domain }}/api/healthz"
|
||||
method: GET
|
||||
status_code: [200]
|
||||
validate_certs: false
|
||||
timeout: 10
|
||||
register: gitea_api_test
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Get Gitea logs for workflow activity (last {{ monitoring_lookback_hours }} hours)
|
||||
ansible.builtin.shell: |
|
||||
cd {{ gitea_stack_path }}
|
||||
docker compose logs gitea --since "{{ monitoring_lookback_hours }}h" 2>&1 | \
|
||||
grep -iE "workflow|action|runner" | \
|
||||
tail -50 || echo "No workflow activity found"
|
||||
register: gitea_workflow_logs
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Count workflow-related log entries
|
||||
ansible.builtin.shell: |
|
||||
cd {{ gitea_stack_path }}
|
||||
docker compose logs gitea --since "{{ monitoring_lookback_hours }}h" 2>&1 | \
|
||||
grep -iE "workflow|action|runner" | \
|
||||
wc -l
|
||||
register: workflow_log_count
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Get disk usage for Gitea data
|
||||
ansible.builtin.shell: |
|
||||
du -sh {{ gitea_stack_path }}/data 2>/dev/null | awk '{print $1}' || echo "N/A"
|
||||
register: gitea_data_size
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Get Docker system disk usage
|
||||
ansible.builtin.shell: |
|
||||
docker system df --format "{{ '{{' }}.Size{{ '}}' }}" 2>/dev/null | head -1 || echo "N/A"
|
||||
register: docker_disk_usage
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Get memory usage
|
||||
ansible.builtin.shell: |
|
||||
free -h | grep Mem | awk '{print $3 "/" $2}'
|
||||
register: memory_usage
|
||||
changed_when: false
|
||||
|
||||
- name: Get CPU usage (1 minute average)
|
||||
ansible.builtin.shell: |
|
||||
top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}'
|
||||
register: cpu_usage
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Generate metrics JSON
|
||||
ansible.builtin.copy:
|
||||
dest: "{{ monitoring_output_dir }}/workflow_metrics_{{ ansible_date_time.epoch }}.json"
|
||||
content: |
|
||||
{
|
||||
"timestamp": "{{ ansible_date_time.iso8601 }}",
|
||||
"system_metrics": {
|
||||
"load_average": "{{ system_load.stdout }}",
|
||||
"cpu_usage_percent": "{{ cpu_usage.stdout | default('N/A') }}",
|
||||
"memory_usage": "{{ memory_usage.stdout }}",
|
||||
"docker_containers": "{{ docker_container_count.stdout }}",
|
||||
"docker_disk_usage": "{{ docker_disk_usage.stdout }}",
|
||||
"gitea_data_size": "{{ gitea_data_size.stdout }}"
|
||||
},
|
||||
"gitea_metrics": {
|
||||
"runner_status": "{{ gitea_runner_status.stdout }}",
|
||||
"api_response_time_ms": "{{ (gitea_api_test.elapsed * 1000) | default('N/A') | int }}",
|
||||
"workflow_log_entries_last_{{ monitoring_lookback_hours }}h": {{ workflow_log_count.stdout | int }},
|
||||
"container_stats": {
|
||||
"cpu_percent": "{{ gitea_stats.stdout.split(',')[0] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
|
||||
"memory_usage": "{{ gitea_stats.stdout.split(',')[1] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
|
||||
"memory_percent": "{{ gitea_stats.stdout.split(',')[2] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}"
|
||||
}
|
||||
},
|
||||
"traefik_metrics": {
|
||||
"container_stats": {
|
||||
"cpu_percent": "{{ traefik_stats.stdout.split(',')[0] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
|
||||
"memory_usage": "{{ traefik_stats.stdout.split(',')[1] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
|
||||
"memory_percent": "{{ traefik_stats.stdout.split(',')[2] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}"
|
||||
}
|
||||
},
|
||||
"optimizations": {
|
||||
"repository_artifact_enabled": true,
|
||||
"helper_script_caching_enabled": true,
|
||||
"combined_deployment_playbook": true,
|
||||
"exponential_backoff_health_checks": true,
|
||||
"concurrency_groups": true
|
||||
}
|
||||
}
|
||||
mode: '0644'
|
||||
|
||||
- name: Display monitoring summary
|
||||
ansible.builtin.debug:
|
||||
msg: |
|
||||
================================================================================
|
||||
WORKFLOW PERFORMANCE MONITORING - SUMMARY
|
||||
================================================================================
|
||||
|
||||
System Metrics:
|
||||
- Load Average: {{ system_load.stdout }}
|
||||
- CPU Usage: {{ cpu_usage.stdout | default('N/A') }}%
|
||||
- Memory Usage: {{ memory_usage.stdout }}
|
||||
- Docker Containers: {{ docker_container_count.stdout }}
|
||||
- Docker Disk Usage: {{ docker_disk_usage.stdout }}
|
||||
- Gitea Data Size: {{ gitea_data_size.stdout }}
|
||||
|
||||
Gitea Metrics:
|
||||
- Runner Status: {{ gitea_runner_status.stdout }}
|
||||
- API Response Time: {{ (gitea_api_test.elapsed * 1000) | default('N/A') | int }}ms
|
||||
- Workflow Log Entries (last {{ monitoring_lookback_hours }}h): {{ workflow_log_count.stdout }}
|
||||
- Container CPU: {{ gitea_stats.stdout.split(',')[0] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
|
||||
- Container Memory: {{ gitea_stats.stdout.split(',')[1] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
|
||||
|
||||
Traefik Metrics:
|
||||
- Container CPU: {{ traefik_stats.stdout.split(',')[0] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
|
||||
- Container Memory: {{ traefik_stats.stdout.split(',')[1] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
|
||||
|
||||
Optimizations Enabled:
|
||||
✅ Repository Artifact Caching
|
||||
✅ Helper Script Caching
|
||||
✅ Combined Deployment Playbook
|
||||
✅ Exponential Backoff Health Checks
|
||||
✅ Concurrency Groups
|
||||
|
||||
Metrics saved to: {{ monitoring_output_dir }}/workflow_metrics_{{ ansible_date_time.epoch }}.json
|
||||
|
||||
================================================================================
|
||||
|
||||
Reference in New Issue
Block a user