--- # Monitor Workflow Performance # Collects comprehensive metrics about workflow execution, Gitea load, and system resources - name: Monitor Workflow Performance hosts: production gather_facts: yes become: no vars: monitoring_output_dir: "/home/deploy/monitoring/workflow-metrics" monitoring_lookback_hours: 24 gitea_stack_path: "{{ stacks_base_path }}/gitea" traefik_stack_path: "{{ stacks_base_path }}/traefik" tasks: - name: Create monitoring output directory ansible.builtin.file: path: "{{ monitoring_output_dir }}" state: directory mode: '0755' - name: Get system load average ansible.builtin.shell: | uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ' ' register: system_load changed_when: false - name: Get Docker container count ansible.builtin.shell: | docker ps --format '{{ '{{' }}.Names{{ '}}' }}' | wc -l register: docker_container_count changed_when: false - name: Get Gitea Runner status ansible.builtin.shell: | if docker ps --format '{{ '{{' }}.Names{{ '}}' }}' | grep -q "gitea-runner"; then echo "running" else echo "stopped" fi register: gitea_runner_status changed_when: false - name: Get Gitea container resource usage ansible.builtin.shell: | docker stats gitea --no-stream --format "{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.MemPerc{{ '}}' }}" 2>/dev/null || echo "N/A,N/A,N/A" register: gitea_stats changed_when: false failed_when: false - name: Get Traefik container resource usage ansible.builtin.shell: | docker stats traefik --no-stream --format "{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.MemPerc{{ '}}' }}" 2>/dev/null || echo "N/A,N/A,N/A" register: traefik_stats changed_when: false failed_when: false - name: Check Gitea API response time ansible.builtin.uri: url: "https://{{ gitea_domain }}/api/healthz" method: GET status_code: [200] validate_certs: false timeout: 10 register: gitea_api_test changed_when: false failed_when: false - name: Get Gitea logs for workflow activity (last {{ monitoring_lookback_hours }} hours) ansible.builtin.shell: | cd {{ gitea_stack_path }} docker compose logs gitea --since "{{ monitoring_lookback_hours }}h" 2>&1 | \ grep -iE "workflow|action|runner" | \ tail -50 || echo "No workflow activity found" register: gitea_workflow_logs changed_when: false failed_when: false - name: Count workflow-related log entries ansible.builtin.shell: | cd {{ gitea_stack_path }} docker compose logs gitea --since "{{ monitoring_lookback_hours }}h" 2>&1 | \ grep -iE "workflow|action|runner" | \ wc -l register: workflow_log_count changed_when: false failed_when: false - name: Get disk usage for Gitea data ansible.builtin.shell: | du -sh {{ gitea_stack_path }}/data 2>/dev/null | awk '{print $1}' || echo "N/A" register: gitea_data_size changed_when: false failed_when: false - name: Get Docker system disk usage ansible.builtin.shell: | docker system df --format "{{ '{{' }}.Size{{ '}}' }}" 2>/dev/null | head -1 || echo "N/A" register: docker_disk_usage changed_when: false failed_when: false - name: Get memory usage ansible.builtin.shell: | free -h | grep Mem | awk '{print $3 "/" $2}' register: memory_usage changed_when: false - name: Get CPU usage (1 minute average) ansible.builtin.shell: | top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}' register: cpu_usage changed_when: false failed_when: false - name: Generate metrics JSON ansible.builtin.copy: dest: "{{ monitoring_output_dir }}/workflow_metrics_{{ ansible_date_time.epoch }}.json" content: | { "timestamp": "{{ ansible_date_time.iso8601 }}", "system_metrics": { "load_average": "{{ system_load.stdout }}", "cpu_usage_percent": "{{ cpu_usage.stdout | default('N/A') }}", "memory_usage": "{{ memory_usage.stdout }}", "docker_containers": "{{ docker_container_count.stdout }}", "docker_disk_usage": "{{ docker_disk_usage.stdout }}", "gitea_data_size": "{{ gitea_data_size.stdout }}" }, "gitea_metrics": { "runner_status": "{{ gitea_runner_status.stdout }}", "api_response_time_ms": "{{ (gitea_api_test.elapsed * 1000) | default('N/A') | int }}", "workflow_log_entries_last_{{ monitoring_lookback_hours }}h": {{ workflow_log_count.stdout | int }}, "container_stats": { "cpu_percent": "{{ gitea_stats.stdout.split(',')[0] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}", "memory_usage": "{{ gitea_stats.stdout.split(',')[1] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}", "memory_percent": "{{ gitea_stats.stdout.split(',')[2] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}" } }, "traefik_metrics": { "container_stats": { "cpu_percent": "{{ traefik_stats.stdout.split(',')[0] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}", "memory_usage": "{{ traefik_stats.stdout.split(',')[1] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}", "memory_percent": "{{ traefik_stats.stdout.split(',')[2] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}" } }, "optimizations": { "repository_artifact_enabled": true, "helper_script_caching_enabled": true, "combined_deployment_playbook": true, "exponential_backoff_health_checks": true, "concurrency_groups": true } } mode: '0644' - name: Display monitoring summary ansible.builtin.debug: msg: | ================================================================================ WORKFLOW PERFORMANCE MONITORING - SUMMARY ================================================================================ System Metrics: - Load Average: {{ system_load.stdout }} - CPU Usage: {{ cpu_usage.stdout | default('N/A') }}% - Memory Usage: {{ memory_usage.stdout }} - Docker Containers: {{ docker_container_count.stdout }} - Docker Disk Usage: {{ docker_disk_usage.stdout }} - Gitea Data Size: {{ gitea_data_size.stdout }} Gitea Metrics: - Runner Status: {{ gitea_runner_status.stdout }} - API Response Time: {{ (gitea_api_test.elapsed * 1000) | default('N/A') | int }}ms - Workflow Log Entries (last {{ monitoring_lookback_hours }}h): {{ workflow_log_count.stdout }} - Container CPU: {{ gitea_stats.stdout.split(',')[0] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }} - Container Memory: {{ gitea_stats.stdout.split(',')[1] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }} Traefik Metrics: - Container CPU: {{ traefik_stats.stdout.split(',')[0] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }} - Container Memory: {{ traefik_stats.stdout.split(',')[1] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }} Optimizations Enabled: ✅ Repository Artifact Caching ✅ Helper Script Caching ✅ Combined Deployment Playbook ✅ Exponential Backoff Health Checks ✅ Concurrency Groups Metrics saved to: {{ monitoring_output_dir }}/workflow_metrics_{{ ansible_date_time.epoch }}.json ================================================================================