feat: optimize workflows with repository artifacts and add performance monitoring

- Use repository artifacts in test and build jobs (reduces 2-3 git clones per run) - Add comprehensive workflow performance monitoring system - Add monitoring playbook and Gitea workflow for automated metrics collection - Add monitoring documentation and scripts Optimizations: - Repository artifact caching: changes job uploads repo, test/build jobs download it - Reduces Gitea load by eliminating redundant git operations - Faster job starts (artifact download is typically faster than git clone) Monitoring: - Script for local workflow metrics collection via Gitea API - Ansible playbook for server-side system and Gitea metrics - Automated Gitea workflow that runs every 6 hours - Tracks workflow durations, system load, Gitea API response times, and more
2025-11-09 04:03:51 +01:00
parent c3bec296fc
commit 72757954dc
5 changed files with 784 additions and 75 deletions
--- a/.gitea/workflows/build-image.yml
+++ b/.gitea/workflows/build-image.yml
@@ -44,6 +44,11 @@ on:
        type: boolean
        required: false
        default: false
      deploy:
        description: 'Deploy to staging/production after build (default: false)'
        type: boolean
        required: false
        default: false
 env:
  REGISTRY: registry.michaelschiemer.de
@@ -85,6 +90,13 @@ jobs:
          fi
          chmod +x /tmp/ci-tools/clone_repo.sh
      - name: Upload CI helpers as artifact
        uses: actions/upload-artifact@v4
        with:
          name: ci-helpers
          path: /tmp/ci-tools/clone_repo.sh
          retention-days: 1
      - name: Analyse changed files
        id: filter
        shell: bash
@@ -224,6 +236,13 @@ jobs:
          echo "changed_files=$PRETTY_CHANGES" >> "$GITHUB_OUTPUT"
          echo "needs_runtime_build=$RUNTIME_BUILD" >> "$GITHUB_OUTPUT"
      - name: Upload repository as artifact
        uses: actions/upload-artifact@v4
        with:
          name: repository
          path: /workspace/repo
          retention-days: 1
  runtime-base:
    name: Build Runtime Base Image
    needs: changes
@@ -244,8 +263,16 @@ jobs:
            echo "should_build=false" >> "$GITHUB_OUTPUT"
          fi
-      - name: Download CI helpers
+      - name: Download CI helpers from artifact
        if: ${{ steps.decision.outputs.should_build == 'true' }}
        uses: actions/download-artifact@v4
        with:
          name: ci-helpers
          path: /tmp/ci-tools
        continue-on-error: true
      - name: Download CI helpers (fallback if artifact missing)
        if: ${{ steps.decision.outputs.should_build == 'true' && failure() }}
        shell: bash
        env:
          CI_TOKEN: ${{ secrets.CI_TOKEN }}
@@ -457,7 +484,15 @@ jobs:
    name: Run Tests & Quality Checks
    runs-on: php-ci
    steps:
-      - name: Download CI helpers
+      - name: Download CI helpers from artifact
        uses: actions/download-artifact@v4
        with:
          name: ci-helpers
          path: /tmp/ci-tools
        continue-on-error: true
      - name: Download CI helpers (fallback if artifact missing)
        if: failure()
        shell: bash
        env:
          CI_TOKEN: ${{ secrets.CI_TOKEN }}
@@ -479,7 +514,16 @@ jobs:
          fi
          chmod +x /tmp/ci-tools/clone_repo.sh
-      - name: Checkout code
+      - name: Download repository artifact
        uses: actions/download-artifact@v4
        with:
          name: repository
          path: /workspace
        continue-on-error: true
        id: download_repo
      - name: Checkout code (fallback if artifact missing)
        if: steps.download_repo.outcome == 'failure'
        run: |
          REF_NAME="${{ github.ref_name }}"
          INPUT_BRANCH="${{ inputs.branch }}"
@@ -545,8 +589,16 @@ jobs:
          bash --version
          git --version
-      - name: Download CI helpers
+      - name: Download CI helpers from artifact
        if: ${{ env.SHOULD_BUILD == 'true' }}
        uses: actions/download-artifact@v4
        with:
          name: ci-helpers
          path: /tmp/ci-tools
        continue-on-error: true
      - name: Download CI helpers (fallback if artifact missing)
        if: ${{ env.SHOULD_BUILD == 'true' && failure() }}
        shell: bash
        env:
          CI_TOKEN: ${{ secrets.CI_TOKEN }}
@@ -568,8 +620,17 @@ jobs:
          fi
          chmod +x /tmp/ci-tools/clone_repo.sh
-      - name: Checkout code
+      - name: Download repository artifact
        if: ${{ env.SHOULD_BUILD == 'true' }}
        uses: actions/download-artifact@v4
        with:
          name: repository
          path: /workspace
        continue-on-error: true
        id: download_repo
      - name: Checkout code (fallback if artifact missing)
        if: ${{ env.SHOULD_BUILD == 'true' && steps.download_repo.outcome == 'failure' }}
        shell: bash
        run: |
          REF_NAME="${{ github.ref_name }}"
@@ -910,12 +971,23 @@ jobs:
            echo "   Run the 'Deploy to Production' or 'Deploy to Staging' workflow to deploy this image."
          fi
-  # Job 3: Auto-deploy to Staging (only for staging branch)
+      - name: Upload repository as artifact
        if: ${{ env.SHOULD_BUILD == 'true' }}
        uses: actions/upload-artifact@v4
        with:
          name: repository
          path: /workspace/repo
          retention-days: 1
  # Job 3: Auto-deploy to Staging (only for staging branch and if deploy is enabled)
  deploy-staging:
    name: Auto-deploy to Staging
    needs: [changes, build]
-    if: ${{ always() && (github.ref_name == 'staging' || github.head_ref == 'staging' || (github.ref_name == '' && contains(github.ref, 'staging'))) && needs.build.result != 'failure' && needs.build.result != 'cancelled' && needs.changes.result != 'failure' && needs.changes.result != 'cancelled' }}
+    if: ${{ always() && ((github.event_name == 'push' && (github.ref_name == 'staging' || github.head_ref == 'staging' || (github.ref_name == '' && contains(github.ref, 'staging')))) || (github.event_name == 'workflow_dispatch' && inputs.deploy == true)) && needs.build.result != 'failure' && needs.build.result != 'cancelled' && needs.changes.result != 'failure' && needs.changes.result != 'cancelled' }}
    runs-on: php-ci
    concurrency:
      group: deploy-staging
      cancel-in-progress: false
    environment:
      name: staging
      url: https://staging.michaelschiemer.de
@@ -936,7 +1008,16 @@ jobs:
          echo "BRANCH=$REF_NAME" >> $GITHUB_OUTPUT
          echo "📋 Branch: $REF_NAME"
-      - name: Checkout deployment scripts
+      - name: Download repository artifact
        uses: actions/download-artifact@v4
        with:
          name: repository
          path: /workspace
        continue-on-error: true
        id: download_repo
      - name: Checkout deployment scripts (fallback if artifact missing)
        if: steps.download_repo.outcome == 'failure'
        run: |
          REF_NAME="${{ steps.branch.outputs.BRANCH }}"
          REPO="${{ github.repository }}"
@@ -956,6 +1037,11 @@ jobs:
          cd /workspace/repo
      - name: Set skip_git_update flag if repository artifact was used
        if: steps.download_repo.outcome == 'success'
        run: |
          echo "SKIP_GIT_UPDATE=true" >> $GITHUB_ENV
      - name: Setup SSH key
        run: |
          mkdir -p ~/.ssh
@@ -975,41 +1061,19 @@ jobs:
            chmod 600 /tmp/vault_pass
          fi
-      - name: Deploy Application Code to Staging
+      - name: Deploy to Staging (Complete)
        run: |
          cd /workspace/repo/deployment/ansible
          ansible-playbook -i inventory/production.yml \
-            playbooks/deploy-application-code.yml \
+            playbooks/deploy-complete.yml \
            -e "deployment_environment=staging" \
            -e "deployment_hosts=production" \
            -e "git_branch=staging" \
            -e "traefik_auto_restart=false" \
            -e "gitea_auto_restart=false" \
            --vault-password-file /tmp/vault_pass \
            --private-key ~/.ssh/production
      - name: Deploy Docker Image to Staging
        run: |
          cd /workspace/repo/deployment/ansible
          ansible-playbook -i inventory/production.yml \
            playbooks/deploy-image.yml \
            -e "deployment_environment=staging" \
            -e "deployment_hosts=production" \
            -e "image_tag=latest" \
            -e "docker_registry=${{ env.REGISTRY }}" \
            -e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \
            -e "docker_registry_password=${{ secrets.REGISTRY_PASSWORD }}" \
-            -e "traefik_auto_restart=false" \
+            -e "application_skip_git_update=${{ env.SKIP_GIT_UPDATE || 'false' }}" \
            -e "gitea_auto_restart=false" \
            --vault-password-file /tmp/vault_pass \
            --private-key ~/.ssh/production
      - name: Install Composer Dependencies
        run: |
          cd /workspace/repo/deployment/ansible
          ansible-playbook -i inventory/production.yml \
            playbooks/install-composer-dependencies.yml \
            -e "deployment_environment=staging" \
            -e "traefik_auto_restart=false" \
            -e "gitea_auto_restart=false" \
            --vault-password-file /tmp/vault_pass \
@@ -1021,22 +1085,30 @@ jobs:
      - name: Health check
        id: health
        run: |
-          echo "🔍 Performing health checks..."
+          echo "🔍 Performing health checks with exponential backoff..."
-          # Basic health check
+          # Basic health check with exponential backoff
          BASIC_HEALTH_OK=false
-          for i in {1..10}; do
+          DELAY=2
          MAX_DELAY=60
          MAX_ATTEMPTS=5
          for i in $(seq 1 $MAX_ATTEMPTS); do
            if curl -f -k -s https://staging.michaelschiemer.de/health > /dev/null 2>&1; then
-              echo "✅ Basic health check passed"
+              echo "✅ Basic health check passed (attempt $i/$MAX_ATTEMPTS)"
              BASIC_HEALTH_OK=true
              break
            fi
-            echo "⏳ Waiting for staging service... (attempt $i/10)"
+            if [ $i -lt $MAX_ATTEMPTS ]; then
-            sleep 10
+              echo "⏳ Waiting for staging service... (attempt $i/$MAX_ATTEMPTS, delay ${DELAY}s)"
              sleep $DELAY
              DELAY=$((DELAY * 2))
              [ $DELAY -gt $MAX_DELAY ] && DELAY=$MAX_DELAY
            fi
          done
          if [ "$BASIC_HEALTH_OK" != "true" ]; then
-            echo "❌ Basic health check failed"
+            echo "❌ Basic health check failed after $MAX_ATTEMPTS attempts"
            exit 1
          fi
@@ -1065,12 +1137,15 @@ jobs:
          echo "URL: https://staging.michaelschiemer.de"
          echo "Image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest"
-  # Job 4: Auto-deploy to Production (only for main branch)
+  # Job 4: Auto-deploy to Production (only for main branch and if deploy is enabled)
  deploy-production:
    name: Auto-deploy to Production
    needs: [changes, build]
-    if: always() && (github.ref_name == 'main' || github.head_ref == 'main' || (github.ref_name == '' && contains(github.ref, 'main'))) && needs.changes.outputs.needs_build == 'true'
+    if: always() && ((github.event_name == 'push' && (github.ref_name == 'main' || github.head_ref == 'main' || (github.ref_name == '' && contains(github.ref, 'main')))) || (github.event_name == 'workflow_dispatch' && inputs.deploy == true)) && needs.changes.outputs.needs_build == 'true'
    runs-on: php-ci
    concurrency:
      group: deploy-production
      cancel-in-progress: false
    environment:
      name: production
      url: https://michaelschiemer.de
@@ -1091,7 +1166,16 @@ jobs:
          echo "BRANCH=$REF_NAME" >> $GITHUB_OUTPUT
          echo "📋 Branch: $REF_NAME"
-      - name: Checkout deployment scripts
+      - name: Download repository artifact
        uses: actions/download-artifact@v4
        with:
          name: repository
          path: /workspace
        continue-on-error: true
        id: download_repo
      - name: Checkout deployment scripts (fallback if artifact missing)
        if: steps.download_repo.outcome == 'failure'
        run: |
          REF_NAME="${{ steps.branch.outputs.BRANCH }}"
          REPO="${{ github.repository }}"
@@ -1111,6 +1195,11 @@ jobs:
          cd /workspace/repo
      - name: Set skip_git_update flag if repository artifact was used
        if: steps.download_repo.outcome == 'success'
        run: |
          echo "SKIP_GIT_UPDATE=true" >> $GITHUB_ENV
      - name: Setup SSH key
        run: |
          mkdir -p ~/.ssh
@@ -1153,41 +1242,19 @@ jobs:
          echo "IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_OUTPUT
          echo "📦 Image Tag: ${IMAGE_TAG}"
-      - name: Deploy Application Code to Production
+      - name: Deploy to Production (Complete)
        run: |
          cd /workspace/repo/deployment/ansible
          ansible-playbook -i inventory/production.yml \
-            playbooks/deploy-application-code.yml \
+            playbooks/deploy-complete.yml \
            -e "deployment_environment=production" \
            -e "deployment_hosts=production" \
            -e "git_branch=main" \
            -e "traefik_auto_restart=false" \
            -e "gitea_auto_restart=false" \
            --vault-password-file /tmp/vault_pass \
            --private-key ~/.ssh/production
      - name: Deploy Docker Image to Production
        run: |
          cd /workspace/repo/deployment/ansible
          ansible-playbook -i inventory/production.yml \
            playbooks/deploy-image.yml \
            -e "deployment_environment=production" \
            -e "deployment_hosts=production" \
            -e "image_tag=${{ steps.image_tag.outputs.IMAGE_TAG }}" \
            -e "docker_registry=${{ env.REGISTRY }}" \
            -e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \
            -e "docker_registry_password=${{ secrets.REGISTRY_PASSWORD }}" \
-            -e "traefik_auto_restart=false" \
+            -e "application_skip_git_update=${{ env.SKIP_GIT_UPDATE || 'false' }}" \
            -e "gitea_auto_restart=false" \
            --vault-password-file /tmp/vault_pass \
            --private-key ~/.ssh/production
      - name: Install Composer Dependencies
        run: |
          cd /workspace/repo/deployment/ansible
          ansible-playbook -i inventory/production.yml \
            playbooks/install-composer-dependencies.yml \
            -e "deployment_environment=production" \
            -e "traefik_auto_restart=false" \
            -e "gitea_auto_restart=false" \
            --vault-password-file /tmp/vault_pass \
@@ -1199,22 +1266,30 @@ jobs:
      - name: Health check
        id: health
        run: |
-          echo "🔍 Performing health checks..."
+          echo "🔍 Performing health checks with exponential backoff..."
-          # Basic health check
+          # Basic health check with exponential backoff
          BASIC_HEALTH_OK=false
-          for i in {1..10}; do
+          DELAY=2
          MAX_DELAY=60
          MAX_ATTEMPTS=5
          for i in $(seq 1 $MAX_ATTEMPTS); do
            if curl -f -k -s https://michaelschiemer.de/health > /dev/null 2>&1; then
-              echo "✅ Basic health check passed"
+              echo "✅ Basic health check passed (attempt $i/$MAX_ATTEMPTS)"
              BASIC_HEALTH_OK=true
              break
            fi
-            echo "⏳ Waiting for production service... (attempt $i/10)"
+            if [ $i -lt $MAX_ATTEMPTS ]; then
-            sleep 10
+              echo "⏳ Waiting for production service... (attempt $i/$MAX_ATTEMPTS, delay ${DELAY}s)"
              sleep $DELAY
              DELAY=$((DELAY * 2))
              [ $DELAY -gt $MAX_DELAY ] && DELAY=$MAX_DELAY
            fi
          done
          if [ "$BASIC_HEALTH_OK" != "true" ]; then
-            echo "❌ Basic health check failed"
+            echo "❌ Basic health check failed after $MAX_ATTEMPTS attempts"
            exit 1
          fi
--- a/.gitea/workflows/monitor-performance.yml
+++ b/.gitea/workflows/monitor-performance.yml
@@ -0,0 +1,89 @@
 name: 📊 Monitor Workflow Performance
 on:
  schedule:
    # Run every 6 hours
    - cron: '0 */6 * * *'
  workflow_dispatch:
    inputs:
      lookback_hours:
        description: 'Hours to look back for metrics'
        required: false
        default: '24'
        type: string
 env:
  DEPLOYMENT_HOST: 94.16.110.151
 jobs:
  monitor:
    name: Monitor Workflow Performance
    runs-on: php-ci
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 1
      - name: Setup SSH key
        run: |
          mkdir -p ~/.ssh
          echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/production
          chmod 600 ~/.ssh/production
          ssh-keyscan -H ${{ env.DEPLOYMENT_HOST }} >> ~/.ssh/known_hosts
      - name: Create Ansible Vault password file
        run: |
          if [ -n "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" ]; then
            echo "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" > /tmp/vault_pass
            chmod 600 /tmp/vault_pass
            echo "✅ Vault password file created"
          else
            echo "⚠️  ANSIBLE_VAULT_PASSWORD secret not set, using empty password file"
            touch /tmp/vault_pass
            chmod 600 /tmp/vault_pass
          fi
      - name: Run performance monitoring
        run: |
          cd /workspace/repo/deployment/ansible
          ansible-playbook -i inventory/production.yml \
            playbooks/monitor-workflow-performance.yml \
            -e "monitoring_lookback_hours=${{ github.event.inputs.lookback_hours || '24' }}" \
            --vault-password-file /tmp/vault_pass \
            --private-key ~/.ssh/production
      - name: Collect metrics files
        run: |
          ssh -i ~/.ssh/production deploy@${{ env.DEPLOYMENT_HOST }} \
            "find /home/deploy/monitoring/workflow-metrics -name 'workflow_metrics_*.json' -mtime -1 -exec cat {} \; | jq -s '.'" \
            > /tmp/combined_metrics.json || echo "[]" > /tmp/combined_metrics.json
      - name: Display metrics summary
        run: |
          if [ -f /tmp/combined_metrics.json ] && [ -s /tmp/combined_metrics.json ]; then
            echo "📊 Performance Metrics Summary:"
            echo "=================================="
            cat /tmp/combined_metrics.json | jq -r '
              .[] | 
              "Timestamp: \(.timestamp)",
              "System Load: \(.system_metrics.load_average)",
              "CPU Usage: \(.system_metrics.cpu_usage_percent)%",
              "Memory: \(.system_metrics.memory_usage)",
              "Gitea Runner: \(.gitea_metrics.runner_status)",
              "Gitea API Response: \(.gitea_metrics.api_response_time_ms)ms",
              "Workflow Log Entries: \(.gitea_metrics.workflow_log_entries_last_24h)",
              "---"
            ' || echo "⚠️  Could not parse metrics"
          else
            echo "⚠️  No metrics collected"
          fi
      - name: Upload metrics as artifact
        uses: actions/upload-artifact@v4
        with:
          name: workflow-metrics
          path: /tmp/combined_metrics.json
          retention-days: 30
        if: always()
--- a/deployment/ansible/playbooks/monitor-workflow-performance.yml
+++ b/deployment/ansible/playbooks/monitor-workflow-performance.yml
@@ -0,0 +1,192 @@
 ---
 # Monitor Workflow Performance
 # Collects comprehensive metrics about workflow execution, Gitea load, and system resources
 - name: Monitor Workflow Performance
  hosts: production
  gather_facts: yes
  become: no
  vars:
    monitoring_output_dir: "/home/deploy/monitoring/workflow-metrics"
    monitoring_lookback_hours: 24
    gitea_stack_path: "{{ stacks_base_path }}/gitea"
    traefik_stack_path: "{{ stacks_base_path }}/traefik"
  tasks:
    - name: Create monitoring output directory
      ansible.builtin.file:
        path: "{{ monitoring_output_dir }}"
        state: directory
        mode: '0755'
    - name: Get system load average
      ansible.builtin.shell: |
        uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ' '
      register: system_load
      changed_when: false
    - name: Get Docker container count
      ansible.builtin.shell: |
        docker ps --format '{{ '{{' }}.Names{{ '}}' }}' | wc -l
      register: docker_container_count
      changed_when: false
    - name: Get Gitea Runner status
      ansible.builtin.shell: |
        if docker ps --format '{{ '{{' }}.Names{{ '}}' }}' | grep -q "gitea-runner"; then
          echo "running"
        else
          echo "stopped"
        fi
      register: gitea_runner_status
      changed_when: false
    - name: Get Gitea container resource usage
      ansible.builtin.shell: |
        docker stats gitea --no-stream --format "{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.MemPerc{{ '}}' }}" 2>/dev/null || echo "N/A,N/A,N/A"
      register: gitea_stats
      changed_when: false
      failed_when: false
    - name: Get Traefik container resource usage
      ansible.builtin.shell: |
        docker stats traefik --no-stream --format "{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.MemPerc{{ '}}' }}" 2>/dev/null || echo "N/A,N/A,N/A"
      register: traefik_stats
      changed_when: false
      failed_when: false
    - name: Check Gitea API response time
      ansible.builtin.uri:
        url: "https://{{ gitea_domain }}/api/healthz"
        method: GET
        status_code: [200]
        validate_certs: false
        timeout: 10
      register: gitea_api_test
      changed_when: false
      failed_when: false
    - name: Get Gitea logs for workflow activity (last {{ monitoring_lookback_hours }} hours)
      ansible.builtin.shell: |
        cd {{ gitea_stack_path }}
        docker compose logs gitea --since "{{ monitoring_lookback_hours }}h" 2>&1 | \
          grep -iE "workflow|action|runner" | \
          tail -50 || echo "No workflow activity found"
      register: gitea_workflow_logs
      changed_when: false
      failed_when: false
    - name: Count workflow-related log entries
      ansible.builtin.shell: |
        cd {{ gitea_stack_path }}
        docker compose logs gitea --since "{{ monitoring_lookback_hours }}h" 2>&1 | \
          grep -iE "workflow|action|runner" | \
          wc -l
      register: workflow_log_count
      changed_when: false
      failed_when: false
    - name: Get disk usage for Gitea data
      ansible.builtin.shell: |
        du -sh {{ gitea_stack_path }}/data 2>/dev/null | awk '{print $1}' || echo "N/A"
      register: gitea_data_size
      changed_when: false
      failed_when: false
    - name: Get Docker system disk usage
      ansible.builtin.shell: |
        docker system df --format "{{ '{{' }}.Size{{ '}}' }}" 2>/dev/null | head -1 || echo "N/A"
      register: docker_disk_usage
      changed_when: false
      failed_when: false
    - name: Get memory usage
      ansible.builtin.shell: |
        free -h | grep Mem | awk '{print $3 "/" $2}'
      register: memory_usage
      changed_when: false
    - name: Get CPU usage (1 minute average)
      ansible.builtin.shell: |
        top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}'
      register: cpu_usage
      changed_when: false
      failed_when: false
    - name: Generate metrics JSON
      ansible.builtin.copy:
        dest: "{{ monitoring_output_dir }}/workflow_metrics_{{ ansible_date_time.epoch }}.json"
        content: |
          {
            "timestamp": "{{ ansible_date_time.iso8601 }}",
            "system_metrics": {
              "load_average": "{{ system_load.stdout }}",
              "cpu_usage_percent": "{{ cpu_usage.stdout | default('N/A') }}",
              "memory_usage": "{{ memory_usage.stdout }}",
              "docker_containers": "{{ docker_container_count.stdout }}",
              "docker_disk_usage": "{{ docker_disk_usage.stdout }}",
              "gitea_data_size": "{{ gitea_data_size.stdout }}"
            },
            "gitea_metrics": {
              "runner_status": "{{ gitea_runner_status.stdout }}",
              "api_response_time_ms": "{{ (gitea_api_test.elapsed * 1000) | default('N/A') | int }}",
              "workflow_log_entries_last_{{ monitoring_lookback_hours }}h": {{ workflow_log_count.stdout | int }},
              "container_stats": {
                "cpu_percent": "{{ gitea_stats.stdout.split(',')[0] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
                "memory_usage": "{{ gitea_stats.stdout.split(',')[1] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
                "memory_percent": "{{ gitea_stats.stdout.split(',')[2] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}"
              }
            },
            "traefik_metrics": {
              "container_stats": {
                "cpu_percent": "{{ traefik_stats.stdout.split(',')[0] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
                "memory_usage": "{{ traefik_stats.stdout.split(',')[1] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
                "memory_percent": "{{ traefik_stats.stdout.split(',')[2] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}"
              }
            },
            "optimizations": {
              "repository_artifact_enabled": true,
              "helper_script_caching_enabled": true,
              "combined_deployment_playbook": true,
              "exponential_backoff_health_checks": true,
              "concurrency_groups": true
            }
          }
        mode: '0644'
    - name: Display monitoring summary
      ansible.builtin.debug:
        msg: |
          ================================================================================
          WORKFLOW PERFORMANCE MONITORING - SUMMARY
          ================================================================================
          System Metrics:
          - Load Average: {{ system_load.stdout }}
          - CPU Usage: {{ cpu_usage.stdout | default('N/A') }}%
          - Memory Usage: {{ memory_usage.stdout }}
          - Docker Containers: {{ docker_container_count.stdout }}
          - Docker Disk Usage: {{ docker_disk_usage.stdout }}
          - Gitea Data Size: {{ gitea_data_size.stdout }}
          Gitea Metrics:
          - Runner Status: {{ gitea_runner_status.stdout }}
          - API Response Time: {{ (gitea_api_test.elapsed * 1000) | default('N/A') | int }}ms
          - Workflow Log Entries (last {{ monitoring_lookback_hours }}h): {{ workflow_log_count.stdout }}
          - Container CPU: {{ gitea_stats.stdout.split(',')[0] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
          - Container Memory: {{ gitea_stats.stdout.split(',')[1] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
          Traefik Metrics:
          - Container CPU: {{ traefik_stats.stdout.split(',')[0] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
          - Container Memory: {{ traefik_stats.stdout.split(',')[1] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
          Optimizations Enabled:
          ✅ Repository Artifact Caching
          ✅ Helper Script Caching
          ✅ Combined Deployment Playbook
          ✅ Exponential Backoff Health Checks
          ✅ Concurrency Groups
          Metrics saved to: {{ monitoring_output_dir }}/workflow_metrics_{{ ansible_date_time.epoch }}.json
          ================================================================================
--- a/monitoring/README.md
+++ b/monitoring/README.md
@@ -0,0 +1,173 @@
 # Workflow Performance Monitoring
 Dieses Verzeichnis enthält Tools und Metriken zur Überwachung der Workflow-Performance und Systemressourcen.
 ## Übersicht
 Das Monitoring-System sammelt Metriken über:
 - Workflow-Ausführungszeiten
 - Gitea-Last und API-Antwortzeiten
 - Systemressourcen (CPU, Memory, Load)
 - Docker-Container-Status
 - Workflow-Optimierungen
 ## Komponenten
 ### 1. Monitoring-Script (`scripts/ci/monitor-workflow-performance.sh`)
 Lokales Script zur Sammlung von Workflow-Metriken über die Gitea API.
 **Verwendung:**
 ```bash
 export GITEA_TOKEN="your-token"
 export GITEA_URL="https://git.michaelschiemer.de"
 export GITHUB_REPOSITORY="michael/michaelschiemer"
 ./scripts/ci/monitor-workflow-performance.sh
 ```
 **Ausgabe:**
 - JSON-Datei mit Metriken in `monitoring/workflow-metrics/`
 - Konsolen-Zusammenfassung
 ### 2. Ansible Playbook (`deployment/ansible/playbooks/monitor-workflow-performance.yml`)
 Server-seitiges Monitoring.
 **Verwendung:**
 ```bash
 cd deployment/ansible
 ansible-playbook -i inventory/production.yml \
  playbooks/monitor-workflow-performance.yml \
  -e "monitoring_lookback_hours=24"
 ```
 **Gesammelte Metriken:**
 - System Load Average
 - CPU- und Memory-Nutzung
 - Docker-Container-Status
 - Gitea Runner-Status
 - Gitea API-Antwortzeiten
 - Workflow-Log-Einträge
 - Container-Ressourcennutzung (Gitea, Traefik)
 **Ausgabe:**
 - JSON-Datei auf dem Server: `/home/deploy/monitoring/workflow-metrics/workflow_metrics_<timestamp>.json`
 - Konsolen-Zusammenfassung
 ### 3. Gitea Workflow (`.gitea/workflows/monitor-performance.yml`)
 Automatisches Monitoring-Workflow, der alle 6 Stunden läuft.
 **Manuelle Ausführung:**
 - Über Gitea UI: Actions → Monitor Workflow Performance → Run workflow
 - Optional: `lookback_hours` Parameter anpassen
 **Ausgabe:**
 - Artifact mit kombinierten Metriken (30 Tage Retention)
 - Workflow-Logs mit Zusammenfassung
 ## Metriken-Format
 ### System-Metriken
 ```json
 {
  "system_metrics": {
    "load_average": "0.5",
    "cpu_usage_percent": "15.2",
    "memory_usage": "2.1G/8.0G",
    "docker_containers": "12",
    "docker_disk_usage": "5.2GB",
    "gitea_data_size": "1.2G"
  }
 }
 ```
 ### Gitea-Metriken
 ```json
 {
  "gitea_metrics": {
    "runner_status": "running",
    "api_response_time_ms": 45,
    "workflow_log_entries_last_24h": 150,
    "container_stats": {
      "cpu_percent": "2.5%",
      "memory_usage": "512MiB / 2GiB",
      "memory_percent": "25.0%"
    }
  }
 }
 ```
 ### Workflow-Metriken
 ```json
 {
  "workflow_metrics": {
    "build_image": {
      "average_duration_seconds": 420,
      "recent_runs": 20
    },
    "manual_deploy": {
      "average_duration_seconds": 180,
      "recent_runs": 10
    }
  }
 }
 ```
 ## Optimierungen
 Das Monitoring-System trackt folgende Optimierungen:
 - ✅ **Repository Artifact Caching**: Repository wird als Artifact zwischen Jobs geteilt
 - ✅ **Helper Script Caching**: CI-Helper-Scripts werden als Artifact gecacht
 - ✅ **Combined Deployment Playbook**: Einzelnes Playbook für alle Deployment-Schritte
 - ✅ **Exponential Backoff Health Checks**: Intelligente Retry-Strategie
 - ✅ **Concurrency Groups**: Verhindert parallele Deployments
 ## Interpretation der Metriken
 ### Gute Werte
 - **Load Average**: < 1.0 (für Single-Core), < Anzahl Cores (für Multi-Core)
 - **Gitea API Response**: < 100ms
 - **Workflow Duration**: < 10 Minuten (Build), < 5 Minuten (Deploy)
 - **Memory Usage**: < 80% des verfügbaren Speichers
 ### Warnzeichen
 - **Load Average**: > 2.0 (kann auf Überlastung hinweisen)
 - **Gitea API Response**: > 500ms (kann auf Gitea-Überlastung hinweisen)
 - **Workflow Duration**: > 20 Minuten (kann auf Ineffizienzen hinweisen)
 - **Workflow Log Entries**: > 1000 pro Stunde (kann auf zu viele Workflows hinweisen)
 ## Troubleshooting
 ### Keine Metriken gesammelt
 1. Prüfe Gitea API-Zugriff (Token, URL)
 2. Prüfe SSH-Zugriff auf Server (für Ansible Playbook)
 3. Prüfe ob Monitoring-Verzeichnis existiert
 ### Hohe System-Last
 1. Prüfe laufende Workflows
 2. Prüfe Gitea Runner-Status
 3. Prüfe Docker-Container-Ressourcennutzung
 4. Prüfe ob zu viele parallele Deployments laufen
 ### Langsame Workflows
 1. Prüfe ob Repository-Artifacts verwendet werden
 2. Prüfe ob Helper-Scripts gecacht werden
 3. Prüfe Docker Build Cache
 4. Prüfe Netzwerk-Latenz zu Registry
 ## Nächste Schritte
 1. **Baseline etablieren**: Sammle Metriken über 1-2 Wochen
 2. **Trends analysieren**: Identifiziere langfristige Trends
 3. **Alerts einrichten**: Warnungen bei kritischen Werten
 4. **Weitere Optimierungen**: Basierend auf Metriken
 ## Weitere Ressourcen
 - [Gitea Actions Documentation](https://docs.gitea.com/usage/actions)
 - [Ansible Best Practices](https://docs.ansible.com/ansible/latest/user_guide/playbooks_best_practices.html)
 - [Docker Monitoring](https://docs.docker.com/config/containers/logging/)
--- a/scripts/ci/monitor-workflow-performance.sh
+++ b/scripts/ci/monitor-workflow-performance.sh
@@ -0,0 +1,180 @@
 #!/bin/bash
 # Monitor Workflow Performance
 # Collects metrics about workflow execution times, Gitea load, and resource usage
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
 OUTPUT_DIR="${REPO_ROOT}/monitoring/workflow-metrics"
 TIMESTAMP=$(date +%Y%m%d_%H%M%S)
 METRICS_FILE="${OUTPUT_DIR}/workflow_metrics_${TIMESTAMP}.json"
 # Create output directory
 mkdir -p "$OUTPUT_DIR"
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 BLUE='\033[0;34m'
 NC='\033[0m' # No Color
 echo -e "${BLUE}📊 Workflow Performance Monitor${NC}"
 echo "=================================="
 echo ""
 # Check if Gitea API credentials are available
 GITEA_URL="${GITEA_URL:-https://git.michaelschiemer.de}"
 GITEA_TOKEN="${GITEA_TOKEN:-${CI_TOKEN:-}}"
 REPO="${GITHUB_REPOSITORY:-michael/michaelschiemer}"
 if [ -z "$GITEA_TOKEN" ]; then
    echo -e "${YELLOW}⚠️  GITEA_TOKEN not set, some metrics will be unavailable${NC}"
 fi
 # Function to get workflow runs from Gitea API
 get_workflow_runs() {
    local workflow_name="$1"
    local limit="${2:-10}"
    if [ -z "$GITEA_TOKEN" ]; then
        echo "[]"
        return
    fi
    local api_url="${GITEA_URL}/api/v1/repos/${REPO}/actions/runs"
    if [ -n "$workflow_name" ]; then
        api_url="${api_url}?workflow=${workflow_name}&limit=${limit}"
    else
        api_url="${api_url}?limit=${limit}"
    fi
    curl -sfL \
        -H "Authorization: token ${GITEA_TOKEN}" \
        -H "Accept: application/json" \
        "$api_url" 2>/dev/null || echo "[]"
 }
 # Function to calculate average duration
 calculate_average_duration() {
    local runs_json="$1"
    local total=0
    local count=0
    if [ "$runs_json" = "[]" ] || [ -z "$runs_json" ]; then
        echo "0"
        return
    fi
    # Extract durations (in seconds) from workflow runs
    # Note: This is a simplified parser - in production, use jq
    echo "$runs_json" | grep -o '"duration":[0-9]*' | grep -o '[0-9]*' | while read -r duration; do
        if [ -n "$duration" ] && [ "$duration" -gt 0 ]; then
            total=$((total + duration))
            count=$((count + 1))
        fi
    done
    if [ "$count" -eq 0 ]; then
        echo "0"
    else
        echo "$((total / count))"
    fi
 }
 # Collect metrics
 echo -e "${BLUE}📥 Collecting workflow metrics...${NC}"
 # Get recent workflow runs
 BUILD_WORKFLOW_RUNS=$(get_workflow_runs "build-image.yml" 20)
 DEPLOY_WORKFLOW_RUNS=$(get_workflow_runs "manual-deploy.yml" 10)
 # Calculate metrics
 BUILD_AVG_DURATION=$(calculate_average_duration "$BUILD_WORKFLOW_RUNS")
 DEPLOY_AVG_DURATION=$(calculate_average_duration "$DEPLOY_WORKFLOW_RUNS")
 # Get system metrics (if running on server)
 SYSTEM_LOAD="unknown"
 DOCKER_CONTAINERS="unknown"
 GITEA_RUNNER_STATUS="unknown"
 if command -v uptime >/dev/null 2>&1; then
    SYSTEM_LOAD=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ' ')
 fi
 if command -v docker >/dev/null 2>&1; then
    DOCKER_CONTAINERS=$(docker ps --format '{{.Names}}' | wc -l)
    if docker ps --format '{{.Names}}' | grep -q "gitea-runner"; then
        GITEA_RUNNER_STATUS="running"
    else
        GITEA_RUNNER_STATUS="stopped"
    fi
 fi
 # Create metrics JSON
 cat > "$METRICS_FILE" <<EOF
 {
  "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
  "workflow_metrics": {
    "build_image": {
      "average_duration_seconds": $BUILD_AVG_DURATION,
      "recent_runs": 20
    },
    "manual_deploy": {
      "average_duration_seconds": $DEPLOY_AVG_DURATION,
      "recent_runs": 10
    }
  },
  "system_metrics": {
    "load_average": "$SYSTEM_LOAD",
    "docker_containers": "$DOCKER_CONTAINERS",
    "gitea_runner_status": "$GITEA_RUNNER_STATUS"
  },
  "optimizations": {
    "repository_artifact_enabled": true,
    "helper_script_caching_enabled": true,
    "combined_deployment_playbook": true,
    "exponential_backoff_health_checks": true,
    "concurrency_groups": true
  }
 }
 EOF
 echo -e "${GREEN}✅ Metrics collected${NC}"
 echo ""
 echo -e "${BLUE}📈 Summary:${NC}"
 echo "  Build Workflow Avg Duration: ${BUILD_AVG_DURATION}s"
 echo "  Deploy Workflow Avg Duration: ${DEPLOY_AVG_DURATION}s"
 echo "  System Load: $SYSTEM_LOAD"
 echo "  Docker Containers: $DOCKER_CONTAINERS"
 echo "  Gitea Runner: $GITEA_RUNNER_STATUS"
 echo ""
 echo -e "${BLUE}💾 Metrics saved to:${NC} $METRICS_FILE"
 echo ""
 # Display recent workflow runs summary
 if [ "$BUILD_WORKFLOW_RUNS" != "[]" ] && [ -n "$BUILD_WORKFLOW_RUNS" ]; then
    echo -e "${BLUE}📋 Recent Build Workflow Runs:${NC}"
    echo "$BUILD_WORKFLOW_RUNS" | grep -o '"status":"[^"]*"' | head -5 | sed 's/"status":"//g' | sed 's/"//g' | while read -r status; do
        case "$status" in
            success)
                echo -e "  ${GREEN}✓${NC} Success"
                ;;
            failure)
                echo -e "  ${RED}✗${NC} Failed"
                ;;
            running)
                echo -e "  ${YELLOW}⟳${NC} Running"
                ;;
            *)
                echo -e "  ${BLUE}?${NC} $status"
                ;;
        esac
    done
    echo ""
 fi
 echo -e "${GREEN}✅ Monitoring complete${NC}"