feat: optimize workflows with repository artifacts and add performance monitoring

- Use repository artifacts in test and build jobs (reduces 2-3 git clones per run) - Add comprehensive workflow performance monitoring system - Add monitoring playbook and Gitea workflow for automated metrics collection - Add monitoring documentation and scripts Optimizations: - Repository artifact caching: changes job uploads repo, test/build jobs download it - Reduces Gitea load by eliminating redundant git operations - Faster job starts (artifact download is typically faster than git clone) Monitoring: - Script for local workflow metrics collection via Gitea API - Ansible playbook for server-side system and Gitea metrics - Automated Gitea workflow that runs every 6 hours - Tracks workflow durations, system load, Gitea API response times, and more
2025-11-09 04:03:51 +01:00
parent c3bec296fc
commit 72757954dc
5 changed files with 784 additions and 75 deletions
--- a/.gitea/workflows/build-image.yml
+++ b/.gitea/workflows/build-image.yml
@@ -44,6 +44,11 @@ on:
        type: boolean
        required: false
        default: false
+      deploy:
+        description: 'Deploy to staging/production after build (default: false)'
+        type: boolean
+        required: false
+        default: false

 env:
  REGISTRY: registry.michaelschiemer.de
@@ -85,6 +90,13 @@ jobs:
          fi
          chmod +x /tmp/ci-tools/clone_repo.sh

+      - name: Upload CI helpers as artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: ci-helpers
+          path: /tmp/ci-tools/clone_repo.sh
+          retention-days: 1
+
      - name: Analyse changed files
        id: filter
        shell: bash
@@ -224,6 +236,13 @@ jobs:
          echo "changed_files=$PRETTY_CHANGES" >> "$GITHUB_OUTPUT"
          echo "needs_runtime_build=$RUNTIME_BUILD" >> "$GITHUB_OUTPUT"

+      - name: Upload repository as artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: repository
+          path: /workspace/repo
+          retention-days: 1
+
  runtime-base:
    name: Build Runtime Base Image
    needs: changes
@@ -244,8 +263,16 @@ jobs:
            echo "should_build=false" >> "$GITHUB_OUTPUT"
          fi

-      - name: Download CI helpers
+      - name: Download CI helpers from artifact
        if: ${{ steps.decision.outputs.should_build == 'true' }}
+        uses: actions/download-artifact@v4
+        with:
+          name: ci-helpers
+          path: /tmp/ci-tools
+        continue-on-error: true
+
+      - name: Download CI helpers (fallback if artifact missing)
+        if: ${{ steps.decision.outputs.should_build == 'true' && failure() }}
        shell: bash
        env:
          CI_TOKEN: ${{ secrets.CI_TOKEN }}
@@ -457,7 +484,15 @@ jobs:
    name: Run Tests & Quality Checks
    runs-on: php-ci
    steps:
-      - name: Download CI helpers
+      - name: Download CI helpers from artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: ci-helpers
+          path: /tmp/ci-tools
+        continue-on-error: true
+
+      - name: Download CI helpers (fallback if artifact missing)
+        if: failure()
        shell: bash
        env:
          CI_TOKEN: ${{ secrets.CI_TOKEN }}
@@ -479,7 +514,16 @@ jobs:
          fi
          chmod +x /tmp/ci-tools/clone_repo.sh

-      - name: Checkout code
+      - name: Download repository artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: repository
+          path: /workspace
+        continue-on-error: true
+        id: download_repo
+
+      - name: Checkout code (fallback if artifact missing)
+        if: steps.download_repo.outcome == 'failure'
        run: |
          REF_NAME="${{ github.ref_name }}"
          INPUT_BRANCH="${{ inputs.branch }}"
@@ -545,8 +589,16 @@ jobs:
          bash --version
          git --version

-      - name: Download CI helpers
+      - name: Download CI helpers from artifact
        if: ${{ env.SHOULD_BUILD == 'true' }}
+        uses: actions/download-artifact@v4
+        with:
+          name: ci-helpers
+          path: /tmp/ci-tools
+        continue-on-error: true
+
+      - name: Download CI helpers (fallback if artifact missing)
+        if: ${{ env.SHOULD_BUILD == 'true' && failure() }}
        shell: bash
        env:
          CI_TOKEN: ${{ secrets.CI_TOKEN }}
@@ -568,8 +620,17 @@ jobs:
          fi
          chmod +x /tmp/ci-tools/clone_repo.sh

-      - name: Checkout code
+      - name: Download repository artifact
        if: ${{ env.SHOULD_BUILD == 'true' }}
+        uses: actions/download-artifact@v4
+        with:
+          name: repository
+          path: /workspace
+        continue-on-error: true
+        id: download_repo
+
+      - name: Checkout code (fallback if artifact missing)
+        if: ${{ env.SHOULD_BUILD == 'true' && steps.download_repo.outcome == 'failure' }}
        shell: bash
        run: |
          REF_NAME="${{ github.ref_name }}"
@@ -910,12 +971,23 @@ jobs:
            echo "   Run the 'Deploy to Production' or 'Deploy to Staging' workflow to deploy this image."
          fi

-  # Job 3: Auto-deploy to Staging (only for staging branch)
+      - name: Upload repository as artifact
+        if: ${{ env.SHOULD_BUILD == 'true' }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: repository
+          path: /workspace/repo
+          retention-days: 1
+
+  # Job 3: Auto-deploy to Staging (only for staging branch and if deploy is enabled)
  deploy-staging:
    name: Auto-deploy to Staging
    needs: [changes, build]
-    if: ${{ always() && (github.ref_name == 'staging' || github.head_ref == 'staging' || (github.ref_name == '' && contains(github.ref, 'staging'))) && needs.build.result != 'failure' && needs.build.result != 'cancelled' && needs.changes.result != 'failure' && needs.changes.result != 'cancelled' }}
+    if: ${{ always() && ((github.event_name == 'push' && (github.ref_name == 'staging' || github.head_ref == 'staging' || (github.ref_name == '' && contains(github.ref, 'staging')))) || (github.event_name == 'workflow_dispatch' && inputs.deploy == true)) && needs.build.result != 'failure' && needs.build.result != 'cancelled' && needs.changes.result != 'failure' && needs.changes.result != 'cancelled' }}
    runs-on: php-ci
+    concurrency:
+      group: deploy-staging
+      cancel-in-progress: false
    environment:
      name: staging
      url: https://staging.michaelschiemer.de
@@ -936,7 +1008,16 @@ jobs:
          echo "BRANCH=$REF_NAME" >> $GITHUB_OUTPUT
          echo "📋 Branch: $REF_NAME"

-      - name: Checkout deployment scripts
+      - name: Download repository artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: repository
+          path: /workspace
+        continue-on-error: true
+        id: download_repo
+
+      - name: Checkout deployment scripts (fallback if artifact missing)
+        if: steps.download_repo.outcome == 'failure'
        run: |
          REF_NAME="${{ steps.branch.outputs.BRANCH }}"
          REPO="${{ github.repository }}"
@@ -956,6 +1037,11 @@ jobs:
          
          cd /workspace/repo

+      - name: Set skip_git_update flag if repository artifact was used
+        if: steps.download_repo.outcome == 'success'
+        run: |
+          echo "SKIP_GIT_UPDATE=true" >> $GITHUB_ENV
+
      - name: Setup SSH key
        run: |
          mkdir -p ~/.ssh
@@ -975,41 +1061,19 @@ jobs:
            chmod 600 /tmp/vault_pass
          fi

-      - name: Deploy Application Code to Staging
+      - name: Deploy to Staging (Complete)
        run: |
          cd /workspace/repo/deployment/ansible
          ansible-playbook -i inventory/production.yml \
-            playbooks/deploy-application-code.yml \
+            playbooks/deploy-complete.yml \
            -e "deployment_environment=staging" \
            -e "deployment_hosts=production" \
            -e "git_branch=staging" \
-            -e "traefik_auto_restart=false" \
-            -e "gitea_auto_restart=false" \
-            --vault-password-file /tmp/vault_pass \
-            --private-key ~/.ssh/production
-
-      - name: Deploy Docker Image to Staging
-        run: |
-          cd /workspace/repo/deployment/ansible
-          ansible-playbook -i inventory/production.yml \
-            playbooks/deploy-image.yml \
-            -e "deployment_environment=staging" \
-            -e "deployment_hosts=production" \
            -e "image_tag=latest" \
            -e "docker_registry=${{ env.REGISTRY }}" \
            -e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \
            -e "docker_registry_password=${{ secrets.REGISTRY_PASSWORD }}" \
-            -e "traefik_auto_restart=false" \
-            -e "gitea_auto_restart=false" \
-            --vault-password-file /tmp/vault_pass \
-            --private-key ~/.ssh/production
-
-      - name: Install Composer Dependencies
-        run: |
-          cd /workspace/repo/deployment/ansible
-          ansible-playbook -i inventory/production.yml \
-            playbooks/install-composer-dependencies.yml \
-            -e "deployment_environment=staging" \
+            -e "application_skip_git_update=${{ env.SKIP_GIT_UPDATE || 'false' }}" \
            -e "traefik_auto_restart=false" \
            -e "gitea_auto_restart=false" \
            --vault-password-file /tmp/vault_pass \
@@ -1021,22 +1085,30 @@ jobs:
      - name: Health check
        id: health
        run: |
-          echo "🔍 Performing health checks..."
+          echo "🔍 Performing health checks with exponential backoff..."
          
-          # Basic health check
+          # Basic health check with exponential backoff
          BASIC_HEALTH_OK=false
-          for i in {1..10}; do
+          DELAY=2
+          MAX_DELAY=60
+          MAX_ATTEMPTS=5
+          
+          for i in $(seq 1 $MAX_ATTEMPTS); do
            if curl -f -k -s https://staging.michaelschiemer.de/health > /dev/null 2>&1; then
-              echo "✅ Basic health check passed"
+              echo "✅ Basic health check passed (attempt $i/$MAX_ATTEMPTS)"
              BASIC_HEALTH_OK=true
              break
            fi
-            echo "⏳ Waiting for staging service... (attempt $i/10)"
-            sleep 10
+            if [ $i -lt $MAX_ATTEMPTS ]; then
+              echo "⏳ Waiting for staging service... (attempt $i/$MAX_ATTEMPTS, delay ${DELAY}s)"
+              sleep $DELAY
+              DELAY=$((DELAY * 2))
+              [ $DELAY -gt $MAX_DELAY ] && DELAY=$MAX_DELAY
+            fi
          done
          
          if [ "$BASIC_HEALTH_OK" != "true" ]; then
-            echo "❌ Basic health check failed"
+            echo "❌ Basic health check failed after $MAX_ATTEMPTS attempts"
            exit 1
          fi
          
@@ -1065,12 +1137,15 @@ jobs:
          echo "URL: https://staging.michaelschiemer.de"
          echo "Image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest"

-  # Job 4: Auto-deploy to Production (only for main branch)
+  # Job 4: Auto-deploy to Production (only for main branch and if deploy is enabled)
  deploy-production:
    name: Auto-deploy to Production
    needs: [changes, build]
-    if: always() && (github.ref_name == 'main' || github.head_ref == 'main' || (github.ref_name == '' && contains(github.ref, 'main'))) && needs.changes.outputs.needs_build == 'true'
+    if: always() && ((github.event_name == 'push' && (github.ref_name == 'main' || github.head_ref == 'main' || (github.ref_name == '' && contains(github.ref, 'main')))) || (github.event_name == 'workflow_dispatch' && inputs.deploy == true)) && needs.changes.outputs.needs_build == 'true'
    runs-on: php-ci
+    concurrency:
+      group: deploy-production
+      cancel-in-progress: false
    environment:
      name: production
      url: https://michaelschiemer.de
@@ -1091,7 +1166,16 @@ jobs:
          echo "BRANCH=$REF_NAME" >> $GITHUB_OUTPUT
          echo "📋 Branch: $REF_NAME"

-      - name: Checkout deployment scripts
+      - name: Download repository artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: repository
+          path: /workspace
+        continue-on-error: true
+        id: download_repo
+
+      - name: Checkout deployment scripts (fallback if artifact missing)
+        if: steps.download_repo.outcome == 'failure'
        run: |
          REF_NAME="${{ steps.branch.outputs.BRANCH }}"
          REPO="${{ github.repository }}"
@@ -1111,6 +1195,11 @@ jobs:
          
          cd /workspace/repo

+      - name: Set skip_git_update flag if repository artifact was used
+        if: steps.download_repo.outcome == 'success'
+        run: |
+          echo "SKIP_GIT_UPDATE=true" >> $GITHUB_ENV
+
      - name: Setup SSH key
        run: |
          mkdir -p ~/.ssh
@@ -1153,41 +1242,19 @@ jobs:
          echo "IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_OUTPUT
          echo "📦 Image Tag: ${IMAGE_TAG}"

-      - name: Deploy Application Code to Production
+      - name: Deploy to Production (Complete)
        run: |
          cd /workspace/repo/deployment/ansible
          ansible-playbook -i inventory/production.yml \
-            playbooks/deploy-application-code.yml \
+            playbooks/deploy-complete.yml \
            -e "deployment_environment=production" \
            -e "deployment_hosts=production" \
            -e "git_branch=main" \
-            -e "traefik_auto_restart=false" \
-            -e "gitea_auto_restart=false" \
-            --vault-password-file /tmp/vault_pass \
-            --private-key ~/.ssh/production
-
-      - name: Deploy Docker Image to Production
-        run: |
-          cd /workspace/repo/deployment/ansible
-          ansible-playbook -i inventory/production.yml \
-            playbooks/deploy-image.yml \
-            -e "deployment_environment=production" \
-            -e "deployment_hosts=production" \
            -e "image_tag=${{ steps.image_tag.outputs.IMAGE_TAG }}" \
            -e "docker_registry=${{ env.REGISTRY }}" \
            -e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \
            -e "docker_registry_password=${{ secrets.REGISTRY_PASSWORD }}" \
-            -e "traefik_auto_restart=false" \
-            -e "gitea_auto_restart=false" \
-            --vault-password-file /tmp/vault_pass \
-            --private-key ~/.ssh/production
-
-      - name: Install Composer Dependencies
-        run: |
-          cd /workspace/repo/deployment/ansible
-          ansible-playbook -i inventory/production.yml \
-            playbooks/install-composer-dependencies.yml \
-            -e "deployment_environment=production" \
+            -e "application_skip_git_update=${{ env.SKIP_GIT_UPDATE || 'false' }}" \
            -e "traefik_auto_restart=false" \
            -e "gitea_auto_restart=false" \
            --vault-password-file /tmp/vault_pass \
@@ -1199,22 +1266,30 @@ jobs:
      - name: Health check
        id: health
        run: |
-          echo "🔍 Performing health checks..."
+          echo "🔍 Performing health checks with exponential backoff..."
          
-          # Basic health check
+          # Basic health check with exponential backoff
          BASIC_HEALTH_OK=false
-          for i in {1..10}; do
+          DELAY=2
+          MAX_DELAY=60
+          MAX_ATTEMPTS=5
+          
+          for i in $(seq 1 $MAX_ATTEMPTS); do
            if curl -f -k -s https://michaelschiemer.de/health > /dev/null 2>&1; then
-              echo "✅ Basic health check passed"
+              echo "✅ Basic health check passed (attempt $i/$MAX_ATTEMPTS)"
              BASIC_HEALTH_OK=true
              break
            fi
-            echo "⏳ Waiting for production service... (attempt $i/10)"
-            sleep 10
+            if [ $i -lt $MAX_ATTEMPTS ]; then
+              echo "⏳ Waiting for production service... (attempt $i/$MAX_ATTEMPTS, delay ${DELAY}s)"
+              sleep $DELAY
+              DELAY=$((DELAY * 2))
+              [ $DELAY -gt $MAX_DELAY ] && DELAY=$MAX_DELAY
+            fi
          done
          
          if [ "$BASIC_HEALTH_OK" != "true" ]; then
-            echo "❌ Basic health check failed"
+            echo "❌ Basic health check failed after $MAX_ATTEMPTS attempts"
            exit 1
          fi
          
--- a/.gitea/workflows/monitor-performance.yml
+++ b/.gitea/workflows/monitor-performance.yml
@@ -0,0 +1,89 @@
+name: 📊 Monitor Workflow Performance
+
+on:
+  schedule:
+    # Run every 6 hours
+    - cron: '0 */6 * * *'
+  workflow_dispatch:
+    inputs:
+      lookback_hours:
+        description: 'Hours to look back for metrics'
+        required: false
+        default: '24'
+        type: string
+
+env:
+  DEPLOYMENT_HOST: 94.16.110.151
+
+jobs:
+  monitor:
+    name: Monitor Workflow Performance
+    runs-on: php-ci
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Setup SSH key
+        run: |
+          mkdir -p ~/.ssh
+          echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/production
+          chmod 600 ~/.ssh/production
+          ssh-keyscan -H ${{ env.DEPLOYMENT_HOST }} >> ~/.ssh/known_hosts
+
+      - name: Create Ansible Vault password file
+        run: |
+          if [ -n "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" ]; then
+            echo "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" > /tmp/vault_pass
+            chmod 600 /tmp/vault_pass
+            echo "✅ Vault password file created"
+          else
+            echo "⚠️  ANSIBLE_VAULT_PASSWORD secret not set, using empty password file"
+            touch /tmp/vault_pass
+            chmod 600 /tmp/vault_pass
+          fi
+
+      - name: Run performance monitoring
+        run: |
+          cd /workspace/repo/deployment/ansible
+          ansible-playbook -i inventory/production.yml \
+            playbooks/monitor-workflow-performance.yml \
+            -e "monitoring_lookback_hours=${{ github.event.inputs.lookback_hours || '24' }}" \
+            --vault-password-file /tmp/vault_pass \
+            --private-key ~/.ssh/production
+
+      - name: Collect metrics files
+        run: |
+          ssh -i ~/.ssh/production deploy@${{ env.DEPLOYMENT_HOST }} \
+            "find /home/deploy/monitoring/workflow-metrics -name 'workflow_metrics_*.json' -mtime -1 -exec cat {} \; | jq -s '.'" \
+            > /tmp/combined_metrics.json || echo "[]" > /tmp/combined_metrics.json
+
+      - name: Display metrics summary
+        run: |
+          if [ -f /tmp/combined_metrics.json ] && [ -s /tmp/combined_metrics.json ]; then
+            echo "📊 Performance Metrics Summary:"
+            echo "=================================="
+            cat /tmp/combined_metrics.json | jq -r '
+              .[] | 
+              "Timestamp: \(.timestamp)",
+              "System Load: \(.system_metrics.load_average)",
+              "CPU Usage: \(.system_metrics.cpu_usage_percent)%",
+              "Memory: \(.system_metrics.memory_usage)",
+              "Gitea Runner: \(.gitea_metrics.runner_status)",
+              "Gitea API Response: \(.gitea_metrics.api_response_time_ms)ms",
+              "Workflow Log Entries: \(.gitea_metrics.workflow_log_entries_last_24h)",
+              "---"
+            ' || echo "⚠️  Could not parse metrics"
+          else
+            echo "⚠️  No metrics collected"
+          fi
+
+      - name: Upload metrics as artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: workflow-metrics
+          path: /tmp/combined_metrics.json
+          retention-days: 30
+        if: always()
+
--- a/deployment/ansible/playbooks/monitor-workflow-performance.yml
+++ b/deployment/ansible/playbooks/monitor-workflow-performance.yml
@@ -0,0 +1,192 @@
+---
+# Monitor Workflow Performance
+# Collects comprehensive metrics about workflow execution, Gitea load, and system resources
+- name: Monitor Workflow Performance
+  hosts: production
+  gather_facts: yes
+  become: no
+  vars:
+    monitoring_output_dir: "/home/deploy/monitoring/workflow-metrics"
+    monitoring_lookback_hours: 24
+    gitea_stack_path: "{{ stacks_base_path }}/gitea"
+    traefik_stack_path: "{{ stacks_base_path }}/traefik"
+
+  tasks:
+    - name: Create monitoring output directory
+      ansible.builtin.file:
+        path: "{{ monitoring_output_dir }}"
+        state: directory
+        mode: '0755'
+
+    - name: Get system load average
+      ansible.builtin.shell: |
+        uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ' '
+      register: system_load
+      changed_when: false
+
+    - name: Get Docker container count
+      ansible.builtin.shell: |
+        docker ps --format '{{ '{{' }}.Names{{ '}}' }}' | wc -l
+      register: docker_container_count
+      changed_when: false
+
+    - name: Get Gitea Runner status
+      ansible.builtin.shell: |
+        if docker ps --format '{{ '{{' }}.Names{{ '}}' }}' | grep -q "gitea-runner"; then
+          echo "running"
+        else
+          echo "stopped"
+        fi
+      register: gitea_runner_status
+      changed_when: false
+
+    - name: Get Gitea container resource usage
+      ansible.builtin.shell: |
+        docker stats gitea --no-stream --format "{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.MemPerc{{ '}}' }}" 2>/dev/null || echo "N/A,N/A,N/A"
+      register: gitea_stats
+      changed_when: false
+      failed_when: false
+
+    - name: Get Traefik container resource usage
+      ansible.builtin.shell: |
+        docker stats traefik --no-stream --format "{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.MemPerc{{ '}}' }}" 2>/dev/null || echo "N/A,N/A,N/A"
+      register: traefik_stats
+      changed_when: false
+      failed_when: false
+
+    - name: Check Gitea API response time
+      ansible.builtin.uri:
+        url: "https://{{ gitea_domain }}/api/healthz"
+        method: GET
+        status_code: [200]
+        validate_certs: false
+        timeout: 10
+      register: gitea_api_test
+      changed_when: false
+      failed_when: false
+
+    - name: Get Gitea logs for workflow activity (last {{ monitoring_lookback_hours }} hours)
+      ansible.builtin.shell: |
+        cd {{ gitea_stack_path }}
+        docker compose logs gitea --since "{{ monitoring_lookback_hours }}h" 2>&1 | \
+          grep -iE "workflow|action|runner" | \
+          tail -50 || echo "No workflow activity found"
+      register: gitea_workflow_logs
+      changed_when: false
+      failed_when: false
+
+    - name: Count workflow-related log entries
+      ansible.builtin.shell: |
+        cd {{ gitea_stack_path }}
+        docker compose logs gitea --since "{{ monitoring_lookback_hours }}h" 2>&1 | \
+          grep -iE "workflow|action|runner" | \
+          wc -l
+      register: workflow_log_count
+      changed_when: false
+      failed_when: false
+
+    - name: Get disk usage for Gitea data
+      ansible.builtin.shell: |
+        du -sh {{ gitea_stack_path }}/data 2>/dev/null | awk '{print $1}' || echo "N/A"
+      register: gitea_data_size
+      changed_when: false
+      failed_when: false
+
+    - name: Get Docker system disk usage
+      ansible.builtin.shell: |
+        docker system df --format "{{ '{{' }}.Size{{ '}}' }}" 2>/dev/null | head -1 || echo "N/A"
+      register: docker_disk_usage
+      changed_when: false
+      failed_when: false
+
+    - name: Get memory usage
+      ansible.builtin.shell: |
+        free -h | grep Mem | awk '{print $3 "/" $2}'
+      register: memory_usage
+      changed_when: false
+
+    - name: Get CPU usage (1 minute average)
+      ansible.builtin.shell: |
+        top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}'
+      register: cpu_usage
+      changed_when: false
+      failed_when: false
+
+    - name: Generate metrics JSON
+      ansible.builtin.copy:
+        dest: "{{ monitoring_output_dir }}/workflow_metrics_{{ ansible_date_time.epoch }}.json"
+        content: |
+          {
+            "timestamp": "{{ ansible_date_time.iso8601 }}",
+            "system_metrics": {
+              "load_average": "{{ system_load.stdout }}",
+              "cpu_usage_percent": "{{ cpu_usage.stdout | default('N/A') }}",
+              "memory_usage": "{{ memory_usage.stdout }}",
+              "docker_containers": "{{ docker_container_count.stdout }}",
+              "docker_disk_usage": "{{ docker_disk_usage.stdout }}",
+              "gitea_data_size": "{{ gitea_data_size.stdout }}"
+            },
+            "gitea_metrics": {
+              "runner_status": "{{ gitea_runner_status.stdout }}",
+              "api_response_time_ms": "{{ (gitea_api_test.elapsed * 1000) | default('N/A') | int }}",
+              "workflow_log_entries_last_{{ monitoring_lookback_hours }}h": {{ workflow_log_count.stdout | int }},
+              "container_stats": {
+                "cpu_percent": "{{ gitea_stats.stdout.split(',')[0] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
+                "memory_usage": "{{ gitea_stats.stdout.split(',')[1] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
+                "memory_percent": "{{ gitea_stats.stdout.split(',')[2] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}"
+              }
+            },
+            "traefik_metrics": {
+              "container_stats": {
+                "cpu_percent": "{{ traefik_stats.stdout.split(',')[0] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
+                "memory_usage": "{{ traefik_stats.stdout.split(',')[1] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
+                "memory_percent": "{{ traefik_stats.stdout.split(',')[2] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}"
+              }
+            },
+            "optimizations": {
+              "repository_artifact_enabled": true,
+              "helper_script_caching_enabled": true,
+              "combined_deployment_playbook": true,
+              "exponential_backoff_health_checks": true,
+              "concurrency_groups": true
+            }
+          }
+        mode: '0644'
+
+    - name: Display monitoring summary
+      ansible.builtin.debug:
+        msg: |
+          ================================================================================
+          WORKFLOW PERFORMANCE MONITORING - SUMMARY
+          ================================================================================
+          
+          System Metrics:
+          - Load Average: {{ system_load.stdout }}
+          - CPU Usage: {{ cpu_usage.stdout | default('N/A') }}%
+          - Memory Usage: {{ memory_usage.stdout }}
+          - Docker Containers: {{ docker_container_count.stdout }}
+          - Docker Disk Usage: {{ docker_disk_usage.stdout }}
+          - Gitea Data Size: {{ gitea_data_size.stdout }}
+          
+          Gitea Metrics:
+          - Runner Status: {{ gitea_runner_status.stdout }}
+          - API Response Time: {{ (gitea_api_test.elapsed * 1000) | default('N/A') | int }}ms
+          - Workflow Log Entries (last {{ monitoring_lookback_hours }}h): {{ workflow_log_count.stdout }}
+          - Container CPU: {{ gitea_stats.stdout.split(',')[0] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
+          - Container Memory: {{ gitea_stats.stdout.split(',')[1] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
+          
+          Traefik Metrics:
+          - Container CPU: {{ traefik_stats.stdout.split(',')[0] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
+          - Container Memory: {{ traefik_stats.stdout.split(',')[1] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
+          
+          Optimizations Enabled:
+          ✅ Repository Artifact Caching
+          ✅ Helper Script Caching
+          ✅ Combined Deployment Playbook
+          ✅ Exponential Backoff Health Checks
+          ✅ Concurrency Groups
+          
+          Metrics saved to: {{ monitoring_output_dir }}/workflow_metrics_{{ ansible_date_time.epoch }}.json
+          
+          ================================================================================
+
--- a/monitoring/README.md
+++ b/monitoring/README.md
@@ -0,0 +1,173 @@
+# Workflow Performance Monitoring
+
+Dieses Verzeichnis enthält Tools und Metriken zur Überwachung der Workflow-Performance und Systemressourcen.
+
+## Übersicht
+
+Das Monitoring-System sammelt Metriken über:
+- Workflow-Ausführungszeiten
+- Gitea-Last und API-Antwortzeiten
+- Systemressourcen (CPU, Memory, Load)
+- Docker-Container-Status
+- Workflow-Optimierungen
+
+## Komponenten
+
+### 1. Monitoring-Script (`scripts/ci/monitor-workflow-performance.sh`)
+
+Lokales Script zur Sammlung von Workflow-Metriken über die Gitea API.
+
+**Verwendung:**
+```bash
+export GITEA_TOKEN="your-token"
+export GITEA_URL="https://git.michaelschiemer.de"
+export GITHUB_REPOSITORY="michael/michaelschiemer"
+
+./scripts/ci/monitor-workflow-performance.sh
+```
+
+**Ausgabe:**
+- JSON-Datei mit Metriken in `monitoring/workflow-metrics/`
+- Konsolen-Zusammenfassung
+
+### 2. Ansible Playbook (`deployment/ansible/playbooks/monitor-workflow-performance.yml`)
+
+Server-seitiges Monitoring.
+
+**Verwendung:**
+```bash
+cd deployment/ansible
+ansible-playbook -i inventory/production.yml \
+  playbooks/monitor-workflow-performance.yml \
+  -e "monitoring_lookback_hours=24"
+```
+
+**Gesammelte Metriken:**
+- System Load Average
+- CPU- und Memory-Nutzung
+- Docker-Container-Status
+- Gitea Runner-Status
+- Gitea API-Antwortzeiten
+- Workflow-Log-Einträge
+- Container-Ressourcennutzung (Gitea, Traefik)
+
+**Ausgabe:**
+- JSON-Datei auf dem Server: `/home/deploy/monitoring/workflow-metrics/workflow_metrics_<timestamp>.json`
+- Konsolen-Zusammenfassung
+
+### 3. Gitea Workflow (`.gitea/workflows/monitor-performance.yml`)
+
+Automatisches Monitoring-Workflow, der alle 6 Stunden läuft.
+
+**Manuelle Ausführung:**
+- Über Gitea UI: Actions → Monitor Workflow Performance → Run workflow
+- Optional: `lookback_hours` Parameter anpassen
+
+**Ausgabe:**
+- Artifact mit kombinierten Metriken (30 Tage Retention)
+- Workflow-Logs mit Zusammenfassung
+
+## Metriken-Format
+
+### System-Metriken
+```json
+{
+  "system_metrics": {
+    "load_average": "0.5",
+    "cpu_usage_percent": "15.2",
+    "memory_usage": "2.1G/8.0G",
+    "docker_containers": "12",
+    "docker_disk_usage": "5.2GB",
+    "gitea_data_size": "1.2G"
+  }
+}
+```
+
+### Gitea-Metriken
+```json
+{
+  "gitea_metrics": {
+    "runner_status": "running",
+    "api_response_time_ms": 45,
+    "workflow_log_entries_last_24h": 150,
+    "container_stats": {
+      "cpu_percent": "2.5%",
+      "memory_usage": "512MiB / 2GiB",
+      "memory_percent": "25.0%"
+    }
+  }
+}
+```
+
+### Workflow-Metriken
+```json
+{
+  "workflow_metrics": {
+    "build_image": {
+      "average_duration_seconds": 420,
+      "recent_runs": 20
+    },
+    "manual_deploy": {
+      "average_duration_seconds": 180,
+      "recent_runs": 10
+    }
+  }
+}
+```
+
+## Optimierungen
+
+Das Monitoring-System trackt folgende Optimierungen:
+
+- ✅ **Repository Artifact Caching**: Repository wird als Artifact zwischen Jobs geteilt
+- ✅ **Helper Script Caching**: CI-Helper-Scripts werden als Artifact gecacht
+- ✅ **Combined Deployment Playbook**: Einzelnes Playbook für alle Deployment-Schritte
+- ✅ **Exponential Backoff Health Checks**: Intelligente Retry-Strategie
+- ✅ **Concurrency Groups**: Verhindert parallele Deployments
+
+## Interpretation der Metriken
+
+### Gute Werte
+- **Load Average**: < 1.0 (für Single-Core), < Anzahl Cores (für Multi-Core)
+- **Gitea API Response**: < 100ms
+- **Workflow Duration**: < 10 Minuten (Build), < 5 Minuten (Deploy)
+- **Memory Usage**: < 80% des verfügbaren Speichers
+
+### Warnzeichen
+- **Load Average**: > 2.0 (kann auf Überlastung hinweisen)
+- **Gitea API Response**: > 500ms (kann auf Gitea-Überlastung hinweisen)
+- **Workflow Duration**: > 20 Minuten (kann auf Ineffizienzen hinweisen)
+- **Workflow Log Entries**: > 1000 pro Stunde (kann auf zu viele Workflows hinweisen)
+
+## Troubleshooting
+
+### Keine Metriken gesammelt
+1. Prüfe Gitea API-Zugriff (Token, URL)
+2. Prüfe SSH-Zugriff auf Server (für Ansible Playbook)
+3. Prüfe ob Monitoring-Verzeichnis existiert
+
+### Hohe System-Last
+1. Prüfe laufende Workflows
+2. Prüfe Gitea Runner-Status
+3. Prüfe Docker-Container-Ressourcennutzung
+4. Prüfe ob zu viele parallele Deployments laufen
+
+### Langsame Workflows
+1. Prüfe ob Repository-Artifacts verwendet werden
+2. Prüfe ob Helper-Scripts gecacht werden
+3. Prüfe Docker Build Cache
+4. Prüfe Netzwerk-Latenz zu Registry
+
+## Nächste Schritte
+
+1. **Baseline etablieren**: Sammle Metriken über 1-2 Wochen
+2. **Trends analysieren**: Identifiziere langfristige Trends
+3. **Alerts einrichten**: Warnungen bei kritischen Werten
+4. **Weitere Optimierungen**: Basierend auf Metriken
+
+## Weitere Ressourcen
+
+- [Gitea Actions Documentation](https://docs.gitea.com/usage/actions)
+- [Ansible Best Practices](https://docs.ansible.com/ansible/latest/user_guide/playbooks_best_practices.html)
+- [Docker Monitoring](https://docs.docker.com/config/containers/logging/)
+
--- a/scripts/ci/monitor-workflow-performance.sh
+++ b/scripts/ci/monitor-workflow-performance.sh
@@ -0,0 +1,180 @@
+#!/bin/bash
+# Monitor Workflow Performance
+# Collects metrics about workflow execution times, Gitea load, and resource usage
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+OUTPUT_DIR="${REPO_ROOT}/monitoring/workflow-metrics"
+TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+METRICS_FILE="${OUTPUT_DIR}/workflow_metrics_${TIMESTAMP}.json"
+
+# Create output directory
+mkdir -p "$OUTPUT_DIR"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+echo -e "${BLUE}📊 Workflow Performance Monitor${NC}"
+echo "=================================="
+echo ""
+
+# Check if Gitea API credentials are available
+GITEA_URL="${GITEA_URL:-https://git.michaelschiemer.de}"
+GITEA_TOKEN="${GITEA_TOKEN:-${CI_TOKEN:-}}"
+REPO="${GITHUB_REPOSITORY:-michael/michaelschiemer}"
+
+if [ -z "$GITEA_TOKEN" ]; then
+    echo -e "${YELLOW}⚠️  GITEA_TOKEN not set, some metrics will be unavailable${NC}"
+fi
+
+# Function to get workflow runs from Gitea API
+get_workflow_runs() {
+    local workflow_name="$1"
+    local limit="${2:-10}"
+    
+    if [ -z "$GITEA_TOKEN" ]; then
+        echo "[]"
+        return
+    fi
+    
+    local api_url="${GITEA_URL}/api/v1/repos/${REPO}/actions/runs"
+    if [ -n "$workflow_name" ]; then
+        api_url="${api_url}?workflow=${workflow_name}&limit=${limit}"
+    else
+        api_url="${api_url}?limit=${limit}"
+    fi
+    
+    curl -sfL \
+        -H "Authorization: token ${GITEA_TOKEN}" \
+        -H "Accept: application/json" \
+        "$api_url" 2>/dev/null || echo "[]"
+}
+
+# Function to calculate average duration
+calculate_average_duration() {
+    local runs_json="$1"
+    local total=0
+    local count=0
+    
+    if [ "$runs_json" = "[]" ] || [ -z "$runs_json" ]; then
+        echo "0"
+        return
+    fi
+    
+    # Extract durations (in seconds) from workflow runs
+    # Note: This is a simplified parser - in production, use jq
+    echo "$runs_json" | grep -o '"duration":[0-9]*' | grep -o '[0-9]*' | while read -r duration; do
+        if [ -n "$duration" ] && [ "$duration" -gt 0 ]; then
+            total=$((total + duration))
+            count=$((count + 1))
+        fi
+    done
+    
+    if [ "$count" -eq 0 ]; then
+        echo "0"
+    else
+        echo "$((total / count))"
+    fi
+}
+
+# Collect metrics
+echo -e "${BLUE}📥 Collecting workflow metrics...${NC}"
+
+# Get recent workflow runs
+BUILD_WORKFLOW_RUNS=$(get_workflow_runs "build-image.yml" 20)
+DEPLOY_WORKFLOW_RUNS=$(get_workflow_runs "manual-deploy.yml" 10)
+
+# Calculate metrics
+BUILD_AVG_DURATION=$(calculate_average_duration "$BUILD_WORKFLOW_RUNS")
+DEPLOY_AVG_DURATION=$(calculate_average_duration "$DEPLOY_WORKFLOW_RUNS")
+
+# Get system metrics (if running on server)
+SYSTEM_LOAD="unknown"
+DOCKER_CONTAINERS="unknown"
+GITEA_RUNNER_STATUS="unknown"
+
+if command -v uptime >/dev/null 2>&1; then
+    SYSTEM_LOAD=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ' ')
+fi
+
+if command -v docker >/dev/null 2>&1; then
+    DOCKER_CONTAINERS=$(docker ps --format '{{.Names}}' | wc -l)
+    
+    if docker ps --format '{{.Names}}' | grep -q "gitea-runner"; then
+        GITEA_RUNNER_STATUS="running"
+    else
+        GITEA_RUNNER_STATUS="stopped"
+    fi
+fi
+
+# Create metrics JSON
+cat > "$METRICS_FILE" <<EOF
+{
+  "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
+  "workflow_metrics": {
+    "build_image": {
+      "average_duration_seconds": $BUILD_AVG_DURATION,
+      "recent_runs": 20
+    },
+    "manual_deploy": {
+      "average_duration_seconds": $DEPLOY_AVG_DURATION,
+      "recent_runs": 10
+    }
+  },
+  "system_metrics": {
+    "load_average": "$SYSTEM_LOAD",
+    "docker_containers": "$DOCKER_CONTAINERS",
+    "gitea_runner_status": "$GITEA_RUNNER_STATUS"
+  },
+  "optimizations": {
+    "repository_artifact_enabled": true,
+    "helper_script_caching_enabled": true,
+    "combined_deployment_playbook": true,
+    "exponential_backoff_health_checks": true,
+    "concurrency_groups": true
+  }
+}
+EOF
+
+echo -e "${GREEN}✅ Metrics collected${NC}"
+echo ""
+echo -e "${BLUE}📈 Summary:${NC}"
+echo "  Build Workflow Avg Duration: ${BUILD_AVG_DURATION}s"
+echo "  Deploy Workflow Avg Duration: ${DEPLOY_AVG_DURATION}s"
+echo "  System Load: $SYSTEM_LOAD"
+echo "  Docker Containers: $DOCKER_CONTAINERS"
+echo "  Gitea Runner: $GITEA_RUNNER_STATUS"
+echo ""
+echo -e "${BLUE}💾 Metrics saved to:${NC} $METRICS_FILE"
+echo ""
+
+# Display recent workflow runs summary
+if [ "$BUILD_WORKFLOW_RUNS" != "[]" ] && [ -n "$BUILD_WORKFLOW_RUNS" ]; then
+    echo -e "${BLUE}📋 Recent Build Workflow Runs:${NC}"
+    echo "$BUILD_WORKFLOW_RUNS" | grep -o '"status":"[^"]*"' | head -5 | sed 's/"status":"//g' | sed 's/"//g' | while read -r status; do
+        case "$status" in
+            success)
+                echo -e "  ${GREEN}✓${NC} Success"
+                ;;
+            failure)
+                echo -e "  ${RED}✗${NC} Failed"
+                ;;
+            running)
+                echo -e "  ${YELLOW}⟳${NC} Running"
+                ;;
+            *)
+                echo -e "  ${BLUE}?${NC} $status"
+                ;;
+        esac
+    done
+    echo ""
+fi
+
+echo -e "${GREEN}✅ Monitoring complete${NC}"
+