feat: optimize workflows with repository artifacts and add performance monitoring
Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Failing after 33s
🚀 Build & Deploy Image / Build Runtime Base Image (push) Has been skipped
🚀 Build & Deploy Image / Build Docker Image (push) Has been skipped
Security Vulnerability Scan / Check for Dependency Changes (push) Successful in 32s
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been skipped
Security Vulnerability Scan / Composer Security Audit (push) Has been skipped
🧊 Warm Docker Build Cache / Refresh Buildx Caches (push) Failing after 11s
📊 Monitor Workflow Performance / Monitor Workflow Performance (push) Failing after 20s

- Use repository artifacts in test and build jobs (reduces 2-3 git clones per run)
- Add comprehensive workflow performance monitoring system
- Add monitoring playbook and Gitea workflow for automated metrics collection
- Add monitoring documentation and scripts

Optimizations:
- Repository artifact caching: changes job uploads repo, test/build jobs download it
- Reduces Gitea load by eliminating redundant git operations
- Faster job starts (artifact download is typically faster than git clone)

Monitoring:
- Script for local workflow metrics collection via Gitea API
- Ansible playbook for server-side system and Gitea metrics
- Automated Gitea workflow that runs every 6 hours
- Tracks workflow durations, system load, Gitea API response times, and more
This commit is contained in:
2025-11-09 04:03:51 +01:00
parent c3bec296fc
commit 72757954dc
5 changed files with 784 additions and 75 deletions

View File

@@ -44,6 +44,11 @@ on:
type: boolean type: boolean
required: false required: false
default: false default: false
deploy:
description: 'Deploy to staging/production after build (default: false)'
type: boolean
required: false
default: false
env: env:
REGISTRY: registry.michaelschiemer.de REGISTRY: registry.michaelschiemer.de
@@ -85,6 +90,13 @@ jobs:
fi fi
chmod +x /tmp/ci-tools/clone_repo.sh chmod +x /tmp/ci-tools/clone_repo.sh
- name: Upload CI helpers as artifact
uses: actions/upload-artifact@v4
with:
name: ci-helpers
path: /tmp/ci-tools/clone_repo.sh
retention-days: 1
- name: Analyse changed files - name: Analyse changed files
id: filter id: filter
shell: bash shell: bash
@@ -224,6 +236,13 @@ jobs:
echo "changed_files=$PRETTY_CHANGES" >> "$GITHUB_OUTPUT" echo "changed_files=$PRETTY_CHANGES" >> "$GITHUB_OUTPUT"
echo "needs_runtime_build=$RUNTIME_BUILD" >> "$GITHUB_OUTPUT" echo "needs_runtime_build=$RUNTIME_BUILD" >> "$GITHUB_OUTPUT"
- name: Upload repository as artifact
uses: actions/upload-artifact@v4
with:
name: repository
path: /workspace/repo
retention-days: 1
runtime-base: runtime-base:
name: Build Runtime Base Image name: Build Runtime Base Image
needs: changes needs: changes
@@ -244,8 +263,16 @@ jobs:
echo "should_build=false" >> "$GITHUB_OUTPUT" echo "should_build=false" >> "$GITHUB_OUTPUT"
fi fi
- name: Download CI helpers - name: Download CI helpers from artifact
if: ${{ steps.decision.outputs.should_build == 'true' }} if: ${{ steps.decision.outputs.should_build == 'true' }}
uses: actions/download-artifact@v4
with:
name: ci-helpers
path: /tmp/ci-tools
continue-on-error: true
- name: Download CI helpers (fallback if artifact missing)
if: ${{ steps.decision.outputs.should_build == 'true' && failure() }}
shell: bash shell: bash
env: env:
CI_TOKEN: ${{ secrets.CI_TOKEN }} CI_TOKEN: ${{ secrets.CI_TOKEN }}
@@ -457,7 +484,15 @@ jobs:
name: Run Tests & Quality Checks name: Run Tests & Quality Checks
runs-on: php-ci runs-on: php-ci
steps: steps:
- name: Download CI helpers - name: Download CI helpers from artifact
uses: actions/download-artifact@v4
with:
name: ci-helpers
path: /tmp/ci-tools
continue-on-error: true
- name: Download CI helpers (fallback if artifact missing)
if: failure()
shell: bash shell: bash
env: env:
CI_TOKEN: ${{ secrets.CI_TOKEN }} CI_TOKEN: ${{ secrets.CI_TOKEN }}
@@ -479,7 +514,16 @@ jobs:
fi fi
chmod +x /tmp/ci-tools/clone_repo.sh chmod +x /tmp/ci-tools/clone_repo.sh
- name: Checkout code - name: Download repository artifact
uses: actions/download-artifact@v4
with:
name: repository
path: /workspace
continue-on-error: true
id: download_repo
- name: Checkout code (fallback if artifact missing)
if: steps.download_repo.outcome == 'failure'
run: | run: |
REF_NAME="${{ github.ref_name }}" REF_NAME="${{ github.ref_name }}"
INPUT_BRANCH="${{ inputs.branch }}" INPUT_BRANCH="${{ inputs.branch }}"
@@ -545,8 +589,16 @@ jobs:
bash --version bash --version
git --version git --version
- name: Download CI helpers - name: Download CI helpers from artifact
if: ${{ env.SHOULD_BUILD == 'true' }} if: ${{ env.SHOULD_BUILD == 'true' }}
uses: actions/download-artifact@v4
with:
name: ci-helpers
path: /tmp/ci-tools
continue-on-error: true
- name: Download CI helpers (fallback if artifact missing)
if: ${{ env.SHOULD_BUILD == 'true' && failure() }}
shell: bash shell: bash
env: env:
CI_TOKEN: ${{ secrets.CI_TOKEN }} CI_TOKEN: ${{ secrets.CI_TOKEN }}
@@ -568,8 +620,17 @@ jobs:
fi fi
chmod +x /tmp/ci-tools/clone_repo.sh chmod +x /tmp/ci-tools/clone_repo.sh
- name: Checkout code - name: Download repository artifact
if: ${{ env.SHOULD_BUILD == 'true' }} if: ${{ env.SHOULD_BUILD == 'true' }}
uses: actions/download-artifact@v4
with:
name: repository
path: /workspace
continue-on-error: true
id: download_repo
- name: Checkout code (fallback if artifact missing)
if: ${{ env.SHOULD_BUILD == 'true' && steps.download_repo.outcome == 'failure' }}
shell: bash shell: bash
run: | run: |
REF_NAME="${{ github.ref_name }}" REF_NAME="${{ github.ref_name }}"
@@ -910,12 +971,23 @@ jobs:
echo " Run the 'Deploy to Production' or 'Deploy to Staging' workflow to deploy this image." echo " Run the 'Deploy to Production' or 'Deploy to Staging' workflow to deploy this image."
fi fi
# Job 3: Auto-deploy to Staging (only for staging branch) - name: Upload repository as artifact
if: ${{ env.SHOULD_BUILD == 'true' }}
uses: actions/upload-artifact@v4
with:
name: repository
path: /workspace/repo
retention-days: 1
# Job 3: Auto-deploy to Staging (only for staging branch and if deploy is enabled)
deploy-staging: deploy-staging:
name: Auto-deploy to Staging name: Auto-deploy to Staging
needs: [changes, build] needs: [changes, build]
if: ${{ always() && (github.ref_name == 'staging' || github.head_ref == 'staging' || (github.ref_name == '' && contains(github.ref, 'staging'))) && needs.build.result != 'failure' && needs.build.result != 'cancelled' && needs.changes.result != 'failure' && needs.changes.result != 'cancelled' }} if: ${{ always() && ((github.event_name == 'push' && (github.ref_name == 'staging' || github.head_ref == 'staging' || (github.ref_name == '' && contains(github.ref, 'staging')))) || (github.event_name == 'workflow_dispatch' && inputs.deploy == true)) && needs.build.result != 'failure' && needs.build.result != 'cancelled' && needs.changes.result != 'failure' && needs.changes.result != 'cancelled' }}
runs-on: php-ci runs-on: php-ci
concurrency:
group: deploy-staging
cancel-in-progress: false
environment: environment:
name: staging name: staging
url: https://staging.michaelschiemer.de url: https://staging.michaelschiemer.de
@@ -936,7 +1008,16 @@ jobs:
echo "BRANCH=$REF_NAME" >> $GITHUB_OUTPUT echo "BRANCH=$REF_NAME" >> $GITHUB_OUTPUT
echo "📋 Branch: $REF_NAME" echo "📋 Branch: $REF_NAME"
- name: Checkout deployment scripts - name: Download repository artifact
uses: actions/download-artifact@v4
with:
name: repository
path: /workspace
continue-on-error: true
id: download_repo
- name: Checkout deployment scripts (fallback if artifact missing)
if: steps.download_repo.outcome == 'failure'
run: | run: |
REF_NAME="${{ steps.branch.outputs.BRANCH }}" REF_NAME="${{ steps.branch.outputs.BRANCH }}"
REPO="${{ github.repository }}" REPO="${{ github.repository }}"
@@ -956,6 +1037,11 @@ jobs:
cd /workspace/repo cd /workspace/repo
- name: Set skip_git_update flag if repository artifact was used
if: steps.download_repo.outcome == 'success'
run: |
echo "SKIP_GIT_UPDATE=true" >> $GITHUB_ENV
- name: Setup SSH key - name: Setup SSH key
run: | run: |
mkdir -p ~/.ssh mkdir -p ~/.ssh
@@ -975,41 +1061,19 @@ jobs:
chmod 600 /tmp/vault_pass chmod 600 /tmp/vault_pass
fi fi
- name: Deploy Application Code to Staging - name: Deploy to Staging (Complete)
run: | run: |
cd /workspace/repo/deployment/ansible cd /workspace/repo/deployment/ansible
ansible-playbook -i inventory/production.yml \ ansible-playbook -i inventory/production.yml \
playbooks/deploy-application-code.yml \ playbooks/deploy-complete.yml \
-e "deployment_environment=staging" \ -e "deployment_environment=staging" \
-e "deployment_hosts=production" \ -e "deployment_hosts=production" \
-e "git_branch=staging" \ -e "git_branch=staging" \
-e "traefik_auto_restart=false" \
-e "gitea_auto_restart=false" \
--vault-password-file /tmp/vault_pass \
--private-key ~/.ssh/production
- name: Deploy Docker Image to Staging
run: |
cd /workspace/repo/deployment/ansible
ansible-playbook -i inventory/production.yml \
playbooks/deploy-image.yml \
-e "deployment_environment=staging" \
-e "deployment_hosts=production" \
-e "image_tag=latest" \ -e "image_tag=latest" \
-e "docker_registry=${{ env.REGISTRY }}" \ -e "docker_registry=${{ env.REGISTRY }}" \
-e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \ -e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \
-e "docker_registry_password=${{ secrets.REGISTRY_PASSWORD }}" \ -e "docker_registry_password=${{ secrets.REGISTRY_PASSWORD }}" \
-e "traefik_auto_restart=false" \ -e "application_skip_git_update=${{ env.SKIP_GIT_UPDATE || 'false' }}" \
-e "gitea_auto_restart=false" \
--vault-password-file /tmp/vault_pass \
--private-key ~/.ssh/production
- name: Install Composer Dependencies
run: |
cd /workspace/repo/deployment/ansible
ansible-playbook -i inventory/production.yml \
playbooks/install-composer-dependencies.yml \
-e "deployment_environment=staging" \
-e "traefik_auto_restart=false" \ -e "traefik_auto_restart=false" \
-e "gitea_auto_restart=false" \ -e "gitea_auto_restart=false" \
--vault-password-file /tmp/vault_pass \ --vault-password-file /tmp/vault_pass \
@@ -1021,22 +1085,30 @@ jobs:
- name: Health check - name: Health check
id: health id: health
run: | run: |
echo "🔍 Performing health checks..." echo "🔍 Performing health checks with exponential backoff..."
# Basic health check # Basic health check with exponential backoff
BASIC_HEALTH_OK=false BASIC_HEALTH_OK=false
for i in {1..10}; do DELAY=2
MAX_DELAY=60
MAX_ATTEMPTS=5
for i in $(seq 1 $MAX_ATTEMPTS); do
if curl -f -k -s https://staging.michaelschiemer.de/health > /dev/null 2>&1; then if curl -f -k -s https://staging.michaelschiemer.de/health > /dev/null 2>&1; then
echo "✅ Basic health check passed" echo "✅ Basic health check passed (attempt $i/$MAX_ATTEMPTS)"
BASIC_HEALTH_OK=true BASIC_HEALTH_OK=true
break break
fi fi
echo "⏳ Waiting for staging service... (attempt $i/10)" if [ $i -lt $MAX_ATTEMPTS ]; then
sleep 10 echo "⏳ Waiting for staging service... (attempt $i/$MAX_ATTEMPTS, delay ${DELAY}s)"
sleep $DELAY
DELAY=$((DELAY * 2))
[ $DELAY -gt $MAX_DELAY ] && DELAY=$MAX_DELAY
fi
done done
if [ "$BASIC_HEALTH_OK" != "true" ]; then if [ "$BASIC_HEALTH_OK" != "true" ]; then
echo "❌ Basic health check failed" echo "❌ Basic health check failed after $MAX_ATTEMPTS attempts"
exit 1 exit 1
fi fi
@@ -1065,12 +1137,15 @@ jobs:
echo "URL: https://staging.michaelschiemer.de" echo "URL: https://staging.michaelschiemer.de"
echo "Image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" echo "Image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest"
# Job 4: Auto-deploy to Production (only for main branch) # Job 4: Auto-deploy to Production (only for main branch and if deploy is enabled)
deploy-production: deploy-production:
name: Auto-deploy to Production name: Auto-deploy to Production
needs: [changes, build] needs: [changes, build]
if: always() && (github.ref_name == 'main' || github.head_ref == 'main' || (github.ref_name == '' && contains(github.ref, 'main'))) && needs.changes.outputs.needs_build == 'true' if: always() && ((github.event_name == 'push' && (github.ref_name == 'main' || github.head_ref == 'main' || (github.ref_name == '' && contains(github.ref, 'main')))) || (github.event_name == 'workflow_dispatch' && inputs.deploy == true)) && needs.changes.outputs.needs_build == 'true'
runs-on: php-ci runs-on: php-ci
concurrency:
group: deploy-production
cancel-in-progress: false
environment: environment:
name: production name: production
url: https://michaelschiemer.de url: https://michaelschiemer.de
@@ -1091,7 +1166,16 @@ jobs:
echo "BRANCH=$REF_NAME" >> $GITHUB_OUTPUT echo "BRANCH=$REF_NAME" >> $GITHUB_OUTPUT
echo "📋 Branch: $REF_NAME" echo "📋 Branch: $REF_NAME"
- name: Checkout deployment scripts - name: Download repository artifact
uses: actions/download-artifact@v4
with:
name: repository
path: /workspace
continue-on-error: true
id: download_repo
- name: Checkout deployment scripts (fallback if artifact missing)
if: steps.download_repo.outcome == 'failure'
run: | run: |
REF_NAME="${{ steps.branch.outputs.BRANCH }}" REF_NAME="${{ steps.branch.outputs.BRANCH }}"
REPO="${{ github.repository }}" REPO="${{ github.repository }}"
@@ -1111,6 +1195,11 @@ jobs:
cd /workspace/repo cd /workspace/repo
- name: Set skip_git_update flag if repository artifact was used
if: steps.download_repo.outcome == 'success'
run: |
echo "SKIP_GIT_UPDATE=true" >> $GITHUB_ENV
- name: Setup SSH key - name: Setup SSH key
run: | run: |
mkdir -p ~/.ssh mkdir -p ~/.ssh
@@ -1153,41 +1242,19 @@ jobs:
echo "IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_OUTPUT echo "IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_OUTPUT
echo "📦 Image Tag: ${IMAGE_TAG}" echo "📦 Image Tag: ${IMAGE_TAG}"
- name: Deploy Application Code to Production - name: Deploy to Production (Complete)
run: | run: |
cd /workspace/repo/deployment/ansible cd /workspace/repo/deployment/ansible
ansible-playbook -i inventory/production.yml \ ansible-playbook -i inventory/production.yml \
playbooks/deploy-application-code.yml \ playbooks/deploy-complete.yml \
-e "deployment_environment=production" \ -e "deployment_environment=production" \
-e "deployment_hosts=production" \ -e "deployment_hosts=production" \
-e "git_branch=main" \ -e "git_branch=main" \
-e "traefik_auto_restart=false" \
-e "gitea_auto_restart=false" \
--vault-password-file /tmp/vault_pass \
--private-key ~/.ssh/production
- name: Deploy Docker Image to Production
run: |
cd /workspace/repo/deployment/ansible
ansible-playbook -i inventory/production.yml \
playbooks/deploy-image.yml \
-e "deployment_environment=production" \
-e "deployment_hosts=production" \
-e "image_tag=${{ steps.image_tag.outputs.IMAGE_TAG }}" \ -e "image_tag=${{ steps.image_tag.outputs.IMAGE_TAG }}" \
-e "docker_registry=${{ env.REGISTRY }}" \ -e "docker_registry=${{ env.REGISTRY }}" \
-e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \ -e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \
-e "docker_registry_password=${{ secrets.REGISTRY_PASSWORD }}" \ -e "docker_registry_password=${{ secrets.REGISTRY_PASSWORD }}" \
-e "traefik_auto_restart=false" \ -e "application_skip_git_update=${{ env.SKIP_GIT_UPDATE || 'false' }}" \
-e "gitea_auto_restart=false" \
--vault-password-file /tmp/vault_pass \
--private-key ~/.ssh/production
- name: Install Composer Dependencies
run: |
cd /workspace/repo/deployment/ansible
ansible-playbook -i inventory/production.yml \
playbooks/install-composer-dependencies.yml \
-e "deployment_environment=production" \
-e "traefik_auto_restart=false" \ -e "traefik_auto_restart=false" \
-e "gitea_auto_restart=false" \ -e "gitea_auto_restart=false" \
--vault-password-file /tmp/vault_pass \ --vault-password-file /tmp/vault_pass \
@@ -1199,22 +1266,30 @@ jobs:
- name: Health check - name: Health check
id: health id: health
run: | run: |
echo "🔍 Performing health checks..." echo "🔍 Performing health checks with exponential backoff..."
# Basic health check # Basic health check with exponential backoff
BASIC_HEALTH_OK=false BASIC_HEALTH_OK=false
for i in {1..10}; do DELAY=2
MAX_DELAY=60
MAX_ATTEMPTS=5
for i in $(seq 1 $MAX_ATTEMPTS); do
if curl -f -k -s https://michaelschiemer.de/health > /dev/null 2>&1; then if curl -f -k -s https://michaelschiemer.de/health > /dev/null 2>&1; then
echo "✅ Basic health check passed" echo "✅ Basic health check passed (attempt $i/$MAX_ATTEMPTS)"
BASIC_HEALTH_OK=true BASIC_HEALTH_OK=true
break break
fi fi
echo "⏳ Waiting for production service... (attempt $i/10)" if [ $i -lt $MAX_ATTEMPTS ]; then
sleep 10 echo "⏳ Waiting for production service... (attempt $i/$MAX_ATTEMPTS, delay ${DELAY}s)"
sleep $DELAY
DELAY=$((DELAY * 2))
[ $DELAY -gt $MAX_DELAY ] && DELAY=$MAX_DELAY
fi
done done
if [ "$BASIC_HEALTH_OK" != "true" ]; then if [ "$BASIC_HEALTH_OK" != "true" ]; then
echo "❌ Basic health check failed" echo "❌ Basic health check failed after $MAX_ATTEMPTS attempts"
exit 1 exit 1
fi fi

View File

@@ -0,0 +1,89 @@
name: 📊 Monitor Workflow Performance
on:
schedule:
# Run every 6 hours
- cron: '0 */6 * * *'
workflow_dispatch:
inputs:
lookback_hours:
description: 'Hours to look back for metrics'
required: false
default: '24'
type: string
env:
DEPLOYMENT_HOST: 94.16.110.151
jobs:
monitor:
name: Monitor Workflow Performance
runs-on: php-ci
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Setup SSH key
run: |
mkdir -p ~/.ssh
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/production
chmod 600 ~/.ssh/production
ssh-keyscan -H ${{ env.DEPLOYMENT_HOST }} >> ~/.ssh/known_hosts
- name: Create Ansible Vault password file
run: |
if [ -n "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" ]; then
echo "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" > /tmp/vault_pass
chmod 600 /tmp/vault_pass
echo "✅ Vault password file created"
else
echo "⚠️ ANSIBLE_VAULT_PASSWORD secret not set, using empty password file"
touch /tmp/vault_pass
chmod 600 /tmp/vault_pass
fi
- name: Run performance monitoring
run: |
cd /workspace/repo/deployment/ansible
ansible-playbook -i inventory/production.yml \
playbooks/monitor-workflow-performance.yml \
-e "monitoring_lookback_hours=${{ github.event.inputs.lookback_hours || '24' }}" \
--vault-password-file /tmp/vault_pass \
--private-key ~/.ssh/production
- name: Collect metrics files
run: |
ssh -i ~/.ssh/production deploy@${{ env.DEPLOYMENT_HOST }} \
"find /home/deploy/monitoring/workflow-metrics -name 'workflow_metrics_*.json' -mtime -1 -exec cat {} \; | jq -s '.'" \
> /tmp/combined_metrics.json || echo "[]" > /tmp/combined_metrics.json
- name: Display metrics summary
run: |
if [ -f /tmp/combined_metrics.json ] && [ -s /tmp/combined_metrics.json ]; then
echo "📊 Performance Metrics Summary:"
echo "=================================="
cat /tmp/combined_metrics.json | jq -r '
.[] |
"Timestamp: \(.timestamp)",
"System Load: \(.system_metrics.load_average)",
"CPU Usage: \(.system_metrics.cpu_usage_percent)%",
"Memory: \(.system_metrics.memory_usage)",
"Gitea Runner: \(.gitea_metrics.runner_status)",
"Gitea API Response: \(.gitea_metrics.api_response_time_ms)ms",
"Workflow Log Entries: \(.gitea_metrics.workflow_log_entries_last_24h)",
"---"
' || echo "⚠️ Could not parse metrics"
else
echo "⚠️ No metrics collected"
fi
- name: Upload metrics as artifact
uses: actions/upload-artifact@v4
with:
name: workflow-metrics
path: /tmp/combined_metrics.json
retention-days: 30
if: always()

View File

@@ -0,0 +1,192 @@
---
# Monitor Workflow Performance
# Collects comprehensive metrics about workflow execution, Gitea load, and system resources
- name: Monitor Workflow Performance
hosts: production
gather_facts: yes
become: no
vars:
monitoring_output_dir: "/home/deploy/monitoring/workflow-metrics"
monitoring_lookback_hours: 24
gitea_stack_path: "{{ stacks_base_path }}/gitea"
traefik_stack_path: "{{ stacks_base_path }}/traefik"
tasks:
- name: Create monitoring output directory
ansible.builtin.file:
path: "{{ monitoring_output_dir }}"
state: directory
mode: '0755'
- name: Get system load average
ansible.builtin.shell: |
uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ' '
register: system_load
changed_when: false
- name: Get Docker container count
ansible.builtin.shell: |
docker ps --format '{{ '{{' }}.Names{{ '}}' }}' | wc -l
register: docker_container_count
changed_when: false
- name: Get Gitea Runner status
ansible.builtin.shell: |
if docker ps --format '{{ '{{' }}.Names{{ '}}' }}' | grep -q "gitea-runner"; then
echo "running"
else
echo "stopped"
fi
register: gitea_runner_status
changed_when: false
- name: Get Gitea container resource usage
ansible.builtin.shell: |
docker stats gitea --no-stream --format "{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.MemPerc{{ '}}' }}" 2>/dev/null || echo "N/A,N/A,N/A"
register: gitea_stats
changed_when: false
failed_when: false
- name: Get Traefik container resource usage
ansible.builtin.shell: |
docker stats traefik --no-stream --format "{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.MemPerc{{ '}}' }}" 2>/dev/null || echo "N/A,N/A,N/A"
register: traefik_stats
changed_when: false
failed_when: false
- name: Check Gitea API response time
ansible.builtin.uri:
url: "https://{{ gitea_domain }}/api/healthz"
method: GET
status_code: [200]
validate_certs: false
timeout: 10
register: gitea_api_test
changed_when: false
failed_when: false
- name: Get Gitea logs for workflow activity (last {{ monitoring_lookback_hours }} hours)
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose logs gitea --since "{{ monitoring_lookback_hours }}h" 2>&1 | \
grep -iE "workflow|action|runner" | \
tail -50 || echo "No workflow activity found"
register: gitea_workflow_logs
changed_when: false
failed_when: false
- name: Count workflow-related log entries
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose logs gitea --since "{{ monitoring_lookback_hours }}h" 2>&1 | \
grep -iE "workflow|action|runner" | \
wc -l
register: workflow_log_count
changed_when: false
failed_when: false
- name: Get disk usage for Gitea data
ansible.builtin.shell: |
du -sh {{ gitea_stack_path }}/data 2>/dev/null | awk '{print $1}' || echo "N/A"
register: gitea_data_size
changed_when: false
failed_when: false
- name: Get Docker system disk usage
ansible.builtin.shell: |
docker system df --format "{{ '{{' }}.Size{{ '}}' }}" 2>/dev/null | head -1 || echo "N/A"
register: docker_disk_usage
changed_when: false
failed_when: false
- name: Get memory usage
ansible.builtin.shell: |
free -h | grep Mem | awk '{print $3 "/" $2}'
register: memory_usage
changed_when: false
- name: Get CPU usage (1 minute average)
ansible.builtin.shell: |
top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}'
register: cpu_usage
changed_when: false
failed_when: false
- name: Generate metrics JSON
ansible.builtin.copy:
dest: "{{ monitoring_output_dir }}/workflow_metrics_{{ ansible_date_time.epoch }}.json"
content: |
{
"timestamp": "{{ ansible_date_time.iso8601 }}",
"system_metrics": {
"load_average": "{{ system_load.stdout }}",
"cpu_usage_percent": "{{ cpu_usage.stdout | default('N/A') }}",
"memory_usage": "{{ memory_usage.stdout }}",
"docker_containers": "{{ docker_container_count.stdout }}",
"docker_disk_usage": "{{ docker_disk_usage.stdout }}",
"gitea_data_size": "{{ gitea_data_size.stdout }}"
},
"gitea_metrics": {
"runner_status": "{{ gitea_runner_status.stdout }}",
"api_response_time_ms": "{{ (gitea_api_test.elapsed * 1000) | default('N/A') | int }}",
"workflow_log_entries_last_{{ monitoring_lookback_hours }}h": {{ workflow_log_count.stdout | int }},
"container_stats": {
"cpu_percent": "{{ gitea_stats.stdout.split(',')[0] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
"memory_usage": "{{ gitea_stats.stdout.split(',')[1] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
"memory_percent": "{{ gitea_stats.stdout.split(',')[2] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}"
}
},
"traefik_metrics": {
"container_stats": {
"cpu_percent": "{{ traefik_stats.stdout.split(',')[0] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
"memory_usage": "{{ traefik_stats.stdout.split(',')[1] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}",
"memory_percent": "{{ traefik_stats.stdout.split(',')[2] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}"
}
},
"optimizations": {
"repository_artifact_enabled": true,
"helper_script_caching_enabled": true,
"combined_deployment_playbook": true,
"exponential_backoff_health_checks": true,
"concurrency_groups": true
}
}
mode: '0644'
- name: Display monitoring summary
ansible.builtin.debug:
msg: |
================================================================================
WORKFLOW PERFORMANCE MONITORING - SUMMARY
================================================================================
System Metrics:
- Load Average: {{ system_load.stdout }}
- CPU Usage: {{ cpu_usage.stdout | default('N/A') }}%
- Memory Usage: {{ memory_usage.stdout }}
- Docker Containers: {{ docker_container_count.stdout }}
- Docker Disk Usage: {{ docker_disk_usage.stdout }}
- Gitea Data Size: {{ gitea_data_size.stdout }}
Gitea Metrics:
- Runner Status: {{ gitea_runner_status.stdout }}
- API Response Time: {{ (gitea_api_test.elapsed * 1000) | default('N/A') | int }}ms
- Workflow Log Entries (last {{ monitoring_lookback_hours }}h): {{ workflow_log_count.stdout }}
- Container CPU: {{ gitea_stats.stdout.split(',')[0] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
- Container Memory: {{ gitea_stats.stdout.split(',')[1] if gitea_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
Traefik Metrics:
- Container CPU: {{ traefik_stats.stdout.split(',')[0] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
- Container Memory: {{ traefik_stats.stdout.split(',')[1] if traefik_stats.stdout != 'N/A,N/A,N/A' else 'N/A' }}
Optimizations Enabled:
✅ Repository Artifact Caching
✅ Helper Script Caching
✅ Combined Deployment Playbook
✅ Exponential Backoff Health Checks
✅ Concurrency Groups
Metrics saved to: {{ monitoring_output_dir }}/workflow_metrics_{{ ansible_date_time.epoch }}.json
================================================================================

173
monitoring/README.md Normal file
View File

@@ -0,0 +1,173 @@
# Workflow Performance Monitoring
Dieses Verzeichnis enthält Tools und Metriken zur Überwachung der Workflow-Performance und Systemressourcen.
## Übersicht
Das Monitoring-System sammelt Metriken über:
- Workflow-Ausführungszeiten
- Gitea-Last und API-Antwortzeiten
- Systemressourcen (CPU, Memory, Load)
- Docker-Container-Status
- Workflow-Optimierungen
## Komponenten
### 1. Monitoring-Script (`scripts/ci/monitor-workflow-performance.sh`)
Lokales Script zur Sammlung von Workflow-Metriken über die Gitea API.
**Verwendung:**
```bash
export GITEA_TOKEN="your-token"
export GITEA_URL="https://git.michaelschiemer.de"
export GITHUB_REPOSITORY="michael/michaelschiemer"
./scripts/ci/monitor-workflow-performance.sh
```
**Ausgabe:**
- JSON-Datei mit Metriken in `monitoring/workflow-metrics/`
- Konsolen-Zusammenfassung
### 2. Ansible Playbook (`deployment/ansible/playbooks/monitor-workflow-performance.yml`)
Server-seitiges Monitoring.
**Verwendung:**
```bash
cd deployment/ansible
ansible-playbook -i inventory/production.yml \
playbooks/monitor-workflow-performance.yml \
-e "monitoring_lookback_hours=24"
```
**Gesammelte Metriken:**
- System Load Average
- CPU- und Memory-Nutzung
- Docker-Container-Status
- Gitea Runner-Status
- Gitea API-Antwortzeiten
- Workflow-Log-Einträge
- Container-Ressourcennutzung (Gitea, Traefik)
**Ausgabe:**
- JSON-Datei auf dem Server: `/home/deploy/monitoring/workflow-metrics/workflow_metrics_<timestamp>.json`
- Konsolen-Zusammenfassung
### 3. Gitea Workflow (`.gitea/workflows/monitor-performance.yml`)
Automatisches Monitoring-Workflow, der alle 6 Stunden läuft.
**Manuelle Ausführung:**
- Über Gitea UI: Actions → Monitor Workflow Performance → Run workflow
- Optional: `lookback_hours` Parameter anpassen
**Ausgabe:**
- Artifact mit kombinierten Metriken (30 Tage Retention)
- Workflow-Logs mit Zusammenfassung
## Metriken-Format
### System-Metriken
```json
{
"system_metrics": {
"load_average": "0.5",
"cpu_usage_percent": "15.2",
"memory_usage": "2.1G/8.0G",
"docker_containers": "12",
"docker_disk_usage": "5.2GB",
"gitea_data_size": "1.2G"
}
}
```
### Gitea-Metriken
```json
{
"gitea_metrics": {
"runner_status": "running",
"api_response_time_ms": 45,
"workflow_log_entries_last_24h": 150,
"container_stats": {
"cpu_percent": "2.5%",
"memory_usage": "512MiB / 2GiB",
"memory_percent": "25.0%"
}
}
}
```
### Workflow-Metriken
```json
{
"workflow_metrics": {
"build_image": {
"average_duration_seconds": 420,
"recent_runs": 20
},
"manual_deploy": {
"average_duration_seconds": 180,
"recent_runs": 10
}
}
}
```
## Optimierungen
Das Monitoring-System trackt folgende Optimierungen:
-**Repository Artifact Caching**: Repository wird als Artifact zwischen Jobs geteilt
-**Helper Script Caching**: CI-Helper-Scripts werden als Artifact gecacht
-**Combined Deployment Playbook**: Einzelnes Playbook für alle Deployment-Schritte
-**Exponential Backoff Health Checks**: Intelligente Retry-Strategie
-**Concurrency Groups**: Verhindert parallele Deployments
## Interpretation der Metriken
### Gute Werte
- **Load Average**: < 1.0 (für Single-Core), < Anzahl Cores (für Multi-Core)
- **Gitea API Response**: < 100ms
- **Workflow Duration**: < 10 Minuten (Build), < 5 Minuten (Deploy)
- **Memory Usage**: < 80% des verfügbaren Speichers
### Warnzeichen
- **Load Average**: > 2.0 (kann auf Überlastung hinweisen)
- **Gitea API Response**: > 500ms (kann auf Gitea-Überlastung hinweisen)
- **Workflow Duration**: > 20 Minuten (kann auf Ineffizienzen hinweisen)
- **Workflow Log Entries**: > 1000 pro Stunde (kann auf zu viele Workflows hinweisen)
## Troubleshooting
### Keine Metriken gesammelt
1. Prüfe Gitea API-Zugriff (Token, URL)
2. Prüfe SSH-Zugriff auf Server (für Ansible Playbook)
3. Prüfe ob Monitoring-Verzeichnis existiert
### Hohe System-Last
1. Prüfe laufende Workflows
2. Prüfe Gitea Runner-Status
3. Prüfe Docker-Container-Ressourcennutzung
4. Prüfe ob zu viele parallele Deployments laufen
### Langsame Workflows
1. Prüfe ob Repository-Artifacts verwendet werden
2. Prüfe ob Helper-Scripts gecacht werden
3. Prüfe Docker Build Cache
4. Prüfe Netzwerk-Latenz zu Registry
## Nächste Schritte
1. **Baseline etablieren**: Sammle Metriken über 1-2 Wochen
2. **Trends analysieren**: Identifiziere langfristige Trends
3. **Alerts einrichten**: Warnungen bei kritischen Werten
4. **Weitere Optimierungen**: Basierend auf Metriken
## Weitere Ressourcen
- [Gitea Actions Documentation](https://docs.gitea.com/usage/actions)
- [Ansible Best Practices](https://docs.ansible.com/ansible/latest/user_guide/playbooks_best_practices.html)
- [Docker Monitoring](https://docs.docker.com/config/containers/logging/)

View File

@@ -0,0 +1,180 @@
#!/bin/bash
# Monitor Workflow Performance
# Collects metrics about workflow execution times, Gitea load, and resource usage
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
OUTPUT_DIR="${REPO_ROOT}/monitoring/workflow-metrics"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
METRICS_FILE="${OUTPUT_DIR}/workflow_metrics_${TIMESTAMP}.json"
# Create output directory
mkdir -p "$OUTPUT_DIR"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
echo -e "${BLUE}📊 Workflow Performance Monitor${NC}"
echo "=================================="
echo ""
# Check if Gitea API credentials are available
GITEA_URL="${GITEA_URL:-https://git.michaelschiemer.de}"
GITEA_TOKEN="${GITEA_TOKEN:-${CI_TOKEN:-}}"
REPO="${GITHUB_REPOSITORY:-michael/michaelschiemer}"
if [ -z "$GITEA_TOKEN" ]; then
echo -e "${YELLOW}⚠️ GITEA_TOKEN not set, some metrics will be unavailable${NC}"
fi
# Function to get workflow runs from Gitea API
get_workflow_runs() {
local workflow_name="$1"
local limit="${2:-10}"
if [ -z "$GITEA_TOKEN" ]; then
echo "[]"
return
fi
local api_url="${GITEA_URL}/api/v1/repos/${REPO}/actions/runs"
if [ -n "$workflow_name" ]; then
api_url="${api_url}?workflow=${workflow_name}&limit=${limit}"
else
api_url="${api_url}?limit=${limit}"
fi
curl -sfL \
-H "Authorization: token ${GITEA_TOKEN}" \
-H "Accept: application/json" \
"$api_url" 2>/dev/null || echo "[]"
}
# Function to calculate average duration
calculate_average_duration() {
local runs_json="$1"
local total=0
local count=0
if [ "$runs_json" = "[]" ] || [ -z "$runs_json" ]; then
echo "0"
return
fi
# Extract durations (in seconds) from workflow runs
# Note: This is a simplified parser - in production, use jq
echo "$runs_json" | grep -o '"duration":[0-9]*' | grep -o '[0-9]*' | while read -r duration; do
if [ -n "$duration" ] && [ "$duration" -gt 0 ]; then
total=$((total + duration))
count=$((count + 1))
fi
done
if [ "$count" -eq 0 ]; then
echo "0"
else
echo "$((total / count))"
fi
}
# Collect metrics
echo -e "${BLUE}📥 Collecting workflow metrics...${NC}"
# Get recent workflow runs
BUILD_WORKFLOW_RUNS=$(get_workflow_runs "build-image.yml" 20)
DEPLOY_WORKFLOW_RUNS=$(get_workflow_runs "manual-deploy.yml" 10)
# Calculate metrics
BUILD_AVG_DURATION=$(calculate_average_duration "$BUILD_WORKFLOW_RUNS")
DEPLOY_AVG_DURATION=$(calculate_average_duration "$DEPLOY_WORKFLOW_RUNS")
# Get system metrics (if running on server)
SYSTEM_LOAD="unknown"
DOCKER_CONTAINERS="unknown"
GITEA_RUNNER_STATUS="unknown"
if command -v uptime >/dev/null 2>&1; then
SYSTEM_LOAD=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ' ')
fi
if command -v docker >/dev/null 2>&1; then
DOCKER_CONTAINERS=$(docker ps --format '{{.Names}}' | wc -l)
if docker ps --format '{{.Names}}' | grep -q "gitea-runner"; then
GITEA_RUNNER_STATUS="running"
else
GITEA_RUNNER_STATUS="stopped"
fi
fi
# Create metrics JSON
cat > "$METRICS_FILE" <<EOF
{
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"workflow_metrics": {
"build_image": {
"average_duration_seconds": $BUILD_AVG_DURATION,
"recent_runs": 20
},
"manual_deploy": {
"average_duration_seconds": $DEPLOY_AVG_DURATION,
"recent_runs": 10
}
},
"system_metrics": {
"load_average": "$SYSTEM_LOAD",
"docker_containers": "$DOCKER_CONTAINERS",
"gitea_runner_status": "$GITEA_RUNNER_STATUS"
},
"optimizations": {
"repository_artifact_enabled": true,
"helper_script_caching_enabled": true,
"combined_deployment_playbook": true,
"exponential_backoff_health_checks": true,
"concurrency_groups": true
}
}
EOF
echo -e "${GREEN}✅ Metrics collected${NC}"
echo ""
echo -e "${BLUE}📈 Summary:${NC}"
echo " Build Workflow Avg Duration: ${BUILD_AVG_DURATION}s"
echo " Deploy Workflow Avg Duration: ${DEPLOY_AVG_DURATION}s"
echo " System Load: $SYSTEM_LOAD"
echo " Docker Containers: $DOCKER_CONTAINERS"
echo " Gitea Runner: $GITEA_RUNNER_STATUS"
echo ""
echo -e "${BLUE}💾 Metrics saved to:${NC} $METRICS_FILE"
echo ""
# Display recent workflow runs summary
if [ "$BUILD_WORKFLOW_RUNS" != "[]" ] && [ -n "$BUILD_WORKFLOW_RUNS" ]; then
echo -e "${BLUE}📋 Recent Build Workflow Runs:${NC}"
echo "$BUILD_WORKFLOW_RUNS" | grep -o '"status":"[^"]*"' | head -5 | sed 's/"status":"//g' | sed 's/"//g' | while read -r status; do
case "$status" in
success)
echo -e " ${GREEN}${NC} Success"
;;
failure)
echo -e " ${RED}${NC} Failed"
;;
running)
echo -e " ${YELLOW}${NC} Running"
;;
*)
echo -e " ${BLUE}?${NC} $status"
;;
esac
done
echo ""
fi
echo -e "${GREEN}✅ Monitoring complete${NC}"