Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Failing after 10m14s
🚀 Build & Deploy Image / Build Runtime Base Image (push) Has been skipped
🚀 Build & Deploy Image / Build Docker Image (push) Has been skipped
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been skipped
Security Vulnerability Scan / Check for Dependency Changes (push) Failing after 11m25s
Security Vulnerability Scan / Composer Security Audit (push) Has been cancelled
- Remove middleware reference from Gitea Traefik labels (caused routing issues) - Optimize Gitea connection pool settings (MAX_IDLE_CONNS=30, authentication_timeout=180s) - Add explicit service reference in Traefik labels - Fix intermittent 504 timeouts by improving PostgreSQL connection handling Fixes Gitea unreachability via git.michaelschiemer.de
344 lines
16 KiB
YAML
344 lines
16 KiB
YAML
---
|
|
# Diagnose Gitea Timeout - Live während Request
|
|
# Führt alle Checks während eines tatsächlichen Requests durch
|
|
- name: Diagnose Gitea Timeout During Request
|
|
hosts: production
|
|
gather_facts: yes
|
|
become: no
|
|
vars:
|
|
gitea_stack_path: "{{ stacks_base_path }}/gitea"
|
|
traefik_stack_path: "{{ stacks_base_path }}/traefik"
|
|
gitea_url: "https://{{ gitea_domain }}"
|
|
test_duration_seconds: 60 # Wie lange wir testen
|
|
test_timestamp: "{{ ansible_date_time.epoch }}"
|
|
|
|
tasks:
|
|
- name: Display diagnostic plan
|
|
ansible.builtin.debug:
|
|
msg: |
|
|
================================================================================
|
|
GITEA TIMEOUT DIAGNOSE - LIVE WÄHREND REQUEST
|
|
================================================================================
|
|
|
|
Diese Diagnose führt alle Checks während eines tatsächlichen Requests durch:
|
|
|
|
1. Docker Stats (CPU/RAM/IO) während Request
|
|
2. Gitea Logs (DB-Timeouts, Panics, "context deadline exceeded")
|
|
3. Postgres Logs (Connection issues)
|
|
4. Traefik Logs ("backend connection error", "EOF")
|
|
5. Direkter Test Traefik → Gitea
|
|
|
|
Test-Dauer: {{ test_duration_seconds }} Sekunden
|
|
Timestamp: {{ test_timestamp }}
|
|
================================================================================
|
|
|
|
- name: Get initial container stats (baseline)
|
|
ansible.builtin.shell: |
|
|
docker stats --no-stream --format "table {{ '{{' }}.Name{{ '}}' }}\t{{ '{{' }}.CPUPerc{{ '}}' }}\t{{ '{{' }}.MemUsage{{ '}}' }}\t{{ '{{' }}.NetIO{{ '}}' }}\t{{ '{{' }}.BlockIO{{ '}}' }}" gitea gitea-postgres gitea-redis traefik 2>/dev/null || echo "Stats collection failed"
|
|
register: initial_stats
|
|
changed_when: false
|
|
|
|
- name: Start collecting Docker stats in background
|
|
ansible.builtin.shell: |
|
|
timeout {{ test_duration_seconds }} docker stats --format "{{ '{{' }}.Name{{ '}}' }},{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.NetIO{{ '}}' }},{{ '{{' }}.BlockIO{{ '}}' }}" gitea gitea-postgres gitea-redis traefik 2>/dev/null | while read line; do
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] $line"
|
|
done > /tmp/gitea_stats_{{ test_timestamp }}.log 2>&1 &
|
|
STATS_PID=$!
|
|
echo $STATS_PID
|
|
register: stats_pid
|
|
changed_when: false
|
|
|
|
- name: Start collecting Gitea logs in background
|
|
ansible.builtin.shell: |
|
|
cd {{ gitea_stack_path }}
|
|
timeout {{ test_duration_seconds }} docker compose logs -f gitea 2>&1 | while read line; do
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] $line"
|
|
done > /tmp/gitea_logs_{{ test_timestamp }}.log 2>&1 &
|
|
echo $!
|
|
register: gitea_logs_pid
|
|
changed_when: false
|
|
|
|
- name: Start collecting Postgres logs in background
|
|
ansible.builtin.shell: |
|
|
cd {{ gitea_stack_path }}
|
|
timeout {{ test_duration_seconds }} docker compose logs -f gitea-postgres 2>&1 | while read line; do
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] $line"
|
|
done > /tmp/postgres_logs_{{ test_timestamp }}.log 2>&1 &
|
|
echo $!
|
|
register: postgres_logs_pid
|
|
changed_when: false
|
|
|
|
- name: Start collecting Traefik logs in background
|
|
ansible.builtin.shell: |
|
|
cd {{ traefik_stack_path }}
|
|
timeout {{ test_duration_seconds }} docker compose logs -f traefik 2>&1 | while read line; do
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] $line"
|
|
done > /tmp/traefik_logs_{{ test_timestamp }}.log 2>&1 &
|
|
echo $!
|
|
register: traefik_logs_pid
|
|
changed_when: false
|
|
|
|
- name: Wait a moment for log collection to start
|
|
ansible.builtin.pause:
|
|
seconds: 2
|
|
|
|
- name: Trigger Gitea request via Traefik (with timeout)
|
|
ansible.builtin.shell: |
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Starting request to {{ gitea_url }}/api/healthz"
|
|
timeout 35 curl -k -v -s -o /tmp/gitea_response_{{ test_timestamp }}.log -w "\nHTTP_CODE:%{http_code}\nTIME_TOTAL:%{time_total}\nTIME_CONNECT:%{time_connect}\nTIME_STARTTRANSFER:%{time_starttransfer}\n" "{{ gitea_url }}/api/healthz" 2>&1 | tee /tmp/gitea_curl_{{ test_timestamp }}.log
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Request completed"
|
|
register: gitea_request
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Test direct connection Traefik → Gitea (parallel)
|
|
ansible.builtin.shell: |
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Starting direct test Traefik → Gitea"
|
|
cd {{ traefik_stack_path }}
|
|
timeout 35 docker compose exec -T traefik wget -qO- --timeout=30 http://gitea:3000/api/healthz 2>&1 | tee /tmp/traefik_gitea_direct_{{ test_timestamp }}.log || echo "DIRECT_TEST_FAILED" > /tmp/traefik_gitea_direct_{{ test_timestamp }}.log
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Direct test completed"
|
|
register: traefik_direct_test
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Wait for log collection to complete
|
|
ansible.builtin.pause:
|
|
seconds: "{{ test_duration_seconds - 5 }}"
|
|
|
|
- name: Stop background processes
|
|
ansible.builtin.shell: |
|
|
pkill -f "docker.*stats.*gitea" || true
|
|
pkill -f "docker compose logs.*gitea" || true
|
|
pkill -f "docker compose logs.*postgres" || true
|
|
pkill -f "docker compose logs.*traefik" || true
|
|
sleep 2
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Collect stats results
|
|
ansible.builtin.slurp:
|
|
src: "/tmp/gitea_stats_{{ test_timestamp }}.log"
|
|
register: stats_results
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Collect Gitea logs results
|
|
ansible.builtin.slurp:
|
|
src: "/tmp/gitea_logs_{{ test_timestamp }}.log"
|
|
register: gitea_logs_results
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Collect Postgres logs results
|
|
ansible.builtin.slurp:
|
|
src: "/tmp/postgres_logs_{{ test_timestamp }}.log"
|
|
register: postgres_logs_results
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Collect Traefik logs results
|
|
ansible.builtin.slurp:
|
|
src: "/tmp/traefik_logs_{{ test_timestamp }}.log"
|
|
register: traefik_logs_results
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Get request result
|
|
ansible.builtin.slurp:
|
|
src: "/tmp/gitea_curl_{{ test_timestamp }}.log"
|
|
register: request_result
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Get direct test result
|
|
ansible.builtin.slurp:
|
|
src: "/tmp/traefik_gitea_direct_{{ test_timestamp }}.log"
|
|
register: direct_test_result
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Analyze stats for high CPU/Memory/IO
|
|
ansible.builtin.shell: |
|
|
if [ -f /tmp/gitea_stats_{{ test_timestamp }}.log ]; then
|
|
echo "=== STATS SUMMARY ==="
|
|
echo "Total samples: $(wc -l < /tmp/gitea_stats_{{ test_timestamp }}.log)"
|
|
echo ""
|
|
echo "=== HIGH CPU (>80%) ==="
|
|
grep -E "gitea|gitea-postgres" /tmp/gitea_stats_{{ test_timestamp }}.log | awk -F',' '{cpu=$2; gsub(/%/, "", cpu); if (cpu+0 > 80) print $0}' | head -10 || echo "No high CPU usage found"
|
|
echo ""
|
|
echo "=== MEMORY USAGE ==="
|
|
grep -E "gitea" /tmp/gitea_stats_{{ test_timestamp }}.log | tail -5 || echo "No memory stats"
|
|
echo ""
|
|
echo "=== NETWORK IO ==="
|
|
grep -E "gitea" /tmp/gitea_stats_{{ test_timestamp }}.log | tail -5 || echo "No network activity"
|
|
else
|
|
echo "Stats file not found"
|
|
fi
|
|
register: stats_analysis
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Analyze Gitea logs for errors
|
|
ansible.builtin.shell: |
|
|
if [ -f /tmp/gitea_logs_{{ test_timestamp }}.log ]; then
|
|
echo "=== DB-TIMEOUTS / CONNECTION ERRORS ==="
|
|
grep -iE "timeout|deadline exceeded|connection.*failed|database.*error|postgres.*error|context.*deadline" /tmp/gitea_logs_{{ test_timestamp }}.log | tail -20 || echo "No DB-timeouts found"
|
|
echo ""
|
|
echo "=== PANICS / FATAL ERRORS ==="
|
|
grep -iE "panic|fatal|error.*fatal" /tmp/gitea_logs_{{ test_timestamp }}.log | tail -10 || echo "No panics found"
|
|
echo ""
|
|
echo "=== SLOW QUERIES / PERFORMANCE ==="
|
|
grep -iE "slow|performance|took.*ms|duration" /tmp/gitea_logs_{{ test_timestamp }}.log | tail -10 || echo "No slow queries found"
|
|
echo ""
|
|
echo "=== RECENT LOG ENTRIES (last 10) ==="
|
|
tail -10 /tmp/gitea_logs_{{ test_timestamp }}.log || echo "No recent logs"
|
|
else
|
|
echo "Gitea logs file not found"
|
|
fi
|
|
register: gitea_logs_analysis
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Analyze Postgres logs for errors
|
|
ansible.builtin.shell: |
|
|
if [ -f /tmp/postgres_logs_{{ test_timestamp }}.log ]; then
|
|
echo "=== POSTGRES ERRORS ==="
|
|
grep -iE "error|timeout|deadlock|connection.*refused|too many connections" /tmp/postgres_logs_{{ test_timestamp }}.log | tail -20 || echo "No Postgres errors found"
|
|
echo ""
|
|
echo "=== SLOW QUERIES ==="
|
|
grep -iE "slow|duration|statement.*took" /tmp/postgres_logs_{{ test_timestamp }}.log | tail -10 || echo "No slow queries found"
|
|
echo ""
|
|
echo "=== RECENT LOG ENTRIES (last 10) ==="
|
|
tail -10 /tmp/postgres_logs_{{ test_timestamp }}.log || echo "No recent logs"
|
|
else
|
|
echo "Postgres logs file not found"
|
|
fi
|
|
register: postgres_logs_analysis
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Analyze Traefik logs for backend errors
|
|
ansible.builtin.shell: |
|
|
if [ -f /tmp/traefik_logs_{{ test_timestamp }}.log ]; then
|
|
echo "=== BACKEND CONNECTION ERRORS ==="
|
|
grep -iE "backend.*error|connection.*error|EOF|gitea.*error|git\.michaelschiemer\.de.*error" /tmp/traefik_logs_{{ test_timestamp }}.log | tail -20 || echo "No backend errors found"
|
|
echo ""
|
|
echo "=== TIMEOUT ERRORS ==="
|
|
grep -iE "timeout|504|gateway.*timeout" /tmp/traefik_logs_{{ test_timestamp }}.log | tail -10 || echo "No timeout errors found"
|
|
echo ""
|
|
echo "=== RECENT LOG ENTRIES (last 10) ==="
|
|
tail -10 /tmp/traefik_logs_{{ test_timestamp }}.log || echo "No recent logs"
|
|
else
|
|
echo "Traefik logs file not found"
|
|
fi
|
|
register: traefik_logs_analysis
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Display comprehensive diagnosis
|
|
ansible.builtin.debug:
|
|
msg: |
|
|
================================================================================
|
|
GITEA TIMEOUT DIAGNOSE - ERGEBNISSE
|
|
================================================================================
|
|
|
|
BASELINE STATS (vor Request):
|
|
{{ initial_stats.stdout }}
|
|
|
|
REQUEST ERGEBNIS:
|
|
{% if request_result.content is defined and request_result.content != '' %}
|
|
{{ request_result.content | b64decode }}
|
|
{% else %}
|
|
Request-Ergebnis nicht verfügbar
|
|
{% endif %}
|
|
|
|
DIREKTER TEST TRAEFIK → GITEA:
|
|
{% if direct_test_result.content is defined and direct_test_result.content != '' %}
|
|
{{ direct_test_result.content | b64decode }}
|
|
{% else %}
|
|
Direkter Test-Ergebnis nicht verfügbar
|
|
{% endif %}
|
|
|
|
================================================================================
|
|
STATS-ANALYSE (während Request):
|
|
================================================================================
|
|
{{ stats_analysis.stdout }}
|
|
|
|
================================================================================
|
|
GITEA LOGS-ANALYSE:
|
|
================================================================================
|
|
{{ gitea_logs_analysis.stdout }}
|
|
|
|
================================================================================
|
|
POSTGRES LOGS-ANALYSE:
|
|
================================================================================
|
|
{{ postgres_logs_analysis.stdout }}
|
|
|
|
================================================================================
|
|
TRAEFIK LOGS-ANALYSE:
|
|
================================================================================
|
|
{{ traefik_logs_analysis.stdout }}
|
|
|
|
================================================================================
|
|
INTERPRETATION:
|
|
================================================================================
|
|
|
|
{% set request_content = request_result.content | default('') | b64decode | default('') %}
|
|
{% set direct_content = direct_test_result.content | default('') | b64decode | default('') %}
|
|
{% set traefik_errors = traefik_logs_analysis.stdout | default('') %}
|
|
{% set gitea_errors = gitea_logs_analysis.stdout | default('') %}
|
|
{% set postgres_errors = postgres_logs_analysis.stdout | default('') %}
|
|
{% set stats_content = stats_analysis.stdout | default('') %}
|
|
|
|
{% if 'timeout' in request_content or '504' in request_content or 'HTTP_CODE:504' in request_content %}
|
|
⚠️ REQUEST HAT TIMEOUT/504:
|
|
|
|
{% if 'EOF' in traefik_errors or 'backend' in traefik_errors | lower or 'connection.*error' in traefik_errors | lower %}
|
|
→ Traefik meldet Backend-Connection-Error
|
|
→ Gitea antwortet nicht auf Traefik's Verbindungsversuche
|
|
{% endif %}
|
|
|
|
{% if 'timeout' in gitea_errors | lower or 'deadline exceeded' in gitea_errors | lower %}
|
|
→ Gitea hat DB-Timeouts oder Context-Deadline-Exceeded
|
|
→ Postgres könnte blockieren oder zu langsam sein
|
|
{% endif %}
|
|
|
|
{% if 'too many connections' in postgres_errors | lower %}
|
|
→ Postgres hat zu viele Verbindungen
|
|
→ Connection Pool könnte überlastet sein
|
|
{% endif %}
|
|
|
|
{% if 'HIGH CPU' in stats_content or '>80' in stats_content %}
|
|
→ Gitea oder Postgres haben hohe CPU-Last
|
|
→ Performance-Problem, nicht Timeout-Konfiguration
|
|
{% endif %}
|
|
|
|
{% if 'DIRECT_TEST_FAILED' in direct_content or direct_content == '' %}
|
|
→ Direkter Test Traefik → Gitea schlägt fehl
|
|
→ Problem liegt bei Gitea selbst, nicht bei Traefik-Routing
|
|
{% endif %}
|
|
{% else %}
|
|
✅ REQUEST WAR ERFOLGREICH:
|
|
→ Problem tritt nur intermittierend auf
|
|
→ Prüfe Logs auf sporadische Fehler
|
|
{% endif %}
|
|
|
|
================================================================================
|
|
NÄCHSTE SCHRITTE:
|
|
================================================================================
|
|
|
|
1. Prüfe ob hohe CPU/Memory bei Gitea oder Postgres
|
|
2. Prüfe ob DB-Timeouts in Gitea-Logs
|
|
3. Prüfe ob Postgres "too many connections" meldet
|
|
4. Prüfe ob Traefik "backend connection error" oder "EOF" meldet
|
|
5. Prüfe ob direkter Test Traefik → Gitea funktioniert
|
|
|
|
================================================================================
|
|
|
|
- name: Cleanup temporary files
|
|
ansible.builtin.file:
|
|
path: "/tmp/gitea_{{ test_timestamp }}.log"
|
|
state: absent
|
|
failed_when: false
|
|
|