diff --git a/deployment/ansible/playbooks/comprehensive-gitea-diagnosis.yml b/deployment/ansible/playbooks/comprehensive-gitea-diagnosis.yml new file mode 100644 index 00000000..78955cac --- /dev/null +++ b/deployment/ansible/playbooks/comprehensive-gitea-diagnosis.yml @@ -0,0 +1,195 @@ +--- +# Comprehensive Gitea Timeout Diagnosis +# Prüft alle Aspekte des intermittierenden Gitea-Timeout-Problems +- name: Comprehensive Gitea Timeout Diagnosis + hosts: production + gather_facts: yes + become: no + vars: + gitea_stack_path: "{{ stacks_base_path }}/gitea" + traefik_stack_path: "{{ stacks_base_path }}/traefik" + gitea_url: "https://{{ gitea_domain }}" + + tasks: + - name: Check Traefik container uptime and restart count + ansible.builtin.shell: | + docker inspect traefik --format '{{ '{{' }}.State.Status{{ '}}' }}|{{ '{{' }}.State.StartedAt{{ '}}' }}|{{ '{{' }}.RestartCount{{ '}}' }}' 2>/dev/null || echo "UNKNOWN" + register: traefik_info + changed_when: false + + - name: Check Gitea container uptime and restart count + ansible.builtin.shell: | + docker inspect gitea --format '{{ '{{' }}.State.Status{{ '}}' }}|{{ '{{' }}.State.StartedAt{{ '}}' }}|{{ '{{' }}.RestartCount{{ '}}' }}' 2>/dev/null || echo "UNKNOWN" + register: gitea_info + changed_when: false + + - name: Check Traefik logs for recent restarts (last 2 hours) + ansible.builtin.shell: | + cd {{ traefik_stack_path }} + docker compose logs traefik --since 2h 2>&1 | grep -iE "stopping server gracefully|I have to go|restart|shutdown" | tail -20 || echo "Keine Restart-Meldungen in den letzten 2 Stunden" + register: traefik_restart_logs + changed_when: false + + - name: Check Gitea logs for errors/timeouts (last 2 hours) + ansible.builtin.shell: | + cd {{ gitea_stack_path }} + docker compose logs gitea --since 2h 2>&1 | grep -iE "error|timeout|failed|panic|fatal|slow" | tail -30 || echo "Keine Fehler in den letzten 2 Stunden" + register: gitea_error_logs + changed_when: false + + - name: Test Gitea direct connection (multiple attempts) + ansible.builtin.shell: | + for i in {1..5}; do + echo "=== Attempt $i ===" + cd {{ gitea_stack_path }} + timeout 5 docker compose exec -T gitea curl -f http://localhost:3000/api/healthz 2>&1 || echo "FAILED" + sleep 1 + done + register: gitea_direct_tests + changed_when: false + + - name: Test Gitea via Traefik (multiple attempts) + ansible.builtin.shell: | + for i in {1..5}; do + echo "=== Attempt $i ===" + timeout 10 curl -k -s -o /dev/null -w "%{http_code}" {{ gitea_url }}/api/healthz 2>&1 || echo "TIMEOUT" + sleep 2 + done + register: gitea_traefik_tests + changed_when: false + + - name: Check Traefik service discovery for Gitea (using CLI) + ansible.builtin.shell: | + cd {{ traefik_stack_path }} + docker compose exec -T traefik traefik show providers docker 2>/dev/null | grep -i "gitea" || echo "Gitea service not found in Traefik providers" + register: traefik_gitea_service + changed_when: false + failed_when: false + + - name: Check Traefik routers for Gitea (using CLI) + ansible.builtin.shell: | + cd {{ traefik_stack_path }} + docker compose exec -T traefik traefik show providers docker 2>/dev/null | grep -i "gitea" || echo "Gitea router not found in Traefik providers" + register: traefik_gitea_router + changed_when: false + failed_when: false + + - name: Check network connectivity Traefik -> Gitea + ansible.builtin.shell: | + cd {{ traefik_stack_path }} + for i in {1..3}; do + echo "=== Attempt $i ===" + docker compose exec -T traefik wget -qO- --timeout=5 http://gitea:3000/api/healthz 2>&1 || echo "CONNECTION_FAILED" + sleep 1 + done + register: traefik_gitea_network + changed_when: false + + - name: Check Gitea container resources (CPU/Memory) + ansible.builtin.shell: | + docker stats gitea --no-stream --format 'CPU: {{ '{{' }}.CPUPerc{{ '}}' }} | Memory: {{ '{{' }}.MemUsage{{ '}}' }}' 2>/dev/null || echo "Could not get stats" + register: gitea_resources + changed_when: false + failed_when: false + + - name: Check Traefik container resources (CPU/Memory) + ansible.builtin.shell: | + docker stats traefik --no-stream --format 'CPU: {{ '{{' }}.CPUPerc{{ '}}' }} | Memory: {{ '{{' }}.MemUsage{{ '}}' }}' 2>/dev/null || echo "Could not get stats" + register: traefik_resources + changed_when: false + failed_when: false + + - name: Check if Gitea is in traefik-public network + ansible.builtin.shell: | + docker network inspect traefik-public --format '{{ '{{' }}range .Containers{{ '}}' }}{{ '{{' }}.Name{{ '}}' }} {{ '{{' }}end{{ '}}' }}' 2>/dev/null | grep -q gitea && echo "YES" || echo "NO" + register: gitea_in_network + changed_when: false + + - name: Check Traefik access logs for Gitea requests (last 100 lines) + ansible.builtin.shell: | + cd {{ traefik_stack_path }} + tail -100 logs/access.log 2>/dev/null | grep -i "git.michaelschiemer.de" | tail -20 || echo "Keine Access-Logs gefunden" + register: traefik_access_logs + changed_when: false + failed_when: false + + - name: Check Traefik error logs for Gitea-related errors + ansible.builtin.shell: | + cd {{ traefik_stack_path }} + tail -100 logs/traefik.log 2>/dev/null | grep -iE "gitea|git\.michaelschiemer\.de|timeout|error.*gitea" | tail -20 || echo "Keine Gitea-Fehler in Traefik-Logs" + register: traefik_error_logs + changed_when: false + failed_when: false + + - name: Summary + ansible.builtin.debug: + msg: | + ================================================================================ + UMFASSENDE GITEA TIMEOUT DIAGNOSE: + ================================================================================ + + Container Status: + - Traefik: {{ traefik_info.stdout }} + - Gitea: {{ gitea_info.stdout }} + + Traefik Restart-Logs (letzte 2h): + {{ traefik_restart_logs.stdout }} + + Gitea Error-Logs (letzte 2h): + {{ gitea_error_logs.stdout }} + + Direkte Gitea-Verbindung (5 Versuche): + {{ gitea_direct_tests.stdout }} + + Gitea via Traefik (5 Versuche): + {{ gitea_traefik_tests.stdout }} + + Traefik Service Discovery: + - Gitea Service: {{ traefik_gitea_service.stdout }} + - Gitea Router: {{ traefik_gitea_router.stdout }} + + Netzwerk-Verbindung Traefik -> Gitea (3 Versuche): + {{ traefik_gitea_network.stdout }} + + Container-Ressourcen: + - Gitea: {{ gitea_resources.stdout }} + - Traefik: {{ traefik_resources.stdout }} + + Netzwerk: + - Gitea in traefik-public: {% if gitea_in_network.stdout == 'YES' %}✅{% else %}❌{% endif %} + + Traefik Access-Logs (letzte 20 Gitea-Requests): + {{ traefik_access_logs.stdout }} + + Traefik Error-Logs (Gitea-bezogen): + {{ traefik_error_logs.stdout }} + + ================================================================================ + ANALYSE: + ================================================================================ + + {% if 'stopping server gracefully' in traefik_restart_logs.stdout | lower or 'I have to go' in traefik_restart_logs.stdout %} + ❌ PROBLEM: Traefik wird regelmäßig gestoppt! + → Dies ist die Hauptursache für die Timeouts + → Führe 'find-traefik-restart-source.yml' aus um die Quelle zu finden + {% endif %} + + {% if 'CONNECTION_FAILED' in traefik_gitea_network.stdout %} + ❌ PROBLEM: Traefik kann Gitea nicht erreichen + → Netzwerk-Problem zwischen Traefik und Gitea + → Prüfe ob beide Container im traefik-public Netzwerk sind + {% endif %} + + {% if 'not found' in traefik_gitea_service.stdout | lower or 'not found' in traefik_gitea_router.stdout | lower %} + ❌ PROBLEM: Gitea nicht in Traefik Service Discovery + → Traefik hat Gitea nicht erkannt + → Führe 'fix-gitea-timeouts.yml' aus um beide zu restarten + {% endif %} + + {% if 'TIMEOUT' in gitea_traefik_tests.stdout %} + ⚠️ PROBLEM: Intermittierende Timeouts via Traefik + → Mögliche Ursachen: Traefik-Restarts, Gitea-Performance, Netzwerk-Probleme + {% endif %} + + ================================================================================ + diff --git a/deployment/ansible/playbooks/fix-gitea-complete.yml b/deployment/ansible/playbooks/fix-gitea-complete.yml index 8e867420..d2b31473 100644 --- a/deployment/ansible/playbooks/fix-gitea-complete.yml +++ b/deployment/ansible/playbooks/fix-gitea-complete.yml @@ -108,7 +108,7 @@ - name: Check if Gitea is in Traefik service discovery ansible.builtin.shell: | cd {{ traefik_stack_path }} - docker compose exec -T traefik wget -qO- http://localhost:8080/api/http/services 2>/dev/null | grep -i "gitea" || echo "NOT_FOUND" + docker compose exec -T traefik traefik show providers docker 2>/dev/null | grep -i "gitea" || echo "NOT_FOUND" register: traefik_gitea_service_check changed_when: false failed_when: false @@ -168,7 +168,7 @@ 1. Warte 1-2 Minuten und teste erneut: curl -k {{ gitea_url }}/api/healthz 2. Prüfe Traefik-Logs: cd {{ traefik_stack_path }} && docker compose logs traefik --tail=50 3. Prüfe Gitea-Logs: cd {{ gitea_stack_path }} && docker compose logs gitea --tail=50 - 4. Prüfe Service Discovery: cd {{ traefik_stack_path }} && docker compose exec -T traefik wget -qO- http://localhost:8080/api/http/services + 4. Prüfe Service Discovery: cd {{ traefik_stack_path }} && docker compose exec -T traefik traefik show providers docker {% endif %} ================================================================================