fix: Gitea Traefik routing and connection pool optimization

- Remove middleware reference from Gitea Traefik labels (caused routing issues) - Optimize Gitea connection pool settings (MAX_IDLE_CONNS=30, authentication_timeout=180s) - Add explicit service reference in Traefik labels - Fix intermittent 504 timeouts by improving PostgreSQL connection handling Fixes Gitea unreachability via git.michaelschiemer.de
2025-11-09 14:46:15 +01:00
parent 85c369e846
commit 36ef2a1e2c
1366 changed files with 104925 additions and 28719 deletions
--- a/deployment/ansible/playbooks/find-ansible-automation-source.yml
+++ b/deployment/ansible/playbooks/find-ansible-automation-source.yml
@@ -0,0 +1,246 @@
+---
+# Find Ansible Automation Source
+# Findet die Quelle der externen Ansible-Automatisierung, die Traefik regelmäßig neu startet
+- name: Find Ansible Automation Source
+  hosts: production
+  gather_facts: yes
+  become: yes
+
+  tasks:
+    - name: Check for running Ansible processes
+      ansible.builtin.shell: |
+        ps aux | grep -E "ansible|ansible-playbook|ansible-pull" | grep -v grep || echo "No Ansible processes found"
+      register: ansible_processes
+      changed_when: false
+
+    - name: Check for ansible-pull processes
+      ansible.builtin.shell: |
+        ps aux | grep ansible-pull | grep -v grep || echo "No ansible-pull processes found"
+      register: ansible_pull_processes
+      changed_when: false
+
+    - name: Check systemd timers for ansible-pull
+      ansible.builtin.shell: |
+        systemctl list-timers --all --no-pager | grep -i ansible || echo "No ansible timers found"
+      register: ansible_timers
+      changed_when: false
+
+    - name: Check for ansible-pull cronjobs
+      ansible.builtin.shell: |
+        for user in $(cut -f1 -d: /etc/passwd); do
+          crontab -u "$user" -l 2>/dev/null | grep -q "ansible-pull\|ansible.*playbook" && echo "=== User: $user ===" && crontab -u "$user" -l 2>/dev/null | grep -E "ansible-pull|ansible.*playbook" || true
+        done || echo "No ansible-pull cronjobs found"
+      register: ansible_cronjobs
+      changed_when: false
+
+    - name: Check system-wide cron for ansible
+      ansible.builtin.shell: |
+        for dir in /etc/cron.d /etc/cron.daily /etc/cron.hourly /etc/cron.weekly /etc/cron.monthly; do
+          if [ -d "$dir" ]; then
+            grep -rE "ansible-pull|ansible.*playbook" "$dir" 2>/dev/null && echo "=== Found in $dir ===" || true
+          fi
+        done || echo "No ansible in system cron"
+      register: ansible_system_cron
+      changed_when: false
+
+    - name: Check journalctl for ansible-ansible processes
+      ansible.builtin.shell: |
+        journalctl --since "24 hours ago" --no-pager | grep -iE "ansible-ansible|ansible-playbook|ansible-pull" | tail -50 || echo "No ansible processes in journalctl"
+      register: ansible_journal
+      changed_when: false
+
+    - name: Check for ansible-pull configuration files
+      ansible.builtin.shell: |
+        find /home -name "*ansible-pull*" -o -name "*ansible*.yml" -path "*/ansible-pull/*" 2>/dev/null | head -20 || echo "No ansible-pull config files found"
+      register: ansible_pull_configs
+      changed_when: false
+
+    - name: Check for running docker compose commands related to Traefik
+      ansible.builtin.shell: |
+        ps aux | grep -E "docker.*compose.*traefik|docker.*restart.*traefik" | grep -v grep || echo "No docker compose traefik commands running"
+      register: docker_traefik_commands
+      changed_when: false
+
+    - name: Check Docker events for Traefik kill events (last hour)
+      ansible.builtin.shell: |
+        docker events --since 1h --until now --filter container=traefik --filter event=die --format "{{ '{{' }}.Time{{ '}}' }} {{ '{{' }}.Action{{ '}}' }} {{ '{{' }}.Actor.Attributes.signal{{ '}}' }}" 2>/dev/null | tail -20 || echo "No Traefik die events in last hour"
+      register: traefik_kill_events
+      changed_when: false
+      failed_when: false
+
+    - name: Check journalctl for docker compose traefik commands
+      ansible.builtin.shell: |
+        journalctl --since "24 hours ago" --no-pager | grep -iE "docker.*compose.*traefik|docker.*restart.*traefik" | tail -30 || echo "No docker compose traefik commands in journalctl"
+      register: docker_traefik_journal
+      changed_when: false
+
+    - name: Check for CI/CD scripts that might run Ansible
+      ansible.builtin.shell: |
+        find /home/deploy -type f \( -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) -exec grep -lE "ansible.*playbook.*traefik|docker.*compose.*traefik.*restart" {} \; 2>/dev/null | head -20 || echo "No CI/CD scripts found"
+      register: cicd_scripts
+      changed_when: false
+
+    - name: Check for Gitea Workflows that run Ansible
+      ansible.builtin.shell: |
+        find /home/deploy -type f -path "*/.gitea/workflows/*.yml" -o -path "*/.github/workflows/*.yml" 2>/dev/null | xargs grep -lE "ansible.*playbook.*traefik|docker.*compose.*traefik" 2>/dev/null | head -10 || echo "No Gitea workflows found"
+      register: gitea_workflows
+      changed_when: false
+
+    - name: Check for monitoring/healthcheck scripts
+      ansible.builtin.shell: |
+        find /home/deploy -type f -name "*monitor*" -o -name "*health*" 2>/dev/null | xargs grep -lE "traefik.*restart|docker.*compose.*traefik" 2>/dev/null | head -10 || echo "No monitoring scripts found"
+      register: monitoring_scripts
+      changed_when: false
+
+    - name: Summary
+      ansible.builtin.debug:
+        msg: |
+          ================================================================================
+          ANSIBLE AUTOMATION SOURCE DIAGNOSE:
+          ================================================================================
+          
+          Laufende Ansible-Prozesse:
+          {{ ansible_processes.stdout }}
+          
+          Ansible-Pull Prozesse:
+          {{ ansible_pull_processes.stdout }}
+          
+          Systemd Timers für Ansible:
+          {{ ansible_timers.stdout }}
+          
+          Cronjobs für Ansible:
+          {{ ansible_cronjobs.stdout }}
+          
+          System-Cron für Ansible:
+          {{ ansible_system_cron.stdout }}
+          
+          Ansible-Prozesse in Journalctl (letzte 24h):
+          {{ ansible_journal.stdout }}
+          
+          Ansible-Pull Konfigurationsdateien:
+          {{ ansible_pull_configs.stdout }}
+          
+          Laufende Docker Compose Traefik-Befehle:
+          {{ docker_traefik_commands.stdout }}
+          
+          Traefik Kill-Events (letzte Stunde):
+          {{ traefik_kill_events.stdout }}
+          
+          Docker Compose Traefik-Befehle in Journalctl:
+          {{ docker_traefik_journal.stdout }}
+          
+          CI/CD Scripts die Traefik restarten:
+          {{ cicd_scripts.stdout }}
+          
+          Gitea Workflows die Traefik restarten:
+          {{ gitea_workflows.stdout }}
+          
+          Monitoring-Scripts die Traefik restarten:
+          {{ monitoring_scripts.stdout }}
+          
+          ================================================================================
+          ANALYSE:
+          ================================================================================
+          
+          {% if 'No Ansible processes found' not in ansible_processes.stdout %}
+          ⚠️  AKTIVE ANSIBLE-PROZESSE GEFUNDEN:
+          {{ ansible_processes.stdout }}
+          
+          → Diese Prozesse könnten Traefik regelmäßig neu starten
+          → Prüfe die Kommandozeile dieser Prozesse um das Playbook zu identifizieren
+          {% endif %}
+          
+          {% if 'No ansible-pull processes found' not in ansible_pull_processes.stdout %}
+          ❌ ANSIBLE-PULL LÄUFT:
+          {{ ansible_pull_processes.stdout }}
+          
+          → ansible-pull führt regelmäßig Playbooks aus
+          → Dies ist wahrscheinlich die Quelle der Traefik-Restarts
+          {% endif %}
+          
+          {% if 'No ansible timers found' not in ansible_timers.stdout %}
+          ❌ ANSIBLE TIMER GEFUNDEN:
+          {{ ansible_timers.stdout }}
+          
+          → Ein Systemd-Timer führt regelmäßig Ansible aus
+          → Deaktiviere mit: systemctl disable <timer-name>
+          {% endif %}
+          
+          {% if 'No ansible-pull cronjobs found' not in ansible_cronjobs.stdout %}
+          ❌ ANSIBLE CRONJOB GEFUNDEN:
+          {{ ansible_cronjobs.stdout }}
+          
+          → Ein Cronjob führt regelmäßig Ansible aus
+          → Entferne oder kommentiere den Cronjob-Eintrag
+          {% endif %}
+          
+          {% if cicd_scripts.stdout and 'No CI/CD scripts found' not in cicd_scripts.stdout %}
+          ⚠️  CI/CD SCRIPTS GEFUNDEN:
+          {{ cicd_scripts.stdout }}
+          
+          → Diese Scripts könnten Traefik regelmäßig neu starten
+          → Prüfe diese Dateien und entferne/kommentiere Traefik-Restart-Befehle
+          {% endif %}
+          
+          {% if gitea_workflows.stdout and 'No Gitea workflows found' not in gitea_workflows.stdout %}
+          ⚠️  GITEA WORKFLOWS GEFUNDEN:
+          {{ gitea_workflows.stdout }}
+          
+          → Diese Workflows könnten Traefik regelmäßig neu starten
+          → Prüfe diese Workflows und entferne/kommentiere Traefik-Restart-Schritte
+          {% endif %}
+          
+          {% if monitoring_scripts.stdout and 'No monitoring scripts found' not in monitoring_scripts.stdout %}
+          ⚠️  MONITORING SCRIPTS GEFUNDEN:
+          {{ monitoring_scripts.stdout }}
+          
+          → Diese Scripts könnten Traefik regelmäßig neu starten
+          → Prüfe diese Scripts und entferne/kommentiere Traefik-Restart-Befehle
+          {% endif %}
+          
+          ================================================================================
+          LÖSUNG:
+          ================================================================================
+          
+          {% if 'No Ansible processes found' in ansible_processes.stdout and 'No ansible-pull processes found' in ansible_pull_processes.stdout and 'No ansible timers found' in ansible_timers.stdout and 'No ansible-pull cronjobs found' in ansible_cronjobs.stdout %}
+          ℹ️  Keine aktiven Ansible-Automatisierungen gefunden
+          
+          Mögliche Ursachen:
+          1. Ansible-Prozesse laufen nur zeitweise (intermittierend)
+          2. Externe CI/CD-Pipeline führt Ansible aus
+          3. Manuelle Ansible-Aufrufe von außen
+          
+          Nächste Schritte:
+          1. Beobachte Docker Events in Echtzeit: docker events --filter container=traefik
+          2. Beobachte Ansible-Prozesse: watch -n 1 'ps aux | grep ansible'
+          3. Prüfe ob externe CI/CD-Pipelines Ansible ausführen
+          {% else %}
+          
+          SOFORTMASSNAHME:
+          
+          {% if 'No ansible-pull processes found' not in ansible_pull_processes.stdout %}
+          1. ❌ Stoppe ansible-pull:
+             pkill -f ansible-pull
+          {% endif %}
+          
+          {% if 'No ansible timers found' not in ansible_timers.stdout %}
+          2. ❌ Deaktiviere Ansible-Timer:
+             systemctl stop <timer-name>
+             systemctl disable <timer-name>
+          {% endif %}
+          
+          {% if 'No ansible-pull cronjobs found' not in ansible_cronjobs.stdout %}
+          3. ❌ Entferne Ansible-Cronjobs:
+             crontab -u <user> -e
+             (Kommentiere oder entferne die Ansible-Zeilen)
+          {% endif %}
+          
+          LANGZEITLÖSUNG:
+          
+          1. Prüfe gefundene Scripts/Workflows und entferne Traefik-Restart-Befehle
+          2. Falls Healthchecks nötig sind, setze größere Intervalle (z.B. 5 Minuten statt 30 Sekunden)
+          3. Restarte Traefik nur bei echten Fehlern, nicht präventiv
+          {% endif %}
+          
+          ================================================================================
+