--- # Stabilize Traefik # Stellt sicher, dass Traefik stabil läuft, acme.json korrekt ist und ACME-Challenges durchlaufen - name: Stabilize Traefik hosts: production gather_facts: yes become: no vars: traefik_stabilize_wait_minutes: "{{ traefik_stabilize_wait_minutes | default(10) }}" traefik_stabilize_check_interval: 60 # Check every 60 seconds tasks: - name: Check if Traefik stack directory exists ansible.builtin.stat: path: "{{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }}" register: traefik_stack_exists - name: Fail if Traefik stack directory does not exist ansible.builtin.fail: msg: "Traefik stack directory not found at {{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }}" when: not traefik_stack_exists.stat.exists - name: Fix acme.json permissions first ansible.builtin.file: path: "{{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }}/acme.json" state: file mode: '0600' owner: "{{ ansible_user | default('deploy') }}" group: "{{ ansible_user | default('deploy') }}" ignore_errors: yes - name: Ensure Traefik container is running ansible.builtin.shell: | cd {{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }} docker compose up -d traefik register: traefik_start changed_when: traefik_start.rc == 0 - name: Wait for Traefik to be ready ansible.builtin.wait_for: timeout: 30 delay: 2 changed_when: false - name: Check Traefik container status ansible.builtin.shell: | cd {{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }} docker compose ps traefik register: traefik_status changed_when: false - name: Display Traefik status ansible.builtin.debug: msg: | ================================================================================ Traefik Container Status: ================================================================================ {{ traefik_status.stdout }} ================================================================================ - name: Check Traefik health ansible.builtin.shell: | cd {{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }} docker compose exec -T traefik traefik healthcheck --ping 2>&1 || echo "HEALTH_CHECK_FAILED" register: traefik_health changed_when: false failed_when: false - name: Display Traefik health check ansible.builtin.debug: msg: | ================================================================================ Traefik Health Check: ================================================================================ {% if 'HEALTH_CHECK_FAILED' not in traefik_health.stdout %} ✅ Traefik is healthy {% else %} ⚠️ Traefik health check failed: {{ traefik_health.stdout }} {% endif %} ================================================================================ - name: Verify acme.json permissions ansible.builtin.stat: path: "{{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }}/acme.json" register: acme_json_stat - name: Fix acme.json permissions if needed ansible.builtin.file: path: "{{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }}/acme.json" mode: '0600' owner: "{{ ansible_user | default('deploy') }}" group: "{{ ansible_user | default('deploy') }}" when: acme_json_stat.stat.mode | string | regex_replace('^0o?', '') != '0600' - name: Display acme.json status ansible.builtin.debug: msg: | ================================================================================ acme.json Status: ================================================================================ Path: {{ acme_json_stat.stat.path }} Mode: {{ acme_json_stat.stat.mode | string | regex_replace('^0o?', '') }} {% if acme_json_stat.stat.mode | string | regex_replace('^0o?', '') == '0600' %} ✅ acme.json has correct permissions (600) {% else %} ⚠️ acme.json permissions need to be fixed {% endif %} ================================================================================ - name: Check Port 80/443 configuration ansible.builtin.shell: | echo "=== Port 80 ===" ss -tlnp 2>/dev/null | grep ":80 " || netstat -tlnp 2>/dev/null | grep ":80 " || echo "Could not check port 80" echo "" echo "=== Port 443 ===" ss -tlnp 2>/dev/null | grep ":443 " || netstat -tlnp 2>/dev/null | grep ":443 " || echo "Could not check port 443" register: port_config_check changed_when: false - name: Display Port configuration ansible.builtin.debug: msg: | ================================================================================ Port-Konfiguration (80/443): ================================================================================ {{ port_config_check.stdout }} ================================================================================ - name: Get initial Traefik restart count ansible.builtin.shell: | docker inspect traefik --format '{{ '{{' }}.RestartCount{{ '}}' }}' 2>/dev/null || echo "0" register: initial_restart_count changed_when: false - name: Display initial restart count ansible.builtin.debug: msg: | ================================================================================ Initial Traefik Restart Count: {{ initial_restart_count.stdout }} ================================================================================ - name: Wait for ACME challenges to complete ansible.builtin.debug: msg: | ================================================================================ Warte auf ACME-Challenge-Abschluss... ================================================================================ Warte {{ traefik_stabilize_wait_minutes }} Minuten und prüfe alle {{ traefik_stabilize_check_interval }} Sekunden ob Traefik stabil läuft und keine Restarts auftreten. ================================================================================ - name: Monitor Traefik stability ansible.builtin.shell: | cd {{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }} docker compose ps traefik --format "{{ '{{' }}.State{{ '}}' }}" | head -1 || echo "UNKNOWN" register: traefik_state_check changed_when: false until: traefik_state_check.stdout == "running" retries: "{{ (traefik_stabilize_wait_minutes | int * 60 / traefik_stabilize_check_interval) | int }}" delay: "{{ traefik_stabilize_check_interval }}" - name: Get final Traefik restart count ansible.builtin.shell: | docker inspect traefik --format '{{ '{{' }}.RestartCount{{ '}}' }}' 2>/dev/null || echo "0" register: final_restart_count changed_when: false - name: Check for Traefik restarts during monitoring ansible.builtin.set_fact: traefik_restarted: "{{ (final_restart_count.stdout | int) > (initial_restart_count.stdout | int) }}" - name: Check Traefik logs for ACME errors ansible.builtin.shell: | cd {{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }} docker compose logs traefik --since {{ traefik_stabilize_wait_minutes }}m 2>&1 | grep -i "acme\|challenge\|certificate" | tail -20 || echo "No ACME-related messages in logs" register: traefik_acme_logs changed_when: false - name: Display Traefik ACME logs ansible.builtin.debug: msg: | ================================================================================ Traefik ACME Logs (letzte {{ traefik_stabilize_wait_minutes }} Minuten): ================================================================================ {{ traefik_acme_logs.stdout }} ================================================================================ - name: Final status check ansible.builtin.shell: | cd {{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }} docker compose ps traefik || echo "Could not get final status" register: final_status changed_when: false - name: Summary ansible.builtin.debug: msg: | ================================================================================ ZUSAMMENFASSUNG - Traefik Stabilisierung: ================================================================================ Initial Restart Count: {{ initial_restart_count.stdout }} Final Restart Count: {{ final_restart_count.stdout }} {% if traefik_restarted %} ⚠️ WARNUNG: Traefik wurde während der Überwachung neu gestartet! Restart Count erhöht sich von {{ initial_restart_count.stdout }} auf {{ final_restart_count.stdout }} Nächste Schritte: - Führe diagnose-traefik-restarts.yml aus um die Ursache zu finden - Prüfe Docker-Events und Logs für Restart-Gründe {% else %} ✅ Traefik lief stabil während der Überwachung ({{ traefik_stabilize_wait_minutes }} Minuten) Keine Restarts aufgetreten. {% endif %} Final Status: {{ final_status.stdout }} {% if acme_json_stat.stat.mode | string | regex_replace('^0o?', '') == '0600' %} ✅ acme.json hat korrekte Berechtigungen {% else %} ⚠️ acme.json Berechtigungen müssen korrigiert werden {% endif %} Wichtig: - Traefik muss stabil laufen (keine häufigen Restarts) - Port 80/443 müssen auf Traefik zeigen - acme.json muss beschreibbar sein - ACME-Challenges benötigen 5-10 Minuten um abzuschließen Nächste Schritte: - Prüfe Traefik-Logs regelmäßig auf ACME-Fehler - Stelle sicher, dass keine Auto-Restart-Mechanismen aktiv sind - Überwache Traefik für weitere {{ traefik_stabilize_wait_minutes }} Minuten ================================================================================