--- # Monitor Traefik Continuously # Überwacht Traefik-Logs und Docker Events in Echtzeit um Restart-Quelle zu finden - name: Monitor Traefik Continuously hosts: production gather_facts: yes become: no vars: traefik_stack_path: "{{ stacks_base_path }}/traefik" monitor_duration_minutes: 30 # Standard: 30 Minuten, kann überschrieben werden tasks: - name: Display monitoring information ansible.builtin.debug: msg: | ================================================================================ TRAEFIK CONTINUOUS MONITORING ================================================================================ Überwachungsdauer: {{ monitor_duration_minutes }} Minuten Überwacht: 1. Traefik-Logs auf "Stopping server gracefully" / "I have to go" 2. Docker Events für Traefik-Container 3. Docker Daemon Logs für Container-Stops Starte Monitoring... ================================================================================ - name: Get initial Traefik status ansible.builtin.shell: | docker inspect traefik --format '{{ '{{' }}.State.Status{{ '}}' }} {{ '{{' }}.State.StartedAt{{ '}}' }}' 2>/dev/null || echo "UNKNOWN" register: initial_status changed_when: false - name: Start monitoring Traefik logs in background ansible.builtin.shell: | cd {{ traefik_stack_path }} timeout {{ monitor_duration_minutes * 60 }} docker compose logs -f traefik 2>&1 | grep --line-buffered -iE "stopping server gracefully|I have to go" | while read line; do echo "[$(date '+%Y-%m-%d %H:%M:%S')] $line" done > /tmp/traefik_monitor_$$.log 2>&1 & echo $! register: log_monitor_pid changed_when: false async: "{{ monitor_duration_minutes * 60 + 60 }}" poll: 0 - name: Start monitoring Docker events in background ansible.builtin.shell: | timeout {{ monitor_duration_minutes * 60 }} docker events --filter container=traefik --filter event=die --format "[{{ '{{' }}.Time{{ '}}' }}] {{ '{{' }}.Action{{ '}}' }} {{ '{{' }}.Actor.Attributes.name{{ '}}' }}" 2>&1 | tee /tmp/traefik_docker_events_$$.log & echo $! register: docker_events_pid changed_when: false async: "{{ monitor_duration_minutes * 60 + 60 }}" poll: 0 - name: Wait for monitoring period ansible.builtin.pause: minutes: "{{ monitor_duration_minutes }}" - name: Stop log monitoring ansible.builtin.shell: | pkill -f "docker compose logs.*traefik" || true sleep 2 changed_when: false failed_when: false - name: Stop Docker events monitoring ansible.builtin.shell: | pkill -f "docker events.*traefik" || true sleep 2 changed_when: false failed_when: false - name: Read Traefik log monitoring results ansible.builtin.slurp: src: "{{ item }}" register: log_results changed_when: false failed_when: false loop: "{{ log_monitor_pid.stdout_lines | map('regex_replace', '^.*', '/tmp/traefik_monitor_' + ansible_date_time.epoch + '.log') | list }}" - name: Read Docker events monitoring results ansible.builtin.slurp: src: "{{ item }}" register: docker_events_results changed_when: false failed_when: false loop: "{{ docker_events_pid.stdout_lines | map('regex_replace', '^.*', '/tmp/traefik_docker_events_' + ansible_date_time.epoch + '.log') | list }}" - name: Get final Traefik status ansible.builtin.shell: | docker inspect traefik --format '{{ '{{' }}.State.Status{{ '}}' }} {{ '{{' }}.State.StartedAt{{ '}}' }} {{ '{{' }}.RestartCount{{ '}}' }}' 2>/dev/null || echo "UNKNOWN" register: final_status changed_when: false - name: Check Traefik logs for stop messages during monitoring ansible.builtin.shell: | cd {{ traefik_stack_path }} docker compose logs traefik --since {{ monitor_duration_minutes }}m 2>&1 | grep -iE "stopping server gracefully|I have to go" || echo "Keine Stop-Meldungen gefunden" register: traefik_stop_messages changed_when: false failed_when: false - name: Summary ansible.builtin.debug: msg: | ================================================================================ MONITORING ZUSAMMENFASSUNG ({{ monitor_duration_minutes }} Minuten): ================================================================================ Initial Status: {{ initial_status.stdout }} Final Status: {{ final_status.stdout }} Traefik Stop-Meldungen während Monitoring: {% if traefik_stop_messages.stdout and 'Keine Stop-Meldungen' not in traefik_stop_messages.stdout %} ❌ STOP-MELDUNGEN GEFUNDEN: {{ traefik_stop_messages.stdout }} ⚠️ PROBLEM BESTÄTIGT: Traefik wurde während des Monitorings gestoppt! Nächste Schritte: 1. Prüfe Docker Events Log: /tmp/traefik_docker_events_*.log 2. Prüfe Traefik Log Monitor: /tmp/traefik_monitor_*.log 3. Prüfe wer den Stop-Befehl ausgeführt hat: - journalctl -u docker.service --since "{{ monitor_duration_minutes }} minutes ago" - docker events --since "{{ monitor_duration_minutes }} minutes ago" --filter container=traefik {% else %} ✅ KEINE STOP-MELDUNGEN GEFUNDEN Traefik lief stabil während des {{ monitor_duration_minutes }}-minütigen Monitorings. {% if initial_status.stdout != final_status.stdout %} ⚠️ Status hat sich geändert: - Vorher: {{ initial_status.stdout }} - Nachher: {{ final_status.stdout }} {% endif %} {% endif %} ================================================================================