Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Failing after 10m14s
🚀 Build & Deploy Image / Build Runtime Base Image (push) Has been skipped
🚀 Build & Deploy Image / Build Docker Image (push) Has been skipped
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been skipped
Security Vulnerability Scan / Check for Dependency Changes (push) Failing after 11m25s
Security Vulnerability Scan / Composer Security Audit (push) Has been cancelled
- Remove middleware reference from Gitea Traefik labels (caused routing issues) - Optimize Gitea connection pool settings (MAX_IDLE_CONNS=30, authentication_timeout=180s) - Add explicit service reference in Traefik labels - Fix intermittent 504 timeouts by improving PostgreSQL connection handling Fixes Gitea unreachability via git.michaelschiemer.de
142 lines
5.9 KiB
YAML
142 lines
5.9 KiB
YAML
---
|
|
# Monitor Traefik Continuously
|
|
# Überwacht Traefik-Logs und Docker Events in Echtzeit um Restart-Quelle zu finden
|
|
- name: Monitor Traefik Continuously
|
|
hosts: production
|
|
gather_facts: yes
|
|
become: no
|
|
vars:
|
|
traefik_stack_path: "{{ stacks_base_path }}/traefik"
|
|
monitor_duration_minutes: 30 # Standard: 30 Minuten, kann überschrieben werden
|
|
|
|
tasks:
|
|
- name: Display monitoring information
|
|
ansible.builtin.debug:
|
|
msg: |
|
|
================================================================================
|
|
TRAEFIK CONTINUOUS MONITORING
|
|
================================================================================
|
|
|
|
Überwachungsdauer: {{ monitor_duration_minutes }} Minuten
|
|
|
|
Überwacht:
|
|
1. Traefik-Logs auf "Stopping server gracefully" / "I have to go"
|
|
2. Docker Events für Traefik-Container
|
|
3. Docker Daemon Logs für Container-Stops
|
|
|
|
Starte Monitoring...
|
|
================================================================================
|
|
|
|
- name: Get initial Traefik status
|
|
ansible.builtin.shell: |
|
|
docker inspect traefik --format '{{ '{{' }}.State.Status{{ '}}' }} {{ '{{' }}.State.StartedAt{{ '}}' }}' 2>/dev/null || echo "UNKNOWN"
|
|
register: initial_status
|
|
changed_when: false
|
|
|
|
- name: Start monitoring Traefik logs in background
|
|
ansible.builtin.shell: |
|
|
cd {{ traefik_stack_path }}
|
|
timeout {{ monitor_duration_minutes * 60 }} docker compose logs -f traefik 2>&1 | grep --line-buffered -iE "stopping server gracefully|I have to go" | while read line; do
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $line"
|
|
done > /tmp/traefik_monitor_$$.log 2>&1 &
|
|
echo $!
|
|
register: log_monitor_pid
|
|
changed_when: false
|
|
async: "{{ monitor_duration_minutes * 60 + 60 }}"
|
|
poll: 0
|
|
|
|
- name: Start monitoring Docker events in background
|
|
ansible.builtin.shell: |
|
|
timeout {{ monitor_duration_minutes * 60 }} docker events --filter container=traefik --filter event=die --format "[{{ '{{' }}.Time{{ '}}' }}] {{ '{{' }}.Action{{ '}}' }} {{ '{{' }}.Actor.Attributes.name{{ '}}' }}" 2>&1 | tee /tmp/traefik_docker_events_$$.log &
|
|
echo $!
|
|
register: docker_events_pid
|
|
changed_when: false
|
|
async: "{{ monitor_duration_minutes * 60 + 60 }}"
|
|
poll: 0
|
|
|
|
- name: Wait for monitoring period
|
|
ansible.builtin.pause:
|
|
minutes: "{{ monitor_duration_minutes }}"
|
|
|
|
- name: Stop log monitoring
|
|
ansible.builtin.shell: |
|
|
pkill -f "docker compose logs.*traefik" || true
|
|
sleep 2
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Stop Docker events monitoring
|
|
ansible.builtin.shell: |
|
|
pkill -f "docker events.*traefik" || true
|
|
sleep 2
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Read Traefik log monitoring results
|
|
ansible.builtin.slurp:
|
|
src: "{{ item }}"
|
|
register: log_results
|
|
changed_when: false
|
|
failed_when: false
|
|
loop: "{{ log_monitor_pid.stdout_lines | map('regex_replace', '^.*', '/tmp/traefik_monitor_' + ansible_date_time.epoch + '.log') | list }}"
|
|
|
|
- name: Read Docker events monitoring results
|
|
ansible.builtin.slurp:
|
|
src: "{{ item }}"
|
|
register: docker_events_results
|
|
changed_when: false
|
|
failed_when: false
|
|
loop: "{{ docker_events_pid.stdout_lines | map('regex_replace', '^.*', '/tmp/traefik_docker_events_' + ansible_date_time.epoch + '.log') | list }}"
|
|
|
|
- name: Get final Traefik status
|
|
ansible.builtin.shell: |
|
|
docker inspect traefik --format '{{ '{{' }}.State.Status{{ '}}' }} {{ '{{' }}.State.StartedAt{{ '}}' }} {{ '{{' }}.RestartCount{{ '}}' }}' 2>/dev/null || echo "UNKNOWN"
|
|
register: final_status
|
|
changed_when: false
|
|
|
|
- name: Check Traefik logs for stop messages during monitoring
|
|
ansible.builtin.shell: |
|
|
cd {{ traefik_stack_path }}
|
|
docker compose logs traefik --since {{ monitor_duration_minutes }}m 2>&1 | grep -iE "stopping server gracefully|I have to go" || echo "Keine Stop-Meldungen gefunden"
|
|
register: traefik_stop_messages
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Summary
|
|
ansible.builtin.debug:
|
|
msg: |
|
|
================================================================================
|
|
MONITORING ZUSAMMENFASSUNG ({{ monitor_duration_minutes }} Minuten):
|
|
================================================================================
|
|
|
|
Initial Status: {{ initial_status.stdout }}
|
|
Final Status: {{ final_status.stdout }}
|
|
|
|
Traefik Stop-Meldungen während Monitoring:
|
|
{% if traefik_stop_messages.stdout and 'Keine Stop-Meldungen' not in traefik_stop_messages.stdout %}
|
|
❌ STOP-MELDUNGEN GEFUNDEN:
|
|
{{ traefik_stop_messages.stdout }}
|
|
|
|
⚠️ PROBLEM BESTÄTIGT: Traefik wurde während des Monitorings gestoppt!
|
|
|
|
Nächste Schritte:
|
|
1. Prüfe Docker Events Log: /tmp/traefik_docker_events_*.log
|
|
2. Prüfe Traefik Log Monitor: /tmp/traefik_monitor_*.log
|
|
3. Prüfe wer den Stop-Befehl ausgeführt hat:
|
|
- journalctl -u docker.service --since "{{ monitor_duration_minutes }} minutes ago"
|
|
- docker events --since "{{ monitor_duration_minutes }} minutes ago" --filter container=traefik
|
|
{% else %}
|
|
✅ KEINE STOP-MELDUNGEN GEFUNDEN
|
|
|
|
Traefik lief stabil während des {{ monitor_duration_minutes }}-minütigen Monitorings.
|
|
|
|
{% if initial_status.stdout != final_status.stdout %}
|
|
⚠️ Status hat sich geändert:
|
|
- Vorher: {{ initial_status.stdout }}
|
|
- Nachher: {{ final_status.stdout }}
|
|
{% endif %}
|
|
{% endif %}
|
|
|
|
================================================================================
|
|
|