Files
michaelschiemer/deployment/ansible/playbooks/monitor-traefik-continuously.yml
Michael Schiemer 36ef2a1e2c
Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Failing after 10m14s
🚀 Build & Deploy Image / Build Runtime Base Image (push) Has been skipped
🚀 Build & Deploy Image / Build Docker Image (push) Has been skipped
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been skipped
Security Vulnerability Scan / Check for Dependency Changes (push) Failing after 11m25s
Security Vulnerability Scan / Composer Security Audit (push) Has been cancelled
fix: Gitea Traefik routing and connection pool optimization
- Remove middleware reference from Gitea Traefik labels (caused routing issues)
- Optimize Gitea connection pool settings (MAX_IDLE_CONNS=30, authentication_timeout=180s)
- Add explicit service reference in Traefik labels
- Fix intermittent 504 timeouts by improving PostgreSQL connection handling

Fixes Gitea unreachability via git.michaelschiemer.de
2025-11-09 14:46:15 +01:00

142 lines
5.9 KiB
YAML

---
# Monitor Traefik Continuously
# Überwacht Traefik-Logs und Docker Events in Echtzeit um Restart-Quelle zu finden
- name: Monitor Traefik Continuously
hosts: production
gather_facts: yes
become: no
vars:
traefik_stack_path: "{{ stacks_base_path }}/traefik"
monitor_duration_minutes: 30 # Standard: 30 Minuten, kann überschrieben werden
tasks:
- name: Display monitoring information
ansible.builtin.debug:
msg: |
================================================================================
TRAEFIK CONTINUOUS MONITORING
================================================================================
Überwachungsdauer: {{ monitor_duration_minutes }} Minuten
Überwacht:
1. Traefik-Logs auf "Stopping server gracefully" / "I have to go"
2. Docker Events für Traefik-Container
3. Docker Daemon Logs für Container-Stops
Starte Monitoring...
================================================================================
- name: Get initial Traefik status
ansible.builtin.shell: |
docker inspect traefik --format '{{ '{{' }}.State.Status{{ '}}' }} {{ '{{' }}.State.StartedAt{{ '}}' }}' 2>/dev/null || echo "UNKNOWN"
register: initial_status
changed_when: false
- name: Start monitoring Traefik logs in background
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
timeout {{ monitor_duration_minutes * 60 }} docker compose logs -f traefik 2>&1 | grep --line-buffered -iE "stopping server gracefully|I have to go" | while read line; do
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $line"
done > /tmp/traefik_monitor_$$.log 2>&1 &
echo $!
register: log_monitor_pid
changed_when: false
async: "{{ monitor_duration_minutes * 60 + 60 }}"
poll: 0
- name: Start monitoring Docker events in background
ansible.builtin.shell: |
timeout {{ monitor_duration_minutes * 60 }} docker events --filter container=traefik --filter event=die --format "[{{ '{{' }}.Time{{ '}}' }}] {{ '{{' }}.Action{{ '}}' }} {{ '{{' }}.Actor.Attributes.name{{ '}}' }}" 2>&1 | tee /tmp/traefik_docker_events_$$.log &
echo $!
register: docker_events_pid
changed_when: false
async: "{{ monitor_duration_minutes * 60 + 60 }}"
poll: 0
- name: Wait for monitoring period
ansible.builtin.pause:
minutes: "{{ monitor_duration_minutes }}"
- name: Stop log monitoring
ansible.builtin.shell: |
pkill -f "docker compose logs.*traefik" || true
sleep 2
changed_when: false
failed_when: false
- name: Stop Docker events monitoring
ansible.builtin.shell: |
pkill -f "docker events.*traefik" || true
sleep 2
changed_when: false
failed_when: false
- name: Read Traefik log monitoring results
ansible.builtin.slurp:
src: "{{ item }}"
register: log_results
changed_when: false
failed_when: false
loop: "{{ log_monitor_pid.stdout_lines | map('regex_replace', '^.*', '/tmp/traefik_monitor_' + ansible_date_time.epoch + '.log') | list }}"
- name: Read Docker events monitoring results
ansible.builtin.slurp:
src: "{{ item }}"
register: docker_events_results
changed_when: false
failed_when: false
loop: "{{ docker_events_pid.stdout_lines | map('regex_replace', '^.*', '/tmp/traefik_docker_events_' + ansible_date_time.epoch + '.log') | list }}"
- name: Get final Traefik status
ansible.builtin.shell: |
docker inspect traefik --format '{{ '{{' }}.State.Status{{ '}}' }} {{ '{{' }}.State.StartedAt{{ '}}' }} {{ '{{' }}.RestartCount{{ '}}' }}' 2>/dev/null || echo "UNKNOWN"
register: final_status
changed_when: false
- name: Check Traefik logs for stop messages during monitoring
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
docker compose logs traefik --since {{ monitor_duration_minutes }}m 2>&1 | grep -iE "stopping server gracefully|I have to go" || echo "Keine Stop-Meldungen gefunden"
register: traefik_stop_messages
changed_when: false
failed_when: false
- name: Summary
ansible.builtin.debug:
msg: |
================================================================================
MONITORING ZUSAMMENFASSUNG ({{ monitor_duration_minutes }} Minuten):
================================================================================
Initial Status: {{ initial_status.stdout }}
Final Status: {{ final_status.stdout }}
Traefik Stop-Meldungen während Monitoring:
{% if traefik_stop_messages.stdout and 'Keine Stop-Meldungen' not in traefik_stop_messages.stdout %}
❌ STOP-MELDUNGEN GEFUNDEN:
{{ traefik_stop_messages.stdout }}
⚠️ PROBLEM BESTÄTIGT: Traefik wurde während des Monitorings gestoppt!
Nächste Schritte:
1. Prüfe Docker Events Log: /tmp/traefik_docker_events_*.log
2. Prüfe Traefik Log Monitor: /tmp/traefik_monitor_*.log
3. Prüfe wer den Stop-Befehl ausgeführt hat:
- journalctl -u docker.service --since "{{ monitor_duration_minutes }} minutes ago"
- docker events --since "{{ monitor_duration_minutes }} minutes ago" --filter container=traefik
{% else %}
✅ KEINE STOP-MELDUNGEN GEFUNDEN
Traefik lief stabil während des {{ monitor_duration_minutes }}-minütigen Monitorings.
{% if initial_status.stdout != final_status.stdout %}
⚠️ Status hat sich geändert:
- Vorher: {{ initial_status.stdout }}
- Nachher: {{ final_status.stdout }}
{% endif %}
{% endif %}
================================================================================