Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Failing after 10m14s
🚀 Build & Deploy Image / Build Runtime Base Image (push) Has been skipped
🚀 Build & Deploy Image / Build Docker Image (push) Has been skipped
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been skipped
Security Vulnerability Scan / Check for Dependency Changes (push) Failing after 11m25s
Security Vulnerability Scan / Composer Security Audit (push) Has been cancelled
- Remove middleware reference from Gitea Traefik labels (caused routing issues) - Optimize Gitea connection pool settings (MAX_IDLE_CONNS=30, authentication_timeout=180s) - Add explicit service reference in Traefik labels - Fix intermittent 504 timeouts by improving PostgreSQL connection handling Fixes Gitea unreachability via git.michaelschiemer.de
329 lines
15 KiB
YAML
329 lines
15 KiB
YAML
---
|
|
# Find Source of Traefik Restarts
|
|
# Umfassende Diagnose um die Quelle der regelmäßigen Traefik-Restarts zu finden
|
|
- name: Find Source of Traefik Restarts
|
|
hosts: production
|
|
gather_facts: yes
|
|
become: yes
|
|
vars:
|
|
traefik_stack_path: "{{ stacks_base_path }}/traefik"
|
|
monitor_duration_seconds: 120 # 2 Minuten Monitoring (kann erhöht werden)
|
|
|
|
tasks:
|
|
- name: Check Traefik container restart count
|
|
ansible.builtin.shell: |
|
|
docker inspect traefik --format '{{ '{{' }}.RestartCount{{ '}}' }}' 2>/dev/null || echo "0"
|
|
register: traefik_restart_count
|
|
changed_when: false
|
|
|
|
- name: Check Traefik container start time
|
|
ansible.builtin.shell: |
|
|
docker inspect traefik --format '{{ '{{' }}.State.StartedAt{{ '}}' }}' 2>/dev/null || echo "UNKNOWN"
|
|
register: traefik_started_at
|
|
changed_when: false
|
|
|
|
- name: Analyze Traefik logs for "Stopping server gracefully" messages
|
|
ansible.builtin.shell: |
|
|
cd {{ traefik_stack_path }}
|
|
docker compose logs traefik 2>&1 | grep -i "stopping server gracefully\|I have to go" | tail -20
|
|
register: traefik_stop_messages
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Extract timestamps from stop messages
|
|
ansible.builtin.shell: |
|
|
cd {{ traefik_stack_path }}
|
|
docker compose logs traefik 2>&1 | grep -i "stopping server gracefully\|I have to go" | tail -20 | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}' | sort | uniq
|
|
register: stop_timestamps
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Check Docker daemon logs for Traefik stop events
|
|
ansible.builtin.shell: |
|
|
journalctl -u docker.service --since "24 hours ago" --no-pager | grep -iE "traefik.*stop|traefik.*kill|traefik.*die|container.*traefik.*stopped" | tail -30 || echo "No Traefik stop events in Docker daemon logs"
|
|
register: docker_daemon_logs
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Check Docker events for Traefik (last 24 hours)
|
|
ansible.builtin.shell: |
|
|
docker events --since 24h --until now --filter container=traefik --filter event=die --format "{{ '{{' }}.Time{{ '}}' }} {{ '{{' }}.Action{{ '}}' }} {{ '{{' }}.Actor.Attributes.name{{ '}}' }}" 2>/dev/null | tail -20 || echo "No Traefik die events found"
|
|
register: docker_events_traefik
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Check all user crontabs for Traefik/Docker commands
|
|
ansible.builtin.shell: |
|
|
for user in $(cut -f1 -d: /etc/passwd); do
|
|
crontab -u "$user" -l 2>/dev/null | grep -qE "traefik|docker.*compose.*traefik|docker.*stop.*traefik|docker.*restart.*traefik|docker.*down.*traefik" && echo "=== User: $user ===" && crontab -u "$user" -l 2>/dev/null | grep -E "traefik|docker.*compose.*traefik|docker.*stop.*traefik|docker.*restart.*traefik|docker.*down.*traefik" || true
|
|
done || echo "No user crontabs with Traefik commands found"
|
|
register: all_user_crontabs
|
|
changed_when: false
|
|
|
|
- name: Check system-wide cron directories
|
|
ansible.builtin.shell: |
|
|
for dir in /etc/cron.d /etc/cron.daily /etc/cron.hourly /etc/cron.weekly /etc/cron.monthly; do
|
|
if [ -d "$dir" ]; then
|
|
echo "=== $dir ==="
|
|
grep -rE "traefik|docker.*compose.*traefik|docker.*stop.*traefik|docker.*restart.*traefik|docker.*down.*traefik" "$dir" 2>/dev/null || echo "No matches"
|
|
fi
|
|
done
|
|
register: system_cron_dirs
|
|
changed_when: false
|
|
|
|
- name: Check systemd timers and services
|
|
ansible.builtin.shell: |
|
|
echo "=== Active Timers ==="
|
|
systemctl list-timers --all --no-pager | grep -E "traefik|docker.*compose" || echo "No Traefik-related timers"
|
|
echo ""
|
|
echo "=== Custom Services ==="
|
|
systemctl list-units --type=service --all | grep -E "traefik|docker.*compose" || echo "No Traefik-related services"
|
|
register: systemd_services
|
|
changed_when: false
|
|
|
|
- name: Check for scripts in deployment directory that restart Traefik
|
|
ansible.builtin.shell: |
|
|
find /home/deploy/deployment -type f \( -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) -exec grep -lE "traefik.*restart|docker.*compose.*traefik.*restart|docker.*compose.*traefik.*down|docker.*compose.*traefik.*stop" {} \; 2>/dev/null | head -30
|
|
register: deployment_scripts
|
|
changed_when: false
|
|
|
|
- name: Check Ansible roles for traefik_auto_restart or restart tasks
|
|
ansible.builtin.shell: |
|
|
grep -rE "traefik_auto_restart|traefik.*restart|docker.*compose.*traefik.*restart" /home/deploy/deployment/ansible/roles/ 2>/dev/null | grep -v ".git" | head -20 || echo "No auto-restart settings found"
|
|
register: ansible_auto_restart
|
|
changed_when: false
|
|
|
|
- name: Check Docker Compose watch mode
|
|
ansible.builtin.shell: |
|
|
cd {{ traefik_stack_path }}
|
|
docker compose ps traefik 2>/dev/null | grep -q "traefik" && echo "running" || echo "not_running"
|
|
register: docker_compose_watch
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Check if Docker Compose is running in watch mode
|
|
ansible.builtin.shell: |
|
|
ps aux | grep -E "docker.*compose.*watch|docker.*compose.*--watch" | grep -v grep || echo "No Docker Compose watch mode detected"
|
|
register: watch_mode_process
|
|
changed_when: false
|
|
|
|
- name: Check for monitoring/watchdog scripts
|
|
ansible.builtin.shell: |
|
|
find /home/deploy -type f -name "*monitor*" -o -name "*watchdog*" -o -name "*health*" 2>/dev/null | xargs grep -lE "traefik|docker.*compose.*traefik" 2>/dev/null | head -10 || echo "No monitoring scripts found"
|
|
register: monitoring_scripts
|
|
changed_when: false
|
|
|
|
- name: Check Gitea Workflows for Traefik restarts
|
|
ansible.builtin.shell: |
|
|
find /home/deploy -type f -path "*/.gitea/workflows/*.yml" -o -path "*/.github/workflows/*.yml" 2>/dev/null | xargs grep -lE "traefik.*restart|docker.*compose.*traefik.*restart" 2>/dev/null | head -10 || echo "No Gitea workflows found that restart Traefik"
|
|
register: gitea_workflows
|
|
changed_when: false
|
|
|
|
- name: Monitor Docker events in real-time (5 minutes)
|
|
ansible.builtin.shell: |
|
|
timeout {{ monitor_duration_seconds }} docker events --filter container=traefik --format "{{ '{{' }}.Time{{ '}}' }} {{ '{{' }}.Action{{ '}}' }} {{ '{{' }}.Actor.Attributes.name{{ '}}' }}" 2>&1 || echo "Monitoring completed or timeout"
|
|
register: docker_events_realtime
|
|
changed_when: false
|
|
failed_when: false
|
|
async: "{{ monitor_duration_seconds + 10 }}"
|
|
poll: 0
|
|
|
|
- name: Wait for monitoring to complete
|
|
ansible.builtin.async_status:
|
|
jid: "{{ docker_events_realtime.ansible_job_id }}"
|
|
register: monitoring_result
|
|
until: monitoring_result.finished
|
|
retries: "{{ (monitor_duration_seconds / 10) | int + 5 }}"
|
|
delay: 10
|
|
failed_when: false
|
|
|
|
- name: Check system reboot history
|
|
ansible.builtin.shell: |
|
|
last reboot --since "24 hours ago" 2>/dev/null | head -10 || echo "No reboots in last 24 hours"
|
|
register: reboot_history
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Check for at jobs
|
|
ansible.builtin.shell: |
|
|
atq 2>/dev/null | while read line; do
|
|
job_id=$(echo "$line" | awk '{print $1}')
|
|
at -c "$job_id" 2>/dev/null | grep -qE "traefik|docker.*compose.*traefik" && echo "=== Job ID: $job_id ===" && at -c "$job_id" 2>/dev/null | grep -E "traefik|docker.*compose.*traefik" || true
|
|
done || echo "No at jobs found or atq not available"
|
|
register: at_jobs
|
|
changed_when: false
|
|
|
|
- name: Check Docker daemon configuration for auto-restart
|
|
ansible.builtin.shell: |
|
|
cat /etc/docker/daemon.json 2>/dev/null | grep -iE "restart|live-restore" || echo "No restart settings in daemon.json"
|
|
register: docker_daemon_config
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Check if Traefik has restart policy
|
|
ansible.builtin.shell: |
|
|
cd {{ traefik_stack_path }}
|
|
docker compose config | grep -A 5 "traefik:" | grep -E "restart|restart_policy" || echo "No explicit restart policy found"
|
|
register: traefik_restart_policy
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Summary
|
|
ansible.builtin.debug:
|
|
msg: |
|
|
================================================================================
|
|
TRAEFIK RESTART SOURCE DIAGNOSE - ZUSAMMENFASSUNG:
|
|
================================================================================
|
|
|
|
Traefik Status:
|
|
- Restart Count: {{ traefik_restart_count.stdout }}
|
|
- Started At: {{ traefik_started_at.stdout }}
|
|
- Stop Messages gefunden: {{ traefik_stop_messages.stdout_lines | length }} (letzte 20)
|
|
|
|
Stop-Zeitstempel (letzte 20):
|
|
{% if stop_timestamps.stdout %}
|
|
{{ stop_timestamps.stdout }}
|
|
{% else %}
|
|
Keine Stop-Zeitstempel gefunden
|
|
{% endif %}
|
|
|
|
Docker Events (letzte 24h):
|
|
{% if docker_events_traefik.stdout and 'No Traefik die events' not in docker_events_traefik.stdout %}
|
|
{{ docker_events_traefik.stdout }}
|
|
{% else %}
|
|
Keine Traefik die-Events in den letzten 24 Stunden
|
|
{% endif %}
|
|
|
|
Docker Daemon Logs:
|
|
{% if docker_daemon_logs.stdout and 'No Traefik stop events' not in docker_daemon_logs.stdout %}
|
|
{{ docker_daemon_logs.stdout }}
|
|
{% else %}
|
|
Keine Traefik-Stop-Events in Docker-Daemon-Logs
|
|
{% endif %}
|
|
|
|
Gefundene Quellen:
|
|
{% if all_user_crontabs.stdout and 'No user crontabs' not in all_user_crontabs.stdout %}
|
|
1. ❌ CRONJOBS (User):
|
|
{{ all_user_crontabs.stdout }}
|
|
{% endif %}
|
|
|
|
{% if system_cron_dirs.stdout and 'No matches' not in system_cron_dirs.stdout %}
|
|
2. ❌ SYSTEM CRON:
|
|
{{ system_cron_dirs.stdout }}
|
|
{% endif %}
|
|
|
|
{% if systemd_services.stdout and 'No Traefik-related' not in systemd_services.stdout %}
|
|
3. ❌ SYSTEMD TIMERS/SERVICES:
|
|
{{ systemd_services.stdout }}
|
|
{% endif %}
|
|
|
|
{% if deployment_scripts.stdout and 'No' not in deployment_scripts.stdout %}
|
|
4. ⚠️ DEPLOYMENT SCRIPTS:
|
|
{{ deployment_scripts.stdout }}
|
|
{% endif %}
|
|
|
|
{% if ansible_auto_restart.stdout and 'No auto-restart' not in ansible_auto_restart.stdout %}
|
|
5. ⚠️ ANSIBLE AUTO-RESTART:
|
|
{{ ansible_auto_restart.stdout }}
|
|
{% endif %}
|
|
|
|
{% if gitea_workflows.stdout and 'No Gitea workflows' not in gitea_workflows.stdout %}
|
|
6. ⚠️ GITEA WORKFLOWS:
|
|
{{ gitea_workflows.stdout }}
|
|
{% endif %}
|
|
|
|
{% if monitoring_scripts.stdout and 'No monitoring scripts' not in monitoring_scripts.stdout %}
|
|
7. ⚠️ MONITORING SCRIPTS:
|
|
{{ monitoring_scripts.stdout }}
|
|
{% endif %}
|
|
|
|
{% if at_jobs.stdout and 'No at jobs' not in at_jobs.stdout %}
|
|
8. ❌ AT JOBS:
|
|
{{ at_jobs.stdout }}
|
|
{% endif %}
|
|
|
|
{% if docker_compose_watch.stdout and 'Could not check' not in docker_compose_watch.stdout %}
|
|
9. ⚠️ DOCKER COMPOSE WATCH:
|
|
{{ docker_compose_watch.stdout }}
|
|
{% endif %}
|
|
|
|
{% if watch_mode_process.stdout and 'No Docker Compose watch' not in watch_mode_process.stdout %}
|
|
10. ❌ DOCKER COMPOSE WATCH MODE (PROZESS):
|
|
{{ watch_mode_process.stdout }}
|
|
{% endif %}
|
|
|
|
{% if reboot_history.stdout and 'No reboots' not in reboot_history.stdout %}
|
|
11. ⚠️ SYSTEM REBOOTS:
|
|
{{ reboot_history.stdout }}
|
|
{% endif %}
|
|
|
|
Real-Time Monitoring ({{ monitor_duration_seconds }} Sekunden):
|
|
{% if monitoring_result.finished and monitoring_result.ansible_job_id %}
|
|
{{ monitoring_result.stdout | default('Keine Events während Monitoring') }}
|
|
{% else %}
|
|
Monitoring läuft noch oder wurde unterbrochen
|
|
{% endif %}
|
|
|
|
================================================================================
|
|
NÄCHSTE SCHRITTE:
|
|
================================================================================
|
|
|
|
{% if all_user_crontabs.stdout and 'No user crontabs' not in all_user_crontabs.stdout %}
|
|
1. ❌ CRONJOBS DEAKTIVIEREN:
|
|
- Prüfe gefundene Cronjobs: {{ all_user_crontabs.stdout }}
|
|
- Entferne oder kommentiere die entsprechenden Einträge
|
|
{% endif %}
|
|
|
|
{% if system_cron_dirs.stdout and 'No matches' not in system_cron_dirs.stdout %}
|
|
2. ❌ SYSTEM CRON DEAKTIVIEREN:
|
|
- Prüfe gefundene System-Cronjobs: {{ system_cron_dirs.stdout }}
|
|
- Entferne oder benenne die Dateien um
|
|
{% endif %}
|
|
|
|
{% if systemd_services.stdout and 'No Traefik-related' not in systemd_services.stdout %}
|
|
3. ❌ SYSTEMD TIMERS/SERVICES DEAKTIVIEREN:
|
|
- Prüfe gefundene Services/Timers: {{ systemd_services.stdout }}
|
|
- Deaktiviere mit: systemctl disable <service>
|
|
{% endif %}
|
|
|
|
{% if deployment_scripts.stdout and 'No' not in deployment_scripts.stdout %}
|
|
4. ⚠️ DEPLOYMENT SCRIPTS PRÜFEN:
|
|
- Prüfe gefundene Scripts: {{ deployment_scripts.stdout }}
|
|
- Entferne oder kommentiere Traefik-Restart-Befehle
|
|
{% endif %}
|
|
|
|
{% if ansible_auto_restart.stdout and 'No auto-restart' not in ansible_auto_restart.stdout %}
|
|
5. ⚠️ ANSIBLE AUTO-RESTART PRÜFEN:
|
|
- Prüfe gefundene Einstellungen: {{ ansible_auto_restart.stdout }}
|
|
- Setze traefik_auto_restart: false in group_vars
|
|
{% endif %}
|
|
|
|
{% if not all_user_crontabs.stdout or 'No user crontabs' in all_user_crontabs.stdout %}
|
|
{% if not system_cron_dirs.stdout or 'No matches' in system_cron_dirs.stdout %}
|
|
{% if not systemd_services.stdout or 'No Traefik-related' in systemd_services.stdout %}
|
|
{% if not deployment_scripts.stdout or 'No' in deployment_scripts.stdout %}
|
|
{% if not ansible_auto_restart.stdout or 'No auto-restart' in ansible_auto_restart.stdout %}
|
|
|
|
⚠️ KEINE AUTOMATISCHEN RESTART-MECHANISMEN GEFUNDEN!
|
|
|
|
Mögliche Ursachen:
|
|
1. Externer Prozess (nicht über Cron/Systemd)
|
|
2. Docker-Service-Restarts (systemctl restart docker)
|
|
3. Host-Reboots
|
|
4. Manuelle Restarts (von außen)
|
|
5. Monitoring-Service (Portainer, Watchtower, etc.)
|
|
|
|
Nächste Schritte:
|
|
1. Führe 'docker events --filter container=traefik' manuell aus und beobachte
|
|
2. Prüfe journalctl -u docker.service für Docker-Service-Restarts
|
|
3. Prüfe ob Portainer oder andere Monitoring-Tools laufen
|
|
4. Prüfe ob Watchtower oder andere Auto-Update-Tools installiert sind
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endif %}
|
|
|
|
================================================================================
|
|
|