- Fix Enter key detection: handle multiple Enter key formats (\n, \r, \r\n) - Reduce flickering: lower render frequency from 60 FPS to 30 FPS - Fix menu bar visibility: re-render menu bar after content to prevent overwriting - Fix content positioning: explicit line positioning for categories and commands - Fix line shifting: clear lines before writing, control newlines manually - Limit visible items: prevent overflow with maxVisibleCategories/Commands - Improve CPU usage: increase sleep interval when no events processed This fixes: - Enter key not working for selection - Strong flickering of the application - Menu bar not visible or being overwritten - Top half of selection list not displayed - Lines being shifted/misaligned
230 lines
10 KiB
YAML
230 lines
10 KiB
YAML
---
|
|
# Consolidated Traefik Diagnosis Playbook
|
|
# Consolidates: diagnose-traefik-restarts.yml, find-traefik-restart-source.yml,
|
|
# monitor-traefik-restarts.yml, monitor-traefik-continuously.yml,
|
|
# verify-traefik-fix.yml
|
|
#
|
|
# Usage:
|
|
# # Basic diagnosis (default)
|
|
# ansible-playbook -i inventory/production.yml playbooks/diagnose/traefik.yml
|
|
#
|
|
# # Find restart source
|
|
# ansible-playbook -i inventory/production.yml playbooks/diagnose/traefik.yml --tags restart-source
|
|
#
|
|
# # Monitor restarts
|
|
# ansible-playbook -i inventory/production.yml playbooks/diagnose/traefik.yml --tags monitor
|
|
|
|
- name: Diagnose Traefik Issues
|
|
hosts: production
|
|
gather_facts: yes
|
|
become: yes
|
|
vars:
|
|
traefik_stack_path: "{{ stacks_base_path }}/traefik"
|
|
traefik_container_name: "traefik"
|
|
monitor_duration_seconds: "{{ monitor_duration_seconds | default(120) }}"
|
|
monitor_lookback_hours: "{{ monitor_lookback_hours | default(24) }}"
|
|
|
|
tasks:
|
|
- name: Display diagnostic plan
|
|
ansible.builtin.debug:
|
|
msg: |
|
|
================================================================================
|
|
TRAEFIK DIAGNOSIS
|
|
================================================================================
|
|
|
|
Running diagnosis with tags: {{ ansible_run_tags | default(['all']) }}
|
|
|
|
Basic checks (always):
|
|
- Container status
|
|
- Restart count
|
|
- Recent logs
|
|
|
|
Restart source (--tags restart-source):
|
|
- Find source of restart loops
|
|
- Check cronjobs, systemd, scripts
|
|
|
|
Monitor (--tags monitor):
|
|
- Monitor for restarts over time
|
|
|
|
================================================================================
|
|
|
|
# ========================================
|
|
# BASIC DIAGNOSIS (always runs)
|
|
# ========================================
|
|
- name: Check Traefik container status
|
|
ansible.builtin.shell: |
|
|
cd {{ traefik_stack_path }}
|
|
docker compose ps {{ traefik_container_name }}
|
|
register: traefik_status
|
|
changed_when: false
|
|
|
|
- name: Check Traefik container restart count
|
|
ansible.builtin.shell: |
|
|
docker inspect {{ traefik_container_name }} --format '{{ '{{' }}.RestartCount{{ '}}' }}' 2>/dev/null || echo "0"
|
|
register: traefik_restart_count
|
|
changed_when: false
|
|
|
|
- name: Check Traefik container start time
|
|
ansible.builtin.shell: |
|
|
docker inspect {{ traefik_container_name }} --format '{{ '{{' }}.State.StartedAt{{ '}}' }}' 2>/dev/null || echo "UNKNOWN"
|
|
register: traefik_started_at
|
|
changed_when: false
|
|
|
|
- name: Check Traefik logs for recent restarts
|
|
ansible.builtin.shell: |
|
|
cd {{ traefik_stack_path }}
|
|
docker compose logs {{ traefik_container_name }} --since 2h 2>&1 | grep -iE "stopping server gracefully|I have to go|restart|shutdown" | tail -20 || echo "No restart messages in last 2 hours"
|
|
register: traefik_restart_logs
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
- name: Check Traefik logs for errors
|
|
ansible.builtin.shell: |
|
|
cd {{ traefik_stack_path }}
|
|
docker compose logs {{ traefik_container_name }} --tail=100 2>&1 | grep -iE "error|warn|fail" | tail -20 || echo "No errors in recent logs"
|
|
register: traefik_error_logs
|
|
changed_when: false
|
|
failed_when: false
|
|
|
|
# ========================================
|
|
# RESTART SOURCE DIAGNOSIS (--tags restart-source)
|
|
# ========================================
|
|
- name: Check all user crontabs for Traefik/Docker commands
|
|
ansible.builtin.shell: |
|
|
for user in $(cut -f1 -d: /etc/passwd); do
|
|
crontab -u "$user" -l 2>/dev/null | grep -qE "traefik|docker.*compose.*traefik|docker.*stop.*traefik|docker.*restart.*traefik|docker.*down.*traefik" && echo "=== User: $user ===" && crontab -u "$user" -l 2>/dev/null | grep -E "traefik|docker.*compose.*traefik|docker.*stop.*traefik|docker.*restart.*traefik|docker.*down.*traefik" || true
|
|
done || echo "No user crontabs with Traefik commands found"
|
|
register: all_user_crontabs
|
|
changed_when: false
|
|
tags:
|
|
- restart-source
|
|
|
|
- name: Check system-wide cron directories
|
|
ansible.builtin.shell: |
|
|
for dir in /etc/cron.d /etc/cron.daily /etc/cron.hourly /etc/cron.weekly /etc/cron.monthly; do
|
|
if [ -d "$dir" ]; then
|
|
echo "=== $dir ==="
|
|
grep -rE "traefik|docker.*compose.*traefik|docker.*stop.*traefik|docker.*restart.*traefik|docker.*down.*traefik" "$dir" 2>/dev/null || echo "No matches"
|
|
fi
|
|
done
|
|
register: system_cron_dirs
|
|
changed_when: false
|
|
tags:
|
|
- restart-source
|
|
|
|
- name: Check systemd timers and services
|
|
ansible.builtin.shell: |
|
|
echo "=== Active Timers ==="
|
|
systemctl list-timers --all --no-pager | grep -E "traefik|docker.*compose" || echo "No Traefik-related timers"
|
|
echo ""
|
|
echo "=== Custom Services ==="
|
|
systemctl list-units --type=service --all | grep -E "traefik|docker.*compose" || echo "No Traefik-related services"
|
|
register: systemd_services
|
|
changed_when: false
|
|
tags:
|
|
- restart-source
|
|
|
|
- name: Check for scripts in deployment directory that restart Traefik
|
|
ansible.builtin.shell: |
|
|
find /home/deploy/deployment -type f \( -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) -exec grep -lE "traefik.*restart|docker.*compose.*traefik.*restart|docker.*compose.*traefik.*down|docker.*compose.*traefik.*stop" {} \; 2>/dev/null | head -30
|
|
register: deployment_scripts
|
|
changed_when: false
|
|
tags:
|
|
- restart-source
|
|
|
|
- name: Check Ansible roles for traefik_auto_restart or restart tasks
|
|
ansible.builtin.shell: |
|
|
grep -rE "traefik_auto_restart|traefik.*restart|docker.*compose.*traefik.*restart" /home/deploy/deployment/ansible/roles/ 2>/dev/null | grep -v ".git" | head -20 || echo "No auto-restart settings found"
|
|
register: ansible_auto_restart
|
|
changed_when: false
|
|
tags:
|
|
- restart-source
|
|
|
|
- name: Check Docker events for Traefik (last 24 hours)
|
|
ansible.builtin.shell: |
|
|
timeout 5 docker events --since 24h --filter container={{ traefik_container_name }} --filter event=die --format "{{ '{{' }}.Time{{ '}}' }} {{ '{{' }}.Action{{ '}}' }}" 2>/dev/null | tail -20 || echo "No Traefik die events found"
|
|
register: docker_events_traefik
|
|
changed_when: false
|
|
failed_when: false
|
|
tags:
|
|
- restart-source
|
|
|
|
# ========================================
|
|
# MONITOR (--tags monitor)
|
|
# ========================================
|
|
- name: Check Traefik logs for stop messages (lookback period)
|
|
ansible.builtin.shell: |
|
|
cd {{ traefik_stack_path }}
|
|
docker compose logs {{ traefik_container_name }} --since {{ monitor_lookback_hours }}h 2>&1 | grep -E "I have to go|Stopping server gracefully" | tail -20 || echo "No stop messages found"
|
|
register: traefik_stop_messages
|
|
changed_when: false
|
|
tags:
|
|
- monitor
|
|
|
|
- name: Count stop messages
|
|
ansible.builtin.set_fact:
|
|
stop_count: "{{ traefik_stop_messages.stdout | regex_findall('I have to go|Stopping server gracefully') | length }}"
|
|
tags:
|
|
- monitor
|
|
|
|
- name: Check system reboot history
|
|
ansible.builtin.shell: |
|
|
last reboot | head -5 || echo "No reboots found"
|
|
register: reboots
|
|
changed_when: false
|
|
tags:
|
|
- monitor
|
|
|
|
# ========================================
|
|
# SUMMARY
|
|
# ========================================
|
|
- name: Summary
|
|
ansible.builtin.debug:
|
|
msg: |
|
|
================================================================================
|
|
TRAEFIK DIAGNOSIS SUMMARY
|
|
================================================================================
|
|
|
|
Container Status:
|
|
- Status: {{ traefik_status.stdout | regex_replace('.*(Up|Down|Restarting).*', '\\1') | default('UNKNOWN') }}
|
|
- Restart Count: {{ traefik_restart_count.stdout }}
|
|
- Started At: {{ traefik_started_at.stdout }}
|
|
|
|
Recent Logs:
|
|
- Restart Messages (last 2h): {{ traefik_restart_logs.stdout | default('None') }}
|
|
- Errors (last 100 lines): {{ traefik_error_logs.stdout | default('None') }}
|
|
|
|
{% if 'restart-source' in ansible_run_tags %}
|
|
Restart Source Analysis:
|
|
- User Crontabs: {{ all_user_crontabs.stdout | default('None found') }}
|
|
- System Cron: {{ system_cron_dirs.stdout | default('None found') }}
|
|
- Systemd Services/Timers: {{ systemd_services.stdout | default('None found') }}
|
|
- Deployment Scripts: {{ deployment_scripts.stdout | default('None found') }}
|
|
- Ansible Auto-Restart: {{ ansible_auto_restart.stdout | default('None found') }}
|
|
- Docker Events: {{ docker_events_traefik.stdout | default('None found') }}
|
|
{% endif %}
|
|
|
|
{% if 'monitor' in ansible_run_tags %}
|
|
Monitoring (last {{ monitor_lookback_hours }} hours):
|
|
- Stop Messages: {{ stop_count | default(0) }}
|
|
- System Reboots: {{ reboots.stdout | default('None') }}
|
|
{% endif %}
|
|
|
|
================================================================================
|
|
RECOMMENDATIONS
|
|
================================================================================
|
|
|
|
{% if 'stopping server gracefully' in traefik_restart_logs.stdout | lower or 'I have to go' in traefik_restart_logs.stdout %}
|
|
❌ PROBLEM: Traefik is being stopped regularly!
|
|
→ Run with --tags restart-source to find the source
|
|
{% endif %}
|
|
|
|
{% if (traefik_restart_count.stdout | int) > 5 %}
|
|
⚠️ WARNING: High restart count ({{ traefik_restart_count.stdout }})
|
|
→ Check restart source: ansible-playbook -i inventory/production.yml playbooks/diagnose/traefik.yml --tags restart-source
|
|
{% endif %}
|
|
|
|
================================================================================
|
|
|
|
|