fix(console): comprehensive TUI rendering fixes
- Fix Enter key detection: handle multiple Enter key formats (\n, \r, \r\n) - Reduce flickering: lower render frequency from 60 FPS to 30 FPS - Fix menu bar visibility: re-render menu bar after content to prevent overwriting - Fix content positioning: explicit line positioning for categories and commands - Fix line shifting: clear lines before writing, control newlines manually - Limit visible items: prevent overflow with maxVisibleCategories/Commands - Improve CPU usage: increase sleep interval when no events processed This fixes: - Enter key not working for selection - Strong flickering of the application - Menu bar not visible or being overwritten - Top half of selection list not displayed - Lines being shifted/misaligned
This commit is contained in:
403
deployment/ansible/playbooks/diagnose/gitea.yml
Normal file
403
deployment/ansible/playbooks/diagnose/gitea.yml
Normal file
@@ -0,0 +1,403 @@
|
||||
---
|
||||
# Consolidated Gitea Diagnosis Playbook
|
||||
# Consolidates: diagnose-gitea-timeouts.yml, diagnose-gitea-timeout-deep.yml,
|
||||
# diagnose-gitea-timeout-live.yml, diagnose-gitea-timeouts-complete.yml,
|
||||
# comprehensive-gitea-diagnosis.yml
|
||||
#
|
||||
# Usage:
|
||||
# # Basic diagnosis (default)
|
||||
# ansible-playbook -i inventory/production.yml playbooks/diagnose/gitea.yml
|
||||
#
|
||||
# # Deep diagnosis (includes resource checks, multiple connection tests)
|
||||
# ansible-playbook -i inventory/production.yml playbooks/diagnose/gitea.yml --tags deep
|
||||
#
|
||||
# # Live diagnosis (monitors during request)
|
||||
# ansible-playbook -i inventory/production.yml playbooks/diagnose/gitea.yml --tags live
|
||||
#
|
||||
# # Complete diagnosis (all checks)
|
||||
# ansible-playbook -i inventory/production.yml playbooks/diagnose/gitea.yml --tags complete
|
||||
|
||||
- name: Diagnose Gitea Issues
|
||||
hosts: production
|
||||
gather_facts: yes
|
||||
become: no
|
||||
vars:
|
||||
gitea_stack_path: "{{ stacks_base_path }}/gitea"
|
||||
traefik_stack_path: "{{ stacks_base_path }}/traefik"
|
||||
gitea_url: "https://{{ gitea_domain }}"
|
||||
gitea_container_name: "gitea"
|
||||
traefik_container_name: "traefik"
|
||||
|
||||
tasks:
|
||||
# ========================================
|
||||
# BASIC DIAGNOSIS (always runs)
|
||||
# ========================================
|
||||
- name: Display diagnostic plan
|
||||
ansible.builtin.debug:
|
||||
msg: |
|
||||
================================================================================
|
||||
GITEA DIAGNOSIS
|
||||
================================================================================
|
||||
|
||||
Running diagnosis with tags: {{ ansible_run_tags | default(['all']) }}
|
||||
|
||||
Basic checks (always):
|
||||
- Container status
|
||||
- Health endpoints
|
||||
- Network connectivity
|
||||
- Service discovery
|
||||
|
||||
Deep checks (--tags deep):
|
||||
- Resource usage
|
||||
- Multiple connection tests
|
||||
- Log analysis
|
||||
|
||||
Live checks (--tags live):
|
||||
- Real-time monitoring during request
|
||||
|
||||
Complete checks (--tags complete):
|
||||
- All checks including app.ini, ServersTransport, etc.
|
||||
|
||||
================================================================================
|
||||
|
||||
- name: Check Gitea container status
|
||||
ansible.builtin.shell: |
|
||||
cd {{ gitea_stack_path }}
|
||||
docker compose ps {{ gitea_container_name }}
|
||||
register: gitea_status
|
||||
changed_when: false
|
||||
|
||||
- name: Check Traefik container status
|
||||
ansible.builtin.shell: |
|
||||
cd {{ traefik_stack_path }}
|
||||
docker compose ps {{ traefik_container_name }}
|
||||
register: traefik_status
|
||||
changed_when: false
|
||||
|
||||
- name: Check Gitea health endpoint (direct from container)
|
||||
ansible.builtin.shell: |
|
||||
cd {{ gitea_stack_path }}
|
||||
docker compose exec -T {{ gitea_container_name }} curl -f http://localhost:3000/api/healthz 2>&1 || echo "HEALTH_CHECK_FAILED"
|
||||
register: gitea_health_direct
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Check Gitea health endpoint (via Traefik)
|
||||
ansible.builtin.uri:
|
||||
url: "{{ gitea_url }}/api/healthz"
|
||||
method: GET
|
||||
status_code: [200]
|
||||
validate_certs: false
|
||||
timeout: 10
|
||||
register: gitea_health_traefik
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
- name: Check if Gitea is in traefik-public network
|
||||
ansible.builtin.shell: |
|
||||
docker network inspect traefik-public --format '{{ '{{' }}range .Containers{{ '}}' }}{{ '{{' }}.Name{{ '}}' }} {{ '{{' }}end{{ '}}' }}' 2>/dev/null | grep -q {{ gitea_container_name }} && echo "YES" || echo "NO"
|
||||
register: gitea_in_traefik_network
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Test connection from Traefik to Gitea
|
||||
ansible.builtin.shell: |
|
||||
cd {{ traefik_stack_path }}
|
||||
docker compose exec -T {{ traefik_container_name }} wget -qO- --timeout=5 http://{{ gitea_container_name }}:3000/api/healthz 2>&1 || echo "CONNECTION_FAILED"
|
||||
register: traefik_gitea_connection
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Check Traefik service discovery for Gitea
|
||||
ansible.builtin.shell: |
|
||||
cd {{ traefik_stack_path }}
|
||||
docker compose exec -T {{ traefik_container_name }} traefik show providers docker 2>/dev/null | grep -i "gitea" || echo "NOT_FOUND"
|
||||
register: traefik_gitea_service
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
# ========================================
|
||||
# DEEP DIAGNOSIS (--tags deep)
|
||||
# ========================================
|
||||
- name: Check Gitea container resources (CPU/Memory)
|
||||
ansible.builtin.shell: |
|
||||
docker stats {{ gitea_container_name }} --no-stream --format 'CPU: {{ '{{' }}.CPUPerc{{ '}}' }} | Memory: {{ '{{' }}.MemUsage{{ '}}' }}' 2>/dev/null || echo "Could not get stats"
|
||||
register: gitea_resources
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- deep
|
||||
- complete
|
||||
|
||||
- name: Check Traefik container resources (CPU/Memory)
|
||||
ansible.builtin.shell: |
|
||||
docker stats {{ traefik_container_name }} --no-stream --format 'CPU: {{ '{{' }}.CPUPerc{{ '}}' }} | Memory: {{ '{{' }}.MemUsage{{ '}}' }}' 2>/dev/null || echo "Could not get stats"
|
||||
register: traefik_resources
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- deep
|
||||
- complete
|
||||
|
||||
- name: Test Gitea direct connection (multiple attempts)
|
||||
ansible.builtin.shell: |
|
||||
for i in {1..5}; do
|
||||
echo "=== Attempt $i ==="
|
||||
cd {{ gitea_stack_path }}
|
||||
timeout 5 docker compose exec -T {{ gitea_container_name }} curl -f http://localhost:3000/api/healthz 2>&1 || echo "FAILED"
|
||||
sleep 1
|
||||
done
|
||||
register: gitea_direct_tests
|
||||
changed_when: false
|
||||
tags:
|
||||
- deep
|
||||
- complete
|
||||
|
||||
- name: Test Gitea via Traefik (multiple attempts)
|
||||
ansible.builtin.shell: |
|
||||
for i in {1..5}; do
|
||||
echo "=== Attempt $i ==="
|
||||
timeout 10 curl -k -s -o /dev/null -w "%{http_code}" {{ gitea_url }}/api/healthz 2>&1 || echo "TIMEOUT"
|
||||
sleep 2
|
||||
done
|
||||
register: gitea_traefik_tests
|
||||
changed_when: false
|
||||
tags:
|
||||
- deep
|
||||
- complete
|
||||
|
||||
- name: Check Gitea logs for errors/timeouts
|
||||
ansible.builtin.shell: |
|
||||
cd {{ gitea_stack_path }}
|
||||
docker compose logs {{ gitea_container_name }} --tail=50 2>&1 | grep -iE "error|timeout|failed|panic|fatal" | tail -20 || echo "No errors in recent logs"
|
||||
register: gitea_errors
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- deep
|
||||
- complete
|
||||
|
||||
- name: Check Traefik logs for Gitea-related errors
|
||||
ansible.builtin.shell: |
|
||||
cd {{ traefik_stack_path }}
|
||||
docker compose logs {{ traefik_container_name }} --tail=50 2>&1 | grep -iE "gitea|git\.michaelschiemer\.de|timeout|error" | tail -20 || echo "No Gitea-related errors in Traefik logs"
|
||||
register: traefik_gitea_errors
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- deep
|
||||
- complete
|
||||
|
||||
# ========================================
|
||||
# COMPLETE DIAGNOSIS (--tags complete)
|
||||
# ========================================
|
||||
- name: Test Gitea internal port (127.0.0.1:3000)
|
||||
ansible.builtin.shell: |
|
||||
docker exec {{ gitea_container_name }} curl -sS -I http://127.0.0.1:3000/ 2>&1 | head -5
|
||||
register: gitea_internal_test
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- complete
|
||||
|
||||
- name: Test Traefik to Gitea via Docker DNS (gitea:3000)
|
||||
ansible.builtin.shell: |
|
||||
docker exec {{ traefik_container_name }} sh -lc 'apk add --no-cache curl >/dev/null 2>&1 || true; curl -sS -I http://gitea:3000/ 2>&1' | head -10
|
||||
register: traefik_gitea_dns_test
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- complete
|
||||
|
||||
- name: Check Traefik logs for 504 errors
|
||||
ansible.builtin.shell: |
|
||||
docker logs {{ traefik_container_name }} --tail=100 2>&1 | grep -i "504\|timeout" | tail -20 || echo "No 504/timeout errors found"
|
||||
register: traefik_504_logs
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- complete
|
||||
|
||||
- name: Check Gitea Traefik labels
|
||||
ansible.builtin.shell: |
|
||||
docker inspect {{ gitea_container_name }} --format '{{ '{{' }}json .Config.Labels{{ '}}' }}' 2>/dev/null | python3 -m json.tool | grep -E "traefik" || echo "No Traefik labels found"
|
||||
register: gitea_labels
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- complete
|
||||
|
||||
- name: Verify service port is 3000
|
||||
ansible.builtin.shell: |
|
||||
docker inspect {{ gitea_container_name }} --format '{{ '{{' }}json .Config.Labels{{ '}}' }}' 2>/dev/null | python3 -c "import sys, json; labels = json.load(sys.stdin); print('server.port:', labels.get('traefik.http.services.gitea.loadbalancer.server.port', 'NOT SET'))"
|
||||
register: gitea_service_port
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- complete
|
||||
|
||||
- name: Check ServersTransport configuration
|
||||
ansible.builtin.shell: |
|
||||
docker inspect {{ gitea_container_name }} --format '{{ '{{' }}json .Config.Labels{{ '}}' }}' 2>/dev/null | python3 -c "
|
||||
import sys, json
|
||||
labels = json.load(sys.stdin)
|
||||
transport = labels.get('traefik.http.services.gitea.loadbalancer.serversTransport', '')
|
||||
if transport:
|
||||
print('ServersTransport:', transport)
|
||||
print('dialtimeout:', labels.get('traefik.http.serverstransports.gitea-transport.forwardingtimeouts.dialtimeout', 'NOT SET'))
|
||||
print('responseheadertimeout:', labels.get('traefik.http.serverstransports.gitea-transport.forwardingtimeouts.responseheadertimeout', 'NOT SET'))
|
||||
print('idleconntimeout:', labels.get('traefik.http.serverstransports.gitea-transport.forwardingtimeouts.idleconntimeout', 'NOT SET'))
|
||||
print('maxidleconnsperhost:', labels.get('traefik.http.serverstransports.gitea-transport.maxidleconnsperhost', 'NOT SET'))
|
||||
else:
|
||||
print('ServersTransport: NOT CONFIGURED')
|
||||
"
|
||||
register: gitea_timeout_config
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- complete
|
||||
|
||||
- name: Check Gitea app.ini proxy settings
|
||||
ansible.builtin.shell: |
|
||||
cd {{ gitea_stack_path }}
|
||||
docker compose exec -T {{ gitea_container_name }} cat /data/gitea/conf/app.ini 2>/dev/null | grep -E "PROXY_TRUSTED_PROXIES|LOCAL_ROOT_URL|COOKIE_SECURE|SAME_SITE" || echo "Proxy settings not found in app.ini"
|
||||
register: gitea_proxy_settings
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- complete
|
||||
|
||||
- name: Check if Traefik can resolve Gitea hostname
|
||||
ansible.builtin.shell: |
|
||||
docker exec {{ traefik_container_name }} getent hosts {{ gitea_container_name }} || echo "DNS resolution failed"
|
||||
register: traefik_dns_resolution
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- complete
|
||||
|
||||
- name: Check Docker networks for Gitea and Traefik
|
||||
ansible.builtin.shell: |
|
||||
docker inspect {{ gitea_container_name }} --format '{{ '{{' }}json .NetworkSettings.Networks{{ '}}' }}' | python3 -c "import sys, json; data=json.load(sys.stdin); print('Gitea networks:', list(data.keys()))"
|
||||
docker inspect {{ traefik_container_name }} --format '{{ '{{' }}json .NetworkSettings.Networks{{ '}}' }}' | python3 -c "import sys, json; data=json.load(sys.stdin); print('Traefik networks:', list(data.keys()))"
|
||||
register: docker_networks_check
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- complete
|
||||
|
||||
- name: Test long-running endpoint from external
|
||||
ansible.builtin.uri:
|
||||
url: "{{ gitea_url }}/user/events"
|
||||
method: GET
|
||||
status_code: [200, 504]
|
||||
validate_certs: false
|
||||
timeout: 60
|
||||
register: long_running_endpoint_test
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- complete
|
||||
|
||||
- name: Check Redis connection from Gitea
|
||||
ansible.builtin.shell: |
|
||||
cd {{ gitea_stack_path }}
|
||||
docker compose exec -T {{ gitea_container_name }} sh -c "redis-cli -h redis -a {{ vault_gitea_redis_password | default('gitea_redis_password') }} ping 2>&1" || echo "REDIS_CONNECTION_FAILED"
|
||||
register: gitea_redis_connection
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- complete
|
||||
|
||||
- name: Check PostgreSQL connection from Gitea
|
||||
ansible.builtin.shell: |
|
||||
cd {{ gitea_stack_path }}
|
||||
docker compose exec -T {{ gitea_container_name }} sh -c "pg_isready -h postgres -p 5432 -U gitea 2>&1" || echo "POSTGRES_CONNECTION_FAILED"
|
||||
register: gitea_postgres_connection
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- complete
|
||||
|
||||
# ========================================
|
||||
# SUMMARY
|
||||
# ========================================
|
||||
- name: Summary
|
||||
ansible.builtin.debug:
|
||||
msg: |
|
||||
================================================================================
|
||||
GITEA DIAGNOSIS SUMMARY
|
||||
================================================================================
|
||||
|
||||
Container Status:
|
||||
- Gitea: {{ gitea_status.stdout | regex_replace('.*(Up|Down|Restarting).*', '\\1') | default('UNKNOWN') }}
|
||||
- Traefik: {{ traefik_status.stdout | regex_replace('.*(Up|Down|Restarting).*', '\\1') | default('UNKNOWN') }}
|
||||
|
||||
Health Checks:
|
||||
- Gitea (direct): {% if 'HEALTH_CHECK_FAILED' not in gitea_health_direct.stdout %}✅{% else %}❌{% endif %}
|
||||
- Gitea (via Traefik): {% if gitea_health_traefik.status == 200 %}✅{% else %}❌ (Status: {{ gitea_health_traefik.status | default('TIMEOUT') }}){% endif %}
|
||||
|
||||
Network:
|
||||
- Gitea in traefik-public: {% if gitea_in_traefik_network.stdout == 'YES' %}✅{% else %}❌{% endif %}
|
||||
- Traefik → Gitea: {% if 'CONNECTION_FAILED' not in traefik_gitea_connection.stdout %}✅{% else %}❌{% endif %}
|
||||
|
||||
Service Discovery:
|
||||
- Traefik finds Gitea: {% if 'NOT_FOUND' not in traefik_gitea_service.stdout %}✅{% else %}❌{% endif %}
|
||||
|
||||
{% if 'deep' in ansible_run_tags or 'complete' in ansible_run_tags %}
|
||||
Resources:
|
||||
- Gitea: {{ gitea_resources.stdout | default('N/A') }}
|
||||
- Traefik: {{ traefik_resources.stdout | default('N/A') }}
|
||||
|
||||
Connection Tests:
|
||||
- Direct (5 attempts): {{ gitea_direct_tests.stdout | default('N/A') }}
|
||||
- Via Traefik (5 attempts): {{ gitea_traefik_tests.stdout | default('N/A') }}
|
||||
|
||||
Error Logs:
|
||||
- Gitea: {{ gitea_errors.stdout | default('No errors') }}
|
||||
- Traefik: {{ traefik_gitea_errors.stdout | default('No errors') }}
|
||||
{% endif %}
|
||||
|
||||
{% if 'complete' in ansible_run_tags %}
|
||||
Configuration:
|
||||
- Service Port: {{ gitea_service_port.stdout | default('N/A') }}
|
||||
- ServersTransport: {{ gitea_timeout_config.stdout | default('N/A') }}
|
||||
- Proxy Settings: {{ gitea_proxy_settings.stdout | default('N/A') }}
|
||||
- DNS Resolution: {{ traefik_dns_resolution.stdout | default('N/A') }}
|
||||
- Networks: {{ docker_networks_check.stdout | default('N/A') }}
|
||||
|
||||
Long-Running Endpoint:
|
||||
- Status: {{ long_running_endpoint_test.status | default('N/A') }}
|
||||
|
||||
Dependencies:
|
||||
- Redis: {% if 'REDIS_CONNECTION_FAILED' not in gitea_redis_connection.stdout %}✅{% else %}❌{% endif %}
|
||||
- PostgreSQL: {% if 'POSTGRES_CONNECTION_FAILED' not in gitea_postgres_connection.stdout %}✅{% else %}❌{% endif %}
|
||||
{% endif %}
|
||||
|
||||
================================================================================
|
||||
RECOMMENDATIONS
|
||||
================================================================================
|
||||
|
||||
{% if gitea_health_traefik.status != 200 %}
|
||||
❌ Gitea is not reachable via Traefik
|
||||
→ Run: ansible-playbook -i inventory/production.yml playbooks/manage/gitea.yml --tags restart
|
||||
{% endif %}
|
||||
|
||||
{% if gitea_in_traefik_network.stdout != 'YES' %}
|
||||
❌ Gitea is not in traefik-public network
|
||||
→ Restart Gitea container to update network membership
|
||||
{% endif %}
|
||||
|
||||
{% if 'CONNECTION_FAILED' in traefik_gitea_connection.stdout %}
|
||||
❌ Traefik cannot reach Gitea
|
||||
→ Restart both containers
|
||||
{% endif %}
|
||||
|
||||
{% if 'NOT_FOUND' in traefik_gitea_service.stdout %}
|
||||
❌ Gitea not found in Traefik service discovery
|
||||
→ Restart Traefik to refresh service discovery
|
||||
{% endif %}
|
||||
|
||||
================================================================================
|
||||
|
||||
|
||||
229
deployment/ansible/playbooks/diagnose/traefik.yml
Normal file
229
deployment/ansible/playbooks/diagnose/traefik.yml
Normal file
@@ -0,0 +1,229 @@
|
||||
---
|
||||
# Consolidated Traefik Diagnosis Playbook
|
||||
# Consolidates: diagnose-traefik-restarts.yml, find-traefik-restart-source.yml,
|
||||
# monitor-traefik-restarts.yml, monitor-traefik-continuously.yml,
|
||||
# verify-traefik-fix.yml
|
||||
#
|
||||
# Usage:
|
||||
# # Basic diagnosis (default)
|
||||
# ansible-playbook -i inventory/production.yml playbooks/diagnose/traefik.yml
|
||||
#
|
||||
# # Find restart source
|
||||
# ansible-playbook -i inventory/production.yml playbooks/diagnose/traefik.yml --tags restart-source
|
||||
#
|
||||
# # Monitor restarts
|
||||
# ansible-playbook -i inventory/production.yml playbooks/diagnose/traefik.yml --tags monitor
|
||||
|
||||
- name: Diagnose Traefik Issues
|
||||
hosts: production
|
||||
gather_facts: yes
|
||||
become: yes
|
||||
vars:
|
||||
traefik_stack_path: "{{ stacks_base_path }}/traefik"
|
||||
traefik_container_name: "traefik"
|
||||
monitor_duration_seconds: "{{ monitor_duration_seconds | default(120) }}"
|
||||
monitor_lookback_hours: "{{ monitor_lookback_hours | default(24) }}"
|
||||
|
||||
tasks:
|
||||
- name: Display diagnostic plan
|
||||
ansible.builtin.debug:
|
||||
msg: |
|
||||
================================================================================
|
||||
TRAEFIK DIAGNOSIS
|
||||
================================================================================
|
||||
|
||||
Running diagnosis with tags: {{ ansible_run_tags | default(['all']) }}
|
||||
|
||||
Basic checks (always):
|
||||
- Container status
|
||||
- Restart count
|
||||
- Recent logs
|
||||
|
||||
Restart source (--tags restart-source):
|
||||
- Find source of restart loops
|
||||
- Check cronjobs, systemd, scripts
|
||||
|
||||
Monitor (--tags monitor):
|
||||
- Monitor for restarts over time
|
||||
|
||||
================================================================================
|
||||
|
||||
# ========================================
|
||||
# BASIC DIAGNOSIS (always runs)
|
||||
# ========================================
|
||||
- name: Check Traefik container status
|
||||
ansible.builtin.shell: |
|
||||
cd {{ traefik_stack_path }}
|
||||
docker compose ps {{ traefik_container_name }}
|
||||
register: traefik_status
|
||||
changed_when: false
|
||||
|
||||
- name: Check Traefik container restart count
|
||||
ansible.builtin.shell: |
|
||||
docker inspect {{ traefik_container_name }} --format '{{ '{{' }}.RestartCount{{ '}}' }}' 2>/dev/null || echo "0"
|
||||
register: traefik_restart_count
|
||||
changed_when: false
|
||||
|
||||
- name: Check Traefik container start time
|
||||
ansible.builtin.shell: |
|
||||
docker inspect {{ traefik_container_name }} --format '{{ '{{' }}.State.StartedAt{{ '}}' }}' 2>/dev/null || echo "UNKNOWN"
|
||||
register: traefik_started_at
|
||||
changed_when: false
|
||||
|
||||
- name: Check Traefik logs for recent restarts
|
||||
ansible.builtin.shell: |
|
||||
cd {{ traefik_stack_path }}
|
||||
docker compose logs {{ traefik_container_name }} --since 2h 2>&1 | grep -iE "stopping server gracefully|I have to go|restart|shutdown" | tail -20 || echo "No restart messages in last 2 hours"
|
||||
register: traefik_restart_logs
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Check Traefik logs for errors
|
||||
ansible.builtin.shell: |
|
||||
cd {{ traefik_stack_path }}
|
||||
docker compose logs {{ traefik_container_name }} --tail=100 2>&1 | grep -iE "error|warn|fail" | tail -20 || echo "No errors in recent logs"
|
||||
register: traefik_error_logs
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
# ========================================
|
||||
# RESTART SOURCE DIAGNOSIS (--tags restart-source)
|
||||
# ========================================
|
||||
- name: Check all user crontabs for Traefik/Docker commands
|
||||
ansible.builtin.shell: |
|
||||
for user in $(cut -f1 -d: /etc/passwd); do
|
||||
crontab -u "$user" -l 2>/dev/null | grep -qE "traefik|docker.*compose.*traefik|docker.*stop.*traefik|docker.*restart.*traefik|docker.*down.*traefik" && echo "=== User: $user ===" && crontab -u "$user" -l 2>/dev/null | grep -E "traefik|docker.*compose.*traefik|docker.*stop.*traefik|docker.*restart.*traefik|docker.*down.*traefik" || true
|
||||
done || echo "No user crontabs with Traefik commands found"
|
||||
register: all_user_crontabs
|
||||
changed_when: false
|
||||
tags:
|
||||
- restart-source
|
||||
|
||||
- name: Check system-wide cron directories
|
||||
ansible.builtin.shell: |
|
||||
for dir in /etc/cron.d /etc/cron.daily /etc/cron.hourly /etc/cron.weekly /etc/cron.monthly; do
|
||||
if [ -d "$dir" ]; then
|
||||
echo "=== $dir ==="
|
||||
grep -rE "traefik|docker.*compose.*traefik|docker.*stop.*traefik|docker.*restart.*traefik|docker.*down.*traefik" "$dir" 2>/dev/null || echo "No matches"
|
||||
fi
|
||||
done
|
||||
register: system_cron_dirs
|
||||
changed_when: false
|
||||
tags:
|
||||
- restart-source
|
||||
|
||||
- name: Check systemd timers and services
|
||||
ansible.builtin.shell: |
|
||||
echo "=== Active Timers ==="
|
||||
systemctl list-timers --all --no-pager | grep -E "traefik|docker.*compose" || echo "No Traefik-related timers"
|
||||
echo ""
|
||||
echo "=== Custom Services ==="
|
||||
systemctl list-units --type=service --all | grep -E "traefik|docker.*compose" || echo "No Traefik-related services"
|
||||
register: systemd_services
|
||||
changed_when: false
|
||||
tags:
|
||||
- restart-source
|
||||
|
||||
- name: Check for scripts in deployment directory that restart Traefik
|
||||
ansible.builtin.shell: |
|
||||
find /home/deploy/deployment -type f \( -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) -exec grep -lE "traefik.*restart|docker.*compose.*traefik.*restart|docker.*compose.*traefik.*down|docker.*compose.*traefik.*stop" {} \; 2>/dev/null | head -30
|
||||
register: deployment_scripts
|
||||
changed_when: false
|
||||
tags:
|
||||
- restart-source
|
||||
|
||||
- name: Check Ansible roles for traefik_auto_restart or restart tasks
|
||||
ansible.builtin.shell: |
|
||||
grep -rE "traefik_auto_restart|traefik.*restart|docker.*compose.*traefik.*restart" /home/deploy/deployment/ansible/roles/ 2>/dev/null | grep -v ".git" | head -20 || echo "No auto-restart settings found"
|
||||
register: ansible_auto_restart
|
||||
changed_when: false
|
||||
tags:
|
||||
- restart-source
|
||||
|
||||
- name: Check Docker events for Traefik (last 24 hours)
|
||||
ansible.builtin.shell: |
|
||||
timeout 5 docker events --since 24h --filter container={{ traefik_container_name }} --filter event=die --format "{{ '{{' }}.Time{{ '}}' }} {{ '{{' }}.Action{{ '}}' }}" 2>/dev/null | tail -20 || echo "No Traefik die events found"
|
||||
register: docker_events_traefik
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags:
|
||||
- restart-source
|
||||
|
||||
# ========================================
|
||||
# MONITOR (--tags monitor)
|
||||
# ========================================
|
||||
- name: Check Traefik logs for stop messages (lookback period)
|
||||
ansible.builtin.shell: |
|
||||
cd {{ traefik_stack_path }}
|
||||
docker compose logs {{ traefik_container_name }} --since {{ monitor_lookback_hours }}h 2>&1 | grep -E "I have to go|Stopping server gracefully" | tail -20 || echo "No stop messages found"
|
||||
register: traefik_stop_messages
|
||||
changed_when: false
|
||||
tags:
|
||||
- monitor
|
||||
|
||||
- name: Count stop messages
|
||||
ansible.builtin.set_fact:
|
||||
stop_count: "{{ traefik_stop_messages.stdout | regex_findall('I have to go|Stopping server gracefully') | length }}"
|
||||
tags:
|
||||
- monitor
|
||||
|
||||
- name: Check system reboot history
|
||||
ansible.builtin.shell: |
|
||||
last reboot | head -5 || echo "No reboots found"
|
||||
register: reboots
|
||||
changed_when: false
|
||||
tags:
|
||||
- monitor
|
||||
|
||||
# ========================================
|
||||
# SUMMARY
|
||||
# ========================================
|
||||
- name: Summary
|
||||
ansible.builtin.debug:
|
||||
msg: |
|
||||
================================================================================
|
||||
TRAEFIK DIAGNOSIS SUMMARY
|
||||
================================================================================
|
||||
|
||||
Container Status:
|
||||
- Status: {{ traefik_status.stdout | regex_replace('.*(Up|Down|Restarting).*', '\\1') | default('UNKNOWN') }}
|
||||
- Restart Count: {{ traefik_restart_count.stdout }}
|
||||
- Started At: {{ traefik_started_at.stdout }}
|
||||
|
||||
Recent Logs:
|
||||
- Restart Messages (last 2h): {{ traefik_restart_logs.stdout | default('None') }}
|
||||
- Errors (last 100 lines): {{ traefik_error_logs.stdout | default('None') }}
|
||||
|
||||
{% if 'restart-source' in ansible_run_tags %}
|
||||
Restart Source Analysis:
|
||||
- User Crontabs: {{ all_user_crontabs.stdout | default('None found') }}
|
||||
- System Cron: {{ system_cron_dirs.stdout | default('None found') }}
|
||||
- Systemd Services/Timers: {{ systemd_services.stdout | default('None found') }}
|
||||
- Deployment Scripts: {{ deployment_scripts.stdout | default('None found') }}
|
||||
- Ansible Auto-Restart: {{ ansible_auto_restart.stdout | default('None found') }}
|
||||
- Docker Events: {{ docker_events_traefik.stdout | default('None found') }}
|
||||
{% endif %}
|
||||
|
||||
{% if 'monitor' in ansible_run_tags %}
|
||||
Monitoring (last {{ monitor_lookback_hours }} hours):
|
||||
- Stop Messages: {{ stop_count | default(0) }}
|
||||
- System Reboots: {{ reboots.stdout | default('None') }}
|
||||
{% endif %}
|
||||
|
||||
================================================================================
|
||||
RECOMMENDATIONS
|
||||
================================================================================
|
||||
|
||||
{% if 'stopping server gracefully' in traefik_restart_logs.stdout | lower or 'I have to go' in traefik_restart_logs.stdout %}
|
||||
❌ PROBLEM: Traefik is being stopped regularly!
|
||||
→ Run with --tags restart-source to find the source
|
||||
{% endif %}
|
||||
|
||||
{% if (traefik_restart_count.stdout | int) > 5 %}
|
||||
⚠️ WARNING: High restart count ({{ traefik_restart_count.stdout }})
|
||||
→ Check restart source: ansible-playbook -i inventory/production.yml playbooks/diagnose/traefik.yml --tags restart-source
|
||||
{% endif %}
|
||||
|
||||
================================================================================
|
||||
|
||||
|
||||
Reference in New Issue
Block a user