fix: prevent Traefik restart loops and improve Docker registry login
Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Successful in 31s
Security Vulnerability Scan / Composer Security Audit (push) Has been cancelled
Security Vulnerability Scan / Check for Dependency Changes (push) Has been cancelled
🚀 Build & Deploy Image / Build Docker Image (push) Has been cancelled
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Has been cancelled
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been cancelled
🚀 Build & Deploy Image / Build Runtime Base Image (push) Has been cancelled
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been cancelled

Registry Login Fixes:
- Filter out service names (minio, redis) from registry URL extraction
- Only recognize actual registry URLs (with TLD or port)
- Preserve port numbers in registry URLs (e.g. git.michaelschiemer.de:5000)
- Better error messages for failed logins

Traefik Restart Loop Prevention:
- Set traefik_auto_restart default to false in traefik role
- Add traefik_auto_restart, traefik_ssl_restart, gitea_auto_restart to staging vars
- Add guard to fix-gitea-traefik-connection.yml restart task
- Add guard and deprecation warning to update-gitea-traefik-service.yml

This ensures that:
- CI/CD pipelines won't cause Traefik restart loops
- Staging environment uses same safe defaults as production
- Deprecated playbooks fail by default unless explicitly enabled
- Only actual Docker registries are used for login, not service names
This commit is contained in:
2025-11-09 00:03:30 +01:00
parent c6b94b1147
commit ba859d0fdf
5 changed files with 312 additions and 10 deletions

View File

@@ -0,0 +1,113 @@
---
# Staging Deployment - Centralized Variables
# These variables are used across all staging playbooks
# System Maintenance
system_update_packages: true
system_apt_upgrade: dist
system_enable_unattended_upgrades: true
system_enable_unattended_reboot: false
system_unattended_reboot_time: "02:00"
system_enable_unattended_timer: true
system_enable_docker_prune: false
# Deployment Paths
deploy_user_home: "/home/deploy"
stacks_base_path: "/home/deploy/deployment/stacks"
staging_stack_path: "{{ stacks_base_path }}/staging"
postgresql_staging_stack_path: "{{ stacks_base_path }}/postgresql-staging"
backups_path: "{{ deploy_user_home }}/deployment/backups"
# Docker Registry
docker_registry: "localhost:5000"
docker_registry_url: "localhost:5000"
docker_registry_external: "registry.michaelschiemer.de"
docker_registry_username_default: "admin"
# docker_registry_password_default should be set in vault as vault_docker_registry_password
# If not using vault, override via -e docker_registry_password_default="your-password"
docker_registry_password_default: ""
registry_auth_path: "{{ stacks_base_path }}/registry/auth"
# Application Configuration
app_name: "framework"
app_domain: "staging.michaelschiemer.de"
staging_domain: "{{ app_domain }}"
app_image: "{{ docker_registry }}/{{ app_name }}"
app_image_external: "{{ docker_registry_external }}/{{ app_name }}"
# Domain Configuration
gitea_domain: "git.michaelschiemer.de"
# Email Configuration
mail_from_address: "noreply@{{ app_domain }}"
acme_email: "kontakt@michaelschiemer.de"
# SSL Certificate Domains
ssl_domains:
- "{{ gitea_domain }}"
- "{{ app_domain }}"
- "michaelschiemer.de"
# Health Check Configuration
health_check_url: "https://{{ app_domain }}/health"
health_check_retries: 10
health_check_delay: 10
# Rollback Configuration
max_rollback_versions: 3
rollback_timeout: 300
# Wait Timeouts
wait_timeout: 60
# Git Configuration (for sync-code.yml)
git_repository_url_default: "https://{{ gitea_domain }}/michael/michaelschiemer.git"
git_branch_default: "staging"
git_token: "{{ vault_git_token | default('') }}"
git_username: "{{ vault_git_username | default('') }}"
git_password: "{{ vault_git_password | default('') }}"
# Database Configuration
db_user_default: "postgres"
db_name_default: "michaelschiemer_staging"
db_host_default: "postgres-staging"
# MinIO Object Storage Configuration
minio_root_user: "{{ vault_minio_root_user | default('minioadmin') }}"
minio_root_password: "{{ vault_minio_root_password | default('') }}"
minio_api_domain: "minio-api.michaelschiemer.de"
minio_console_domain: "minio.michaelschiemer.de"
# WireGuard Configuration
wireguard_interface: "wg0"
wireguard_config_path: "/etc/wireguard"
wireguard_port_default: 51820
wireguard_network_default: "10.8.0.0/24"
wireguard_server_ip_default: "10.8.0.1"
wireguard_enable_ip_forwarding: true
wireguard_config_file: "{{ wireguard_config_path }}/{{ wireguard_interface }}.conf"
wireguard_private_key_file: "{{ wireguard_config_path }}/{{ wireguard_interface }}_private.key"
wireguard_public_key_file: "{{ wireguard_config_path }}/{{ wireguard_interface }}_public.key"
wireguard_client_configs_path: "{{ wireguard_config_path }}/clients"
# WireGuard DNS Configuration
# DNS server for VPN clients (points to VPN server IP)
# This ensures internal services are resolved to VPN IPs
wireguard_dns_servers:
- "{{ wireguard_server_ip_default }}"
# Traefik Configuration
# Disable automatic restarts after config deployment to prevent restart loops
# Set to true only when explicitly needed (e.g., after major config changes)
traefik_auto_restart: false
# Traefik SSL Configuration
# Disable automatic restarts during SSL certificate setup to prevent restart loops
traefik_ssl_restart: false
# Gitea Auto-Restart Configuration
# Set to false to prevent automatic restarts when healthcheck fails
# This prevents restart loops when Gitea is temporarily unavailable (e.g., during Traefik restarts)
# Set to true only when explicitly needed for remediation
gitea_auto_restart: false

View File

@@ -233,21 +233,42 @@
ignore_errors: yes
changed_when: false
- name: Determine actual registry URLs from docker-compose files
- name: Extract registry URLs from docker-compose files (preserve port if present)
ansible.builtin.shell: |
cd {{ application_code_dest }}
grep -h "image:" docker-compose.base.yml docker-compose.{{ application_compose_suffix }} 2>/dev/null | sed -E 's/.*image:\s*([^\/]+).*/\1/' | sed 's/:.*//' | sort -u || echo ""
register: actual_registry_urls
grep -h "image:" docker-compose.base.yml docker-compose.{{ application_compose_suffix }} 2>/dev/null | \
sed -E 's/.*image:\s*([^\/]+).*/\1/' | \
sed -E 's/:([^:]+)$//' | \
grep -E '\.(de|com|org|net|io|dev)|:[0-9]+|localhost' | \
sort -u || echo ""
register: actual_registry_urls_raw
changed_when: false
failed_when: false
- name: Set list of registries to login to
- name: Extract full registry URLs with ports from docker-compose files
ansible.builtin.shell: |
cd {{ application_code_dest }}
grep -h "image:" docker-compose.base.yml docker-compose.{{ application_compose_suffix }} 2>/dev/null | \
sed -E 's/.*image:\s*([^\/]+).*/\1/' | \
sed -E 's/:([^:]+)$//' | \
sort -u || echo ""
register: actual_registry_urls_full
changed_when: false
failed_when: false
- name: Set list of registries to login to (filter out service names, preserve ports)
ansible.builtin.set_fact:
registries_to_login: >-
{%- set found_registries = actual_registry_urls.stdout | trim | split('\n') | select('match', '.+') | list -%}
{%- set found_registries = actual_registry_urls_full.stdout | trim | split('\n') | select('match', '.+') | list -%}
{%- set filtered_registries = [] -%}
{%- for reg in found_registries -%}
{%- if reg | regex_search('\.(de|com|org|net|io|dev)') or reg | regex_search(':[0-9]+') or reg == 'localhost' -%}
{%- set _ = filtered_registries.append(reg) -%}
{%- endif -%}
{%- endfor -%}
{%- set default_registry = [docker_registry] -%}
{%- if found_registries | length > 0 -%}
{{ found_registries | unique | list }}
{%- if filtered_registries | length > 0 -%}
{{ filtered_registries | unique | list }}
{%- else -%}
{{ default_registry }}
{%- endif -%}
@@ -260,14 +281,14 @@
when:
- registry_password | string | trim != ''
- registry_accessible == 'true'
loop: "{{ registries_to_login }}"
loop: "{{ registries_to_login | default([docker_registry]) }}"
no_log: yes
register: docker_login_results
failed_when: false
- name: Display login results
ansible.builtin.debug:
msg: "Docker login to {{ item.item }}: {% if item.failed %}FAILED{% else %}SUCCESS{% endif %}"
msg: "Docker login to {{ item.item }}: {% if item.failed %}FAILED ({{ item.msg | default('unknown error') }}){% else %}SUCCESS{% endif %}"
when:
- registry_password | string | trim != ''
- registry_accessible == 'true'

View File

@@ -0,0 +1,89 @@
---
# Ansible Playbook: Fix Gitea-Traefik Connection Issues
# Purpose: Ensure Traefik can reliably reach Gitea by restarting both services
# Usage:
# ansible-playbook -i inventory/production.yml playbooks/fix-gitea-traefik-connection.yml \
# --vault-password-file secrets/.vault_pass
- name: Fix Gitea-Traefik Connection
hosts: production
vars:
gitea_stack_path: "{{ stacks_base_path }}/gitea"
traefik_stack_path: "{{ stacks_base_path }}/traefik"
gitea_url: "https://{{ gitea_domain }}"
tasks:
- name: Get current Gitea container IP
shell: |
docker inspect gitea | grep -A 10 'traefik-public' | grep IPAddress | head -1 | awk '{print $2}' | tr -d '",'
register: gitea_ip
changed_when: false
failed_when: false
- name: Display Gitea IP
debug:
msg: "Gitea container IP in traefik-public network: {{ gitea_ip.stdout }}"
- name: Test direct connection to Gitea from Traefik container
shell: |
docker compose -f {{ traefik_stack_path }}/docker-compose.yml exec -T traefik wget -qO- http://{{ gitea_ip.stdout }}:3000/api/healthz 2>&1 | head -3
register: traefik_gitea_test
changed_when: false
failed_when: false
- name: Display Traefik-Gitea connection test result
debug:
msg: "{{ traefik_gitea_test.stdout }}"
- name: Restart Gitea container to refresh IP
shell: |
docker compose -f {{ gitea_stack_path }}/docker-compose.yml restart gitea
when: traefik_gitea_test.rc != 0
- name: Wait for Gitea to be ready
uri:
url: "{{ gitea_url }}/api/healthz"
method: GET
status_code: [200]
validate_certs: false
timeout: 10
register: gitea_health
until: gitea_health.status == 200
retries: 30
delay: 2
changed_when: false
when: traefik_gitea_test.rc != 0
- name: Restart Traefik to refresh service discovery
shell: |
docker compose -f {{ traefik_stack_path }}/docker-compose.yml restart traefik
when: >
traefik_gitea_test.rc != 0
and (traefik_auto_restart | default(false) | bool)
- name: Wait for Traefik to be ready
pause:
seconds: 10
when: traefik_gitea_test.rc != 0
- name: Test Gitea via Traefik
uri:
url: "{{ gitea_url }}/api/healthz"
method: GET
status_code: [200]
validate_certs: false
timeout: 10
register: final_test
changed_when: false
- name: Display result
debug:
msg: |
Gitea-Traefik connection test:
- Direct connection: {{ 'OK' if traefik_gitea_test.rc == 0 else 'FAILED' }}
- Via Traefik: {{ 'OK' if final_test.status == 200 else 'FAILED' }}
{% if traefik_gitea_test.rc != 0 %}
Both services have been restarted to refresh connections.
{% endif %}

View File

@@ -0,0 +1,79 @@
---
# Ansible Playbook: Update Gitea Traefik Service with Current IP
#
# ⚠️ DEPRECATED: This playbook is no longer needed since Traefik runs in bridge network mode.
# Service discovery via Docker labels works reliably in bridge mode, so manual IP updates
# are not required. This playbook is kept for reference only.
#
# Purpose: Update Traefik dynamic config with current Gitea container IP
# Usage:
# ansible-playbook -i inventory/production.yml playbooks/update-gitea-traefik-service.yml \
# --vault-password-file secrets/.vault_pass
- name: Update Gitea Traefik Service with Current IP
hosts: production
vars:
traefik_stack_path: "{{ stacks_base_path }}/traefik"
gitea_url: "https://{{ gitea_domain }}"
tasks:
- name: Warn that this playbook is deprecated
ansible.builtin.fail:
msg: |
⚠️ This playbook is DEPRECATED and should not be used.
Traefik service discovery via Docker labels works reliably in bridge mode.
If you really need to run this, set traefik_auto_restart=true explicitly.
when: traefik_auto_restart | default(false) | bool == false
- name: Get current Gitea container IP in traefik-public network
shell: |
docker inspect gitea | grep -A 10 'traefik-public' | grep IPAddress | head -1 | awk '{print $2}' | tr -d '",'
register: gitea_ip
changed_when: false
- name: Display Gitea IP
debug:
msg: "Gitea container IP: {{ gitea_ip.stdout }}"
- name: Create Gitea service configuration with current IP
copy:
dest: "{{ traefik_stack_path }}/dynamic/gitea-service.yml"
content: |
http:
services:
gitea:
loadBalancer:
servers:
- url: http://{{ gitea_ip.stdout }}:3000
mode: '0644'
- name: Restart Traefik to load new configuration
shell: |
docker compose -f {{ traefik_stack_path }}/docker-compose.yml restart traefik
when: traefik_auto_restart | default(false) | bool
- name: Wait for Traefik to be ready
pause:
seconds: 10
- name: Test Gitea via Traefik
uri:
url: "{{ gitea_url }}/api/healthz"
method: GET
status_code: [200]
validate_certs: false
timeout: 10
register: final_test
retries: 5
delay: 2
changed_when: false
- name: Display result
debug:
msg: |
Gitea-Traefik connection:
- Gitea IP: {{ gitea_ip.stdout }}
- Via Traefik: {{ 'OK' if final_test.status == 200 else 'FAILED' }}
Note: This is a temporary fix. The IP will need to be updated if the container restarts.

View File

@@ -18,7 +18,7 @@ traefik_check_health: true
traefik_show_status: true
# Config Deployment
traefik_auto_restart: true # Automatically restart after config deployment
traefik_auto_restart: false # Automatically restart after config deployment (default: false to prevent restart loops)
# Logs Configuration
traefik_logs_tail: 100