fix: prevent Traefik restart loops and improve Docker registry login
Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Successful in 31s
Security Vulnerability Scan / Composer Security Audit (push) Has been cancelled
Security Vulnerability Scan / Check for Dependency Changes (push) Has been cancelled
🚀 Build & Deploy Image / Build Docker Image (push) Has been cancelled
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Has been cancelled
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been cancelled
🚀 Build & Deploy Image / Build Runtime Base Image (push) Has been cancelled
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been cancelled
Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Successful in 31s
Security Vulnerability Scan / Composer Security Audit (push) Has been cancelled
Security Vulnerability Scan / Check for Dependency Changes (push) Has been cancelled
🚀 Build & Deploy Image / Build Docker Image (push) Has been cancelled
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Has been cancelled
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been cancelled
🚀 Build & Deploy Image / Build Runtime Base Image (push) Has been cancelled
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been cancelled
Registry Login Fixes: - Filter out service names (minio, redis) from registry URL extraction - Only recognize actual registry URLs (with TLD or port) - Preserve port numbers in registry URLs (e.g. git.michaelschiemer.de:5000) - Better error messages for failed logins Traefik Restart Loop Prevention: - Set traefik_auto_restart default to false in traefik role - Add traefik_auto_restart, traefik_ssl_restart, gitea_auto_restart to staging vars - Add guard to fix-gitea-traefik-connection.yml restart task - Add guard and deprecation warning to update-gitea-traefik-service.yml This ensures that: - CI/CD pipelines won't cause Traefik restart loops - Staging environment uses same safe defaults as production - Deprecated playbooks fail by default unless explicitly enabled - Only actual Docker registries are used for login, not service names
This commit is contained in:
113
deployment/ansible/inventory/group_vars/staging/vars.yml
Normal file
113
deployment/ansible/inventory/group_vars/staging/vars.yml
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
---
|
||||||
|
# Staging Deployment - Centralized Variables
|
||||||
|
# These variables are used across all staging playbooks
|
||||||
|
|
||||||
|
# System Maintenance
|
||||||
|
system_update_packages: true
|
||||||
|
system_apt_upgrade: dist
|
||||||
|
system_enable_unattended_upgrades: true
|
||||||
|
system_enable_unattended_reboot: false
|
||||||
|
system_unattended_reboot_time: "02:00"
|
||||||
|
system_enable_unattended_timer: true
|
||||||
|
system_enable_docker_prune: false
|
||||||
|
|
||||||
|
# Deployment Paths
|
||||||
|
deploy_user_home: "/home/deploy"
|
||||||
|
stacks_base_path: "/home/deploy/deployment/stacks"
|
||||||
|
staging_stack_path: "{{ stacks_base_path }}/staging"
|
||||||
|
postgresql_staging_stack_path: "{{ stacks_base_path }}/postgresql-staging"
|
||||||
|
backups_path: "{{ deploy_user_home }}/deployment/backups"
|
||||||
|
|
||||||
|
# Docker Registry
|
||||||
|
docker_registry: "localhost:5000"
|
||||||
|
docker_registry_url: "localhost:5000"
|
||||||
|
docker_registry_external: "registry.michaelschiemer.de"
|
||||||
|
docker_registry_username_default: "admin"
|
||||||
|
# docker_registry_password_default should be set in vault as vault_docker_registry_password
|
||||||
|
# If not using vault, override via -e docker_registry_password_default="your-password"
|
||||||
|
docker_registry_password_default: ""
|
||||||
|
registry_auth_path: "{{ stacks_base_path }}/registry/auth"
|
||||||
|
|
||||||
|
# Application Configuration
|
||||||
|
app_name: "framework"
|
||||||
|
app_domain: "staging.michaelschiemer.de"
|
||||||
|
staging_domain: "{{ app_domain }}"
|
||||||
|
app_image: "{{ docker_registry }}/{{ app_name }}"
|
||||||
|
app_image_external: "{{ docker_registry_external }}/{{ app_name }}"
|
||||||
|
|
||||||
|
# Domain Configuration
|
||||||
|
gitea_domain: "git.michaelschiemer.de"
|
||||||
|
|
||||||
|
# Email Configuration
|
||||||
|
mail_from_address: "noreply@{{ app_domain }}"
|
||||||
|
acme_email: "kontakt@michaelschiemer.de"
|
||||||
|
|
||||||
|
# SSL Certificate Domains
|
||||||
|
ssl_domains:
|
||||||
|
- "{{ gitea_domain }}"
|
||||||
|
- "{{ app_domain }}"
|
||||||
|
- "michaelschiemer.de"
|
||||||
|
|
||||||
|
# Health Check Configuration
|
||||||
|
health_check_url: "https://{{ app_domain }}/health"
|
||||||
|
health_check_retries: 10
|
||||||
|
health_check_delay: 10
|
||||||
|
|
||||||
|
# Rollback Configuration
|
||||||
|
max_rollback_versions: 3
|
||||||
|
rollback_timeout: 300
|
||||||
|
|
||||||
|
# Wait Timeouts
|
||||||
|
wait_timeout: 60
|
||||||
|
|
||||||
|
# Git Configuration (for sync-code.yml)
|
||||||
|
git_repository_url_default: "https://{{ gitea_domain }}/michael/michaelschiemer.git"
|
||||||
|
git_branch_default: "staging"
|
||||||
|
git_token: "{{ vault_git_token | default('') }}"
|
||||||
|
git_username: "{{ vault_git_username | default('') }}"
|
||||||
|
git_password: "{{ vault_git_password | default('') }}"
|
||||||
|
|
||||||
|
# Database Configuration
|
||||||
|
db_user_default: "postgres"
|
||||||
|
db_name_default: "michaelschiemer_staging"
|
||||||
|
db_host_default: "postgres-staging"
|
||||||
|
|
||||||
|
# MinIO Object Storage Configuration
|
||||||
|
minio_root_user: "{{ vault_minio_root_user | default('minioadmin') }}"
|
||||||
|
minio_root_password: "{{ vault_minio_root_password | default('') }}"
|
||||||
|
minio_api_domain: "minio-api.michaelschiemer.de"
|
||||||
|
minio_console_domain: "minio.michaelschiemer.de"
|
||||||
|
|
||||||
|
# WireGuard Configuration
|
||||||
|
wireguard_interface: "wg0"
|
||||||
|
wireguard_config_path: "/etc/wireguard"
|
||||||
|
wireguard_port_default: 51820
|
||||||
|
wireguard_network_default: "10.8.0.0/24"
|
||||||
|
wireguard_server_ip_default: "10.8.0.1"
|
||||||
|
wireguard_enable_ip_forwarding: true
|
||||||
|
wireguard_config_file: "{{ wireguard_config_path }}/{{ wireguard_interface }}.conf"
|
||||||
|
wireguard_private_key_file: "{{ wireguard_config_path }}/{{ wireguard_interface }}_private.key"
|
||||||
|
wireguard_public_key_file: "{{ wireguard_config_path }}/{{ wireguard_interface }}_public.key"
|
||||||
|
wireguard_client_configs_path: "{{ wireguard_config_path }}/clients"
|
||||||
|
|
||||||
|
# WireGuard DNS Configuration
|
||||||
|
# DNS server for VPN clients (points to VPN server IP)
|
||||||
|
# This ensures internal services are resolved to VPN IPs
|
||||||
|
wireguard_dns_servers:
|
||||||
|
- "{{ wireguard_server_ip_default }}"
|
||||||
|
|
||||||
|
# Traefik Configuration
|
||||||
|
# Disable automatic restarts after config deployment to prevent restart loops
|
||||||
|
# Set to true only when explicitly needed (e.g., after major config changes)
|
||||||
|
traefik_auto_restart: false
|
||||||
|
|
||||||
|
# Traefik SSL Configuration
|
||||||
|
# Disable automatic restarts during SSL certificate setup to prevent restart loops
|
||||||
|
traefik_ssl_restart: false
|
||||||
|
|
||||||
|
# Gitea Auto-Restart Configuration
|
||||||
|
# Set to false to prevent automatic restarts when healthcheck fails
|
||||||
|
# This prevents restart loops when Gitea is temporarily unavailable (e.g., during Traefik restarts)
|
||||||
|
# Set to true only when explicitly needed for remediation
|
||||||
|
gitea_auto_restart: false
|
||||||
|
|
||||||
@@ -233,21 +233,42 @@
|
|||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
||||||
- name: Determine actual registry URLs from docker-compose files
|
- name: Extract registry URLs from docker-compose files (preserve port if present)
|
||||||
ansible.builtin.shell: |
|
ansible.builtin.shell: |
|
||||||
cd {{ application_code_dest }}
|
cd {{ application_code_dest }}
|
||||||
grep -h "image:" docker-compose.base.yml docker-compose.{{ application_compose_suffix }} 2>/dev/null | sed -E 's/.*image:\s*([^\/]+).*/\1/' | sed 's/:.*//' | sort -u || echo ""
|
grep -h "image:" docker-compose.base.yml docker-compose.{{ application_compose_suffix }} 2>/dev/null | \
|
||||||
register: actual_registry_urls
|
sed -E 's/.*image:\s*([^\/]+).*/\1/' | \
|
||||||
|
sed -E 's/:([^:]+)$//' | \
|
||||||
|
grep -E '\.(de|com|org|net|io|dev)|:[0-9]+|localhost' | \
|
||||||
|
sort -u || echo ""
|
||||||
|
register: actual_registry_urls_raw
|
||||||
changed_when: false
|
changed_when: false
|
||||||
failed_when: false
|
failed_when: false
|
||||||
|
|
||||||
- name: Set list of registries to login to
|
- name: Extract full registry URLs with ports from docker-compose files
|
||||||
|
ansible.builtin.shell: |
|
||||||
|
cd {{ application_code_dest }}
|
||||||
|
grep -h "image:" docker-compose.base.yml docker-compose.{{ application_compose_suffix }} 2>/dev/null | \
|
||||||
|
sed -E 's/.*image:\s*([^\/]+).*/\1/' | \
|
||||||
|
sed -E 's/:([^:]+)$//' | \
|
||||||
|
sort -u || echo ""
|
||||||
|
register: actual_registry_urls_full
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Set list of registries to login to (filter out service names, preserve ports)
|
||||||
ansible.builtin.set_fact:
|
ansible.builtin.set_fact:
|
||||||
registries_to_login: >-
|
registries_to_login: >-
|
||||||
{%- set found_registries = actual_registry_urls.stdout | trim | split('\n') | select('match', '.+') | list -%}
|
{%- set found_registries = actual_registry_urls_full.stdout | trim | split('\n') | select('match', '.+') | list -%}
|
||||||
|
{%- set filtered_registries = [] -%}
|
||||||
|
{%- for reg in found_registries -%}
|
||||||
|
{%- if reg | regex_search('\.(de|com|org|net|io|dev)') or reg | regex_search(':[0-9]+') or reg == 'localhost' -%}
|
||||||
|
{%- set _ = filtered_registries.append(reg) -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
{%- set default_registry = [docker_registry] -%}
|
{%- set default_registry = [docker_registry] -%}
|
||||||
{%- if found_registries | length > 0 -%}
|
{%- if filtered_registries | length > 0 -%}
|
||||||
{{ found_registries | unique | list }}
|
{{ filtered_registries | unique | list }}
|
||||||
{%- else -%}
|
{%- else -%}
|
||||||
{{ default_registry }}
|
{{ default_registry }}
|
||||||
{%- endif -%}
|
{%- endif -%}
|
||||||
@@ -260,14 +281,14 @@
|
|||||||
when:
|
when:
|
||||||
- registry_password | string | trim != ''
|
- registry_password | string | trim != ''
|
||||||
- registry_accessible == 'true'
|
- registry_accessible == 'true'
|
||||||
loop: "{{ registries_to_login }}"
|
loop: "{{ registries_to_login | default([docker_registry]) }}"
|
||||||
no_log: yes
|
no_log: yes
|
||||||
register: docker_login_results
|
register: docker_login_results
|
||||||
failed_when: false
|
failed_when: false
|
||||||
|
|
||||||
- name: Display login results
|
- name: Display login results
|
||||||
ansible.builtin.debug:
|
ansible.builtin.debug:
|
||||||
msg: "Docker login to {{ item.item }}: {% if item.failed %}FAILED{% else %}SUCCESS{% endif %}"
|
msg: "Docker login to {{ item.item }}: {% if item.failed %}FAILED ({{ item.msg | default('unknown error') }}){% else %}SUCCESS{% endif %}"
|
||||||
when:
|
when:
|
||||||
- registry_password | string | trim != ''
|
- registry_password | string | trim != ''
|
||||||
- registry_accessible == 'true'
|
- registry_accessible == 'true'
|
||||||
|
|||||||
@@ -0,0 +1,89 @@
|
|||||||
|
---
|
||||||
|
# Ansible Playbook: Fix Gitea-Traefik Connection Issues
|
||||||
|
# Purpose: Ensure Traefik can reliably reach Gitea by restarting both services
|
||||||
|
# Usage:
|
||||||
|
# ansible-playbook -i inventory/production.yml playbooks/fix-gitea-traefik-connection.yml \
|
||||||
|
# --vault-password-file secrets/.vault_pass
|
||||||
|
|
||||||
|
- name: Fix Gitea-Traefik Connection
|
||||||
|
hosts: production
|
||||||
|
vars:
|
||||||
|
gitea_stack_path: "{{ stacks_base_path }}/gitea"
|
||||||
|
traefik_stack_path: "{{ stacks_base_path }}/traefik"
|
||||||
|
gitea_url: "https://{{ gitea_domain }}"
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Get current Gitea container IP
|
||||||
|
shell: |
|
||||||
|
docker inspect gitea | grep -A 10 'traefik-public' | grep IPAddress | head -1 | awk '{print $2}' | tr -d '",'
|
||||||
|
register: gitea_ip
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Display Gitea IP
|
||||||
|
debug:
|
||||||
|
msg: "Gitea container IP in traefik-public network: {{ gitea_ip.stdout }}"
|
||||||
|
|
||||||
|
- name: Test direct connection to Gitea from Traefik container
|
||||||
|
shell: |
|
||||||
|
docker compose -f {{ traefik_stack_path }}/docker-compose.yml exec -T traefik wget -qO- http://{{ gitea_ip.stdout }}:3000/api/healthz 2>&1 | head -3
|
||||||
|
register: traefik_gitea_test
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Display Traefik-Gitea connection test result
|
||||||
|
debug:
|
||||||
|
msg: "{{ traefik_gitea_test.stdout }}"
|
||||||
|
|
||||||
|
- name: Restart Gitea container to refresh IP
|
||||||
|
shell: |
|
||||||
|
docker compose -f {{ gitea_stack_path }}/docker-compose.yml restart gitea
|
||||||
|
when: traefik_gitea_test.rc != 0
|
||||||
|
|
||||||
|
- name: Wait for Gitea to be ready
|
||||||
|
uri:
|
||||||
|
url: "{{ gitea_url }}/api/healthz"
|
||||||
|
method: GET
|
||||||
|
status_code: [200]
|
||||||
|
validate_certs: false
|
||||||
|
timeout: 10
|
||||||
|
register: gitea_health
|
||||||
|
until: gitea_health.status == 200
|
||||||
|
retries: 30
|
||||||
|
delay: 2
|
||||||
|
changed_when: false
|
||||||
|
when: traefik_gitea_test.rc != 0
|
||||||
|
|
||||||
|
- name: Restart Traefik to refresh service discovery
|
||||||
|
shell: |
|
||||||
|
docker compose -f {{ traefik_stack_path }}/docker-compose.yml restart traefik
|
||||||
|
when: >
|
||||||
|
traefik_gitea_test.rc != 0
|
||||||
|
and (traefik_auto_restart | default(false) | bool)
|
||||||
|
|
||||||
|
- name: Wait for Traefik to be ready
|
||||||
|
pause:
|
||||||
|
seconds: 10
|
||||||
|
when: traefik_gitea_test.rc != 0
|
||||||
|
|
||||||
|
- name: Test Gitea via Traefik
|
||||||
|
uri:
|
||||||
|
url: "{{ gitea_url }}/api/healthz"
|
||||||
|
method: GET
|
||||||
|
status_code: [200]
|
||||||
|
validate_certs: false
|
||||||
|
timeout: 10
|
||||||
|
register: final_test
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Display result
|
||||||
|
debug:
|
||||||
|
msg: |
|
||||||
|
Gitea-Traefik connection test:
|
||||||
|
- Direct connection: {{ 'OK' if traefik_gitea_test.rc == 0 else 'FAILED' }}
|
||||||
|
- Via Traefik: {{ 'OK' if final_test.status == 200 else 'FAILED' }}
|
||||||
|
|
||||||
|
{% if traefik_gitea_test.rc != 0 %}
|
||||||
|
Both services have been restarted to refresh connections.
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
@@ -0,0 +1,79 @@
|
|||||||
|
---
|
||||||
|
# Ansible Playbook: Update Gitea Traefik Service with Current IP
|
||||||
|
#
|
||||||
|
# ⚠️ DEPRECATED: This playbook is no longer needed since Traefik runs in bridge network mode.
|
||||||
|
# Service discovery via Docker labels works reliably in bridge mode, so manual IP updates
|
||||||
|
# are not required. This playbook is kept for reference only.
|
||||||
|
#
|
||||||
|
# Purpose: Update Traefik dynamic config with current Gitea container IP
|
||||||
|
# Usage:
|
||||||
|
# ansible-playbook -i inventory/production.yml playbooks/update-gitea-traefik-service.yml \
|
||||||
|
# --vault-password-file secrets/.vault_pass
|
||||||
|
|
||||||
|
- name: Update Gitea Traefik Service with Current IP
|
||||||
|
hosts: production
|
||||||
|
vars:
|
||||||
|
traefik_stack_path: "{{ stacks_base_path }}/traefik"
|
||||||
|
gitea_url: "https://{{ gitea_domain }}"
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Warn that this playbook is deprecated
|
||||||
|
ansible.builtin.fail:
|
||||||
|
msg: |
|
||||||
|
⚠️ This playbook is DEPRECATED and should not be used.
|
||||||
|
Traefik service discovery via Docker labels works reliably in bridge mode.
|
||||||
|
If you really need to run this, set traefik_auto_restart=true explicitly.
|
||||||
|
when: traefik_auto_restart | default(false) | bool == false
|
||||||
|
|
||||||
|
- name: Get current Gitea container IP in traefik-public network
|
||||||
|
shell: |
|
||||||
|
docker inspect gitea | grep -A 10 'traefik-public' | grep IPAddress | head -1 | awk '{print $2}' | tr -d '",'
|
||||||
|
register: gitea_ip
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Display Gitea IP
|
||||||
|
debug:
|
||||||
|
msg: "Gitea container IP: {{ gitea_ip.stdout }}"
|
||||||
|
|
||||||
|
- name: Create Gitea service configuration with current IP
|
||||||
|
copy:
|
||||||
|
dest: "{{ traefik_stack_path }}/dynamic/gitea-service.yml"
|
||||||
|
content: |
|
||||||
|
http:
|
||||||
|
services:
|
||||||
|
gitea:
|
||||||
|
loadBalancer:
|
||||||
|
servers:
|
||||||
|
- url: http://{{ gitea_ip.stdout }}:3000
|
||||||
|
mode: '0644'
|
||||||
|
|
||||||
|
- name: Restart Traefik to load new configuration
|
||||||
|
shell: |
|
||||||
|
docker compose -f {{ traefik_stack_path }}/docker-compose.yml restart traefik
|
||||||
|
when: traefik_auto_restart | default(false) | bool
|
||||||
|
|
||||||
|
- name: Wait for Traefik to be ready
|
||||||
|
pause:
|
||||||
|
seconds: 10
|
||||||
|
|
||||||
|
- name: Test Gitea via Traefik
|
||||||
|
uri:
|
||||||
|
url: "{{ gitea_url }}/api/healthz"
|
||||||
|
method: GET
|
||||||
|
status_code: [200]
|
||||||
|
validate_certs: false
|
||||||
|
timeout: 10
|
||||||
|
register: final_test
|
||||||
|
retries: 5
|
||||||
|
delay: 2
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Display result
|
||||||
|
debug:
|
||||||
|
msg: |
|
||||||
|
Gitea-Traefik connection:
|
||||||
|
- Gitea IP: {{ gitea_ip.stdout }}
|
||||||
|
- Via Traefik: {{ 'OK' if final_test.status == 200 else 'FAILED' }}
|
||||||
|
|
||||||
|
Note: This is a temporary fix. The IP will need to be updated if the container restarts.
|
||||||
|
|
||||||
@@ -18,7 +18,7 @@ traefik_check_health: true
|
|||||||
traefik_show_status: true
|
traefik_show_status: true
|
||||||
|
|
||||||
# Config Deployment
|
# Config Deployment
|
||||||
traefik_auto_restart: true # Automatically restart after config deployment
|
traefik_auto_restart: false # Automatically restart after config deployment (default: false to prevent restart loops)
|
||||||
|
|
||||||
# Logs Configuration
|
# Logs Configuration
|
||||||
traefik_logs_tail: 100
|
traefik_logs_tail: 100
|
||||||
|
|||||||
Reference in New Issue
Block a user