Files
michaelschiemer/deployment/ansible/playbooks/redeploy-traefik-gitea.yml
Michael Schiemer f548a0322c
Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Successful in 28s
Security Vulnerability Scan / Check for Dependency Changes (push) Successful in 35s
🚀 Build & Deploy Image / Build Runtime Base Image (push) Successful in 18s
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been skipped
Security Vulnerability Scan / Composer Security Audit (push) Has been skipped
🚀 Build & Deploy Image / Build Docker Image (push) Successful in 17s
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Failing after 1m9s
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been skipped
fix: prevent CI jobs from restarting Traefik
- Add traefik_auto_restart check to fix-gitea-timeouts.yml
- Add traefik_auto_restart check to fix-gitea-ssl-routing.yml
- Add traefik_auto_restart check to fix-gitea-complete.yml
- Set traefik_auto_restart=false in all Gitea workflow Ansible calls
- Set gitea_auto_restart=false in all Gitea workflow Ansible calls
- Add redeploy-traefik-gitea.yml playbook for clean redeployment

This prevents CI/CD pipelines from causing Traefik restart loops by
ensuring all remediation playbooks respect the traefik_auto_restart
flag, which is set to false in group_vars/production/vars.yml.
2025-11-08 23:47:44 +01:00

364 lines
13 KiB
YAML

---
# Redeploy Traefik and Gitea Stacks
# Purpose: Clean redeployment of Traefik and Gitea stacks to fix service discovery issues
# This playbook:
# - Stops and removes containers (but keeps volumes and acme.json)
# - Redeploys both stacks with fresh containers
# - Reinitializes service discovery
# - Verifies everything works
#
# Usage:
# ansible-playbook -i inventory/production.yml playbooks/redeploy-traefik-gitea.yml \
# --vault-password-file secrets/.vault_pass
- name: Redeploy Traefik and Gitea Stacks
hosts: production
gather_facts: yes
become: no
vars:
traefik_stack_path: "{{ stacks_base_path }}/traefik"
gitea_stack_path: "{{ stacks_base_path }}/gitea"
gitea_url: "https://{{ gitea_domain }}"
traefik_container_name: "traefik"
gitea_container_name: "gitea"
tasks:
# ========================================
# 1. PREPARATION
# ========================================
- name: Display redeployment plan
ansible.builtin.debug:
msg: |
================================================================================
TRAEFIK + GITEA REDEPLOYMENT PLAN
================================================================================
This playbook will:
1. ✅ Sync latest stack configurations
2. ✅ Stop and remove Traefik containers (keeps acme.json)
3. ✅ Stop and remove Gitea containers (keeps volumes/data)
4. ✅ Redeploy Traefik stack
5. ✅ Redeploy Gitea stack
6. ✅ Verify service discovery
7. ✅ Test Gitea accessibility
⚠️ IMPORTANT:
- SSL certificates (acme.json) will be preserved
- Gitea data (volumes) will be preserved
- Only containers will be recreated
- Expected downtime: ~2-5 minutes
================================================================================
- name: Sync infrastructure stacks to server
ansible.builtin.include_role:
name: traefik
tasks_from: deploy
vars:
traefik_auto_restart: false # Don't restart during sync
when: false # Skip for now, we'll do it manually
- name: Sync stacks directory to production server
ansible.builtin.synchronize:
src: "{{ playbook_dir }}/../../stacks/"
dest: "{{ stacks_base_path }}/"
delete: no
recursive: yes
rsync_opts:
- "--chmod=D755,F644"
- "--exclude=.git"
- "--exclude=*.log"
- "--exclude=data/"
- "--exclude=volumes/"
- "--exclude=acme.json" # Preserve SSL certificates
- "--exclude=*.key"
- "--exclude=*.pem"
# ========================================
# 2. TRAEFIK REDEPLOYMENT
# ========================================
- name: Check Traefik container status (before)
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
docker compose ps {{ traefik_container_name }} 2>/dev/null || echo "NOT_RUNNING"
register: traefik_status_before
changed_when: false
- name: Display Traefik status (before)
ansible.builtin.debug:
msg: |
Traefik Status (Before):
{{ traefik_status_before.stdout }}
- name: Check if acme.json exists
ansible.builtin.stat:
path: "{{ traefik_stack_path }}/acme.json"
register: acme_json_stat
- name: Backup acme.json (safety measure)
ansible.builtin.copy:
src: "{{ traefik_stack_path }}/acme.json"
dest: "{{ traefik_stack_path }}/acme.json.backup.{{ ansible_date_time.epoch }}"
remote_src: yes
mode: '0600'
when: acme_json_stat.stat.exists
register: acme_backup
failed_when: false
changed_when: acme_backup.rc == 0
- name: Stop Traefik stack
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
docker compose down
register: traefik_stop
changed_when: traefik_stop.rc == 0
failed_when: false
- name: Remove Traefik containers (if any remain)
ansible.builtin.shell: |
docker ps -a --filter "name={{ traefik_container_name }}" --format "{{ '{{' }}.ID{{ '}}' }}" | xargs -r docker rm -f 2>/dev/null || true
register: traefik_remove
changed_when: traefik_remove.rc == 0
failed_when: false
- name: Ensure acme.json exists and has correct permissions
ansible.builtin.file:
path: "{{ traefik_stack_path }}/acme.json"
state: touch
mode: '0600'
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
become: yes
register: acme_json_ensure
- name: Check if acme.json exists after ensure
ansible.builtin.stat:
path: "{{ traefik_stack_path }}/acme.json"
register: acme_json_after_ensure
- name: Restore acme.json from backup if it was deleted
ansible.builtin.copy:
src: "{{ traefik_stack_path }}/acme.json.backup.{{ ansible_date_time.epoch }}"
dest: "{{ traefik_stack_path }}/acme.json"
remote_src: yes
mode: '0600'
when:
- acme_backup.changed | default(false)
- acme_json_stat.stat.exists
- not acme_json_after_ensure.stat.exists
failed_when: false
- name: Deploy Traefik stack
community.docker.docker_compose_v2:
project_src: "{{ traefik_stack_path }}"
state: present
pull: always
register: traefik_deploy
- name: Wait for Traefik to be ready
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
docker compose ps {{ traefik_container_name }} | grep -Eiq "Up|running"
register: traefik_ready
changed_when: false
until: traefik_ready.rc == 0
retries: 12
delay: 5
failed_when: traefik_ready.rc != 0
- name: Check Traefik container status (after)
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
docker compose ps {{ traefik_container_name }}
register: traefik_status_after
changed_when: false
- name: Display Traefik status (after)
ansible.builtin.debug:
msg: |
Traefik Status (After):
{{ traefik_status_after.stdout }}
# ========================================
# 3. GITEA REDEPLOYMENT
# ========================================
- name: Check Gitea container status (before)
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose ps {{ gitea_container_name }} 2>/dev/null || echo "NOT_RUNNING"
register: gitea_status_before
changed_when: false
- name: Display Gitea status (before)
ansible.builtin.debug:
msg: |
Gitea Status (Before):
{{ gitea_status_before.stdout }}
- name: Stop Gitea stack (preserves volumes)
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose down
register: gitea_stop
changed_when: gitea_stop.rc == 0
failed_when: false
- name: Remove Gitea containers (if any remain, volumes are preserved)
ansible.builtin.shell: |
docker ps -a --filter "name={{ gitea_container_name }}" --format "{{ '{{' }}.ID{{ '}}' }}" | xargs -r docker rm -f 2>/dev/null || true
register: gitea_remove
changed_when: gitea_remove.rc == 0
failed_when: false
- name: Deploy Gitea stack
community.docker.docker_compose_v2:
project_src: "{{ gitea_stack_path }}"
state: present
pull: always
register: gitea_deploy
- name: Wait for Gitea to be ready
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose ps {{ gitea_container_name }} | grep -Eiq "Up|running"
register: gitea_ready
changed_when: false
until: gitea_ready.rc == 0
retries: 12
delay: 5
failed_when: gitea_ready.rc != 0
- name: Wait for Gitea to be healthy
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose exec -T {{ gitea_container_name }} curl -f http://localhost:3000/api/healthz 2>&1 | grep -q "status.*pass" && echo "HEALTHY" || echo "NOT_HEALTHY"
register: gitea_health
changed_when: false
until: gitea_health.stdout == "HEALTHY"
retries: 30
delay: 2
failed_when: false
- name: Check Gitea container status (after)
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose ps {{ gitea_container_name }}
register: gitea_status_after
changed_when: false
- name: Display Gitea status (after)
ansible.builtin.debug:
msg: |
Gitea Status (After):
{{ gitea_status_after.stdout }}
# ========================================
# 4. SERVICE DISCOVERY VERIFICATION
# ========================================
- name: Wait for Traefik to discover Gitea (service discovery delay)
ansible.builtin.pause:
seconds: 15
- name: Check if Gitea is in traefik-public network
ansible.builtin.shell: |
docker network inspect traefik-public --format '{{ '{{' }}range .Containers{{ '}}' }}{{ '{{' }}.Name{{ '}}' }} {{ '{{' }}end{{ '}}' }}' 2>/dev/null | grep -q {{ gitea_container_name }} && echo "YES" || echo "NO"
register: gitea_in_network
changed_when: false
- name: Check if Traefik is in traefik-public network
ansible.builtin.shell: |
docker network inspect traefik-public --format '{{ '{{' }}range .Containers{{ '}}' }}{{ '{{' }}.Name{{ '}}' }} {{ '{{' }}end{{ '}}' }}' 2>/dev/null | grep -q {{ traefik_container_name }} && echo "YES" || echo "NO"
register: traefik_in_network
changed_when: false
- name: Test direct connection from Traefik to Gitea
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
docker compose exec -T {{ traefik_container_name }} wget -qO- --timeout=5 http://{{ gitea_container_name }}:3000/api/healthz 2>&1 | head -5 || echo "CONNECTION_FAILED"
register: traefik_gitea_direct
changed_when: false
failed_when: false
- name: Display network status
ansible.builtin.debug:
msg: |
Network Status:
- Gitea in traefik-public: {% if gitea_in_network.stdout == 'YES' %}✅{% else %}❌{% endif %}
- Traefik in traefik-public: {% if traefik_in_network.stdout == 'YES' %}✅{% else %}❌{% endif %}
- Traefik → Gitea (direct): {% if 'CONNECTION_FAILED' not in traefik_gitea_direct.stdout %}✅{% else %}❌{% endif %}
# ========================================
# 5. FINAL VERIFICATION
# ========================================
- name: Test Gitea via HTTPS (with retries)
ansible.builtin.uri:
url: "{{ gitea_url }}/api/healthz"
method: GET
status_code: [200]
validate_certs: false
timeout: 10
register: gitea_https_test
until: gitea_https_test.status == 200
retries: 20
delay: 3
changed_when: false
failed_when: false
- name: Check SSL certificate status
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
if [ -f acme.json ] && [ -s acme.json ]; then
echo "SSL certificates: PRESENT"
else
echo "SSL certificates: MISSING or EMPTY"
fi
register: ssl_status
changed_when: false
- name: Final status summary
ansible.builtin.debug:
msg: |
================================================================================
REDEPLOYMENT SUMMARY
================================================================================
Traefik:
- Status: {{ traefik_status_after.stdout | regex_replace('.*(Up|Down|Restarting).*', '\\1') | default('UNKNOWN') }}
- SSL Certificates: {{ ssl_status.stdout }}
Gitea:
- Status: {{ gitea_status_after.stdout | regex_replace('.*(Up|Down|Restarting).*', '\\1') | default('UNKNOWN') }}
- Health: {% if gitea_health.stdout == 'HEALTHY' %}✅ Healthy{% else %}❌ Not Healthy{% endif %}
Service Discovery:
- Gitea in network: {% if gitea_in_network.stdout == 'YES' %}✅{% else %}❌{% endif %}
- Traefik in network: {% if traefik_in_network.stdout == 'YES' %}✅{% else %}❌{% endif %}
- Direct connection: {% if 'CONNECTION_FAILED' not in traefik_gitea_direct.stdout %}✅{% else %}❌{% endif %}
Gitea Accessibility:
{% if gitea_https_test.status == 200 %}
✅ Gitea is reachable via HTTPS (Status: 200)
URL: {{ gitea_url }}
{% else %}
❌ Gitea is NOT reachable via HTTPS (Status: {{ gitea_https_test.status | default('TIMEOUT') }})
Possible causes:
1. SSL certificate is still being generated (wait 2-5 minutes)
2. Service discovery needs more time (wait 1-2 minutes)
3. Network configuration issue
Next steps:
- Wait 2-5 minutes and test again: curl -k {{ gitea_url }}/api/healthz
- Check Traefik logs: cd {{ traefik_stack_path }} && docker compose logs traefik --tail=50
- Check Gitea logs: cd {{ gitea_stack_path }} && docker compose logs gitea --tail=50
{% endif %}
================================================================================