--- # Redeploy Traefik and Gitea Stacks # Purpose: Clean redeployment of Traefik and Gitea stacks to fix service discovery issues # This playbook: # - Stops and removes containers (but keeps volumes and acme.json) # - Redeploys both stacks with fresh containers # - Reinitializes service discovery # - Verifies everything works # # Usage: # ansible-playbook -i inventory/production.yml playbooks/redeploy-traefik-gitea.yml \ # --vault-password-file secrets/.vault_pass - name: Redeploy Traefik and Gitea Stacks hosts: production gather_facts: yes become: no vars: traefik_stack_path: "{{ stacks_base_path }}/traefik" gitea_stack_path: "{{ stacks_base_path }}/gitea" gitea_url: "https://{{ gitea_domain }}" traefik_container_name: "traefik" gitea_container_name: "gitea" tasks: # ======================================== # 1. PREPARATION # ======================================== - name: Display redeployment plan ansible.builtin.debug: msg: | ================================================================================ TRAEFIK + GITEA REDEPLOYMENT PLAN ================================================================================ This playbook will: 1. ✅ Sync latest stack configurations 2. ✅ Stop and remove Traefik containers (keeps acme.json) 3. ✅ Stop and remove Gitea containers (keeps volumes/data) 4. ✅ Redeploy Traefik stack 5. ✅ Redeploy Gitea stack 6. ✅ Verify service discovery 7. ✅ Test Gitea accessibility ⚠️ IMPORTANT: - SSL certificates (acme.json) will be preserved - Gitea data (volumes) will be preserved - Only containers will be recreated - Expected downtime: ~2-5 minutes ================================================================================ - name: Sync infrastructure stacks to server ansible.builtin.include_role: name: traefik tasks_from: deploy vars: traefik_auto_restart: false # Don't restart during sync when: false # Skip for now, we'll do it manually - name: Sync stacks directory to production server ansible.builtin.synchronize: src: "{{ playbook_dir }}/../../stacks/" dest: "{{ stacks_base_path }}/" delete: no recursive: yes rsync_opts: - "--chmod=D755,F644" - "--exclude=.git" - "--exclude=*.log" - "--exclude=data/" - "--exclude=volumes/" - "--exclude=acme.json" # Preserve SSL certificates - "--exclude=*.key" - "--exclude=*.pem" # ======================================== # 2. TRAEFIK REDEPLOYMENT # ======================================== - name: Check Traefik container status (before) ansible.builtin.shell: | cd {{ traefik_stack_path }} docker compose ps {{ traefik_container_name }} 2>/dev/null || echo "NOT_RUNNING" register: traefik_status_before changed_when: false - name: Display Traefik status (before) ansible.builtin.debug: msg: | Traefik Status (Before): {{ traefik_status_before.stdout }} - name: Check if acme.json exists ansible.builtin.stat: path: "{{ traefik_stack_path }}/acme.json" register: acme_json_stat - name: Backup acme.json (safety measure) ansible.builtin.copy: src: "{{ traefik_stack_path }}/acme.json" dest: "{{ traefik_stack_path }}/acme.json.backup.{{ ansible_date_time.epoch }}" remote_src: yes mode: '0600' when: acme_json_stat.stat.exists register: acme_backup failed_when: false changed_when: acme_backup.rc == 0 - name: Stop Traefik stack ansible.builtin.shell: | cd {{ traefik_stack_path }} docker compose down register: traefik_stop changed_when: traefik_stop.rc == 0 failed_when: false - name: Remove Traefik containers (if any remain) ansible.builtin.shell: | docker ps -a --filter "name={{ traefik_container_name }}" --format "{{ '{{' }}.ID{{ '}}' }}" | xargs -r docker rm -f 2>/dev/null || true register: traefik_remove changed_when: traefik_remove.rc == 0 failed_when: false - name: Ensure acme.json exists and has correct permissions ansible.builtin.file: path: "{{ traefik_stack_path }}/acme.json" state: touch mode: '0600' owner: "{{ ansible_user }}" group: "{{ ansible_user }}" become: yes register: acme_json_ensure - name: Check if acme.json exists after ensure ansible.builtin.stat: path: "{{ traefik_stack_path }}/acme.json" register: acme_json_after_ensure - name: Restore acme.json from backup if it was deleted ansible.builtin.copy: src: "{{ traefik_stack_path }}/acme.json.backup.{{ ansible_date_time.epoch }}" dest: "{{ traefik_stack_path }}/acme.json" remote_src: yes mode: '0600' when: - acme_backup.changed | default(false) - acme_json_stat.stat.exists - not acme_json_after_ensure.stat.exists failed_when: false - name: Deploy Traefik stack community.docker.docker_compose_v2: project_src: "{{ traefik_stack_path }}" state: present pull: always register: traefik_deploy - name: Wait for Traefik to be ready ansible.builtin.shell: | cd {{ traefik_stack_path }} docker compose ps {{ traefik_container_name }} | grep -Eiq "Up|running" register: traefik_ready changed_when: false until: traefik_ready.rc == 0 retries: 12 delay: 5 failed_when: traefik_ready.rc != 0 - name: Check Traefik container status (after) ansible.builtin.shell: | cd {{ traefik_stack_path }} docker compose ps {{ traefik_container_name }} register: traefik_status_after changed_when: false - name: Display Traefik status (after) ansible.builtin.debug: msg: | Traefik Status (After): {{ traefik_status_after.stdout }} # ======================================== # 3. GITEA REDEPLOYMENT # ======================================== - name: Check Gitea container status (before) ansible.builtin.shell: | cd {{ gitea_stack_path }} docker compose ps {{ gitea_container_name }} 2>/dev/null || echo "NOT_RUNNING" register: gitea_status_before changed_when: false - name: Display Gitea status (before) ansible.builtin.debug: msg: | Gitea Status (Before): {{ gitea_status_before.stdout }} - name: Stop Gitea stack (preserves volumes) ansible.builtin.shell: | cd {{ gitea_stack_path }} docker compose down register: gitea_stop changed_when: gitea_stop.rc == 0 failed_when: false - name: Remove Gitea containers (if any remain, volumes are preserved) ansible.builtin.shell: | docker ps -a --filter "name={{ gitea_container_name }}" --format "{{ '{{' }}.ID{{ '}}' }}" | xargs -r docker rm -f 2>/dev/null || true register: gitea_remove changed_when: gitea_remove.rc == 0 failed_when: false - name: Deploy Gitea stack community.docker.docker_compose_v2: project_src: "{{ gitea_stack_path }}" state: present pull: always register: gitea_deploy - name: Wait for Gitea to be ready ansible.builtin.shell: | cd {{ gitea_stack_path }} docker compose ps {{ gitea_container_name }} | grep -Eiq "Up|running" register: gitea_ready changed_when: false until: gitea_ready.rc == 0 retries: 12 delay: 5 failed_when: gitea_ready.rc != 0 - name: Wait for Gitea to be healthy ansible.builtin.shell: | cd {{ gitea_stack_path }} docker compose exec -T {{ gitea_container_name }} curl -f http://localhost:3000/api/healthz 2>&1 | grep -q "status.*pass" && echo "HEALTHY" || echo "NOT_HEALTHY" register: gitea_health changed_when: false until: gitea_health.stdout == "HEALTHY" retries: 30 delay: 2 failed_when: false - name: Check Gitea container status (after) ansible.builtin.shell: | cd {{ gitea_stack_path }} docker compose ps {{ gitea_container_name }} register: gitea_status_after changed_when: false - name: Display Gitea status (after) ansible.builtin.debug: msg: | Gitea Status (After): {{ gitea_status_after.stdout }} # ======================================== # 4. SERVICE DISCOVERY VERIFICATION # ======================================== - name: Wait for Traefik to discover Gitea (service discovery delay) ansible.builtin.pause: seconds: 15 - name: Check if Gitea is in traefik-public network ansible.builtin.shell: | docker network inspect traefik-public --format '{{ '{{' }}range .Containers{{ '}}' }}{{ '{{' }}.Name{{ '}}' }} {{ '{{' }}end{{ '}}' }}' 2>/dev/null | grep -q {{ gitea_container_name }} && echo "YES" || echo "NO" register: gitea_in_network changed_when: false - name: Check if Traefik is in traefik-public network ansible.builtin.shell: | docker network inspect traefik-public --format '{{ '{{' }}range .Containers{{ '}}' }}{{ '{{' }}.Name{{ '}}' }} {{ '{{' }}end{{ '}}' }}' 2>/dev/null | grep -q {{ traefik_container_name }} && echo "YES" || echo "NO" register: traefik_in_network changed_when: false - name: Test direct connection from Traefik to Gitea ansible.builtin.shell: | cd {{ traefik_stack_path }} docker compose exec -T {{ traefik_container_name }} wget -qO- --timeout=5 http://{{ gitea_container_name }}:3000/api/healthz 2>&1 | head -5 || echo "CONNECTION_FAILED" register: traefik_gitea_direct changed_when: false failed_when: false - name: Display network status ansible.builtin.debug: msg: | Network Status: - Gitea in traefik-public: {% if gitea_in_network.stdout == 'YES' %}✅{% else %}❌{% endif %} - Traefik in traefik-public: {% if traefik_in_network.stdout == 'YES' %}✅{% else %}❌{% endif %} - Traefik → Gitea (direct): {% if 'CONNECTION_FAILED' not in traefik_gitea_direct.stdout %}✅{% else %}❌{% endif %} # ======================================== # 5. FINAL VERIFICATION # ======================================== - name: Test Gitea via HTTPS (with retries) ansible.builtin.uri: url: "{{ gitea_url }}/api/healthz" method: GET status_code: [200] validate_certs: false timeout: 10 register: gitea_https_test until: gitea_https_test.status == 200 retries: 20 delay: 3 changed_when: false failed_when: false - name: Check SSL certificate status ansible.builtin.shell: | cd {{ traefik_stack_path }} if [ -f acme.json ] && [ -s acme.json ]; then echo "SSL certificates: PRESENT" else echo "SSL certificates: MISSING or EMPTY" fi register: ssl_status changed_when: false - name: Final status summary ansible.builtin.debug: msg: | ================================================================================ REDEPLOYMENT SUMMARY ================================================================================ Traefik: - Status: {{ traefik_status_after.stdout | regex_replace('.*(Up|Down|Restarting).*', '\\1') | default('UNKNOWN') }} - SSL Certificates: {{ ssl_status.stdout }} Gitea: - Status: {{ gitea_status_after.stdout | regex_replace('.*(Up|Down|Restarting).*', '\\1') | default('UNKNOWN') }} - Health: {% if gitea_health.stdout == 'HEALTHY' %}✅ Healthy{% else %}❌ Not Healthy{% endif %} Service Discovery: - Gitea in network: {% if gitea_in_network.stdout == 'YES' %}✅{% else %}❌{% endif %} - Traefik in network: {% if traefik_in_network.stdout == 'YES' %}✅{% else %}❌{% endif %} - Direct connection: {% if 'CONNECTION_FAILED' not in traefik_gitea_direct.stdout %}✅{% else %}❌{% endif %} Gitea Accessibility: {% if gitea_https_test.status == 200 %} ✅ Gitea is reachable via HTTPS (Status: 200) URL: {{ gitea_url }} {% else %} ❌ Gitea is NOT reachable via HTTPS (Status: {{ gitea_https_test.status | default('TIMEOUT') }}) Possible causes: 1. SSL certificate is still being generated (wait 2-5 minutes) 2. Service discovery needs more time (wait 1-2 minutes) 3. Network configuration issue Next steps: - Wait 2-5 minutes and test again: curl -k {{ gitea_url }}/api/healthz - Check Traefik logs: cd {{ traefik_stack_path }} && docker compose logs traefik --tail=50 - Check Gitea logs: cd {{ gitea_stack_path }} && docker compose logs gitea --tail=50 {% endif %} ================================================================================