Compare commits

...

4 Commits

Author SHA1 Message Date
e047720954 feat: Add Ansible playbook to restart Traefik and verify configuration
Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Successful in 34s
🚀 Build & Deploy Image / Build Runtime Base Image (push) Successful in 11s
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been skipped
🚀 Build & Deploy Image / Build Docker Image (push) Successful in 13s
Security Vulnerability Scan / Check for Dependency Changes (push) Failing after 12m24s
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Failing after 4m10s
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been skipped
Security Vulnerability Scan / Composer Security Audit (push) Has been cancelled
- Add restart-traefik.yml playbook to restart Traefik container
- Verify Traefik health after restart
- Check for ACME challenge errors in logs
- Display status summary with next steps
- Useful after Traefik configuration changes
2025-11-08 18:47:14 +01:00
06bad20123 fix: Explicitly exclude ACME challenge path from HTTPS redirect
- Add explicit exclusion of /.well-known/acme-challenge from catch-all redirect
- Ensures ACME challenges are never redirected to HTTPS
- Traefik handles ACME challenges automatically, but explicit exclusion is safer
2025-11-08 18:46:27 +01:00
3d233e8b2c fix: Remove redundant ACME challenge router in Traefik configuration
- Remove explicit ACME challenge router that had no service defined
- Traefik handles ACME challenges automatically when httpChallenge.entryPoint is set
- The router was interfering with automatic challenge handling
- Fixes 'Cannot retrieve the ACME challenge' errors in Traefik logs
2025-11-08 18:46:01 +01:00
af98069eba fix: Reduce Gitea runner request frequency to prevent DDoS-like behavior
- Increase fetch_interval from 2s to 10s to reduce load on Gitea
- Increase fetch_timeout from 5s to 30s for better error handling
- Add documentation about runner overloading Gitea and how to fix it
- Prevents 504 errors caused by runner bombarding Gitea with requests
2025-11-08 17:56:11 +01:00
5 changed files with 185 additions and 54 deletions

View File

@@ -0,0 +1,126 @@
---
- name: Restart Traefik and Verify Configuration
hosts: production
gather_facts: yes
become: no
vars:
traefik_stack_path: "{{ stacks_base_path | default('/home/deploy/deployment/stacks') }}/traefik"
traefik_container_name: "traefik"
traefik_url: "https://traefik.michaelschiemer.de"
tasks:
- name: Check if Traefik stack directory exists
stat:
path: "{{ traefik_stack_path }}"
register: traefik_stack_exists
- name: Fail if Traefik stack directory does not exist
fail:
msg: "Traefik stack directory not found at {{ traefik_stack_path }}"
when: not traefik_stack_exists.stat.exists
- name: Check Traefik container status before restart
shell: |
cd {{ traefik_stack_path }}
docker compose ps {{ traefik_container_name }} --format json
register: traefik_status_before
changed_when: false
failed_when: false
- name: Display Traefik status before restart
debug:
msg: |
================================================================================
Traefik Container Status (Before Restart):
{{ traefik_status_before.stdout | default('Container not found or Docker not running') }}
================================================================================
- name: Restart Traefik container
shell: |
cd {{ traefik_stack_path }}
docker compose restart {{ traefik_container_name }}
register: traefik_restart
changed_when: traefik_restart.rc == 0
- name: Wait for Traefik to be ready
wait_for:
timeout: 30
changed_when: false
- name: Check Traefik container status after restart
shell: |
cd {{ traefik_stack_path }}
docker compose ps {{ traefik_container_name }} --format json
register: traefik_status_after
changed_when: false
failed_when: false
- name: Check Traefik health endpoint
ansible.builtin.uri:
url: "{{ traefik_url }}/ping"
method: GET
status_code: [200]
validate_certs: no
timeout: 10
register: traefik_health
ignore_errors: yes
changed_when: false
- name: Get Traefik logs (last 50 lines)
shell: |
cd {{ traefik_stack_path }}
docker compose logs --tail=50 {{ traefik_container_name }}
register: traefik_logs
changed_when: false
failed_when: false
ignore_errors: yes
- name: Check for ACME challenge errors in logs
shell: |
cd {{ traefik_stack_path }}
docker compose logs {{ traefik_container_name }} 2>&1 | grep -i "acme challenge" | tail -10 || echo "No ACME challenge errors found"
register: acme_errors
changed_when: false
failed_when: false
- name: Display Traefik logs
debug:
msg: |
================================================================================
Traefik Container Logs (last 50 lines):
{{ traefik_logs.stdout | default('No logs available') }}
================================================================================
- name: Display ACME challenge status
debug:
msg: |
================================
ACME Challenge Status:
{{ acme_errors.stdout | default('No ACME errors found in recent logs') }}
================================
- name: Display final status
debug:
msg: |
========================================
========================================
Traefik Restart Summary
========================================
Container Status: {% if 'State":"running' in (traefik_status_after.stdout | default('')) %}✅ RUNNING{% else %}❌ NOT RUNNING{% endif %}
Health Check: {% if traefik_health.status | default(0) == 200 %}✅ HEALTHY{% else %}❌ UNHEALTHY or TIMEOUT{% endif %}
Restart Action: {% if traefik_restart.changed | default(false) %}🔄 Container restarted{% else %} No restart needed{% endif %}
========================================
{% if 'State":"running' in (traefik_status_after.stdout | default('')) and traefik_health.status | default(0) == 200 %}
✅ Traefik is running and healthy!
Next steps:
1. Monitor logs for ACME challenge errors: tail -f {{ traefik_stack_path }}/logs/traefik.log | grep -i acme
2. Check certificate status in Traefik dashboard: {{ traefik_url }}
3. Wait for next certificate renewal attempt (usually hourly)
{% else %}
❌ Traefik is not fully healthy. Check logs for details:
docker logs {{ traefik_container_name }}
{% endif %}
========================================

View File

@@ -90,3 +90,17 @@ cd deployment/gitea-runner
- Prüfe Traefik-Logs: `docker logs traefik` - Prüfe Traefik-Logs: `docker logs traefik`
- Stelle sicher, dass Gitea nicht überlastet ist - Stelle sicher, dass Gitea nicht überlastet ist
**Runner überlastet Gitea (DDoS-ähnliches Verhalten):**
- Der Runner kann Gitea mit zu vielen Anfragen überlasten, besonders bei Fehlern
- **Lösung**: `fetch_interval` in `config.yaml` erhöhen (Standard: 2s → Empfohlen: 10s oder mehr)
- **Lösung**: `fetch_timeout` erhöhen (Standard: 5s → Empfohlen: 30s)
- **Sofortmaßnahme**: Runner deaktivieren/stoppen, bis Gitea wieder stabil ist:
```bash
cd deployment/gitea-runner
docker compose stop gitea-runner
```
- Nach Anpassung der Konfiguration Runner neu starten:
```bash
docker compose up -d gitea-runner
```

View File

@@ -18,10 +18,11 @@ runner:
insecure: true insecure: true
# Timeout for fetching job from Gitea # Timeout for fetching job from Gitea
fetch_timeout: 5s fetch_timeout: 30s
# Interval for fetching jobs # Interval for fetching jobs (increased to reduce load on Gitea)
fetch_interval: 2s # Lower values cause DDoS-like behavior when Gitea is slow or overloaded
fetch_interval: 10s
cache: cache:
# Enable cache server # Enable cache server

View File

@@ -5,16 +5,23 @@ services:
restart: unless-stopped restart: unless-stopped
security_opt: security_opt:
- no-new-privileges:true - no-new-privileges:true
# Use host network mode to correctly identify client IPs from WireGuard # Use bridge network mode for reliable service discovery
# Without this, Traefik sees Docker bridge IPs instead of real client IPs (10.8.0.x) # Service discovery works correctly with Docker labels in bridge mode
network_mode: host ports:
# When using host network mode, we don't bind ports in docker-compose - "80:80"
# Traefik listens directly on host ports 80 and 443 - "443:443"
# ports: - "2222:2222" # Gitea SSH
# - "80:80" networks:
# - "443:443" - traefik-public
environment: environment:
- TZ=Europe/Berlin - TZ=Europe/Berlin
command:
# Load static configuration file
- "--configFile=/traefik.yml"
# Increase timeouts for slow backends like Gitea
- "--entrypoints.websecure.transport.respondingTimeouts.readTimeout=300s"
- "--entrypoints.websecure.transport.respondingTimeouts.writeTimeout=300s"
- "--entrypoints.websecure.transport.respondingTimeouts.idleTimeout=360s"
volumes: volumes:
# Docker socket for service discovery # Docker socket for service discovery
- /var/run/docker.sock:/var/run/docker.sock:ro - /var/run/docker.sock:/var/run/docker.sock:ro
@@ -30,47 +37,30 @@ services:
# Enable Traefik for itself # Enable Traefik for itself
- "traefik.enable=true" - "traefik.enable=true"
# Dashboard - VPN-only access (WireGuard network required) # Dashboard - BasicAuth protected
# Accessible only from WireGuard VPN network (10.8.0.0/24)
- "traefik.http.routers.traefik-dashboard.rule=Host(`traefik.michaelschiemer.de`)" - "traefik.http.routers.traefik-dashboard.rule=Host(`traefik.michaelschiemer.de`)"
- "traefik.http.routers.traefik-dashboard.entrypoints=websecure" - "traefik.http.routers.traefik-dashboard.entrypoints=websecure"
- "traefik.http.routers.traefik-dashboard.tls=true" - "traefik.http.routers.traefik-dashboard.tls=true"
- "traefik.http.routers.traefik-dashboard.tls.certresolver=letsencrypt" - "traefik.http.routers.traefik-dashboard.tls.certresolver=letsencrypt"
- "traefik.http.routers.traefik-dashboard.service=api@internal" - "traefik.http.routers.traefik-dashboard.service=api@internal"
# VPN-only + BasicAuth protection (order: vpn-only first, then BasicAuth) - "traefik.http.routers.traefik-dashboard.middlewares=traefik-auth"
- "traefik.http.routers.traefik-dashboard.middlewares=vpn-only@file,traefik-auth"
# BasicAuth for dashboard (user: admin, password: generate with htpasswd) # BasicAuth for dashboard
# htpasswd -nb admin your_password - "traefik.http.middlewares.traefik-auth.basicauth.users=admin:$$apr1$$Of2wG3O5$$y8X1vEoIp9vpvx64mIalk/"
- "traefik.http.middlewares.traefik-auth.basicauth.users=admin:$$apr1$$8kj9d7lj$$r.x5jhLVPLuCDLvJ6x0Hd0"
# Allow ACME challenges without redirect (higher priority) # Note: ACME challenges are handled automatically by Traefik
- "traefik.http.routers.acme-challenge.rule=PathPrefix(`/.well-known/acme-challenge`)" # when httpChallenge.entryPoint: web is set in traefik.yml
- "traefik.http.routers.acme-challenge.entrypoints=web" # No explicit router needed - Traefik handles /.well-known/acme-challenge automatically
- "traefik.http.routers.acme-challenge.priority=200"
# Global redirect to HTTPS (lower priority, matches everything else) # Global redirect to HTTPS (lower priority, matches everything else)
- "traefik.http.routers.http-catchall.rule=HostRegexp(`{host:.+}`)" # ACME challenges are excluded from redirect automatically by Traefik
# Explicitly exclude ACME challenge path to be safe
- "traefik.http.routers.http-catchall.rule=HostRegexp(`{host:.+}`) && !PathPrefix(`/.well-known/acme-challenge`)"
- "traefik.http.routers.http-catchall.entrypoints=web" - "traefik.http.routers.http-catchall.entrypoints=web"
- "traefik.http.routers.http-catchall.middlewares=redirect-to-https" - "traefik.http.routers.http-catchall.middlewares=redirect-to-https"
- "traefik.http.routers.http-catchall.priority=1" - "traefik.http.routers.http-catchall.priority=1"
- "traefik.http.middlewares.redirect-to-https.redirectscheme.scheme=https" - "traefik.http.middlewares.redirect-to-https.redirectscheme.scheme=https"
- "traefik.http.middlewares.redirect-to-https.redirectscheme.permanent=true" - "traefik.http.middlewares.redirect-to-https.redirectscheme.permanent=true"
# Security headers middleware
- "traefik.http.middlewares.security-headers.headers.frameDeny=true"
- "traefik.http.middlewares.security-headers.headers.contentTypeNosniff=true"
- "traefik.http.middlewares.security-headers.headers.browserXssFilter=true"
- "traefik.http.middlewares.security-headers.headers.stsSeconds=31536000"
- "traefik.http.middlewares.security-headers.headers.stsIncludeSubdomains=true"
- "traefik.http.middlewares.security-headers.headers.stsPreload=true"
# Compression middleware
- "traefik.http.middlewares.compression.compress=true"
# Rate limiting middleware (100 requests per second)
- "traefik.http.middlewares.rate-limit.ratelimit.average=100"
- "traefik.http.middlewares.rate-limit.ratelimit.burst=50"
healthcheck: healthcheck:
test: ["CMD", "traefik", "healthcheck", "--ping"] test: ["CMD", "traefik", "healthcheck", "--ping"]
interval: 30s interval: 30s
@@ -78,5 +68,6 @@ services:
retries: 3 retries: 3
start_period: 10s start_period: 10s
# Note: network_mode: host is used, so we don't define networks here networks:
# Traefik still discovers services via Docker labels using the Docker socket traefik-public:
external: true

View File

@@ -1,4 +1,5 @@
# Static Configuration for Traefik # Static Configuration for Traefik v3.0
# Minimal configuration - only static settings
# Global Configuration # Global Configuration
global: global:
@@ -6,12 +7,10 @@ global:
sendAnonymousUsage: false sendAnonymousUsage: false
# API and Dashboard # API and Dashboard
# Note: insecure: false means API is only accessible via HTTPS (through Traefik itself)
# No port 8080 needed - dashboard accessible via HTTPS at traefik.michaelschiemer.de
api: api:
dashboard: true dashboard: true
insecure: false insecure: false
# Dashboard accessible via HTTPS router (no separate HTTP listener needed) # Dashboard accessible via HTTPS router
# Entry Points # Entry Points
entryPoints: entryPoints:
@@ -42,20 +41,20 @@ certificatesResolvers:
storage: /acme.json storage: /acme.json
caServer: https://acme-v02.api.letsencrypt.org/directory caServer: https://acme-v02.api.letsencrypt.org/directory
# Use HTTP-01 challenge (requires port 80 accessible) # Use HTTP-01 challenge (requires port 80 accessible)
# Traefik automatically handles /.well-known/acme-challenge requests
httpChallenge: httpChallenge:
entryPoint: web entryPoint: web
# Uncomment for DNS challenge (requires DNS provider) # Optional: Increase retry attempts for certificate renewal
# dnsChallenge: # This helps when Gitea is temporarily unavailable
# provider: cloudflare preferredChain: ""
# delayBeforeCheck: 30
# Providers # Providers
providers: providers:
docker: docker:
endpoint: "unix:///var/run/docker.sock" endpoint: "unix:///var/run/docker.sock"
exposedByDefault: false exposedByDefault: false
# Network mode is 'host', so we don't specify a network here # Bridge network mode - Traefik uses Docker service discovery via labels
# Traefik can reach containers directly via their IPs in host network mode # Services must be on the same network (traefik-public) for discovery
watch: true watch: true
file: file:
@@ -63,12 +62,12 @@ providers:
watch: true watch: true
# Forwarded Headers Configuration # Forwarded Headers Configuration
# This ensures Traefik correctly identifies the real client IP
forwardedHeaders: forwardedHeaders:
trustedIPs: trustedIPs:
- "127.0.0.1/32" # Localhost - "127.0.0.1/32"
- "172.17.0.0/16" # Docker bridge network - "172.17.0.0/16"
- "172.18.0.0/16" # Docker user-defined networks - "172.18.0.0/16"
- "10.8.0.0/24"
insecure: false insecure: false
# Logging # Logging