fix: Gitea Traefik routing and connection pool optimization
Some checks failed
🚀 Build & Deploy Image / Determine Build Necessity (push) Failing after 10m14s
🚀 Build & Deploy Image / Build Runtime Base Image (push) Has been skipped
🚀 Build & Deploy Image / Build Docker Image (push) Has been skipped
🚀 Build & Deploy Image / Run Tests & Quality Checks (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Staging (push) Has been skipped
🚀 Build & Deploy Image / Auto-deploy to Production (push) Has been skipped
Security Vulnerability Scan / Check for Dependency Changes (push) Failing after 11m25s
Security Vulnerability Scan / Composer Security Audit (push) Has been cancelled

- Remove middleware reference from Gitea Traefik labels (caused routing issues)
- Optimize Gitea connection pool settings (MAX_IDLE_CONNS=30, authentication_timeout=180s)
- Add explicit service reference in Traefik labels
- Fix intermittent 504 timeouts by improving PostgreSQL connection handling

Fixes Gitea unreachability via git.michaelschiemer.de
This commit is contained in:
2025-11-09 14:46:15 +01:00
parent 85c369e846
commit 36ef2a1e2c
1366 changed files with 104925 additions and 28719 deletions

27
.gitattributes vendored Normal file
View File

@@ -0,0 +1,27 @@
# Ensure shell scripts use LF line endings
*.sh text eol=lf
docker/**/*.sh text eol=lf
**/*.sh text eol=lf
# PHP files
*.php text eol=lf
# Configuration files
*.yml text eol=lf
*.yaml text eol=lf
*.json text eol=lf
*.conf text eol=lf
*.ini text eol=lf
# Docker files
Dockerfile* text eol=lf
docker-compose*.yml text eol=lf
*.dockerfile text eol=lf
# Scripts
*.bash text eol=lf
*.zsh text eol=lf
# Default for text files
* text=auto

View File

@@ -96,7 +96,7 @@ jobs:
chmod +x /tmp/ci-tools/clone_repo.sh chmod +x /tmp/ci-tools/clone_repo.sh
- name: Upload CI helpers as artifact - name: Upload CI helpers as artifact
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v3
with: with:
name: ci-helpers name: ci-helpers
path: /tmp/ci-tools/clone_repo.sh path: /tmp/ci-tools/clone_repo.sh
@@ -242,7 +242,7 @@ jobs:
echo "needs_runtime_build=$RUNTIME_BUILD" >> "$GITHUB_OUTPUT" echo "needs_runtime_build=$RUNTIME_BUILD" >> "$GITHUB_OUTPUT"
- name: Upload repository as artifact - name: Upload repository as artifact
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v3
with: with:
name: repository name: repository
path: /workspace/repo path: /workspace/repo
@@ -270,7 +270,7 @@ jobs:
- name: Download CI helpers from artifact - name: Download CI helpers from artifact
if: ${{ steps.decision.outputs.should_build == 'true' }} if: ${{ steps.decision.outputs.should_build == 'true' }}
uses: actions/download-artifact@v4 uses: actions/download-artifact@v3
with: with:
name: ci-helpers name: ci-helpers
path: /tmp/ci-tools path: /tmp/ci-tools
@@ -316,7 +316,7 @@ jobs:
- name: Download repository artifact - name: Download repository artifact
if: steps.decision.outputs.should_build == 'true' if: steps.decision.outputs.should_build == 'true'
uses: actions/download-artifact@v4 uses: actions/download-artifact@v3
with: with:
name: repository name: repository
path: /workspace path: /workspace
@@ -499,7 +499,7 @@ jobs:
runs-on: php-ci runs-on: php-ci
steps: steps:
- name: Download CI helpers from artifact - name: Download CI helpers from artifact
uses: actions/download-artifact@v4 uses: actions/download-artifact@v3
with: with:
name: ci-helpers name: ci-helpers
path: /tmp/ci-tools path: /tmp/ci-tools
@@ -529,7 +529,7 @@ jobs:
chmod +x /tmp/ci-tools/clone_repo.sh chmod +x /tmp/ci-tools/clone_repo.sh
- name: Download repository artifact - name: Download repository artifact
uses: actions/download-artifact@v4 uses: actions/download-artifact@v3
with: with:
name: repository name: repository
path: /workspace path: /workspace
@@ -605,7 +605,7 @@ jobs:
- name: Download CI helpers from artifact - name: Download CI helpers from artifact
if: ${{ env.SHOULD_BUILD == 'true' }} if: ${{ env.SHOULD_BUILD == 'true' }}
uses: actions/download-artifact@v4 uses: actions/download-artifact@v3
with: with:
name: ci-helpers name: ci-helpers
path: /tmp/ci-tools path: /tmp/ci-tools
@@ -636,7 +636,7 @@ jobs:
- name: Download repository artifact - name: Download repository artifact
if: ${{ env.SHOULD_BUILD == 'true' }} if: ${{ env.SHOULD_BUILD == 'true' }}
uses: actions/download-artifact@v4 uses: actions/download-artifact@v3
with: with:
name: repository name: repository
path: /workspace path: /workspace
@@ -987,7 +987,7 @@ jobs:
- name: Upload repository as artifact - name: Upload repository as artifact
if: ${{ env.SHOULD_BUILD == 'true' }} if: ${{ env.SHOULD_BUILD == 'true' }}
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v3
with: with:
name: repository name: repository
path: /workspace/repo path: /workspace/repo
@@ -1023,7 +1023,7 @@ jobs:
echo "📋 Branch: $REF_NAME" echo "📋 Branch: $REF_NAME"
- name: Download repository artifact - name: Download repository artifact
uses: actions/download-artifact@v4 uses: actions/download-artifact@v3
with: with:
name: repository name: repository
path: /workspace path: /workspace
@@ -1182,7 +1182,7 @@ jobs:
echo "📋 Branch: $REF_NAME" echo "📋 Branch: $REF_NAME"
- name: Download repository artifact - name: Download repository artifact
uses: actions/download-artifact@v4 uses: actions/download-artifact@v3
with: with:
name: repository name: repository
path: /workspace path: /workspace

View File

@@ -124,33 +124,14 @@ jobs:
chmod 600 /tmp/vault_pass chmod 600 /tmp/vault_pass
fi fi
- name: Deploy Application Code to Staging - name: Deploy to Staging (Complete)
run: | run: |
cd /workspace/repo/deployment/ansible cd /workspace/repo/deployment/ansible
ansible-playbook -i inventory/production.yml \ ansible-playbook -i inventory/production.yml \
playbooks/deploy-application-code.yml \ playbooks/deploy-complete.yml \
-e "deployment_environment=staging" \ -e "deployment_environment=staging" \
-e "deployment_hosts=production" \ -e "deployment_hosts=production" \
-e "git_branch=${{ steps.branch.outputs.BRANCH }}" \ -e "git_branch=${{ steps.branch.outputs.BRANCH }}" \
--vault-password-file /tmp/vault_pass \
--private-key ~/.ssh/production
- name: Install Composer Dependencies
run: |
cd /workspace/repo/deployment/ansible
ansible-playbook -i inventory/production.yml \
playbooks/install-composer-dependencies.yml \
-e "deployment_environment=staging" \
--vault-password-file /tmp/vault_pass \
--private-key ~/.ssh/production
- name: Deploy Docker Image to Staging
run: |
cd /workspace/repo/deployment/ansible
ansible-playbook -i inventory/production.yml \
playbooks/deploy-image.yml \
-e "deployment_environment=staging" \
-e "deployment_hosts=production" \
-e "image_tag=${{ needs.determine-image.outputs.image_tag }}" \ -e "image_tag=${{ needs.determine-image.outputs.image_tag }}" \
-e "docker_registry=${{ needs.determine-image.outputs.registry_host }}" \ -e "docker_registry=${{ needs.determine-image.outputs.registry_host }}" \
-e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \ -e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \
@@ -164,15 +145,24 @@ jobs:
- name: Health check - name: Health check
id: health id: health
run: | run: |
for i in {1..10}; do echo "🔍 Performing health checks with exponential backoff..."
if curl -f -k https://staging.michaelschiemer.de/health; then DELAY=2
echo "✅ Health check passed" MAX_DELAY=60
MAX_ATTEMPTS=5
for i in $(seq 1 $MAX_ATTEMPTS); do
if curl -f -k -s https://staging.michaelschiemer.de/health > /dev/null 2>&1; then
echo "✅ Health check passed (attempt $i/$MAX_ATTEMPTS)"
exit 0 exit 0
fi fi
echo "⏳ Waiting for staging service... (attempt $i/10)" if [ $i -lt $MAX_ATTEMPTS ]; then
sleep 10 echo "⏳ Waiting for staging service... (attempt $i/$MAX_ATTEMPTS, delay ${DELAY}s)"
sleep $DELAY
DELAY=$((DELAY * 2))
[ $DELAY -gt $MAX_DELAY ] && DELAY=$MAX_DELAY
fi
done done
echo "❌ Health check failed" echo "❌ Health check failed after $MAX_ATTEMPTS attempts"
exit 1 exit 1
- name: Notify deployment success - name: Notify deployment success
@@ -187,6 +177,9 @@ jobs:
needs: determine-image needs: determine-image
if: inputs.environment == 'production' if: inputs.environment == 'production'
runs-on: ubuntu-latest runs-on: ubuntu-latest
concurrency:
group: deploy-production
cancel-in-progress: false
environment: environment:
name: production name: production
url: https://michaelschiemer.de url: https://michaelschiemer.de
@@ -243,33 +236,14 @@ jobs:
chmod 600 /tmp/vault_pass chmod 600 /tmp/vault_pass
fi fi
- name: Deploy Application Code to Production - name: Deploy to Production (Complete)
run: | run: |
cd /workspace/repo/deployment/ansible cd /workspace/repo/deployment/ansible
ansible-playbook -i inventory/production.yml \ ansible-playbook -i inventory/production.yml \
playbooks/deploy-application-code.yml \ playbooks/deploy-complete.yml \
-e "deployment_environment=production" \ -e "deployment_environment=production" \
-e "deployment_hosts=production" \ -e "deployment_hosts=production" \
-e "git_branch=${{ steps.branch.outputs.BRANCH }}" \ -e "git_branch=${{ steps.branch.outputs.BRANCH }}" \
--vault-password-file /tmp/vault_pass \
--private-key ~/.ssh/production
- name: Install Composer Dependencies
run: |
cd /workspace/repo/deployment/ansible
ansible-playbook -i inventory/production.yml \
playbooks/install-composer-dependencies.yml \
-e "deployment_environment=production" \
--vault-password-file /tmp/vault_pass \
--private-key ~/.ssh/production
- name: Deploy Docker Image to Production
run: |
cd /workspace/repo/deployment/ansible
ansible-playbook -i inventory/production.yml \
playbooks/deploy-image.yml \
-e "deployment_environment=production" \
-e "deployment_hosts=production" \
-e "image_tag=${{ needs.determine-image.outputs.image_tag }}" \ -e "image_tag=${{ needs.determine-image.outputs.image_tag }}" \
-e "docker_registry=${{ needs.determine-image.outputs.registry_host }}" \ -e "docker_registry=${{ needs.determine-image.outputs.registry_host }}" \
-e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \ -e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \
@@ -283,15 +257,24 @@ jobs:
- name: Health check - name: Health check
id: health id: health
run: | run: |
for i in {1..10}; do echo "🔍 Performing health checks with exponential backoff..."
if curl -f -k https://michaelschiemer.de/health; then DELAY=2
echo "✅ Health check passed" MAX_DELAY=60
MAX_ATTEMPTS=5
for i in $(seq 1 $MAX_ATTEMPTS); do
if curl -f -k -s https://michaelschiemer.de/health > /dev/null 2>&1; then
echo "✅ Health check passed (attempt $i/$MAX_ATTEMPTS)"
exit 0 exit 0
fi fi
echo "⏳ Waiting for production service... (attempt $i/10)" if [ $i -lt $MAX_ATTEMPTS ]; then
sleep 10 echo "⏳ Waiting for production service... (attempt $i/$MAX_ATTEMPTS, delay ${DELAY}s)"
sleep $DELAY
DELAY=$((DELAY * 2))
[ $DELAY -gt $MAX_DELAY ] && DELAY=$MAX_DELAY
fi
done done
echo "❌ Health check failed" echo "❌ Health check failed after $MAX_ATTEMPTS attempts"
exit 1 exit 1
- name: Notify deployment success - name: Notify deployment success

View File

@@ -80,7 +80,7 @@ jobs:
fi fi
- name: Upload metrics as artifact - name: Upload metrics as artifact
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v3
with: with:
name: workflow-metrics name: workflow-metrics
path: /tmp/combined_metrics.json path: /tmp/combined_metrics.json

9
.gitignore vendored
View File

@@ -1,5 +1,6 @@
# Editor / IDE # Editor / IDE
.idea/ .idea/
.vscode/
# System # System
.DS_Store .DS_Store
@@ -68,5 +69,13 @@ secrets/*.txt
!secrets/*.example !secrets/*.example
deployment/ansible/secrets/local.vault.yml deployment/ansible/secrets/local.vault.yml
# Ansible Vault password files and generated password backups
deployment/ansible/secrets/.vault_pass
deployment/ansible/secrets/.vault-passwords.txt
deployment/ansible/secrets/production.vault.yml
# SSL/TLS certificates # SSL/TLS certificates
**/acme.json **/acme.json
# Documentation backups
backups/docs-backup-*/

View File

@@ -6,6 +6,7 @@ This file provides comprehensive guidance for AI agents (like Claude Code, Curso
## Quick Reference ## Quick Reference
- **Permission Requirement**: Obtain explicit approval from the project owner before making any code changes
- **Framework**: Custom PHP Framework (PHP 8.5+) - **Framework**: Custom PHP Framework (PHP 8.5+)
- **Local URL**: https://localhost (HTTPS required) - **Local URL**: https://localhost (HTTPS required)
- **Docker**: Use `make up` to start containers - **Docker**: Use `make up` to start containers
@@ -405,18 +406,21 @@ final readonly class DatabaseConfig
## Additional Documentation ## Additional Documentation
For detailed information, see: For detailed information, see:
- `docs/claude/README.md` - AI-specific documentation overview
- `docs/claude/guidelines.md` - Detailed coding guidelines - `docs/claude/guidelines.md` - Detailed coding guidelines
- `docs/claude/architecture.md` - Architecture documentation - `docs/claude/architecture.md` - Architecture documentation
- `docs/claude/development-commands.md` - Command reference - `docs/claude/development-commands.md` - Command reference
- `docs/claude/common-workflows.md` - Common development workflows - `docs/guides/common-workflows.md` - Common development workflows
- `docs/claude/error-handling.md` - Error handling patterns - `docs/features/error-handling/guide.md` - Error handling patterns
- `docs/claude/security-patterns.md` - Security patterns - `docs/features/security/patterns.md` - Security patterns
- `docs/claude/mcp-integration.md` - MCP integration details - `docs/claude/mcp-integration.md` - MCP integration details
- And other files in `docs/claude/` - `docs/README.md` - Main framework documentation
- And other files in `docs/claude/` and `docs/`
## Quick Checklist for AI Agents ## Quick Checklist for AI Agents
Before making changes: Before making changes:
- [ ] Obtain explicit permission from the project owner before modifying any code
- [ ] Follow framework principles (no inheritance, readonly, final, immutable) - [ ] Follow framework principles (no inheritance, readonly, final, immutable)
- [ ] Use Value Objects instead of primitives/arrays - [ ] Use Value Objects instead of primitives/arrays
- [ ] Place test files in `tests/` directory - [ ] Place test files in `tests/` directory

View File

@@ -10,16 +10,16 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
@docs/claude/scheduler-queue-pipeline-persona.md @docs/claude/scheduler-queue-pipeline-persona.md
@docs/claude/magiclinks-system.md @docs/claude/magiclinks-system.md
@docs/claude/guidelines.md @docs/claude/guidelines.md
@docs/claude/common-workflows.md @docs/guides/common-workflows.md
@docs/claude/error-handling.md @docs/features/error-handling/guide.md
@docs/claude/security-patterns.md @docs/features/security/patterns.md
@docs/claude/queue-system.md @docs/features/queue/system.md
@docs/claude/event-system.md @docs/features/events/system.md
@docs/claude/async-components.md @docs/claude/async-components.md
@docs/claude/console-commands.md @docs/claude/console-commands.md
@docs/claude/database-patterns.md @docs/features/database/patterns.md
@docs/claude/performance-monitoring.md @docs/claude/performance-monitoring.md
@docs/claude/troubleshooting.md @docs/guides/troubleshooting.md
## MCP Server Integration 🤖 ## MCP Server Integration 🤖

8
CODEX.md Normal file
View File

@@ -0,0 +1,8 @@
# Codex Agent Reference
Codex agents should follow `AGENTS.md` for the full project rule set. Additional Codex-oriented guidance lives in `docs/codex/guidelines.md`.
- Always secure explicit approval from the project owner before modifying any files.
- Adhere to the workflow and tooling requirements documented in the Codex guidelines.
Keep this file in sync with updates to `docs/codex/guidelines.md` and the broader agent documentation.

View File

@@ -1,6 +1,6 @@
# Production Dockerfile - Multi-Stage Build # Production Dockerfile - Multi-Stage Build
ARG PHP_VERSION=8.5.0RC3 ARG PHP_VERSION=8.5.0RC4
# Override via --build-arg PHP_VERSION=8.5.0RCX to track upstream releases # Override via --build-arg PHP_VERSION=8.5.0RCX to track upstream releases
ARG RUNTIME_IMAGE=runtime-base ARG RUNTIME_IMAGE=runtime-base
@@ -57,7 +57,7 @@ COPY tsconfig.json ./
RUN npm run build RUN npm run build
# Stage: Runtime Base Image (shared) # Stage: Runtime Base Image (shared)
ARG PHP_VERSION=8.5.0RC3 ARG PHP_VERSION=8.5.0RC4
FROM php:${PHP_VERSION}-fpm AS runtime-base FROM php:${PHP_VERSION}-fpm AS runtime-base
# Install system dependencies + nginx for production # Install system dependencies + nginx for production

View File

@@ -158,6 +158,10 @@ help: ## Zeigt diese Hilfe an
console: ## Run console commands (Usage: make console ARGS="command arguments") console: ## Run console commands (Usage: make console ARGS="command arguments")
docker exec -it php php console.php $(ARGS) docker exec -it php php console.php $(ARGS)
design-tokens: ## Generate design tokens CSS from PHP
@echo "🎨 Generating design tokens..."
docker exec php php console.php design:generate-tokens
composer: ## Use Composer composer: ## Use Composer

View File

@@ -53,8 +53,7 @@
}, },
"suggest": { "suggest": {
"ext-apcu": "For better caching performance (not yet available for PHP 8.5)", "ext-apcu": "For better caching performance (not yet available for PHP 8.5)",
"ext-redis": "For Redis cache driver support (not yet available for PHP 8.5)", "ext-redis": "For Redis cache driver support (not yet available for PHP 8.5)"
"ext-zend-opcache": "For improved performance (built-in for PHP 8.5)"
}, },
"scripts": { "scripts": {

View File

@@ -5,6 +5,9 @@ declare(strict_types=1);
require_once __DIR__ . '/vendor/autoload.php'; require_once __DIR__ . '/vendor/autoload.php';
require __DIR__ . '/src/Framework/Debug/helpers.php'; require __DIR__ . '/src/Framework/Debug/helpers.php';
putenv('APP_BASE_PATH=' . __DIR__);
$_ENV['APP_BASE_PATH'] = __DIR__;
// Detect MCP Server mode early (before any logging initialization) // Detect MCP Server mode early (before any logging initialization)
// This allows LoggerInitializer to use NullHandler and suppress all output // This allows LoggerInitializer to use NullHandler and suppress all output
if (in_array('mcp:server', $argv, true)) { if (in_array('mcp:server', $argv, true)) {

View File

@@ -68,6 +68,8 @@ Developer → git push
## Directory Structure ## Directory Structure
### Local Repository Structure
``` ```
deployment/ deployment/
├── ansible/ # Ansible config, playbooks, inventory, templates ├── ansible/ # Ansible config, playbooks, inventory, templates
@@ -90,6 +92,22 @@ deployment/
└── README.md (dieses Dokument) └── README.md (dieses Dokument)
``` ```
### Server Directory Structure
Auf dem Production-Server existieren zwei Hauptverzeichnisse:
```
/home/deploy/
├── deployment/ # Infrastructure-as-Code (24M)
│ ├── stacks/ # Docker Compose Stacks
│ └── backups/ # Backup-Dateien
└── michaelschiemer/ # Application Code (491M)
├── current/ # Symlink → Aktuelle deployed Version
└── .archive/ # Alte Versionen (Rollback)
```
**📖 Detaillierte Erklärung:** Siehe [docs/server-directory-structure.md](docs/server-directory-structure.md)
## Getting Started ## Getting Started
### 🧪 Pipeline-Tests vorbereiten ### 🧪 Pipeline-Tests vorbereiten

View File

@@ -328,7 +328,7 @@ All zentralen Variablen werden in `group_vars/production.yml` gepflegt und könn
|----------|--------------|--------------| |----------|--------------|--------------|
| `deploy_user_home` | Home-Verzeichnis des Deploy-Users | `/home/deploy` | | `deploy_user_home` | Home-Verzeichnis des Deploy-Users | `/home/deploy` |
| `stacks_base_path` | Basispfad für Docker Compose Stacks | `/home/deploy/deployment/stacks` | | `stacks_base_path` | Basispfad für Docker Compose Stacks | `/home/deploy/deployment/stacks` |
| `app_stack_path` | Pfad zum Application Stack | `/home/deploy/deployment/stacks/application` | | `app_stack_path` | Pfad zum Application Stack | `/home/deploy/deployment/stacks/production` |
| `backups_path` | Ablageort für Deployment-Backups | `/home/deploy/deployment/backups` | | `backups_path` | Ablageort für Deployment-Backups | `/home/deploy/deployment/backups` |
| `docker_registry` | Interner Registry-Endpunkt (lokal) | `localhost:5000` | | `docker_registry` | Interner Registry-Endpunkt (lokal) | `localhost:5000` |
| `docker_registry_external` | Externer Registry-Endpunkt | `registry.michaelschiemer.de` | | `docker_registry_external` | Externer Registry-Endpunkt | `registry.michaelschiemer.de` |

View File

@@ -10,6 +10,8 @@ fact_caching = jsonfile
fact_caching_connection = /tmp/ansible_facts fact_caching_connection = /tmp/ansible_facts
fact_caching_timeout = 3600 fact_caching_timeout = 3600
roles_path = roles roles_path = roles
stdout_callback = default_with_clean_msg
callback_plugins = ./callback_plugins
[ssh_connection] [ssh_connection]
pipelining = True pipelining = True

View File

@@ -0,0 +1,282 @@
# Ansible Callback Plugin - default_with_clean_msg
**Stand:** 2025-11-07
**Status:** Dokumentation des Custom Callback Plugins
---
## Übersicht
Das `default_with_clean_msg` Callback Plugin erweitert Ansible's Standard-Output mit verbesserter Formatierung für multiline `msg` Felder. Multiline Nachrichten werden als lesbare Blöcke mit Borders angezeigt, anstatt als escaped Newline-Zeichen.
**Datei:** `deployment/ansible/callback_plugins/default_with_clean_msg.py`
---
## Zweck
### Problem
Ansible's Standard Callback Plugin zeigt multiline `msg` Felder so an:
```
"msg": "Line 1\nLine 2\nLine 3"
```
Dies macht es schwierig, multiline Debug-Ausgaben zu lesen und zu kopieren.
### Lösung
Das Custom Plugin formatiert multiline Nachrichten als lesbare Blöcke:
```
================================================================================
Line 1
Line 2
Line 3
================================================================================
```
---
## Funktionalität
### Multiline Message Formatting
**Automatische Erkennung:**
- Nur Nachrichten mit mehr als einer Zeile werden formatiert
- Einzeilige Nachrichten bleiben unverändert
**Format:**
- Border oben und unten (aus `=` Zeichen)
- Maximale Border-Breite: 80 Zeichen
- Farbcodierung entsprechend Task-Status
### Method Overrides
Das Plugin überschreibt folgende Methoden des Default Callbacks:
- `v2_playbook_on_task_start` - Task Start
- `v2_runner_on_start` - Runner Start
- `v2_runner_on_ok` - Erfolgreiche Tasks
- `v2_runner_on_failed` - Fehlgeschlagene Tasks
- `v2_runner_on_skipped` - Übersprungene Tasks
- `v2_runner_on_unreachable` - Unerreichbare Hosts
**Grund:** Diese Methoden werden überschrieben, um Warnings zu vermeiden, die auftreten, wenn `get_option()` vor der vollständigen Initialisierung aufgerufen wird.
---
## Konfiguration
### ansible.cfg
**Datei:** `deployment/ansible/ansible.cfg`
```ini
[defaults]
stdout_callback = default_with_clean_msg
callback_plugins = ./callback_plugins
```
**Wichtig:**
- `stdout_callback` aktiviert das Plugin als Standard-Output
- `callback_plugins` gibt den Pfad zu den Plugin-Dateien an
### Plugin-Datei
**Pfad:** `deployment/ansible/callback_plugins/default_with_clean_msg.py`
**Struktur:**
- Erbt von `ansible.plugins.callback.default.CallbackModule`
- Überschreibt spezifische Methoden
- Fügt `_print_clean_msg()` Methode hinzu
---
## Verwendung
### Automatisch
Das Plugin wird automatisch verwendet, wenn `ansible.cfg` korrekt konfiguriert ist:
```bash
cd deployment/ansible
ansible-playbook -i inventory/production.yml playbooks/setup-infrastructure.yml
```
### Manuell
Falls das Plugin nicht automatisch geladen wird:
```bash
ansible-playbook \
--callback-plugin ./callback_plugins \
--stdout-callback default_with_clean_msg \
-i inventory/production.yml \
playbooks/setup-infrastructure.yml
```
---
## Beispiel-Ausgabe
### Vorher (Standard Callback)
```
ok: [server] => {
"msg": "Container Status:\nNAME IMAGE COMMAND SERVICE CREATED STATUS PORTS\nproduction-php-1 localhost:5000/framework:latest \"/usr/local/bin/entr…\" php About a minute ago Restarting (255) 13 seconds ago"
}
```
### Nachher (Custom Callback)
```
ok: [server] => {
================================================================================
Container Status:
NAME IMAGE COMMAND SERVICE CREATED STATUS PORTS
production-php-1 localhost:5000/framework:latest "/usr/local/bin/entr…" php About a minute ago Restarting (255) 13 seconds ago
================================================================================
}
```
---
## Bekannte Limitationen
### Warnings bei Option Access
**Problem:** Frühere Versionen des Plugins riefen `get_option()` auf, bevor Ansible's Optionen vollständig initialisiert waren, was zu Warnings führte:
```
[WARNING]: Failure using method (v2_playbook_on_task_start) in callback plugin: 'display_skipped_hosts'
```
**Lösung:** Das Plugin überschreibt die problematischen Methoden direkt, ohne `get_option()` aufzurufen.
### Option-Checks
**Aktuell:** Das Plugin zeigt immer alle Hosts an (ok, changed, skipped), ohne Option-Checks.
**Grund:** Option-Checks würden `get_option()` erfordern, was Warnings verursacht.
**Workaround:** Falls Option-Checks benötigt werden, können sie nach vollständiger Initialisierung implementiert werden.
---
## Technische Details
### Inheritance
```python
from ansible.plugins.callback.default import CallbackModule as DefaultCallbackModule
class CallbackModule(DefaultCallbackModule):
CALLBACK_NAME = 'default_with_clean_msg'
def _print_clean_msg(self, result, color=C.COLOR_VERBOSE):
# Custom formatting logic
```
**Vorteil:** Erbt alle Standard-Funktionalität und erweitert nur die Formatierung.
### Method Overrides
**Warum Overrides?**
Die Standard-Methoden rufen `get_option()` auf, um zu prüfen, ob bestimmte Hosts angezeigt werden sollen. Dies schlägt fehl, wenn Optionen noch nicht initialisiert sind.
**Lösung:** Direkte Implementierung ohne Option-Checks:
```python
def v2_runner_on_ok(self, result):
# Eigene Implementierung ohne get_option()
# ...
self._print_clean_msg(result, color=color)
```
---
## Entwicklung
### Plugin testen
```bash
# Plugin-Verzeichnis
cd deployment/ansible/callback_plugins
# Syntax prüfen
python3 -m py_compile default_with_clean_msg.py
# Mit Ansible testen
cd ..
ansible-playbook -i inventory/production.yml playbooks/check-container-status.yml
```
### Plugin erweitern
**Neue Formatierung hinzufügen:**
1. `_print_clean_msg()` Methode erweitern
2. Neue Formatierungslogik implementieren
3. Tests durchführen
**Beispiel:**
```python
def _print_clean_msg(self, result, color=C.COLOR_VERBOSE):
msg_body = result._result.get('msg')
if isinstance(msg_body, str) and msg_body.strip():
# Custom formatting logic here
# ...
```
---
## Troubleshooting
### Plugin wird nicht geladen
**Problem:** Plugin wird nicht verwendet, Standard-Output bleibt
**Lösung:**
1. `ansible.cfg` prüfen:
```ini
stdout_callback = default_with_clean_msg
callback_plugins = ./callback_plugins
```
2. Plugin-Datei prüfen:
```bash
ls -la deployment/ansible/callback_plugins/default_with_clean_msg.py
```
3. Syntax prüfen:
```bash
python3 -m py_compile default_with_clean_msg.py
```
### Warnings erscheinen
**Problem:** Warnings wie `'display_skipped_hosts'`
**Lösung:** Plugin-Version prüfen - sollte Method Overrides ohne `get_option()` verwenden.
**Aktueller Stand:** Plugin verwendet direkte Overrides ohne Option-Checks.
---
## Referenz
- [Ansible Callback Plugins Documentation](https://docs.ansible.com/ansible/latest/plugins/callback.html)
- [Ansible Callback Development Guide](https://docs.ansible.com/ansible/latest/dev_guide/developing_plugins.html#callback-plugins)
- [Initial Deployment Troubleshooting](../docs/troubleshooting/initial-deployment-issues.md) - Problem 8: Ansible Debug Messages
---
## Changelog
### 2025-11-07
- Initial Version erstellt
- Multiline Message Formatting implementiert
- Method Overrides ohne Option-Checks
- Warnings behoben

View File

@@ -0,0 +1,181 @@
# (c) 2012-2014, Michael DeHaan <michael.dehaan@gmail.com>
# (c) 2017 Ansible Project
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
# Modified to add clean multiline msg formatting
#
# This plugin extends the default callback with enhanced multiline message formatting.
# It suppresses warnings by implementing its own versions of methods that would
# otherwise call get_option() before options are initialized.
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
DOCUMENTATION = '''
name: default_with_clean_msg
type: stdout
short_description: default Ansible screen output with clean multiline messages
version_added: historical
description:
This is the default output callback for ansible-playbook with enhanced
multiline message formatting. Multiline messages in msg fields are
displayed as clean, readable blocks instead of escaped newline characters.
'''
from ansible import constants as C
from ansible.plugins.callback.default import CallbackModule as DefaultCallbackModule
class CallbackModule(DefaultCallbackModule):
'''
This extends the default callback with enhanced multiline message formatting.
Multiline messages in 'msg' fields are displayed as clean, readable blocks.
'''
CALLBACK_NAME = 'default_with_clean_msg'
def _print_clean_msg(self, result, color=C.COLOR_VERBOSE):
'''
Print multiline messages in a clean, readable format with borders.
This makes it easy to read and copy multiline content from debug outputs.
'''
msg_body = result._result.get('msg')
if isinstance(msg_body, str) and msg_body.strip():
lines = msg_body.strip().splitlines()
if len(lines) > 1: # Only format if multiline
max_len = max(len(line) for line in lines if line.strip())
if max_len > 0:
border = "=" * min(max_len, 80) # Limit border width
self._display.display("\n" + border, color=color)
for line in lines:
self._display.display(line, color=color)
self._display.display(border + "\n", color=color)
def v2_playbook_on_task_start(self, task, is_conditional):
# Suppress warnings by implementing our own version that doesn't call get_option early
# Initialize state if needed
if not hasattr(self, '_play'):
self._play = None
if not hasattr(self, '_last_task_banner'):
self._last_task_banner = None
if not hasattr(self, '_task_type_cache'):
self._task_type_cache = {}
# Cache task prefix
self._task_type_cache[task._uuid] = 'TASK'
# Store task name
if self._play and hasattr(self._play, 'strategy'):
from ansible.utils.fqcn import add_internal_fqcns
if self._play.strategy in add_internal_fqcns(('free', 'host_pinned')):
self._last_task_name = None
else:
self._last_task_name = task.get_name().strip()
else:
self._last_task_name = task.get_name().strip()
# Print task banner (only if we should display it)
# We skip the parent's check for display_skipped_hosts/display_ok_hosts to avoid warnings
if self._play and hasattr(self._play, 'strategy'):
from ansible.utils.fqcn import add_internal_fqcns
if self._play.strategy not in add_internal_fqcns(('free', 'host_pinned')):
self._last_task_banner = task._uuid
self._display.banner('TASK [%s]' % task.get_name().strip())
def v2_runner_on_start(self, host, task):
# Suppress warnings by not calling parent if options aren't ready
# This method is optional and only shows per-host start messages
# We can safely skip it to avoid warnings
pass
def v2_runner_on_ok(self, result):
# Suppress warnings by implementing our own version that doesn't call get_option
host_label = self.host_label(result)
# Handle TaskInclude separately
from ansible.playbook.task_include import TaskInclude
if isinstance(result._task, TaskInclude):
if self._last_task_banner != result._task._uuid:
self.v2_playbook_on_task_start(result._task, False)
return
# Clean results and handle warnings
self._clean_results(result._result, result._task.action)
self._handle_warnings(result._result)
# Handle loop results
if result._task.loop and 'results' in result._result:
self._process_items(result)
return
# Determine status and color
if result._result.get('changed', False):
if self._last_task_banner != result._task._uuid:
self.v2_playbook_on_task_start(result._task, False)
self._display.display("changed: [%s]" % host_label, color=C.COLOR_CHANGED)
color = C.COLOR_CHANGED
else:
# Always display ok hosts (skip get_option check to avoid warnings)
if self._last_task_banner != result._task._uuid:
self.v2_playbook_on_task_start(result._task, False)
self._display.display("ok: [%s]" % host_label, color=C.COLOR_OK)
color = C.COLOR_OK
# Add our clean message formatting
self._print_clean_msg(result, color=color)
def v2_runner_on_failed(self, result, ignore_errors=False):
# Suppress warnings by implementing our own version
host_label = self.host_label(result)
self._clean_results(result._result, result._task.action)
if self._last_task_banner != result._task._uuid:
self.v2_playbook_on_task_start(result._task, False)
self._handle_exception(result._result, use_stderr=False)
self._handle_warnings(result._result)
if result._task.loop and 'results' in result._result:
self._process_items(result)
else:
msg = "fatal: [%s]: FAILED! => %s" % (host_label, self._dump_results(result._result))
self._display.display(msg, color=C.COLOR_ERROR)
if ignore_errors:
self._display.display("...ignoring", color=C.COLOR_SKIP)
# Add our clean message formatting
self._print_clean_msg(result, color=C.COLOR_ERROR)
def v2_runner_on_skipped(self, result):
# Suppress warnings by implementing our own version
# Always display skipped hosts (skip get_option check to avoid warnings)
self._clean_results(result._result, result._task.action)
if self._last_task_banner != result._task._uuid:
self.v2_playbook_on_task_start(result._task, False)
if result._task.loop is not None and 'results' in result._result:
self._process_items(result)
else:
msg = "skipping: [%s]" % result._host.get_name()
if self._run_is_verbose(result):
msg += " => %s" % self._dump_results(result._result)
self._display.display(msg, color=C.COLOR_SKIP)
# Add our clean message formatting
self._print_clean_msg(result, color=C.COLOR_SKIP)
def v2_runner_on_unreachable(self, result):
# Suppress warnings by implementing our own version
if self._last_task_banner != result._task._uuid:
self.v2_playbook_on_task_start(result._task, False)
host_label = self.host_label(result)
msg = "fatal: [%s]: UNREACHABLE! => %s" % (host_label, self._dump_results(result._result))
self._display.display(msg, color=C.COLOR_UNREACHABLE)
if result._task.ignore_unreachable:
self._display.display("...ignoring", color=C.COLOR_SKIP)
# Add our clean message formatting
self._print_clean_msg(result, color=C.COLOR_UNREACHABLE)

View File

@@ -1 +0,0 @@
../../group_vars/production.yml

View File

@@ -0,0 +1 @@
../../../secrets/production.vault.yml

View File

@@ -0,0 +1,81 @@
# Build Initial Image - Anleitung
## Übersicht
Dieses Playbook baut das initiale Docker Image für das Framework und pusht es ins lokale Registry (`localhost:5000`).
## Voraussetzungen
1. **Registry muss laufen**: Das Registry muss bereits deployed sein (via `setup-infrastructure.yml`)
2. **Vault-Passwort**: `vault_docker_registry_password` muss im Vault-File gesetzt sein
3. **Git-Zugriff**: Der Server muss Zugriff auf das Git-Repository haben
## Verwendung
### Standard (main branch)
```bash
cd deployment/ansible
ansible-playbook -i inventory/production.yml \
playbooks/build-initial-image.yml \
--vault-password-file secrets/.vault_pass
```
### Mit spezifischem Branch
```bash
ansible-playbook -i inventory/production.yml \
playbooks/build-initial-image.yml \
--vault-password-file secrets/.vault_pass \
-e "build_repo_branch=staging"
```
### Mit spezifischem Image-Tag
```bash
ansible-playbook -i inventory/production.yml \
playbooks/build-initial-image.yml \
--vault-password-file secrets/.vault_pass \
-e "build_image_tag=v1.0.0"
```
## Was das Playbook macht
1. ✅ Lädt Vault-Secrets (Registry-Credentials)
2. ✅ Klont/aktualisiert das Git-Repository
3. ✅ Prüft, ob `Dockerfile.production` existiert
4. ✅ Loggt sich beim Registry ein
5. ✅ Baut das Docker Image
6. ✅ Pusht das Image ins Registry
7. ✅ Verifiziert, dass das Image existiert
## Nach dem Build
Nach erfolgreichem Build kannst du das Application-Stack deployen:
```bash
ansible-playbook -i inventory/production.yml \
playbooks/setup-infrastructure.yml \
--vault-password-file secrets/.vault_pass
```
## Troubleshooting
### Registry-Login schlägt fehl
- Prüfe, ob `vault_docker_registry_password` im Vault-File gesetzt ist
- Prüfe, ob das Registry läuft: `docker ps | grep registry`
- Prüfe, ob der Registry erreichbar ist: `curl http://localhost:5000/v2/`
### Dockerfile.production nicht gefunden
- Prüfe, ob der Branch existiert: `git ls-remote --heads <repo-url>`
- Prüfe, ob `Dockerfile.production` im Repository existiert
### Build schlägt fehl
- Prüfe Docker-Logs auf dem Server
- Prüfe, ob genug Speicherplatz vorhanden ist: `df -h`
- Prüfe, ob Docker Buildx installiert ist: `docker buildx version`

View File

@@ -2,10 +2,12 @@
## Verfügbare Playbooks ## Verfügbare Playbooks
> **Hinweis**: Die meisten Playbooks wurden in wiederverwendbare Roles refactored. Die Playbooks sind jetzt Wrapper, die die entsprechenden Role-Tasks aufrufen. Dies verbessert Wiederverwendbarkeit, Wartbarkeit und folgt Ansible Best Practices.
### Infrastructure Setup ### Infrastructure Setup
- **`setup-infrastructure.yml`** - Deployed alle Stacks (Traefik, PostgreSQL, Redis, Registry, Gitea, Monitoring, Production) - **`setup-infrastructure.yml`** - Deployed alle Stacks (Traefik, PostgreSQL, Redis, Registry, Gitea, Monitoring, Production)
- **`setup-production-secrets.yml`** - Deployed Secrets zu Production - **`setup-production-secrets.yml`** - Deployed Secrets zu Production
- **`setup-ssl-certificates.yml`** - SSL Certificate Setup - **`setup-ssl-certificates.yml`** - SSL Certificate Setup (Wrapper für `traefik` Role, `tasks_from: ssl`)
- **`setup-wireguard-host.yml`** - WireGuard VPN Setup - **`setup-wireguard-host.yml`** - WireGuard VPN Setup
- **`sync-stacks.yml`** - Synchronisiert Stack-Konfigurationen zum Server - **`sync-stacks.yml`** - Synchronisiert Stack-Konfigurationen zum Server
@@ -14,40 +16,50 @@
- **`backup.yml`** - Erstellt Backups von PostgreSQL, Application Data, Gitea, Registry - **`backup.yml`** - Erstellt Backups von PostgreSQL, Application Data, Gitea, Registry
- **`deploy-image.yml`** - Docker Image Deployment (wird von CI/CD Workflows verwendet) - **`deploy-image.yml`** - Docker Image Deployment (wird von CI/CD Workflows verwendet)
### Traefik Management (Role-basiert)
- **`restart-traefik.yml`** - Restart Traefik Container (Wrapper für `traefik` Role, `tasks_from: restart`)
- **`recreate-traefik.yml`** - Recreate Traefik Container (Wrapper für `traefik` Role, `tasks_from: restart` mit `traefik_restart_action: recreate`)
- **`deploy-traefik-config.yml`** - Deploy Traefik Configuration Files (Wrapper für `traefik` Role, `tasks_from: config`)
- **`check-traefik-acme-logs.yml`** - Check Traefik ACME Challenge Logs (Wrapper für `traefik` Role, `tasks_from: logs`)
- **`setup-ssl-certificates.yml`** - Setup Let's Encrypt SSL Certificates (Wrapper für `traefik` Role, `tasks_from: ssl`)
### Gitea Management (Role-basiert)
- **`check-and-restart-gitea.yml`** - Check and Restart Gitea if Unhealthy (Wrapper für `gitea` Role, `tasks_from: restart`)
- **`fix-gitea-runner-config.yml`** - Fix Gitea Runner Configuration (Wrapper für `gitea` Role, `tasks_from: runner` mit `gitea_runner_action: fix`)
- **`register-gitea-runner.yml`** - Register Gitea Runner (Wrapper für `gitea` Role, `tasks_from: runner` mit `gitea_runner_action: register`)
- **`update-gitea-config.yml`** - Update Gitea Configuration (Wrapper für `gitea` Role, `tasks_from: config`)
- **`setup-gitea-initial-config.yml`** - Setup Gitea Initial Configuration (Wrapper für `gitea` Role, `tasks_from: setup`)
- **`setup-gitea-repository.yml`** - Setup Gitea Repository (Wrapper für `gitea` Role, `tasks_from: repository`)
### Application Deployment (Role-basiert)
- **`deploy-application-code.yml`** - Deploy Application Code via Git (Wrapper für `application` Role, `tasks_from: deploy_code` mit `application_deployment_method: git`)
- **`sync-application-code.yml`** - Synchronize Application Code via Rsync (Wrapper für `application` Role, `tasks_from: deploy_code` mit `application_deployment_method: rsync`)
- **`install-composer-dependencies.yml`** - Install Composer Dependencies (Wrapper für `application` Role, `tasks_from: composer`)
### Application Container Management (Role-basiert)
- **`check-container-status.yml`** - Check Container Status (Wrapper für `application` Role, `tasks_from: health_check`)
- **`check-container-logs.yml`** - Check Container Logs (Wrapper für `application` Role, `tasks_from: logs`)
- **`check-worker-logs.yml`** - Check Worker and Scheduler Logs (Wrapper für `application` Role, `tasks_from: logs` mit `application_logs_check_vendor: true`)
- **`check-final-status.yml`** - Check Final Container Status (Wrapper für `application` Role, `tasks_from: health_check` mit `application_health_check_final: true`)
- **`fix-container-issues.yml`** - Fix Container Issues (Wrapper für `application` Role, `tasks_from: containers` mit `application_container_action: fix`)
- **`fix-web-container.yml`** - Fix Web Container Permissions (Wrapper für `application` Role, `tasks_from: containers` mit `application_container_action: fix-web`)
- **`recreate-containers-with-env.yml`** - Recreate Containers with Environment Variables (Wrapper für `application` Role, `tasks_from: containers` mit `application_container_action: recreate-with-env`)
- **`sync-and-recreate-containers.yml`** - Sync and Recreate Containers (Wrapper für `application` Role, `tasks_from: containers` mit `application_container_action: sync-recreate`)
### Maintenance ### Maintenance
- **`cleanup-all-containers.yml`** - Stoppt und entfernt alle Container, bereinigt Netzwerke und Volumes (für vollständigen Server-Reset)
- **`system-maintenance.yml`** - System-Updates, Unattended-Upgrades, Docker-Pruning - **`system-maintenance.yml`** - System-Updates, Unattended-Upgrades, Docker-Pruning
- **`troubleshoot.yml`** - Unified Troubleshooting mit Tags - **`troubleshoot.yml`** - Unified Troubleshooting mit Tags
- **`update-gitea-config.yml`** - Aktualisiert Gitea-Konfiguration und startet neu
### WireGuard ### WireGuard
- **`generate-wireguard-client.yml`** - Generiert WireGuard Client-Config - **`generate-wireguard-client.yml`** - Generiert WireGuard Client-Config
- **`wireguard-routing.yml`** - Konfiguriert WireGuard Routing - **`wireguard-routing.yml`** - Konfiguriert WireGuard Routing
### Initial Deployment ### Initial Deployment
- **`sync-application-code.yml`** - Rsync-basiertes Code-Sync für Initial Deployment (synchronisiert Code vom lokalen Repository zum Server)
- **`deploy-application-code.yml`** - Git-basiertes Code-Deployment (für CI/CD und zukünftige Deployments)
- **`install-composer-dependencies.yml`** - Installiert Composer Dependencies im PHP Container
- **`build-initial-image.yml`** - Build und Push des initialen Docker Images (für erstes Deployment) - **`build-initial-image.yml`** - Build und Push des initialen Docker Images (für erstes Deployment)
### Code Deployment
- **`sync-application-code.yml`** - Rsync-basiertes Code-Sync (Initial Deployment)
- **`deploy-application-code.yml`** - Git-basiertes Code-Deployment (CI/CD)
- **`deploy-image.yml`** - Docker Image Deployment zu Application Stack
### Troubleshooting & Diagnostics
- **`check-container-logs.yml`** - Container Logs prüfen (queue-worker, web, scheduler)
- **`check-container-status.yml`** - Container Status prüfen
- **`check-final-status.yml`** - Finale Status-Prüfung aller Container
- **`fix-container-issues.yml`** - Container-Probleme beheben (Composer Dependencies, Permissions)
- **`fix-web-container.yml`** - Web Container Permissions beheben
- **`recreate-containers-with-env.yml`** - Container mit env_file neu erstellen
- **`sync-and-recreate-containers.yml`** - Docker Compose sync und Container recreate
### CI/CD & Development ### CI/CD & Development
- **`setup-gitea-runner-ci.yml`** - Gitea Runner CI Setup - **`setup-gitea-runner-ci.yml`** - Gitea Runner CI Setup
- **`setup-gitea-initial-config.yml`** - Gitea Initial Setup (automatisiert via app.ini + CLI)
- **`setup-gitea-repository.yml`** - Erstellt Repository in Gitea und konfiguriert Git-Remote (automatisiert via API)
- **`update-gitea-config.yml`** - Aktualisiert Gitea-Konfiguration (Cache, Connection Pooling) zur Behebung von Performance-Problemen
- **`install-docker.yml`** - Docker Installation auf Server - **`install-docker.yml`** - Docker Installation auf Server
## Entfernte/Legacy Playbooks ## Entfernte/Legacy Playbooks
@@ -59,8 +71,83 @@ Die folgenden Playbooks wurden entfernt, da sie nicht mehr benötigt werden:
## Verwendung ## Verwendung
### Standard-Verwendung
```bash ```bash
cd deployment/ansible cd deployment/ansible
ansible-playbook -i inventory/production.yml playbooks/<playbook>.yml ansible-playbook -i inventory/production.yml playbooks/<playbook>.yml --vault-password-file secrets/.vault_pass
``` ```
### Role-basierte Playbooks
Die meisten Playbooks sind jetzt Wrapper, die Roles verwenden. Die Funktionalität bleibt gleich, aber die Implementierung ist jetzt in wiederverwendbaren Roles organisiert:
**Beispiel: Traefik Restart**
```bash
# Alte Methode (funktioniert noch, ruft jetzt aber die Role auf):
ansible-playbook -i inventory/production.yml playbooks/restart-traefik.yml --vault-password-file secrets/.vault_pass
# Direkte Role-Verwendung (alternative Methode):
ansible-playbook -i inventory/production.yml -e "traefik_restart_action=restart" -e "traefik_show_status=true" playbooks/restart-traefik.yml
```
**Beispiel: Gitea Runner Fix**
```bash
ansible-playbook -i inventory/production.yml playbooks/fix-gitea-runner-config.yml --vault-password-file secrets/.vault_pass
```
**Beispiel: Application Code Deployment**
```bash
# Git-basiert (Standard):
ansible-playbook -i inventory/production.yml playbooks/deploy-application-code.yml \
-e "deployment_environment=staging" \
-e "git_branch=staging" \
--vault-password-file secrets/.vault_pass
# Rsync-basiert (für Initial Deployment):
ansible-playbook -i inventory/production.yml playbooks/sync-application-code.yml \
--vault-password-file secrets/.vault_pass
```
### Tags verwenden
Viele Playbooks unterstützen Tags für selektive Ausführung:
```bash
# Nur Traefik-bezogene Tasks:
ansible-playbook -i inventory/production.yml playbooks/restart-traefik.yml --tags traefik,restart
# Nur Gitea-bezogene Tasks:
ansible-playbook -i inventory/production.yml playbooks/check-and-restart-gitea.yml --tags gitea,restart
# Nur Application-bezogene Tasks:
ansible-playbook -i inventory/production.yml playbooks/deploy-application-code.yml --tags application,deploy
```
## Role-Struktur
Die Playbooks verwenden jetzt folgende Roles:
### `traefik` Role
- **Tasks**: `restart`, `config`, `logs`, `ssl`
- **Location**: `roles/traefik/tasks/`
- **Defaults**: `roles/traefik/defaults/main.yml`
### `gitea` Role
- **Tasks**: `restart`, `runner`, `config`, `setup`, `repository`
- **Location**: `roles/gitea/tasks/`
- **Defaults**: `roles/gitea/defaults/main.yml`
### `application` Role
- **Tasks**: `deploy_code`, `composer`, `containers`, `health_check`, `logs`, `deploy`
- **Location**: `roles/application/tasks/`
- **Defaults**: `roles/application/defaults/main.yml`
## Vorteile der Role-basierten Struktur
1. **Wiederverwendbarkeit**: Tasks können in mehreren Playbooks genutzt werden
2. **Wartbarkeit**: Änderungen zentral in Roles
3. **Testbarkeit**: Roles isoliert testbar
4. **Klarheit**: Klare Struktur nach Komponenten
5. **Best Practices**: Folgt Ansible-Empfehlungen

View File

@@ -0,0 +1,157 @@
# Traefik Restart Loop - Diagnose & Lösung
## Problem
Traefik wird regelmäßig gestoppt mit den Meldungen:
- "I have to go..."
- "Stopping server gracefully"
Dies führt zu:
- ACME-Challenge-Fehlern
- Externen Timeouts
- Unterbrechungen der SSL-Zertifikats-Erneuerung
## Durchgeführte Diagnose
### 1. Erweiterte Diagnose (`diagnose-traefik-restarts.yml`)
**Geprüfte Bereiche:**
- ✅ Systemd-Timer (keine gefunden die Traefik stoppen)
- ✅ Alle User-Crontabs (keine gefunden)
- ✅ System-wide Cronjobs (keine gefunden)
- ✅ Gitea Workflows (gefunden: `build-image.yml`, `manual-deploy.yml` - rufen aber keine Traefik-Restarts auf)
- ✅ Custom Systemd Services/Timers (keine gefunden)
- ✅ At Jobs (keine gefunden)
- ✅ Docker Compose Watch Mode (nicht aktiviert)
- ✅ Ansible `traefik_auto_restart` Einstellung (prüfbar)
- ✅ Port-Konfiguration (Ports 80/443 korrekt auf Traefik gemappt)
- ✅ Netzwerk-Konfiguration (geprüft)
**Ergebnisse:**
- ❌ Keine automatischen Restart-Mechanismen gefunden
- ✅ Ports 80/443 sind korrekt konfiguriert
- ✅ Traefik läuft stabil (keine Restarts während 2-minütiger Überwachung)
### 2. acme.json Berechtigungen (`fix-traefik-acme-permissions.yml`)
**Ergebnisse:**
- ✅ acme.json hat korrekte Berechtigungen (chmod 600)
- ✅ Owner/Group korrekt (deploy:deploy)
- ✅ Traefik Container kann auf acme.json schreiben
### 3. Auto-Restart-Mechanismen (`disable-traefik-auto-restarts.yml`)
**Ergebnisse:**
- ❌ Keine Cronjobs gefunden die Traefik restarten
- ❌ Keine Systemd Timers/Services gefunden
- Ansible `traefik_auto_restart` kann in group_vars überschrieben werden
### 4. Traefik Stabilisierung (`stabilize-traefik.yml`)
**Ergebnisse:**
- ✅ Traefik läuft stabil (41 Minuten Uptime)
- ✅ Keine Restarts während 2-minütiger Überwachung
- ✅ Traefik ist healthy
- ✅ Ports 80/443 korrekt konfiguriert
## Mögliche Ursachen (nicht gefunden, aber zu prüfen)
1. **Docker-Service-Restarts**: Am 08.11. um 16:12:58 wurde der Docker-Service gestoppt, was alle Container gestoppt hat
- Prüfe: `journalctl -u docker.service` für regelmäßige Stops
- Prüfe: System-Reboots oder Kernel-Updates
2. **Unattended-Upgrades**: Können zu Reboots führen
- Prüfe: `journalctl -u unattended-upgrades`
3. **Manuelle Restarts**: Jemand könnte Traefik manuell restarten
- Prüfe: Docker-Events für Stop-Events
- Prüfe: SSH-Login-Historie
4. **Gitea Workflows**: Können indirekt Traefik beeinflussen
- `build-image.yml`: Ruft `deploy-image.yml` auf (keine Traefik-Restarts)
- `manual-deploy.yml`: Ruft `deploy-image.yml` auf (keine Traefik-Restarts)
## Verfügbare Playbooks
### Diagnose
```bash
# Erweiterte Diagnose durchführen
ansible-playbook -i inventory/production.yml \
playbooks/diagnose-traefik-restarts.yml \
--vault-password-file secrets/.vault_pass
```
### acme.json Berechtigungen
```bash
# acme.json Berechtigungen prüfen und korrigieren
ansible-playbook -i inventory/production.yml \
playbooks/fix-traefik-acme-permissions.yml \
--vault-password-file secrets/.vault_pass
```
### Auto-Restarts deaktivieren
```bash
# Prüfe Auto-Restart-Mechanismen
ansible-playbook -i inventory/production.yml \
playbooks/disable-traefik-auto-restarts.yml \
--vault-password-file secrets/.vault_pass
```
### Traefik stabilisieren
```bash
# Traefik stabilisieren und überwachen (10 Minuten)
ansible-playbook -i inventory/production.yml \
playbooks/stabilize-traefik.yml \
-e "traefik_stabilize_wait_minutes=10" \
--vault-password-file secrets/.vault_pass
```
## Empfohlene nächste Schritte
1. **Längere Überwachung**: Führe `stabilize-traefik.yml` mit 10 Minuten aus, um zu sehen, ob Restarts auftreten
```bash
ansible-playbook -i inventory/production.yml \
playbooks/stabilize-traefik.yml \
-e "traefik_stabilize_wait_minutes=10" \
--vault-password-file secrets/.vault_pass
```
2. **Docker-Events überwachen**: Prüfe Docker-Events für Stop-Events
```bash
docker events --filter container=traefik --format "{{.Time}} {{.Action}}"
```
3. **Traefik-Logs prüfen**: Suche nach Stop-Meldungen
```bash
cd /home/deploy/deployment/stacks/traefik
docker compose logs traefik | grep -E "I have to go|Stopping server gracefully|SIGTERM|SIGINT"
```
4. **Docker-Service-Logs prüfen**: Prüfe ob Docker-Service regelmäßig gestoppt wird
```bash
journalctl -u docker.service --since "7 days ago" | grep -i "stop\|restart"
```
5. **System-Reboots prüfen**: Prüfe ob regelmäßige Reboots auftreten
```bash
last reboot
uptime
```
## Wichtige Erkenntnisse
- ✅ **Keine automatischen Restart-Mechanismen gefunden**: Keine Cronjobs, Systemd-Timer oder Services die Traefik regelmäßig stoppen
- ✅ **acme.json ist korrekt konfiguriert**: Berechtigungen (600) und Container-Zugriff sind korrekt
- ✅ **Ports sind korrekt**: Ports 80/443 zeigen auf Traefik
- ✅ **Traefik läuft stabil**: Während der 2-minütigen Überwachung keine Restarts
- ⚠️ **Docker-Service wurde einmalig gestoppt**: Am 08.11. um 16:12:58 - könnte die Ursache sein
## Fazit
Die Diagnose zeigt, dass **keine automatischen Restart-Mechanismen** aktiv sind. Die "I have to go..." Meldungen stammen wahrscheinlich von:
1. Einmaligem Docker-Service-Stop (08.11. 16:12:58)
2. System-Reboots (nicht in der Historie sichtbar, aber möglich)
3. Manuellen Restarts (nicht nachweisbar)
**Empfehlung**: Überwache Traefik für 10-30 Minuten mit `stabilize-traefik.yml`, um zu sehen, ob weitere Restarts auftreten. Wenn keine Restarts auftreten, war das Problem wahrscheinlich der einmalige Docker-Service-Stop.

View File

@@ -0,0 +1,268 @@
---
- name: Create Comprehensive Backups
hosts: production
gather_facts: yes
become: no
vars:
backup_retention_days: "{{ backup_retention_days | default(7) }}"
pre_tasks:
- name: Ensure backup directory exists
file:
path: "{{ backups_path }}"
state: directory
mode: '0755'
become: yes
- name: Create timestamp for backup
set_fact:
backup_timestamp: "{{ ansible_date_time.epoch }}"
backup_date: "{{ ansible_date_time.date }}"
backup_time: "{{ ansible_date_time.time }}"
- name: Create backup directory for this run
file:
path: "{{ backups_path }}/backup_{{ backup_date }}_{{ backup_time }}"
state: directory
mode: '0755'
register: backup_dir
become: yes
- name: Set backup directory path
set_fact:
current_backup_dir: "{{ backup_dir.path }}"
tasks:
- name: Backup PostgreSQL Database
when: backup_postgresql | default(true) | bool
block:
- name: Check if PostgreSQL stack is running
shell: docker compose -f {{ stacks_base_path }}/postgresql/docker-compose.yml ps --format json | jq -r '.[] | select(.Service=="postgres") | .State' | grep -q "running"
register: postgres_running
changed_when: false
failed_when: false
- name: Get PostgreSQL container name
shell: docker compose -f {{ stacks_base_path }}/postgresql/docker-compose.yml ps --format json | jq -r '.[] | select(.Service=="postgres") | .Name'
register: postgres_container
changed_when: false
when: postgres_running.rc == 0
- name: Read PostgreSQL environment variables
shell: |
cd {{ stacks_base_path }}/postgresql
grep -E "^POSTGRES_(DB|USER|PASSWORD)=" .env 2>/dev/null || echo ""
register: postgres_env
changed_when: false
failed_when: false
no_log: true
- name: Extract PostgreSQL credentials
set_fact:
postgres_db: "{{ postgres_env.stdout | regex_search('POSTGRES_DB=([^\\n]+)', '\\1') | first | default('michaelschiemer') }}"
postgres_user: "{{ postgres_env.stdout | regex_search('POSTGRES_USER=([^\\n]+)', '\\1') | first | default('postgres') }}"
postgres_password: "{{ postgres_env.stdout | regex_search('POSTGRES_PASSWORD=([^\\n]+)', '\\1') | first | default('') }}"
when: postgres_running.rc == 0
no_log: true
- name: Create PostgreSQL backup
shell: |
cd {{ stacks_base_path }}/postgresql
PGPASSWORD="{{ postgres_password }}" docker compose exec -T postgres pg_dump \
-U {{ postgres_user }} \
-d {{ postgres_db }} \
--clean \
--if-exists \
--create \
--no-owner \
--no-privileges \
| gzip > {{ current_backup_dir }}/postgresql_${postgres_db}_{{ backup_date }}_{{ backup_time }}.sql.gz
when: postgres_running.rc == 0
no_log: true
- name: Verify PostgreSQL backup
stat:
path: "{{ current_backup_dir }}/postgresql_{{ postgres_db }}_{{ backup_date }}_{{ backup_time }}.sql.gz"
register: postgres_backup_file
when: postgres_running.rc == 0
- name: Display PostgreSQL backup status
debug:
msg: "PostgreSQL backup: {{ 'SUCCESS' if (postgres_running.rc == 0 and postgres_backup_file.stat.exists) else 'SKIPPED (PostgreSQL not running)' }}"
- name: Backup Application Data
when: backup_application_data | default(true) | bool
block:
- name: Check if production stack is running
shell: docker compose -f {{ stacks_base_path }}/production/docker-compose.base.yml -f {{ stacks_base_path }}/production/docker-compose.production.yml ps --format json | jq -r '.[] | select(.Service=="php") | .State' | grep -q "running"
register: app_running
changed_when: false
failed_when: false
- name: Backup application storage directory
archive:
path: "{{ stacks_base_path }}/production/storage"
dest: "{{ current_backup_dir }}/application_storage_{{ backup_date }}_{{ backup_time }}.tar.gz"
format: gz
when: app_running.rc == 0
ignore_errors: yes
- name: Backup application logs
archive:
path: "{{ stacks_base_path }}/production/storage/logs"
dest: "{{ current_backup_dir }}/application_logs_{{ backup_date }}_{{ backup_time }}.tar.gz"
format: gz
when: app_running.rc == 0
ignore_errors: yes
- name: Backup application .env file
copy:
src: "{{ stacks_base_path }}/production/.env"
dest: "{{ current_backup_dir }}/application_env_{{ backup_date }}_{{ backup_time }}.env"
remote_src: yes
when: app_running.rc == 0
ignore_errors: yes
- name: Display application backup status
debug:
msg: "Application data backup: {{ 'SUCCESS' if app_running.rc == 0 else 'SKIPPED (Application not running)' }}"
- name: Backup Gitea Data
when: backup_gitea | default(true) | bool
block:
- name: Check if Gitea stack is running
shell: docker compose -f {{ stacks_base_path }}/gitea/docker-compose.yml ps --format json | jq -r '.[] | select(.Service=="gitea") | .State' | grep -q "running"
register: gitea_running
changed_when: false
failed_when: false
- name: Get Gitea volume name
shell: docker compose -f {{ stacks_base_path }}/gitea/docker-compose.yml config --volumes | head -1
register: gitea_volume
changed_when: false
when: gitea_running.rc == 0
- name: Backup Gitea volume
shell: |
docker run --rm \
-v {{ gitea_volume.stdout }}:/source:ro \
-v {{ current_backup_dir }}:/backup \
alpine tar czf /backup/gitea_data_{{ backup_date }}_{{ backup_time }}.tar.gz -C /source .
when: gitea_running.rc == 0 and gitea_volume.stdout != ""
ignore_errors: yes
- name: Display Gitea backup status
debug:
msg: "Gitea backup: {{ 'SUCCESS' if (gitea_running.rc == 0 and gitea_volume.stdout != '') else 'SKIPPED (Gitea not running)' }}"
- name: Backup Docker Registry Images (Optional)
when: backup_registry | default(false) | bool
block:
- name: Check if registry stack is running
shell: docker compose -f {{ stacks_base_path }}/registry/docker-compose.yml ps --format json | jq -r '.[] | select(.Service=="registry") | .State' | grep -q "running"
register: registry_running
changed_when: false
failed_when: false
- name: List registry images
shell: |
cd {{ stacks_base_path }}/registry
docker compose exec -T registry registry garbage-collect --dry-run /etc/docker/registry/config.yml 2>&1 | grep -E "repository|tag" || echo "No images found"
register: registry_images
changed_when: false
when: registry_running.rc == 0
ignore_errors: yes
- name: Save registry image list
copy:
content: "{{ registry_images.stdout }}"
dest: "{{ current_backup_dir }}/registry_images_{{ backup_date }}_{{ backup_time }}.txt"
when: registry_running.rc == 0 and registry_images.stdout != ""
ignore_errors: yes
- name: Display registry backup status
debug:
msg: "Registry backup: {{ 'SUCCESS' if registry_running.rc == 0 else 'SKIPPED (Registry not running)' }}"
- name: Create backup metadata
copy:
content: |
Backup Date: {{ backup_date }} {{ backup_time }}
Backup Timestamp: {{ backup_timestamp }}
Host: {{ inventory_hostname }}
Components Backed Up:
- PostgreSQL: {{ 'YES' if ((backup_postgresql | default(true) | bool) and (postgres_running.rc | default(1) == 0)) else 'NO' }}
- Application Data: {{ 'YES' if ((backup_application_data | default(true) | bool) and (app_running.rc | default(1) == 0)) else 'NO' }}
- Gitea: {{ 'YES' if ((backup_gitea | default(true) | bool) and (gitea_running.rc | default(1) == 0)) else 'NO' }}
- Registry: {{ 'YES' if ((backup_registry | default(false) | bool) and (registry_running.rc | default(1) == 0)) else 'NO' }}
Backup Location: {{ current_backup_dir }}
dest: "{{ current_backup_dir }}/backup_metadata.txt"
mode: '0644'
- name: Verify backup files
when: verify_backups | default(true) | bool
block:
- name: List all backup files
find:
paths: "{{ current_backup_dir }}"
file_type: file
register: backup_files
- name: Check backup file sizes
stat:
path: "{{ item.path }}"
register: backup_file_stats
loop: "{{ backup_files.files }}"
- name: Display backup summary
debug:
msg: |
Backup Summary:
- Total files: {{ backup_files.files | length }}
- Total size: {{ backup_file_stats.results | map(attribute='stat.size') | sum | int / 1024 / 1024 }} MB
- Location: {{ current_backup_dir }}
- name: Fail if no backup files created
fail:
msg: "No backup files were created in {{ current_backup_dir }}"
when: backup_files.files | length == 0
- name: Cleanup old backups
block:
- name: Find old backup directories
find:
paths: "{{ backups_path }}"
patterns: "backup_*"
file_type: directory
register: backup_dirs
- name: Calculate cutoff date
set_fact:
cutoff_timestamp: "{{ (ansible_date_time.epoch | int) - (backup_retention_days | int * 86400) }}"
- name: Remove old backup directories
file:
path: "{{ item.path }}"
state: absent
loop: "{{ backup_dirs.files }}"
when: item.mtime | int < cutoff_timestamp | int
become: yes
- name: Display cleanup summary
debug:
msg: "Cleaned up backups older than {{ backup_retention_days }} days"
post_tasks:
- name: Display final backup status
debug:
msg: |
==========================================
Backup completed successfully!
==========================================
Backup location: {{ current_backup_dir }}
Retention: {{ backup_retention_days }} days
==========================================

View File

@@ -0,0 +1,432 @@
---
- name: Build and Push Initial Docker Image
hosts: production
become: no
gather_facts: yes
vars:
vault_file: "{{ playbook_dir }}/../secrets/production.vault.yml"
build_repo_path: "/home/deploy/michaelschiemer"
build_repo_url: "{{ git_repository_url_default | default('https://git.michaelschiemer.de/michael/michaelschiemer.git') }}"
build_repo_branch_default: "main"
# Local repository path for cloning (temporary)
local_repo_path: "/tmp/michaelschiemer-build-{{ ansible_date_time.epoch }}"
# Check if local repository exists (project root)
local_repo_check_path: "{{ playbook_dir | default('') | dirname | dirname | dirname }}"
image_name: "{{ app_name | default('framework') }}"
image_tag_default: "latest"
registry_url: "{{ docker_registry | default('localhost:5000') }}"
registry_username: "{{ vault_docker_registry_username | default(docker_registry_username_default | default('admin')) }}"
registry_password: "{{ vault_docker_registry_password | default('') }}"
pre_tasks:
- name: Verify vault file exists
ansible.builtin.stat:
path: "{{ vault_file }}"
register: vault_stat
delegate_to: localhost
become: no
- name: Load vault secrets
ansible.builtin.include_vars:
file: "{{ vault_file }}"
when: vault_stat.stat.exists
no_log: yes
ignore_errors: yes
delegate_to: localhost
become: no
- name: Verify registry password is set
ansible.builtin.fail:
msg: |
Registry password is required!
Please set vault_docker_registry_password in:
{{ vault_file }}
Or pass it via extra vars:
-e "registry_password=your-password"
when: registry_password | string | trim == ''
tasks:
- name: Set build variables
ansible.builtin.set_fact:
build_repo_branch: "{{ build_repo_branch | default(build_repo_branch_default) }}"
image_tag: "{{ build_image_tag | default(image_tag_default) }}"
- name: Display build information
ansible.builtin.debug:
msg: |
Building Docker Image:
- Repository: {{ build_repo_url }}
- Branch: {{ build_repo_branch }}
- Build Path: {{ build_repo_path }}
- Registry: {{ registry_url }}
- Image: {{ image_name }}:{{ image_tag }}
- Username: {{ registry_username }}
- name: Check if local repository exists (project root)
ansible.builtin.stat:
path: "{{ local_repo_check_path }}/.git"
delegate_to: localhost
register: local_repo_exists
become: no
- name: Configure Git to skip SSL verification for git.michaelschiemer.de (local)
ansible.builtin.command: |
git config --global http.https://git.michaelschiemer.de.sslVerify false
delegate_to: localhost
changed_when: false
failed_when: false
become: no
when: not local_repo_exists.stat.exists
- name: Determine Git URL with authentication if token is available
ansible.builtin.set_fact:
git_repo_url_with_auth: >-
{%- if vault_git_token is defined and vault_git_token | string | trim != '' -%}
https://{{ vault_git_token }}@git.michaelschiemer.de/michael/michaelschiemer.git
{%- elif vault_git_username is defined and vault_git_username | string | trim != '' and vault_git_password is defined and vault_git_password | string | trim != '' -%}
https://{{ vault_git_username }}:{{ vault_git_password }}@git.michaelschiemer.de/michael/michaelschiemer.git
{%- else -%}
{{ build_repo_url }}
{%- endif -%}
no_log: yes
- name: Debug Git URL (without credentials)
ansible.builtin.debug:
msg: |
Git Repository Configuration:
- Original URL: {{ build_repo_url }}
- Local repo exists: {{ 'YES' if local_repo_exists.stat.exists else 'NO' }}
- Local repo path: {{ local_repo_check_path }}
- Using authentication: {{ 'YES' if (vault_git_token is defined and vault_git_token | string | trim != '') or (vault_git_username is defined and vault_git_username | string | trim != '') else 'NO' }}
- Auth method: {{ 'Token' if (vault_git_token is defined and vault_git_token | string | trim != '') else 'Username/Password' if (vault_git_username is defined and vault_git_username | string | trim != '') else 'None' }}
no_log: yes
- name: Use existing local repository or clone to temporary location
block:
- name: Clone repository to temporary location (local)
ansible.builtin.git:
repo: "{{ git_repo_url_with_auth }}"
dest: "{{ local_repo_path }}"
version: "{{ build_repo_branch }}"
force: yes
update: yes
delegate_to: localhost
register: git_result
changed_when: git_result.changed
environment:
GIT_SSL_NO_VERIFY: "1"
no_log: yes
when: not local_repo_exists.stat.exists
- name: Set local repository path to existing project root
ansible.builtin.set_fact:
source_repo_path: "{{ local_repo_check_path }}"
when: local_repo_exists.stat.exists
- name: Set local repository path to cloned temporary location
ansible.builtin.set_fact:
source_repo_path: "{{ local_repo_path }}"
when: not local_repo_exists.stat.exists
- name: Display repository source
ansible.builtin.debug:
msg: |
Repository source:
- Using existing local repo: {{ 'YES' if local_repo_exists.stat.exists else 'NO' }}
- Source path: {{ source_repo_path }}
- Branch: {{ build_repo_branch }}
- name: Ensure build directory exists on server
ansible.builtin.file:
path: "{{ build_repo_path }}"
state: directory
mode: '0755'
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
- name: Copy repository to server (excluding .git and build artifacts)
ansible.builtin.synchronize:
src: "{{ source_repo_path }}/"
dest: "{{ build_repo_path }}/"
delete: no
recursive: yes
rsync_opts:
- "--chmod=D755,F644"
- "--exclude=.git"
- "--exclude=.gitignore"
- "--exclude=node_modules"
- "--exclude=vendor"
- "--exclude=.env"
- "--exclude=.env.*"
- "--exclude=*.log"
- "--exclude=.idea"
- "--exclude=.vscode"
- "--exclude=.DS_Store"
- "--exclude=*.swp"
- "--exclude=*.swo"
- "--exclude=*~"
- "--exclude=.phpunit.result.cache"
- "--exclude=coverage"
- "--exclude=.phpunit.cache"
- "--exclude=public/assets"
- "--exclude=storage/logs"
- "--exclude=storage/framework/cache"
- "--exclude=storage/framework/sessions"
- "--exclude=storage/framework/views"
- name: Clean up temporary cloned repository
ansible.builtin.file:
path: "{{ local_repo_path }}"
state: absent
delegate_to: localhost
become: no
when:
- not local_repo_exists.stat.exists
- local_repo_path is defined
- name: Check if Dockerfile.production exists on server
ansible.builtin.stat:
path: "{{ build_repo_path }}/Dockerfile.production"
register: dockerfile_stat
- name: Fail if Dockerfile.production not found
ansible.builtin.fail:
msg: |
Dockerfile.production not found at {{ build_repo_path }}/Dockerfile.production
Please verify:
1. The repository was copied successfully to {{ build_repo_path }}
2. The Dockerfile.production file exists in the repository
3. The source repository path is correct: {{ source_repo_path }}
when: not dockerfile_stat.stat.exists
- name: Check if entrypoint script exists on server
ansible.builtin.stat:
path: "{{ build_repo_path }}/docker/entrypoint.sh"
register: entrypoint_stat
- name: Display entrypoint script status
ansible.builtin.debug:
msg: |
Entrypoint Script Check:
- Path: {{ build_repo_path }}/docker/entrypoint.sh
- Exists: {{ entrypoint_stat.stat.exists | default(false) }}
{% if entrypoint_stat.stat.exists %}
- Mode: {{ entrypoint_stat.stat.mode | default('unknown') }}
- Size: {{ entrypoint_stat.stat.size | default(0) }} bytes
{% endif %}
when: not ansible_check_mode
- name: Fail if entrypoint script not found
ansible.builtin.fail:
msg: |
Entrypoint script not found at {{ build_repo_path }}/docker/entrypoint.sh
This file is required for the Docker image build!
Please verify:
1. The file exists in the source repository: {{ source_repo_path }}/docker/entrypoint.sh
2. The rsync operation copied the file successfully
when: not entrypoint_stat.stat.exists
- name: Convert entrypoint script to LF line endings
ansible.builtin.shell: |
sed -i 's/\r$//' "{{ build_repo_path }}/docker/entrypoint.sh"
when: entrypoint_stat.stat.exists
changed_when: false
failed_when: false
- name: Verify entrypoint script has LF line endings
ansible.builtin.shell: |
if head -1 "{{ build_repo_path }}/docker/entrypoint.sh" | od -c | grep -q "\\r"; then
echo "CRLF_DETECTED"
else
echo "LF_ONLY"
fi
register: line_ending_check
changed_when: false
when: entrypoint_stat.stat.exists
- name: Display line ending check result
ansible.builtin.debug:
msg: |
Entrypoint Script Line Endings:
- Status: {{ line_ending_check.stdout | default('unknown') }}
{% if 'CRLF_DETECTED' in line_ending_check.stdout %}
⚠️ WARNING: Script still has CRLF line endings after conversion attempt!
{% else %}
✅ Script has LF line endings
{% endif %}
when:
- entrypoint_stat.stat.exists
- not ansible_check_mode
- name: Login to Docker registry
community.docker.docker_login:
registry_url: "{{ registry_url }}"
username: "{{ registry_username }}"
password: "{{ registry_password }}"
no_log: yes
register: login_result
- name: Verify registry login
ansible.builtin.debug:
msg: "✅ Successfully logged in to {{ registry_url }}"
when: not login_result.failed | default(false)
- name: Fail if registry login failed
ansible.builtin.fail:
msg: |
Failed to login to Docker registry!
Registry: {{ registry_url }}
Username: {{ registry_username }}
Please verify:
1. The registry is running and accessible
2. The username and password are correct
3. The registry URL is correct
when: login_result.failed | default(false)
- name: Verify Docker Buildx is available
ansible.builtin.command: docker buildx version
register: buildx_check
changed_when: false
failed_when: false
- name: Warn if Buildx is not available
ansible.builtin.debug:
msg: |
⚠️ Docker Buildx not found. BuildKit features may not work.
Install with: apt-get install docker-buildx-plugin
when: buildx_check.rc != 0
- name: Set build cache option
ansible.builtin.set_fact:
build_no_cache: "{{ build_no_cache | default('false') | bool }}"
- name: Build and push Docker image with BuildKit
ansible.builtin.shell: |
set -e
BUILD_ARGS=""
{% if build_no_cache | bool %}
BUILD_ARGS="--no-cache"
{% endif %}
DOCKER_BUILDKIT=1 docker buildx build \
--platform linux/amd64 \
--file {{ build_repo_path }}/Dockerfile.production \
--tag {{ registry_url }}/{{ image_name }}:{{ image_tag }} \
--push \
--progress=plain \
$BUILD_ARGS \
{{ build_repo_path }}
register: build_result
environment:
DOCKER_BUILDKIT: "1"
changed_when: build_result.rc == 0
failed_when: build_result.rc != 0
async: 3600
poll: 10
- name: Display build result
ansible.builtin.debug:
msg: |
Build result:
- Image: {{ registry_url }}/{{ image_name }}:{{ image_tag }}
- Return code: {{ build_result.rc | default('unknown') }}
- Changed: {{ build_result.changed | default(false) }}
- Failed: {{ build_result.failed | default(false) }}
when: build_result is defined
- name: Display build output on failure
ansible.builtin.debug:
msg: |
Build failed! Output:
{{ build_result.stdout_lines | default([]) | join('\n') }}
Error output:
{{ build_result.stderr_lines | default([]) | join('\n') }}
when:
- build_result is defined
- build_result.rc | default(0) != 0
- name: Verify image exists locally
ansible.builtin.command: |
docker image inspect {{ registry_url }}/{{ image_name }}:{{ image_tag }}
register: image_check
changed_when: false
failed_when: false
- name: Verify entrypoint script in built image
shell: |
CONTAINER_ID=$(docker create {{ registry_url }}/{{ image_name }}:{{ image_tag }} 2>/dev/null) && \
if docker cp $CONTAINER_ID:/usr/local/bin/entrypoint.sh /tmp/entrypoint_verify.sh 2>&1; then \
echo "FILE_EXISTS"; \
ls -la /tmp/entrypoint_verify.sh; \
head -3 /tmp/entrypoint_verify.sh; \
file /tmp/entrypoint_verify.sh; \
rm -f /tmp/entrypoint_verify.sh; \
else \
echo "FILE_NOT_FOUND"; \
fi && \
docker rm $CONTAINER_ID >/dev/null 2>&1 || true
register: image_entrypoint_check
changed_when: false
failed_when: false
- name: Display entrypoint verification result
debug:
msg: |
==========================================
Entrypoint Script Verification in Built Image
==========================================
Image: {{ registry_url }}/{{ image_name }}:{{ image_tag }}
Verification Result:
{{ image_entrypoint_check.stdout | default('Check failed') }}
{% if 'FILE_NOT_FOUND' in image_entrypoint_check.stdout %}
⚠️ CRITICAL: Entrypoint script NOT FOUND in built image!
This means the Docker build did not copy the entrypoint script.
Possible causes:
1. The COPY command in Dockerfile.production failed silently
2. The docker/entrypoint.sh file was not in the build context
3. There was an issue with the multi-stage build
Please check:
1. Build logs above for COPY errors
2. Verify docker/entrypoint.sh exists: ls -la {{ build_repo_path }}/docker/entrypoint.sh
3. Rebuild with verbose output to see COPY step
{% elif 'FILE_EXISTS' in image_entrypoint_check.stdout %}
✅ Entrypoint script found in built image
{% endif %}
==========================================
- name: Display image information
ansible.builtin.debug:
msg: |
✅ Image built and pushed successfully!
Registry: {{ registry_url }}
Image: {{ image_name }}:{{ image_tag }}
Local: {{ 'Available' if image_check.rc == 0 else 'Not found locally' }}
Next steps:
1. Run setup-infrastructure.yml to deploy the application stack
2. Or manually deploy using docker-compose
when: image_check.rc == 0
- name: Warn if image not found locally
ansible.builtin.debug:
msg: |
⚠️ Image was pushed but not found locally.
This is normal if the image was pushed to a remote registry.
Verify the image exists in the registry:
curl -u {{ registry_username }}:{{ registry_password }} http://{{ registry_url }}/v2/{{ image_name }}/tags/list
when: image_check.rc != 0

View File

@@ -1,78 +0,0 @@
---
- name: Check Traefik Logs After Grafana Access
hosts: production
gather_facts: no
become: no
tasks:
- name: Instructions
debug:
msg:
- "=== ANWEISUNG ==="
- "Bitte mache JETZT einen Zugriff auf https://grafana.michaelschiemer.de im Browser"
- "Dann pr?fe ich die Logs und sage dir, ob Traffic ?ber VPN kommt"
- ""
- name: Wait for access
pause:
seconds: 15
- name: Get last 20 Grafana requests
shell: |
cd ~/deployment/stacks/traefik
tail -500 logs/access.log | grep -i grafana | tail -20
args:
executable: /bin/bash
register: recent_grafana_logs
ignore_errors: yes
failed_when: false
- name: Extract and display client IPs with timestamps
shell: |
cd ~/deployment/stacks/traefik
tail -100 logs/access.log | grep -i grafana | tail -10 | while IFS= read -r line; do
time=$(echo "$line" | grep -oP '"time":"[^"]*"' | sed 's/"time":"//;s/"//' | cut -d'T' -f2 | cut -d'+' -f1)
client=$(echo "$line" | grep -oP '"ClientHost":"[^"]*"' | sed 's/"ClientHost":"//;s/"//')
status=$(echo "$line" | grep -oP '"DownstreamStatus":[0-9]+' | sed 's/"DownstreamStatus"://')
if [[ "$client" =~ ^10\.8\.0\.[0-9]+$ ]]; then
echo "$time | ClientHost: $client | Status: $status ? VPN-IP (Traffic kommt ?ber VPN!)"
elif [[ "$client" == "89.246.96.244" ]]; then
echo "$time | ClientHost: $client | Status: $status ? ?ffentliche IP (Traffic kommt NICHT ?ber VPN)"
else
echo "$time | ClientHost: $client | Status: $status ? Unbekannte IP"
fi
done
args:
executable: /bin/bash
register: analysis_result
ignore_errors: yes
failed_when: false
- name: Display analysis
debug:
msg: "{{ analysis_result.stdout_lines }}"
- name: Get unique client IPs from last 10 requests
shell: |
cd ~/deployment/stacks/traefik
tail -100 logs/access.log | grep -i grafana | tail -10 | grep -oP '"ClientHost":"[^"]*"' | sed 's/"ClientHost":"//;s/"//' | sort -u
args:
executable: /bin/bash
register: unique_ips
ignore_errors: yes
failed_when: false
- name: Display unique IPs
debug:
msg: "{{ unique_ips.stdout_lines }}"
- name: Final verdict
debug:
msg:
- ""
- "=== ERGEBNIS ==="
- "Pr?fe die obigen Zeilen:"
- "- ? Wenn ClientHost: 10.8.0.7 ? Traffic kommt ?ber VPN!"
- "- ? Wenn ClientHost: 89.246.96.244 ? Traffic kommt NICHT ?ber VPN"
- ""
- "N?chster Schritt: Wenn VPN funktioniert, entfernen wir die tempor?re IP-Erlaubnis!"

View File

@@ -1,126 +1,14 @@
--- ---
- name: Check and Restart Gitea if Unhealthy # Check and Restart Gitea if Unhealthy
hosts: production # Wrapper Playbook for gitea role restart tasks
- hosts: production
gather_facts: yes gather_facts: yes
become: no become: no
vars:
gitea_stack_path: "{{ stacks_base_path | default('/home/deploy/deployment/stacks') }}/gitea"
gitea_url: "https://git.michaelschiemer.de"
gitea_container_name: "gitea"
tasks: tasks:
- name: Check if Gitea stack directory exists - name: Include gitea restart tasks
stat: ansible.builtin.include_role:
path: "{{ gitea_stack_path }}" name: gitea
register: gitea_stack_exists tasks_from: restart
tags:
- name: Fail if Gitea stack directory does not exist - gitea
fail: - restart
msg: "Gitea stack directory not found at {{ gitea_stack_path }}"
when: not gitea_stack_exists.stat.exists
- name: Check Gitea container status
shell: |
cd {{ gitea_stack_path }}
docker compose ps {{ gitea_container_name }} --format json
register: gitea_container_status
changed_when: false
failed_when: false
- name: Display Gitea container status
debug:
msg: |
Gitea Container Status:
{{ gitea_container_status.stdout | default('Container not found or error') }}
- name: Check Gitea health endpoint
uri:
url: "{{ gitea_url }}/api/healthz"
method: GET
status_code: [200]
validate_certs: false
timeout: 10
register: gitea_health
ignore_errors: yes
changed_when: false
- name: Display Gitea health check result
debug:
msg: |
Gitea Health Check:
- Status Code: {{ gitea_health.status | default('UNREACHABLE') }}
- Response Time: {{ gitea_health.elapsed | default('N/A') }}s
{% if gitea_health.status == 200 %}
- Status: ✅ HEALTHY
{% else %}
- Status: ❌ UNHEALTHY or TIMEOUT
{% endif %}
- name: Get Gitea container logs (last 50 lines)
shell: |
cd {{ gitea_stack_path }}
docker compose logs --tail=50 {{ gitea_container_name }} 2>&1 || echo "LOGS_NOT_AVAILABLE"
register: gitea_logs
changed_when: false
failed_when: false
- name: Display Gitea container logs
debug:
msg: |
Gitea Container Logs (last 50 lines):
{{ gitea_logs.stdout | default('No logs available') }}
- name: Check if Gitea container is running
set_fact:
gitea_is_running: "{{ 'State":"running' in gitea_container_status.stdout | default('') }}"
- name: Check if Gitea is healthy
set_fact:
gitea_is_healthy: "{{ gitea_health.status | default(0) == 200 }}"
- name: Restart Gitea container if unhealthy or not running
shell: |
cd {{ gitea_stack_path }}
docker compose restart {{ gitea_container_name }}
when: not gitea_is_healthy or not gitea_is_running
register: gitea_restart
changed_when: gitea_restart.rc == 0
- name: Wait for Gitea to be ready after restart
uri:
url: "{{ gitea_url }}/api/healthz"
method: GET
status_code: [200]
validate_certs: false
timeout: 10
register: gitea_health_after_restart
until: gitea_health_after_restart.status == 200
retries: 30
delay: 2
when: not gitea_is_healthy or not gitea_is_running
ignore_errors: yes
- name: Display final status
debug:
msg: |
========================================
Gitea Status Summary
========================================
Container Running: {{ '✅ YES' if gitea_is_running else '❌ NO' }}
Health Check: {{ '✅ HEALTHY' if gitea_is_healthy else '❌ UNHEALTHY' }}
{% if not gitea_is_healthy or not gitea_is_running %}
Action Taken: 🔄 Container restarted
Final Status: {{ '✅ HEALTHY' if gitea_health_after_restart.status | default(0) == 200 else '❌ STILL UNHEALTHY' }}
{% else %}
Action Taken: No action needed
{% endif %}
========================================
{% if gitea_health_after_restart.status | default(0) == 200 %}
✅ Gitea is now accessible and healthy!
{% elif not gitea_is_healthy and not gitea_is_running %}
⚠️ Gitea container was restarted but may still be starting up.
Please check logs manually: docker compose -f {{ gitea_stack_path }}/docker-compose.yml logs gitea
{% endif %}

View File

@@ -0,0 +1,14 @@
---
# Check Container Logs for Troubleshooting
# Wrapper Playbook for application role logs tasks
- hosts: production
gather_facts: no
become: no
tasks:
- name: Include application logs tasks
ansible.builtin.include_role:
name: application
tasks_from: logs
tags:
- application
- logs

View File

@@ -0,0 +1,15 @@
---
# Check Container Status After Code Sync
# Wrapper Playbook for application role health_check tasks
- hosts: production
gather_facts: no
become: no
tasks:
- name: Include application health_check tasks
ansible.builtin.include_role:
name: application
tasks_from: health_check
tags:
- application
- health
- status

View File

@@ -1,40 +0,0 @@
---
- name: Check Docker Compose Logs for JSON Output
hosts: production
gather_facts: yes
become: no
tasks:
- name: Get recent docker compose logs for staging-app (JSON format check)
shell: |
cd ~/deployment/stacks/staging
echo "=== Last 100 lines of staging-app logs ==="
docker compose logs --tail=100 staging-app 2>&1 | tail -50
echo ""
echo "=== Checking for JSON logs ==="
docker compose logs --tail=200 staging-app 2>&1 | grep -E '^{"|^\{' | head -5 || echo "No JSON logs found (or logs are in plain text)"
args:
executable: /bin/bash
register: compose_logs
ignore_errors: yes
failed_when: false
- name: Display compose logs
debug:
msg: "{{ compose_logs.stdout_lines }}"
- name: Get all recent logs from all staging services
shell: |
cd ~/deployment/stacks/staging
echo "=== All staging services logs (last 30 lines each) ==="
docker compose logs --tail=30 2>&1
args:
executable: /bin/bash
register: all_logs
ignore_errors: yes
failed_when: false
- name: Display all logs
debug:
msg: "{{ all_logs.stdout_lines }}"
when: all_logs.stdout_lines is defined

View File

@@ -1,63 +0,0 @@
---
- name: Check Entrypoint Script Execution
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check when nginx container started
shell: |
cd ~/deployment/stacks/staging
docker compose ps staging-nginx --format "{{.Status}}" || echo "Container not running"
args:
executable: /bin/bash
register: container_status
ignore_errors: yes
failed_when: false
- name: Display container status
debug:
msg: "{{ container_status.stdout }}"
- name: Check entrypoint logs
shell: |
cd ~/deployment/stacks/staging
echo "=== Entrypoint logs (startup) ==="
docker compose logs staging-nginx 2>&1 | grep -E "(??|Fixing|PHP-FPM|upstream)" | head -20
args:
executable: /bin/bash
register: entrypoint_logs
ignore_errors: yes
failed_when: false
- name: Display entrypoint logs
debug:
msg: "{{ entrypoint_logs.stdout_lines }}"
- name: Check if sites-available/default is a volume mount
shell: |
cd ~/deployment/stacks/staging
docker inspect staging-nginx 2>&1 | grep -A 20 "Mounts" | grep "sites-available\|sites-enabled" || echo "No volume mounts for sites-available"
args:
executable: /bin/bash
register: volume_check
ignore_errors: yes
failed_when: false
- name: Display volume check
debug:
msg: "{{ volume_check.stdout_lines }}"
- name: Check when sites-available/default was last modified
shell: |
cd ~/deployment/stacks/staging
docker compose exec -T staging-nginx stat -c "%y" /etc/nginx/sites-available/default 2>&1 || echo "Could not get file stat"
args:
executable: /bin/bash
register: file_stat
ignore_errors: yes
failed_when: false
- name: Display file modification time
debug:
msg: "{{ file_stat.stdout_lines }}"

View File

@@ -0,0 +1,83 @@
---
- name: Check .env File and Environment Variables
hosts: production
gather_facts: no
become: no
vars:
application_stack_dest: "{{ app_stack_path | default(stacks_base_path + '/production') }}"
application_compose_suffix: "production.yml"
tasks:
- name: Check if .env file exists
stat:
path: "{{ application_stack_dest }}/.env"
delegate_to: "{{ inventory_hostname }}"
register: env_file_exists
- name: Display .env file status
debug:
msg: ".env file exists: {{ env_file_exists.stat.exists }}"
- name: Read .env file content (first 50 lines)
shell: |
head -50 {{ application_stack_dest }}/.env 2>&1 || echo "FILE_NOT_FOUND"
delegate_to: "{{ inventory_hostname }}"
register: env_file_content
changed_when: false
when: env_file_exists.stat.exists
- name: Display .env file content
debug:
msg: |
.env file content:
{{ env_file_content.stdout }}
- name: Check for DB_DATABASE in .env file
shell: |
grep -E "^DB_DATABASE=|^DB_NAME=" {{ application_stack_dest }}/.env 2>&1 || echo "NOT_FOUND"
delegate_to: "{{ inventory_hostname }}"
register: db_database_check
changed_when: false
when: env_file_exists.stat.exists
- name: Display DB_DATABASE check
debug:
msg: "DB_DATABASE in .env: {{ db_database_check.stdout }}"
- name: Check environment variables in queue-worker container
shell: |
docker compose -f {{ application_stack_dest }}/docker-compose.base.yml -f {{ application_stack_dest }}/docker-compose.{{ application_compose_suffix }} exec -T queue-worker env | grep -E "^DB_" | sort
register: queue_worker_env
changed_when: false
failed_when: false
ignore_errors: yes
- name: Display queue-worker environment variables
debug:
msg: |
Queue-Worker DB Environment Variables:
{{ queue_worker_env.stdout | default('CONTAINER_NOT_RUNNING') }}
- name: Check docker-compose project directory
shell: |
cd {{ application_stack_dest }} && pwd
delegate_to: "{{ inventory_hostname }}"
register: project_dir
changed_when: false
- name: Display project directory
debug:
msg: "Docker Compose project directory: {{ project_dir.stdout }}"
- name: Check if .env file is in project directory
shell: |
test -f {{ application_stack_dest }}/.env && echo "EXISTS" || echo "MISSING"
delegate_to: "{{ inventory_hostname }}"
register: env_in_project_dir
changed_when: false
- name: Display .env file location check
debug:
msg: ".env file in project directory: {{ env_in_project_dir.stdout }}"

View File

@@ -0,0 +1,18 @@
---
# Check Final Container Status
# Wrapper Playbook for application role health_check tasks (final status)
- hosts: production
gather_facts: no
become: no
vars:
application_health_check_final: true
application_health_check_logs_tail: 5
tasks:
- name: Include application health_check tasks (final)
ansible.builtin.include_role:
name: application
tasks_from: health_check
tags:
- application
- health
- final

View File

@@ -1,68 +0,0 @@
---
- name: Check Git Deployment Logs
hosts: production
gather_facts: yes
become: no
tasks:
- name: Get full container logs
shell: |
docker logs app --tail 100
args:
executable: /bin/bash
register: container_logs
changed_when: false
- name: Get Git-related logs
shell: |
docker logs app --tail 100 | grep -E "(Git|Clone|Pull|✅|❌|📥|📦|🔄|🗑️)" || echo "No Git-related logs found"
args:
executable: /bin/bash
register: git_logs
changed_when: false
- name: Check GIT_REPOSITORY_URL environment variable
shell: |
docker exec app env | grep GIT_REPOSITORY_URL || echo "GIT_REPOSITORY_URL not set"
args:
executable: /bin/bash
register: git_env
changed_when: false
ignore_errors: yes
- name: Check if .git directory exists
shell: |
docker exec app test -d /var/www/html/.git && echo "✅ Git repo vorhanden" || echo "❌ Git repo fehlt"
args:
executable: /bin/bash
register: git_repo_check
changed_when: false
ignore_errors: yes
- name: Check entrypoint script for Git functionality
shell: |
docker exec app cat /usr/local/bin/entrypoint.sh | grep -A 5 "GIT_REPOSITORY_URL" | head -10 || echo "Entrypoint script not found or no Git functionality"
args:
executable: /bin/bash
register: entrypoint_check
changed_when: false
ignore_errors: yes
- name: Display Git-related logs
debug:
msg:
- "=== Git-Related Logs ==="
- "{{ git_logs.stdout }}"
- ""
- "=== Git Environment Variable ==="
- "{{ git_env.stdout }}"
- ""
- "=== Git Repository Check ==="
- "{{ git_repo_check.stdout }}"
- ""
- "=== Entrypoint Git Check ==="
- "{{ entrypoint_check.stdout }}"
- name: Display full logs (last 50 lines)
debug:
msg: "{{ container_logs.stdout_lines[-50:] | join('\n') }}"

View File

@@ -1,192 +0,0 @@
---
- name: Diagnose Gitea Bad Gateway Issue
hosts: production
gather_facts: yes
become: no
vars:
gitea_stack_path: "{{ stacks_base_path }}/gitea"
tasks:
- name: Check if Gitea stack directory exists
stat:
path: "{{ gitea_stack_path }}"
register: gitea_stack_dir
- name: Display Gitea stack directory status
debug:
msg: "Gitea stack path: {{ gitea_stack_path }} - Exists: {{ gitea_stack_dir.stat.exists }}"
- name: Check Gitea container status
shell: |
cd {{ gitea_stack_path }}
echo "=== Gitea Container Status ==="
docker compose ps 2>&1 || echo "Could not check container status"
args:
executable: /bin/bash
register: gitea_status
ignore_errors: yes
failed_when: false
when: gitea_stack_dir.stat.exists
- name: Display Gitea container status
debug:
msg: "{{ gitea_status.stdout_lines }}"
when: gitea_stack_dir.stat.exists
- name: Check if Gitea container is running
shell: |
docker ps --filter name=gitea --format "{{ '{{' }}.Names{{ '}}' }}: {{ '{{' }}.Status{{ '}}' }}"
register: gitea_running
ignore_errors: yes
failed_when: false
- name: Display Gitea running status
debug:
msg: "{{ gitea_running.stdout_lines if gitea_running.stdout else 'Gitea container not found' }}"
- name: Check Gitea logs (last 50 lines)
shell: |
cd {{ gitea_stack_path }}
echo "=== Gitea Logs (Last 50 lines) ==="
docker compose logs --tail=50 gitea 2>&1 || echo "Could not read Gitea logs"
args:
executable: /bin/bash
register: gitea_logs
ignore_errors: yes
failed_when: false
when: gitea_stack_dir.stat.exists
- name: Display Gitea logs
debug:
msg: "{{ gitea_logs.stdout_lines }}"
when: gitea_stack_dir.stat.exists
- name: Check Gitea container health
shell: |
docker inspect gitea --format '{{ '{{' }}.State.Health.Status{{ '}}' }}' 2>&1 || echo "Could not check health"
register: gitea_health
ignore_errors: yes
failed_when: false
- name: Display Gitea health status
debug:
msg: "Gitea health: {{ gitea_health.stdout }}"
- name: Test Gitea health endpoint from container
shell: |
docker exec gitea curl -f http://localhost:3000/api/healthz 2>&1 || echo "Health check failed"
register: gitea_internal_health
ignore_errors: yes
failed_when: false
- name: Display internal health check result
debug:
msg: "{{ gitea_internal_health.stdout_lines }}"
- name: Check if Gitea is reachable from Traefik network
shell: |
docker exec traefik curl -f http://gitea:3000/api/healthz 2>&1 || echo "Could not reach Gitea from Traefik network"
register: gitea_from_traefik
ignore_errors: yes
failed_when: false
- name: Display Traefik to Gitea connectivity
debug:
msg: "{{ gitea_from_traefik.stdout_lines }}"
- name: Check Traefik logs for Gitea errors
shell: |
cd {{ stacks_base_path }}/traefik
echo "=== Traefik Logs - Gitea related (Last 30 lines) ==="
docker compose logs --tail=100 traefik 2>&1 | grep -i "gitea" | tail -30 || echo "No Gitea-related logs found"
args:
executable: /bin/bash
register: traefik_gitea_logs
ignore_errors: yes
failed_when: false
- name: Display Traefik Gitea logs
debug:
msg: "{{ traefik_gitea_logs.stdout_lines }}"
- name: Check Docker networks
shell: |
echo "=== Docker Networks ==="
docker network ls
echo ""
echo "=== Traefik Network Details ==="
docker network inspect traefik-public 2>&1 | grep -E "(Name|Subnet|Containers|gitea)" || echo "Could not inspect traefik-public network"
args:
executable: /bin/bash
register: network_info
ignore_errors: yes
failed_when: false
- name: Display network info
debug:
msg: "{{ network_info.stdout_lines }}"
- name: Check if Gitea is in traefik-public network
shell: |
docker network inspect traefik-public 2>&1 | grep -i "gitea" || echo "Gitea not found in traefik-public network"
register: gitea_in_network
ignore_errors: yes
failed_when: false
- name: Display Gitea network membership
debug:
msg: "{{ gitea_in_network.stdout_lines }}"
- name: Check Gitea container configuration
shell: |
echo "=== Gitea Container Labels ==="
docker inspect gitea --format '{{ '{{' }}range .Config.Labels{{ '}}' }}{{ '{{' }}.Key{{ '}}' }}={{ '{{' }}.Value{{ '}}' }}{{ '{{' }}\n{{ '}}' }}{{ '{{' }}end{{ '}}' }}' 2>&1 | grep -i traefik || echo "No Traefik labels found"
register: gitea_labels
ignore_errors: yes
failed_when: false
- name: Display Gitea labels
debug:
msg: "{{ gitea_labels.stdout_lines }}"
- name: Check Traefik service registration
shell: |
docker exec traefik wget -qO- http://localhost:8080/api/http/services 2>&1 | grep -i gitea || echo "Gitea service not found in Traefik API"
register: traefik_service
ignore_errors: yes
failed_when: false
- name: Display Traefik service registration
debug:
msg: "{{ traefik_service.stdout_lines }}"
- name: Test external Gitea access
shell: |
echo "=== Testing External Gitea Access ==="
curl -k -H "User-Agent: Mozilla/5.0" -s -o /dev/null -w "HTTP Status: %{http_code}\n" https://git.michaelschiemer.de/ 2>&1 || echo "Connection failed"
args:
executable: /bin/bash
register: external_test
ignore_errors: yes
failed_when: false
- name: Display external test result
debug:
msg: "{{ external_test.stdout_lines }}"
- name: Summary
debug:
msg:
- "=== DIAGNOSIS SUMMARY ==="
- "1. Check if Gitea container is running"
- "2. Check if Gitea is in traefik-public network"
- "3. Check Gitea health endpoint (port 3000)"
- "4. Check Traefik can reach Gitea"
- "5. Check Traefik logs for errors"
- ""
- "Common issues:"
- "- Container not running: Restart with 'docker compose up -d' in {{ gitea_stack_path }}"
- "- Not in network: Recreate container or add to network"
- "- Health check failing: Check Gitea logs for errors"
- "- Traefik can't reach: Check network configuration"

View File

@@ -1,68 +0,0 @@
---
- name: Check Grafana Logs After Test
hosts: production
gather_facts: no
become: no
tasks:
- name: Check last 20 Grafana access attempts
shell: |
cd ~/deployment/stacks/traefik
tail -200 logs/access.log | grep -i grafana | tail -20
args:
executable: /bin/bash
register: latest_logs
ignore_errors: yes
failed_when: false
- name: Extract client IPs with timestamps
shell: |
cd ~/deployment/stacks/traefik
tail -100 logs/access.log | grep -i grafana | tail -10 | while IFS= read -r line; do
time=$(echo "$line" | grep -oP '"time":"[^"]*"' | sed 's/"time":"//;s/"//' | cut -d'T' -f2 | cut -d'+' -f1)
client=$(echo "$line" | grep -oP '"ClientHost":"[^"]*"' | sed 's/"ClientHost":"//;s/"//')
status=$(echo "$line" | grep -oP '"DownstreamStatus":[0-9]+' | sed 's/"DownstreamStatus"://')
if [[ "$client" =~ ^10\.8\.0\.[0-9]+$ ]]; then
echo "$time | ClientHost: $client | Status: $status ? VPN-IP (Traffic kommt ?ber VPN!)"
elif [[ "$client" == "89.246.96.244" ]]; then
echo "$time | ClientHost: $client | Status: $status ? ?ffentliche IP (Traffic kommt NICHT ?ber VPN)"
else
echo "$time | ClientHost: $client | Status: $status ? Unbekannt"
fi
done
args:
executable: /bin/bash
register: analysis
ignore_errors: yes
failed_when: false
- name: Display analysis
debug:
msg: "{{ analysis.stdout_lines }}"
- name: Get unique client IPs
shell: |
cd ~/deployment/stacks/traefik
tail -100 logs/access.log | grep -i grafana | tail -10 | grep -oP '"ClientHost":"[^"]*"' | sed 's/"ClientHost":"//;s/"//' | sort -u
args:
executable: /bin/bash
register: unique_ips
ignore_errors: yes
failed_when: false
- name: Display unique IPs
debug:
msg: "{{ unique_ips.stdout_lines }}"
- name: Final result
debug:
msg:
- ""
- "=== ERGEBNIS ==="
- "Pr?fe die obigen Zeilen:"
- ""
- "? Wenn ClientHost: 10.8.0.7 ? Traffic kommt ?ber VPN!"
- " ? Dann k?nnen wir die tempor?re IP-Erlaubnis entfernen!"
- ""
- "? Wenn ClientHost: 89.246.96.244 ? Traffic kommt NICHT ?ber VPN"
- " ? Dann m?ssen wir weiter debuggen"

View File

@@ -1,55 +0,0 @@
---
- name: Check Latest Grafana Access - Client IP Analysis
hosts: production
gather_facts: no
become: no
tasks:
- name: Get latest Grafana access logs
shell: |
cd ~/deployment/stacks/traefik
echo "=== Latest 5 Grafana Access Logs ==="
tail -100 logs/access.log | grep -i grafana | tail -5
args:
executable: /bin/bash
register: latest_logs
ignore_errors: yes
failed_when: false
- name: Extract client IPs from latest logs
shell: |
cd ~/deployment/stacks/traefik
tail -50 logs/access.log | grep -i grafana | tail -10 | grep -oP '"ClientHost":"[^"]*"' | sed 's/"ClientHost":"//;s/"//' | sort -u
args:
executable: /bin/bash
register: client_ips
ignore_errors: yes
failed_when: false
- name: Display latest logs
debug:
msg: "{{ latest_logs.stdout_lines }}"
- name: Display client IPs
debug:
msg: "{{ client_ips.stdout_lines }}"
- name: Analyze if traffic comes from VPN
shell: |
cd ~/deployment/stacks/traefik
if tail -20 logs/access.log | grep -i grafana | tail -5 | grep -oP '"ClientHost":"[^"]*"' | grep -q "10.8.0"; then
echo "? Traffic kommt ?ber VPN! (ClientHost: 10.8.0.x)"
elif tail -20 logs/access.log | grep -i grafana | tail -5 | grep -oP '"ClientHost":"[^"]*"' | grep -q "89.246.96.244"; then
echo "? Traffic kommt NICHT ?ber VPN (ClientHost: 89.246.96.244 - ?ffentliche IP)"
else
echo "?? Keine aktuellen Grafana-Logs gefunden. Bitte mache einen Zugriff auf https://grafana.michaelschiemer.de"
fi
args:
executable: /bin/bash
register: analysis
ignore_errors: yes
failed_when: false
- name: Display analysis
debug:
msg: "{{ analysis.stdout_lines }}"

View File

@@ -1,78 +0,0 @@
---
- name: Check PHP Files and PHP-FPM Workers
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check if public/index.php exists
shell: |
cd ~/deployment/stacks/staging
echo "=== Check public/index.php ==="
docker compose exec -T staging-app ls -la /var/www/html/public/index.php 2>&1 || echo "index.php not found"
echo ""
echo "=== Check public directory ==="
docker compose exec -T staging-app ls -la /var/www/html/public/ 2>&1 | head -20 || echo "public directory not found"
echo ""
echo "=== Check if code directory exists ==="
docker compose exec -T staging-app ls -la /var/www/html/ 2>&1 | head -20 || echo "Code directory not found"
args:
executable: /bin/bash
register: file_check
ignore_errors: yes
failed_when: false
- name: Display file check results
debug:
msg: "{{ file_check.stdout_lines }}"
- name: Check PHP-FPM worker processes in detail
shell: |
cd ~/deployment/stacks/staging
echo "=== All processes in staging-app ==="
docker compose exec -T staging-app ps aux 2>&1 || echo "Could not get processes"
echo ""
echo "=== Check PHP-FPM master and worker processes ==="
docker compose exec -T staging-app sh -c "ps aux | grep -E '[p]hp|[f]pm' || echo 'No PHP-FPM processes found'" || echo "Process check failed"
args:
executable: /bin/bash
register: process_check
ignore_errors: yes
failed_when: false
- name: Display process check results
debug:
msg: "{{ process_check.stdout_lines }}"
- name: Test PHP execution directly
shell: |
cd ~/deployment/stacks/staging
echo "=== Test PHP CLI ==="
docker compose exec -T staging-app php -v 2>&1 || echo "PHP CLI failed"
echo ""
echo "=== Test if we can include index.php ==="
docker compose exec -T staging-app php -r "if(file_exists('/var/www/html/public/index.php')) { echo 'index.php exists\n'; } else { echo 'index.php NOT FOUND\n'; }" 2>&1 || echo "PHP test failed"
args:
executable: /bin/bash
register: php_test
ignore_errors: yes
failed_when: false
- name: Display PHP test results
debug:
msg: "{{ php_test.stdout_lines }}"
- name: Check PHP-FPM pool status using status page
shell: |
cd ~/deployment/stacks/staging
echo "=== Try to get PHP-FPM status ==="
docker compose exec -T staging-app sh -c "SCRIPT_NAME=/status SCRIPT_FILENAME=/status REQUEST_METHOD=GET timeout 2 php -r \"\\\$socket = socket_create(AF_INET, SOCK_STREAM, SOL_TCP); if (socket_connect(\\\$socket, '127.0.0.1', 9000)) { socket_write(\\\$socket, 'GET /status HTTP/1.0\\r\\nHost: localhost\\r\\n\\r\\n'); \\\$response = socket_read(\\\$socket, 1024); echo \\\$response; socket_close(\\\$socket); } else { echo 'Could not connect to PHP-FPM'; }\" 2>&1" || echo "Status check failed"
args:
executable: /bin/bash
register: fpm_status
ignore_errors: yes
failed_when: false
- name: Display PHP-FPM status
debug:
msg: "{{ fpm_status.stdout_lines }}"

View File

@@ -1,80 +0,0 @@
---
- name: Check PHP-FPM Configuration in Detail
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check PHP-FPM pool configuration
shell: |
cd ~/deployment/stacks/staging
echo "=== PHP-FPM www.conf listen configuration ==="
docker compose exec -T staging-app cat /usr/local/etc/php-fpm.d/www.conf 2>&1 | grep -E "(listen|listen.allowed_clients|listen.owner|listen.group|listen.mode|pm)" | head -20
echo ""
echo "=== Check PHP-FPM processes ==="
docker compose exec -T staging-app ps aux | grep php-fpm || echo "No php-fpm processes found"
echo ""
echo "=== Check PHP-FPM status page ==="
docker compose exec -T staging-app sh -c "SCRIPT_NAME=/status SCRIPT_FILENAME=/status REQUEST_METHOD=GET cgi-fcgi -bind -connect 127.0.0.1:9000 2>&1 || echo 'Status check failed'"
args:
executable: /bin/bash
register: phpfpm_config
ignore_errors: yes
failed_when: false
- name: Display PHP-FPM configuration
debug:
msg: "{{ phpfpm_config.stdout_lines }}"
- name: Check what interface PHP-FPM is listening on
shell: |
cd ~/deployment/stacks/staging
echo "=== Check listening interface ==="
docker compose exec -T staging-app netstat -tlnp 2>/dev/null | grep 9000 || \
docker compose exec -T staging-app ss -tlnp 2>/dev/null | grep 9000 || \
echo "Could not check listening interface"
echo ""
echo "=== Try to connect from nginx using FastCGI protocol ==="
docker compose exec -T staging-nginx sh -c "echo -e 'REQUEST_METHOD=GET\nSCRIPT_FILENAME=/var/www/html/public/index.php\n' | cgi-fcgi -bind -connect staging-app:9000 2>&1 | head -20" || echo "FastCGI test failed"
args:
executable: /bin/bash
register: listen_check
ignore_errors: yes
failed_when: false
- name: Display listening interface check
debug:
msg: "{{ listen_check.stdout_lines }}"
- name: Check PHP-FPM error logs
shell: |
cd ~/deployment/stacks/staging
echo "=== PHP-FPM error log ==="
docker compose exec -T staging-app tail -50 /var/log/php-fpm.log 2>&1 || \
docker compose exec -T staging-app tail -50 /usr/local/var/log/php-fpm.log 2>&1 || \
docker compose logs --tail=100 staging-app 2>&1 | grep -iE "(fpm|error|warning)" | tail -20 || \
echo "No PHP-FPM error logs found"
args:
executable: /bin/bash
register: phpfpm_errors
ignore_errors: yes
failed_when: false
- name: Display PHP-FPM errors
debug:
msg: "{{ phpfpm_errors.stdout_lines }}"
- name: Test actual request from outside
shell: |
cd ~/deployment/stacks/staging
echo "=== Test request from nginx to PHP-FPM ==="
docker compose exec -T staging-nginx sh -c "curl -v http://127.0.0.1/ 2>&1 | head -30" || echo "Request test failed"
args:
executable: /bin/bash
register: request_test
ignore_errors: yes
failed_when: false
- name: Display request test
debug:
msg: "{{ request_test.stdout_lines }}"

View File

@@ -1,147 +0,0 @@
---
- name: Check Production Server Status
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check server uptime and basic info
shell: |
echo "=== Server Uptime ==="
uptime
echo ""
echo "=== Disk Space ==="
df -h
echo ""
echo "=== Memory Usage ==="
free -h
echo ""
echo "=== Docker Status ==="
docker --version || echo "Docker not found"
docker ps || echo "Docker not running"
args:
executable: /bin/bash
register: server_info
ignore_errors: yes
failed_when: false
- name: Display server info
debug:
msg: "{{ server_info.stdout_lines }}"
- name: Check all Docker stacks status
shell: |
echo "=== Traefik Stack ==="
cd ~/deployment/stacks/traefik && docker compose ps 2>&1 || echo "Traefik stack not found or not running"
echo ""
echo "=== Application Stack ==="
cd ~/deployment/stacks/application && docker compose ps 2>&1 || echo "Application stack not found or not running"
echo ""
echo "=== PostgreSQL Stack ==="
cd ~/deployment/stacks/postgresql && docker compose ps 2>&1 || echo "PostgreSQL stack not found or not running"
echo ""
echo "=== Monitoring Stack ==="
cd ~/deployment/stacks/monitoring && docker compose ps 2>&1 || echo "Monitoring stack not found or not running"
echo ""
echo "=== Gitea Stack ==="
cd ~/deployment/stacks/gitea && docker compose ps 2>&1 || echo "Gitea stack not found or not running"
echo ""
echo "=== Registry Stack ==="
cd ~/deployment/stacks/registry && docker compose ps 2>&1 || echo "Registry stack not found or not running"
args:
executable: /bin/bash
register: stacks_status
ignore_errors: yes
failed_when: false
- name: Display stacks status
debug:
msg: "{{ stacks_status.stdout_lines }}"
- name: Check Traefik logs for errors
shell: |
cd ~/deployment/stacks/traefik
echo "=== Traefik Logs (Last 30 lines) ==="
docker compose logs --tail=30 traefik 2>&1 | tail -30 || echo "Could not read Traefik logs"
args:
executable: /bin/bash
register: traefik_logs
ignore_errors: yes
failed_when: false
- name: Display Traefik logs
debug:
msg: "{{ traefik_logs.stdout_lines }}"
- name: Check Application stack logs
shell: |
cd ~/deployment/stacks/application
echo "=== Application Nginx Logs (Last 20 lines) ==="
docker compose logs --tail=20 web 2>&1 | tail -20 || echo "Could not read web logs"
echo ""
echo "=== Application PHP Logs (Last 20 lines) ==="
docker compose logs --tail=20 php 2>&1 | tail -20 || echo "Could not read PHP logs"
args:
executable: /bin/bash
register: app_logs
ignore_errors: yes
failed_when: false
- name: Display application logs
debug:
msg: "{{ app_logs.stdout_lines }}"
- name: Test HTTP connectivity
shell: |
echo "=== Testing HTTP Connectivity ==="
echo "Test 1: HTTPS to michaelschiemer.de"
curl -k -H "User-Agent: Mozilla/5.0" -s -o /dev/null -w "HTTP %{http_code}\n" https://michaelschiemer.de/health || echo "Connection failed"
echo ""
echo "Test 2: Direct localhost"
curl -k -H "User-Agent: Mozilla/5.0" -s -o /dev/null -w "HTTP %{http_code}\n" https://localhost/health || echo "Connection failed"
args:
executable: /bin/bash
register: http_tests
ignore_errors: yes
failed_when: false
- name: Display HTTP test results
debug:
msg: "{{ http_tests.stdout_lines }}"
- name: Check network connectivity
shell: |
echo "=== Network Interfaces ==="
ip addr show | grep -E "(inet |state)" | head -10
echo ""
echo "=== Docker Networks ==="
docker network ls
echo ""
echo "=== Traefik Network Connectivity ==="
docker network inspect traefik-public 2>&1 | grep -E "(Name|Subnet|Containers)" | head -10 || echo "Traefik network not found"
args:
executable: /bin/bash
register: network_info
ignore_errors: yes
failed_when: false
- name: Display network info
debug:
msg: "{{ network_info.stdout_lines }}"
- name: Check firewall status
shell: |
echo "=== Firewall Status ==="
sudo ufw status || echo "UFW not installed or not configured"
echo ""
echo "=== Listening Ports ==="
sudo netstat -tlnp | grep -E "(80|443|8080|3000)" | head -10 || ss -tlnp | grep -E "(80|443|8080|3000)" | head -10 || echo "Could not check listening ports"
args:
executable: /bin/bash
register: firewall_info
ignore_errors: yes
failed_when: false
- name: Display firewall info
debug:
msg: "{{ firewall_info.stdout_lines }}"

View File

@@ -1,193 +0,0 @@
---
- name: Check Redis Connection and Environment Variables in PHP Container
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check if application stack is running
shell: |
cd ~/deployment/stacks/application
docker compose ps
args:
executable: /bin/bash
register: stack_status
ignore_errors: yes
failed_when: false
- name: Display stack status
debug:
msg: "{{ stack_status.stdout_lines }}"
- name: Check PHP container exists
shell: |
docker ps --filter "name=app"
args:
executable: /bin/bash
register: php_container
ignore_errors: yes
failed_when: false
- name: Display PHP container status
debug:
msg: "{{ php_container.stdout_lines }}"
- name: Check Environment Variables in PHP Container
shell: |
echo "=== Redis Environment Variables in PHP Container ==="
docker exec app env | grep -E "(REDIS_|CACHE_|SESSION_|QUEUE_)" || echo "Container not accessible or no Redis vars found"
echo ""
echo "=== All Environment Variables in PHP Container ==="
docker exec app env | sort | head -50 || echo "Container not accessible"
args:
executable: /bin/bash
register: env_vars
ignore_errors: yes
failed_when: false
- name: Display environment variables
debug:
msg: "{{ env_vars.stdout_lines }}"
- name: Test Redis Connection from PHP Container
shell: |
echo "=== Testing Redis Connection from PHP Container ==="
echo "Test 1: Check if Redis is reachable"
docker exec app php -r "
\$redis_host = getenv('REDIS_HOST') ?: 'redis';
\$redis_port = (int)(getenv('REDIS_PORT') ?: 6379);
\$redis_password = getenv('REDIS_PASSWORD');
echo \"REDIS_HOST: \" . \$redis_host . \"\\n\";
echo \"REDIS_PORT: \" . \$redis_port . \"\\n\";
echo \"REDIS_PASSWORD: \" . (\$redis_password ? 'SET (length: ' . strlen(\$redis_password) . ')' : 'NOT SET') . \"\\n\";
// Test TCP connection
\$socket = @fsockopen(\$redis_host, \$redis_port, \$errno, \$errstr, 2);
if (\$socket) {
echo \"TCP Connection: OK\\n\";
fclose(\$socket);
} else {
echo \"TCP Connection: FAILED (errno: \$errno, errstr: \$errstr)\\n\";
}
// Test with Predis if available
if (class_exists('Predis\\Client')) {
try {
\$client = new Predis\\Client([
'scheme' => 'tcp',
'host' => \$redis_host,
'port' => \$redis_port,
'password' => \$redis_password ?: null,
]);
\$client->connect();
echo \"Predis Connection: OK\\n\";
echo \"Redis PING: \" . \$client->ping() . \"\\n\";
\$client->disconnect();
} catch (Exception \$e) {
echo \"Predis Connection: FAILED - \" . \$e->getMessage() . \"\\n\";
}
} else {
echo \"Predis not available\\n\";
}
" || echo "Could not execute PHP test"
args:
executable: /bin/bash
register: redis_test
ignore_errors: yes
failed_when: false
- name: Display Redis connection test results
debug:
msg: "{{ redis_test.stdout_lines }}"
- name: Check Redis Container Configuration
shell: |
echo "=== Redis Container Status ==="
docker ps --filter "name=redis"
echo ""
echo "=== Redis Container Environment ==="
docker exec redis env | grep -E "(REDIS_|REQUIREPASS)" || echo "No Redis env vars found"
echo ""
echo "=== Test Redis Password ==="
REDIS_PASSWORD=$(cd ~/deployment/stacks/application && grep REDIS_PASSWORD .env | cut -d '=' -f2 | tr -d ' ' || echo "")
if [ -n "$REDIS_PASSWORD" ]; then
PASSWORD_LEN=$(echo -n "$REDIS_PASSWORD" | wc -c)
echo "REDIS_PASSWORD from .env file: SET (length: $PASSWORD_LEN)"
docker exec redis redis-cli -a "$REDIS_PASSWORD" PING || echo "Redis password test failed"
else
echo "REDIS_PASSWORD from .env file: NOT SET"
docker exec redis redis-cli PING || echo "Redis connection test failed (no password)"
fi
args:
executable: /bin/bash
register: redis_config
ignore_errors: yes
failed_when: false
- name: Display Redis container configuration
debug:
msg: "{{ redis_config.stdout_lines }}"
- name: Check Docker Network Connectivity
shell: |
echo "=== Docker Network: app-internal ==="
docker network inspect app-internal 2>&1 | grep -E "(Name|Subnet|Containers)" | head -20 || echo "Network not found"
echo ""
echo "=== Testing Network Connectivity ==="
echo "From PHP container to Redis:"
docker exec app ping -c 2 redis 2>&1 || echo "Ping test failed"
echo ""
echo "From PHP container to Redis (port 6379):"
docker exec app nc -zv redis 6379 2>&1 || echo "Port test failed"
args:
executable: /bin/bash
register: network_test
ignore_errors: yes
failed_when: false
- name: Display network connectivity test
debug:
msg: "{{ network_test.stdout_lines }}"
- name: Check Application Logs for Redis Errors
shell: |
cd ~/deployment/stacks/application
echo "=== Application Logs (Last 50 lines, Redis-related) ==="
docker compose logs app --tail=50 2>&1 | grep -i redis || echo "No Redis-related logs found"
args:
executable: /bin/bash
register: app_logs
ignore_errors: yes
failed_when: false
- name: Display application logs
debug:
msg: "{{ app_logs.stdout_lines }}"
- name: Check .env file configuration
shell: |
cd ~/deployment/stacks/application
echo "=== .env file Redis Configuration ==="
if [ -f .env ]; then
grep -E "(REDIS_|CACHE_|SESSION_|QUEUE_)" .env | grep -v "^#" || echo "No Redis config found in .env"
else
echo ".env file not found"
fi
echo ""
echo "=== Checking for application.env file ==="
if [ -f application.env ]; then
echo "application.env exists"
grep -E "(REDIS_|CACHE_|SESSION_|QUEUE_)" application.env | grep -v "^#" || echo "No Redis config found in application.env"
else
echo "application.env file not found"
fi
args:
executable: /bin/bash
register: env_file_config
ignore_errors: yes
failed_when: false
- name: Display .env file configuration
debug:
msg: "{{ env_file_config.stdout_lines }}"

View File

@@ -1,49 +0,0 @@
---
- name: Check Staging 500 Error
hosts: production
gather_facts: yes
become: no
tasks:
- name: Get recent PHP errors from staging-app
shell: |
cd ~/deployment/stacks/staging
echo "=== Recent PHP errors (last 50 lines) ==="
docker compose exec -T staging-app tail -100 /var/www/html/storage/logs/php-errors.log 2>&1 | tail -50
args:
executable: /bin/bash
register: php_errors
ignore_errors: yes
failed_when: false
- name: Display PHP errors
debug:
msg: "{{ php_errors.stdout_lines }}"
- name: Get docker compose logs for staging-app
shell: |
cd ~/deployment/stacks/staging
echo "=== Recent staging-app container logs ==="
docker compose logs --tail=50 staging-app 2>&1 | tail -50
args:
executable: /bin/bash
register: container_logs
ignore_errors: yes
failed_when: false
- name: Display container logs
debug:
msg: "{{ container_logs.stdout_lines }}"
- name: Test health endpoint
shell: |
curl -H "User-Agent: Mozilla/5.0" -s https://staging.michaelschiemer.de/health 2>&1
args:
executable: /bin/bash
register: health_test
ignore_errors: yes
failed_when: false
- name: Display health endpoint result
debug:
msg: "{{ health_test.stdout }}"

View File

@@ -1,66 +0,0 @@
---
- name: Check Staging Container Logs
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check staging container status
shell: |
cd ~/deployment/stacks/staging && docker compose ps
args:
executable: /bin/bash
register: container_status
changed_when: false
- name: Get staging-app logs
shell: |
cd ~/deployment/stacks/staging && docker compose logs --tail=100 staging-app
args:
executable: /bin/bash
register: staging_app_logs
changed_when: false
- name: Get staging-nginx logs
shell: |
cd ~/deployment/stacks/staging && docker compose logs --tail=100 staging-nginx
args:
executable: /bin/bash
register: staging_nginx_logs
changed_when: false
- name: Get staging-queue-worker logs
shell: |
cd ~/deployment/stacks/staging && docker compose logs --tail=100 staging-queue-worker
args:
executable: /bin/bash
register: staging_queue_logs
changed_when: false
- name: Get staging-scheduler logs
shell: |
cd ~/deployment/stacks/staging && docker compose logs --tail=100 staging-scheduler
args:
executable: /bin/bash
register: staging_scheduler_logs
changed_when: false
- name: Display container status
debug:
msg: "{{ container_status.stdout_lines }}"
- name: Display staging-app logs
debug:
msg: "{{ staging_app_logs.stdout_lines }}"
- name: Display staging-nginx logs
debug:
msg: "{{ staging_nginx_logs.stdout_lines }}"
- name: Display staging-queue-worker logs
debug:
msg: "{{ staging_queue_logs.stdout_lines }}"
- name: Display staging-scheduler logs
debug:
msg: "{{ staging_scheduler_logs.stdout_lines }}"

View File

@@ -1,66 +0,0 @@
---
- name: Check Staging PHP Logs in Volume
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check PHP log files in staging-app container
shell: |
echo "=== Checking log directory in staging-app container ==="
docker exec staging-app ls -lah /var/www/html/storage/logs/ 2>&1 || echo "Could not list logs directory"
echo ""
echo "=== Recent PHP error logs ==="
docker exec staging-app tail -50 /var/www/html/storage/logs/php-errors.log 2>&1 | tail -30 || echo "Could not read php-errors.log"
echo ""
echo "=== Recent application logs ==="
docker exec staging-app find /var/www/html/storage/logs -name "*.log" -type f -exec ls -lh {} \; 2>&1 | head -20 || echo "Could not find log files"
echo ""
echo "=== Staging log file (if exists) ==="
docker exec staging-app tail -50 /var/www/html/storage/logs/staging.log 2>&1 | tail -30 || echo "staging.log not found or empty"
echo ""
echo "=== Check log volume mount ==="
docker exec staging-app df -h /var/www/html/storage/logs 2>&1 || echo "Could not check volume"
args:
executable: /bin/bash
register: php_logs_check
ignore_errors: yes
failed_when: false
- name: Display PHP logs check results
debug:
msg: "{{ php_logs_check.stdout_lines }}"
- name: Check if we can access log volume directly
shell: |
echo "=== Docker volume inspect ==="
docker volume inspect staging-logs 2>&1 || echo "Volume not found"
echo ""
echo "=== Try to access volume through temporary container ==="
docker run --rm -v staging-logs:/logs alpine ls -lah /logs 2>&1 | head -30 || echo "Could not access volume"
args:
executable: /bin/bash
register: volume_check
ignore_errors: yes
failed_when: false
- name: Display volume check results
debug:
msg: "{{ volume_check.stdout_lines }}"
- name: Check PHP configuration for logging
shell: |
echo "=== PHP error_log setting ==="
docker exec staging-app php -i | grep -E "(error_log|log_errors)" || echo "Could not get PHP config"
echo ""
echo "=== PHP-FPM error log location ==="
docker exec staging-app grep -E "(error_log|catch_workers_output)" /usr/local/etc/php-fpm.d/www.conf | head -5 || echo "Could not read PHP-FPM config"
args:
executable: /bin/bash
register: php_config_check
ignore_errors: yes
failed_when: false
- name: Display PHP configuration
debug:
msg: "{{ php_config_check.stdout_lines }}"

View File

@@ -1,255 +0,0 @@
---
- name: Check Redis Connection and Environment Variables in Staging PHP Container
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check if staging stack is running
shell: |
cd ~/deployment/stacks/staging
docker compose ps
args:
executable: /bin/bash
register: stack_status
ignore_errors: yes
failed_when: false
- name: Display stack status
debug:
msg: "{{ stack_status.stdout_lines }}"
- name: Check PHP container exists
shell: |
docker ps --filter "name=staging-app"
args:
executable: /bin/bash
register: php_container
ignore_errors: yes
failed_when: false
- name: Display PHP container status
debug:
msg: "{{ php_container.stdout_lines }}"
- name: Check Environment Variables in Staging PHP Container
shell: |
echo "=== Redis Environment Variables in Staging PHP Container ==="
docker exec staging-app env | grep -E "(REDIS_|CACHE_|SESSION_|QUEUE_)" || echo "Container not accessible or no Redis vars found"
echo ""
echo "=== All Environment Variables in Staging PHP Container ==="
docker exec staging-app env | sort | head -50 || echo "Container not accessible"
args:
executable: /bin/bash
register: env_vars
ignore_errors: yes
failed_when: false
- name: Display environment variables
debug:
msg: "{{ env_vars.stdout_lines }}"
- name: Test Redis Connection from Staging PHP Container
shell: |
echo "=== Testing Redis Connection from Staging PHP Container ==="
echo "Test 1: Check if Redis is reachable"
docker exec staging-app php -r "
\$redis_host = getenv('REDIS_HOST') ?: 'staging-redis';
\$redis_port = (int)(getenv('REDIS_PORT') ?: 6379);
\$redis_password = getenv('REDIS_PASSWORD');
echo \\\"REDIS_HOST: \\\" . \$redis_host . \\\"\\\\n\\\";
echo \\\"REDIS_PORT: \\\" . \$redis_port . \\\"\\\\n\\\";
echo \\\"REDIS_PASSWORD: \\\" . (\$redis_password ? 'SET (length: ' . strlen(\$redis_password) . ')' : 'NOT SET') . \\\"\\\\n\\\";
// Test TCP connection
\$socket = @fsockopen(\$redis_host, \$redis_port, \$errno, \$errstr, 2);
if (\$socket) {
echo \\\"TCP Connection: OK\\\\n\\\";
fclose(\$socket);
} else {
echo \\\"TCP Connection: FAILED (errno: \$errno, errstr: \$errstr)\\\\n\\\";
}
" || echo "Could not execute PHP test"
args:
executable: /bin/bash
register: redis_test
ignore_errors: yes
failed_when: false
- name: Display Redis connection test results
debug:
msg: "{{ redis_test.stdout_lines }}"
- name: Test Redis connection with actual php-redis extension
shell: |
docker exec staging-app php -r "
// Get environment variables
\$redis_host = getenv('REDIS_HOST') ?: 'staging-redis';
\$redis_port = (int)(getenv('REDIS_PORT') ?: 6379);
\$redis_password = getenv('REDIS_PASSWORD');
echo '=== Staging Redis Connection Test ===' . PHP_EOL;
echo 'REDIS_HOST: ' . \$redis_host . PHP_EOL;
echo 'REDIS_PORT: ' . \$redis_port . PHP_EOL;
echo 'REDIS_PASSWORD: ' . (\$redis_password ? 'SET (length: ' . strlen(\$redis_password) . ')' : 'NOT SET') . PHP_EOL;
echo PHP_EOL;
if (!extension_loaded('redis')) {
echo 'ERROR: php-redis extension is not loaded!' . PHP_EOL;
exit(1);
}
if (!class_exists('Redis')) {
echo 'ERROR: Redis class is not available!' . PHP_EOL;
exit(1);
}
try {
\$redis = new Redis();
echo 'Created Redis instance' . PHP_EOL;
// Connect
\$success = \$redis->connect(\$redis_host, \$redis_port, 2.0);
if (!\$success) {
echo 'ERROR: Failed to connect to Redis server' . PHP_EOL;
echo 'Host: ' . \$redis_host . ', Port: ' . \$redis_port . PHP_EOL;
exit(1);
}
echo 'Connected to Redis server' . PHP_EOL;
// Authenticate if password is provided
if (\$redis_password) {
\$auth_result = \$redis->auth(\$redis_password);
if (!\$auth_result) {
echo 'ERROR: Redis authentication failed' . PHP_EOL;
echo 'Password used: ' . substr(\$redis_password, 0, 5) . '...' . PHP_EOL;
exit(1);
}
echo 'Authenticated with Redis' . PHP_EOL;
}
// Test PING
\$ping_result = \$redis->ping();
echo 'Redis PING: ' . \$ping_result . PHP_EOL;
// Test SET/GET
\$test_key = 'test_connection_' . time();
\$test_value = 'test_value';
\$set_result = \$redis->set(\$test_key, \$test_value);
echo 'SET test: ' . (\$set_result ? 'OK' : 'FAILED') . PHP_EOL;
\$get_result = \$redis->get(\$test_key);
echo 'GET test: ' . (\$get_result === \$test_value ? 'OK' : 'FAILED') . PHP_EOL;
// Cleanup
\$redis->del(\$test_key);
\$redis->close();
echo PHP_EOL . '? All tests passed!' . PHP_EOL;
} catch (Exception \$e) {
echo 'ERROR: ' . \$e->getMessage() . PHP_EOL;
echo 'Exception type: ' . get_class(\$e) . PHP_EOL;
exit(1);
}
"
args:
executable: /bin/bash
register: redis_direct_test
ignore_errors: yes
failed_when: false
- name: Display Redis direct connection test results
debug:
msg: "{{ redis_direct_test.stdout_lines }}"
- name: Check Staging Redis Container Configuration
shell: |
echo "=== Staging Redis Container Status ==="
docker ps --filter "name=staging-redis"
echo ""
echo "=== Staging Redis Container Environment ==="
docker exec staging-redis env | grep -E "(REDIS_|REQUIREPASS)" || echo "No Redis env vars found"
echo ""
echo "=== Test Redis Password ==="
REDIS_PASSWORD=$(cd ~/deployment/stacks/staging && grep REDIS_PASSWORD .env | cut -d '=' -f2 | tr -d ' ' || echo "")
if [ -n "$REDIS_PASSWORD" ]; then
PASSWORD_LEN=$(echo -n "$REDIS_PASSWORD" | wc -c)
echo "REDIS_PASSWORD from .env file: SET (length: $PASSWORD_LEN)"
docker exec staging-redis redis-cli -a "$REDIS_PASSWORD" PING || echo "Redis password test failed"
else
echo "REDIS_PASSWORD from .env file: NOT SET"
docker exec staging-redis redis-cli PING || echo "Redis connection test failed (no password)"
fi
args:
executable: /bin/bash
register: redis_config
ignore_errors: yes
failed_when: false
- name: Display Redis container configuration
debug:
msg: "{{ redis_config.stdout_lines }}"
- name: Check Docker Network Connectivity for Staging
shell: |
echo "=== Docker Network: staging-internal ==="
docker network inspect staging-internal 2>&1 | grep -E "(Name|Subnet|Containers)" | head -20 || echo "Network not found"
echo ""
echo "=== Testing Network Connectivity ==="
echo "From Staging PHP container to Redis:"
docker exec staging-app php -r "echo gethostbyname('staging-redis') . PHP_EOL;" 2>&1 || echo "DNS test failed"
echo ""
echo "Testing connection from staging-app to staging-redis:"
docker exec staging-app php -r "\$socket = @fsockopen('staging-redis', 6379, \$errno, \$errstr, 2); if (\$socket) { echo 'Port 6379: OK' . PHP_EOL; fclose(\$socket); } else { echo 'Port 6379: FAILED (errno: ' . \$errno . ', errstr: ' . \$errstr . ')' . PHP_EOL; }"
args:
executable: /bin/bash
register: network_test
ignore_errors: yes
failed_when: false
- name: Display network connectivity test
debug:
msg: "{{ network_test.stdout_lines }}"
- name: Check Staging Application Logs for Redis Errors
shell: |
cd ~/deployment/stacks/staging
echo "=== Staging Application Logs (Last 50 lines, Redis-related) ==="
docker compose logs staging-app --tail=50 2>&1 | grep -i -E "(redis|connection|error)" | tail -20 || echo "No Redis-related logs found"
args:
executable: /bin/bash
register: app_logs
ignore_errors: yes
failed_when: false
- name: Display application logs
debug:
msg: "{{ app_logs.stdout_lines }}"
- name: Check Staging .env file configuration
shell: |
cd ~/deployment/stacks/staging
echo "=== .env file Redis Configuration ==="
if [ -f .env ]; then
grep -E "(REDIS_|CACHE_|SESSION_|QUEUE_)" .env | grep -v "^#" || echo "No Redis config found in .env"
else
echo ".env file not found"
fi
echo ""
echo "=== Checking for application.env file ==="
if [ -f application.env ]; then
echo "application.env exists"
grep -E "(REDIS_|CACHE_|SESSION_|QUEUE_)" application.env | grep -v "^#" || echo "No Redis config found in application.env"
else
echo "application.env file not found"
fi
args:
executable: /bin/bash
register: env_file_config
ignore_errors: yes
failed_when: false
- name: Display .env file configuration
debug:
msg: "{{ env_file_config.stdout_lines }}"

View File

@@ -1,135 +0,0 @@
---
- name: Check Staging Redis Environment File and Container Password
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check .env file exists and contains REDIS_PASSWORD
shell: |
cd ~/deployment/stacks/staging
echo "=== Checking .env file ==="
if [ -f .env ]; then
echo ".env file exists"
echo ""
echo "=== REDIS_PASSWORD from .env ==="
REDIS_PASSWORD_FROM_ENV=$(grep "^REDIS_PASSWORD=" .env | cut -d '=' -f2- | tr -d ' ' || echo "")
if [ -n "$REDIS_PASSWORD_FROM_ENV" ]; then
PASSWORD_LEN=$(echo -n "$REDIS_PASSWORD_FROM_ENV" | wc -c)
echo "REDIS_PASSWORD found in .env (length: $PASSWORD_LEN)"
echo "First 10 chars: ${REDIS_PASSWORD_FROM_ENV:0:10}..."
echo "Last 10 chars: ...${REDIS_PASSWORD_FROM_ENV: -10}"
else
echo "REDIS_PASSWORD NOT FOUND in .env file!"
fi
else
echo ".env file NOT FOUND!"
fi
args:
executable: /bin/bash
register: env_file_check
ignore_errors: yes
failed_when: false
- name: Display .env file check
debug:
msg: "{{ env_file_check.stdout_lines }}"
- name: Check how Redis container was started
shell: |
echo "=== Checking Redis container command ==="
docker inspect staging-redis --format '{{ '{{' }}.Config.Cmd{{ '}}' }}' || echo "Could not inspect container"
echo ""
echo "=== Checking if Redis actually requires password ==="
# Try without password first
docker exec staging-redis redis-cli PING 2>&1 || echo "Connection failed (expected if password required)"
echo ""
# Try with password from .env
cd ~/deployment/stacks/staging
REDIS_PASSWORD=$(grep "^REDIS_PASSWORD=" .env | cut -d '=' -f2- | tr -d ' ' || echo "")
if [ -n "$REDIS_PASSWORD" ]; then
echo "Testing with password from .env:"
docker exec staging-redis redis-cli -a "$REDIS_PASSWORD" PING 2>&1 || echo "Password test failed"
else
echo "Cannot test with password - REDIS_PASSWORD not found in .env"
fi
args:
executable: /bin/bash
register: redis_startup_check
ignore_errors: yes
failed_when: false
- name: Display Redis startup check
debug:
msg: "{{ redis_startup_check.stdout_lines }}"
- name: Test actual connection from PHP container
shell: |
cd ~/deployment/stacks/staging
REDIS_PASSWORD_ENV=$(grep "^REDIS_PASSWORD=" .env | cut -d '=' -f2- | tr -d ' ' || echo "")
docker exec staging-app php -r "
\$redis_host = 'staging-redis';
\$redis_port = 6379;
\$redis_password = getenv('REDIS_PASSWORD');
\$redis_password_env_file = '$REDIS_PASSWORD_ENV';
echo '=== Password Comparison ===' . PHP_EOL;
echo 'REDIS_PASSWORD from environment: ' . (\$redis_password ? 'SET (length: ' . strlen(\$redis_password) . ')' : 'NOT SET') . PHP_EOL;
echo 'REDIS_PASSWORD from .env file: ' . (\$redis_password_env_file ? 'SET (length: ' . strlen(\$redis_password_env_file) . ')' : 'NOT SET') . PHP_EOL;
if (\$redis_password && \$redis_password_env_file) {
if (\$redis_password === \$redis_password_env_file) {
echo 'Passwords MATCH!' . PHP_EOL;
} else {
echo 'Passwords DO NOT MATCH!' . PHP_EOL;
echo 'Env password first 10: ' . substr(\$redis_password, 0, 10) . PHP_EOL;
echo '.env password first 10: ' . substr(\$redis_password_env_file, 0, 10) . PHP_EOL;
}
}
echo PHP_EOL . '=== Connection Test ===' . PHP_EOL;
if (!extension_loaded('redis')) {
echo 'ERROR: php-redis extension not loaded!' . PHP_EOL;
exit(1);
}
try {
\$redis = new Redis();
\$success = \$redis->connect(\$redis_host, \$redis_port, 2.0);
if (!\$success) {
echo 'ERROR: Failed to connect to Redis' . PHP_EOL;
exit(1);
}
echo 'Connected to Redis' . PHP_EOL;
if (\$redis_password) {
\$auth_result = \$redis->auth(\$redis_password);
if (\$auth_result) {
echo 'Authentication: SUCCESS' . PHP_EOL;
\$ping = \$redis->ping();
echo 'PING: ' . \$ping . PHP_EOL;
} else {
echo 'Authentication: FAILED' . PHP_EOL;
echo 'Tried password: ' . substr(\$redis_password, 0, 10) . '...' . PHP_EOL;
}
} else {
echo 'WARNING: No password set in environment!' . PHP_EOL;
}
\$redis->close();
} catch (Exception \$e) {
echo 'ERROR: ' . \$e->getMessage() . PHP_EOL;
exit(1);
}
"
args:
executable: /bin/bash
register: password_comparison
ignore_errors: yes
failed_when: false
- name: Display password comparison
debug:
msg: "{{ password_comparison.stdout_lines }}"

View File

@@ -1,52 +0,0 @@
---
- name: Check Staging Status Complete
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check nginx error log for recent 502s
shell: |
cd ~/deployment/stacks/staging
docker compose exec -T staging-nginx tail -20 /var/log/nginx/error.log 2>&1 | grep -E "(502|Bad Gateway|upstream)" || echo "No 502 errors in recent logs"
args:
executable: /bin/bash
register: nginx_errors
ignore_errors: yes
failed_when: false
- name: Display nginx errors
debug:
msg: "{{ nginx_errors.stdout_lines }}"
- name: Verify upstream configuration one more time
shell: |
cd ~/deployment/stacks/staging
docker compose exec -T staging-nginx grep -A 3 "upstream php-upstream" /etc/nginx/sites-available/default
docker compose exec -T staging-nginx grep "fastcgi_pass" /etc/nginx/sites-available/default | head -3
args:
executable: /bin/bash
register: upstream_check
ignore_errors: yes
failed_when: false
- name: Display upstream check
debug:
msg: "{{ upstream_check.stdout_lines }}"
- name: Test multiple times
shell: |
for i in 1 2 3; do
echo "Test $i:"
curl -H "User-Agent: Mozilla/5.0" -H "Cache-Control: no-cache" -s -o /dev/null -w " HTTP %{http_code}\n" https://staging.michaelschiemer.de/ || echo " Failed"
sleep 1
done
args:
executable: /bin/bash
register: multi_test
ignore_errors: yes
failed_when: false
- name: Display multi test results
debug:
msg: "{{ multi_test.stdout_lines }}"

View File

@@ -0,0 +1,15 @@
---
# Check Traefik ACME Challenge Logs
# Wrapper Playbook for traefik role logs tasks
- hosts: production
gather_facts: yes
become: no
tasks:
- name: Include traefik logs tasks
ansible.builtin.include_role:
name: traefik
tasks_from: logs
tags:
- traefik
- logs
- acme

View File

@@ -1,70 +0,0 @@
---
- name: Check Traefik Gitea Configuration
hosts: production
gather_facts: yes
become: no
vars:
traefik_stack_path: "{{ stacks_base_path }}/traefik"
tasks:
- name: Check Traefik logs for Gitea errors
shell: |
cd {{ traefik_stack_path }}
echo "=== Traefik Logs - Gitea errors (Last 50 lines) ==="
docker compose logs --tail=100 traefik 2>&1 | grep -i "gitea\|502\|bad gateway" | tail -50 || echo "No Gitea-related errors found"
args:
executable: /bin/bash
register: traefik_errors
ignore_errors: yes
failed_when: false
- name: Display Traefik errors
debug:
msg: "{{ traefik_errors.stdout_lines }}"
- name: Check dynamic Gitea configuration on server
shell: |
cat {{ traefik_stack_path }}/dynamic/gitea.yml 2>&1 || echo "File not found"
register: gitea_dynamic_config
ignore_errors: yes
failed_when: false
- name: Display dynamic Gitea config
debug:
msg: "{{ gitea_dynamic_config.stdout_lines }}"
- name: Test if Traefik can resolve gitea hostname
shell: |
docker exec traefik getent hosts gitea 2>&1 || echo "Cannot resolve gitea hostname"
register: traefik_resolve
ignore_errors: yes
failed_when: false
- name: Display Traefik resolve result
debug:
msg: "{{ traefik_resolve.stdout_lines }}"
- name: Get Gitea container IP
shell: |
docker inspect gitea --format '{{ '{{' }}range.NetworkSettings.Networks{{ '}}' }}{{ '{{' }}if eq .NetworkID (printf "%s" (docker network inspect traefik-public --format "{{ '{{' }}.Id{{ '}}' }}")){{ '}}' }}{{ '{{' }}.IPAddress{{ '}}' }}{{ '{{' }}end{{ '}}' }}{{ '{{' }}end{{ '}}' }}' 2>&1 || echo "Could not get IP"
register: gitea_ip
ignore_errors: yes
failed_when: false
- name: Display Gitea IP
debug:
msg: "Gitea IP in traefik-public network: {{ gitea_ip.stdout }}"
- name: Test connectivity from Traefik to Gitea IP
shell: |
GITEA_IP="{{ gitea_ip.stdout | default('172.21.0.3') }}"
docker exec traefik wget -qO- --timeout=5 "http://$GITEA_IP:3000/api/healthz" 2>&1 || echo "Cannot connect to Gitea at $GITEA_IP:3000"
register: traefik_connect
ignore_errors: yes
failed_when: false
when: gitea_ip.stdout is defined and gitea_ip.stdout != ""
- name: Display connectivity result
debug:
msg: "{{ traefik_connect.stdout_lines }}"

View File

@@ -1,63 +0,0 @@
---
- name: Check VPN Routing Status - Client-Side Diagnostics
hosts: production
gather_facts: no
become: no
tasks:
- name: Instructions for client-side DNS check
debug:
msg:
- "=== CLIENT-SIDE DNS CHECK ==="
- "Bitte f?hre diese Befehle auf deinem Client-System aus (nicht auf dem Server):"
- ""
- "1. Pr?fe aktive DNS-Server:"
- " Linux/Mac: cat /etc/resolv.conf"
- " Windows: Get-DnsClientServerAddress | Select-Object InterfaceAlias, ServerAddresses"
- ""
- "2. Teste DNS-Aufl?sung:"
- " dig +short grafana.michaelschiemer.de"
- " Oder: nslookup grafana.michaelschiemer.de"
- ""
- "3. Teste DNS-Aufl?sung ?ber VPN-DNS:"
- " dig +short grafana.michaelschiemer.de @10.8.0.1"
- " Sollte zur?ckgeben: 10.8.0.1"
- ""
- "4. Pr?fe WireGuard Config:"
- " ?ffne deine WireGuard-Config und pr?fe:"
- " [Interface]"
- " DNS = 10.8.0.1"
- ""
- "Teile mir die Ergebnisse mit, dann kann ich dir helfen, das zu fixen!"
- name: Check server-side WireGuard configuration
shell: |
echo "=== Server-Side WireGuard Config ==="
sudo cat /etc/wireguard/wg0.conf | grep -A 5 "\[Interface\]" | head -10
args:
executable: /bin/bash
register: wg_server_config
ignore_errors: yes
failed_when: false
- name: Display server-side WireGuard config
debug:
msg: "{{ wg_server_config.stdout_lines }}"
- name: Check server-side DNS resolution
shell: |
echo "=== Server-Side DNS Tests ==="
echo "1. Grafana via VPN DNS (10.8.0.1):"
dig +short grafana.michaelschiemer.de @10.8.0.1 2>&1 || echo "Failed"
echo ""
echo "2. Grafana via public DNS (8.8.8.8):"
dig +short grafana.michaelschiemer.de @8.8.8.8 2>&1 || echo "Failed"
args:
executable: /bin/bash
register: server_dns
ignore_errors: yes
failed_when: false
- name: Display server-side DNS tests
debug:
msg: "{{ server_dns.stdout_lines }}"

View File

@@ -1,116 +0,0 @@
---
- name: Check VPN Test Results from Client
hosts: production
gather_facts: no
become: no
tasks:
- name: Check latest Grafana access attempts
shell: |
cd ~/deployment/stacks/traefik
echo "=== Last 20 Grafana Access Attempts ==="
tail -500 logs/access.log | grep -i grafana | tail -20 | while IFS= read -r line; do
time=$(echo "$line" | grep -oP '"time":"[^"]*"' | sed 's/"time":"//;s/"//' | cut -d'T' -f2 | cut -d'+' -f1 | cut -d':' -f1-2)
client=$(echo "$line" | grep -oP '"ClientHost":"[^"]*"' | sed 's/"ClientHost":"//;s/"//')
status=$(echo "$line" | grep -oP '"DownstreamStatus":[0-9]+' | sed 's/"DownstreamStatus"://')
method=$(echo "$line" | grep -oP '"RequestMethod":"[^"]*"' | sed 's/"RequestMethod":"//;s/"//')
path=$(echo "$line" | grep -oP '"RequestPath":"[^"]*"' | sed 's/"RequestPath":"//;s/"//')
if [[ "$client" =~ ^10\.8\.0\.[0-9]+$ ]]; then
echo "? $time | ClientHost: $client | Status: $status | $method $path ? VPN-IP (Traffic kommt ?ber VPN!)"
elif [[ "$client" == "89.246.96.244" ]]; then
echo "? $time | ClientHost: $client | Status: $status | $method $path ? ?ffentliche IP (Traffic kommt NICHT ?ber VPN)"
else
echo "? $time | ClientHost: $client | Status: $status | $method $path ? Unbekannt"
fi
done
args:
executable: /bin/bash
register: recent_access
ignore_errors: yes
failed_when: false
- name: Display recent access attempts
debug:
msg: "{{ recent_access.stdout_lines }}"
- name: Extract unique client IPs from recent requests
shell: |
cd ~/deployment/stacks/traefik
tail -100 logs/access.log | grep -i grafana | tail -20 | grep -oP '"ClientHost":"[^"]*"' | sed 's/"ClientHost":"//;s/"//' | sort -u
args:
executable: /bin/bash
register: unique_ips
ignore_errors: yes
failed_when: false
- name: Display unique client IPs
debug:
msg: "{{ unique_ips.stdout_lines }}"
- name: Analyze client IP distribution
shell: |
cd ~/deployment/stacks/traefik
echo "=== Client IP Analysis (Last 20 requests) ==="
VPN_COUNT=$(tail -100 logs/access.log | grep -i grafana | tail -20 | grep -oP '"ClientHost":"10\.8\.0\.[0-9]+"' | wc -l)
PUBLIC_COUNT=$(tail -100 logs/access.log | grep -i grafana | tail -20 | grep -oP '"ClientHost":"89\.246\.96\.244"' | wc -l)
TOTAL=$(tail -100 logs/access.log | grep -i grafana | tail -20 | wc -l)
echo "Total Grafana requests (last 20): $TOTAL"
echo "VPN IP requests (10.8.0.x): $VPN_COUNT"
echo "Public IP requests (89.246.96.244): $PUBLIC_COUNT"
if [ "$VPN_COUNT" -gt 0 ]; then
echo ""
echo "? SUCCESS: Traffic is coming through VPN!"
echo " $VPN_COUNT out of $TOTAL requests used VPN IP"
elif [ "$PUBLIC_COUNT" -gt 0 ]; then
echo ""
echo "? PROBLEM: Traffic is NOT coming through VPN"
echo " $PUBLIC_COUNT out of $TOTAL requests used public IP"
echo " Check VPN routing configuration on client"
else
echo ""
echo "?? No recent Grafana requests found"
fi
args:
executable: /bin/bash
register: ip_analysis
ignore_errors: yes
failed_when: false
- name: Display IP analysis
debug:
msg: "{{ ip_analysis.stdout_lines }}"
- name: Check WireGuard peer status for test-client
shell: |
echo "=== WireGuard Peer Status ==="
sudo wg show | grep -A 5 "YbjBipkOHjLfcIYQKDReJ1swseczqHolTCRv7+LHnmw=" || echo "Peer not found or not connected"
args:
executable: /bin/bash
register: wg_peer_status
ignore_errors: yes
failed_when: false
- name: Display WireGuard peer status
debug:
msg: "{{ wg_peer_status.stdout_lines }}"
- name: Final recommendations
debug:
msg:
- ""
- "=== ERGEBNIS ==="
- ""
- "Pr?fe die obigen Zeilen:"
- ""
- "? Wenn ClientHost: 10.8.0.x ? Traffic kommt ?ber VPN!"
- " ? Dann k?nnen wir die tempor?re IP-Erlaubnis entfernen!"
- ""
- "? Wenn ClientHost: 89.246.96.244 ? Traffic kommt NICHT ?ber VPN"
- " ? Dann m?ssen wir das VPN-Routing auf dem Client pr?fen"
- ""
- "N?chste Schritte:"
- "1. Wenn VPN funktioniert: Temporary IP-Erlaubnis entfernen"
- "2. Wenn VPN nicht funktioniert: Route-Tabellen auf Client pr?fen"

View File

@@ -0,0 +1,17 @@
---
# Check Worker and Scheduler Logs
# Wrapper Playbook for application role logs tasks
- hosts: production
gather_facts: no
become: no
vars:
application_logs_check_vendor: true
tasks:
- name: Include application logs tasks (worker)
ansible.builtin.include_role:
name: application
tasks_from: logs
tags:
- application
- logs
- worker

View File

@@ -0,0 +1,216 @@
---
- name: Cleanup All Containers and Networks on Production Server
hosts: production
become: no
gather_facts: yes
vars:
cleanup_volumes: true # Set to false to preserve volumes
tasks:
- name: Set stacks_base_path if not defined
set_fact:
stacks_base_path: "{{ stacks_base_path | default('/home/deploy/deployment/stacks') }}"
- name: Display cleanup warning
debug:
msg:
- "=== WARNING: This will stop and remove ALL containers ==="
- "Volumes will be removed: {{ cleanup_volumes }}"
- "This will cause downtime for all services"
- "Stacks path: {{ stacks_base_path }}"
- ""
- name: List all running containers before cleanup
command: docker ps --format 'table {{ "{{" }}.Names{{ "}}" }}\t{{ "{{" }}.Status{{ "}}" }}\t{{ "{{" }}.Ports{{ "}}" }}'
register: containers_before
changed_when: false
- name: Display running containers
debug:
msg: "{{ containers_before.stdout_lines }}"
# Stop all Docker Compose stacks
- name: Stop Traefik stack
command: docker compose -f {{ stacks_base_path }}/traefik/docker-compose.yml down
args:
chdir: "{{ stacks_base_path }}/traefik"
ignore_errors: yes
register: traefik_stop
- name: Stop Gitea stack
command: docker compose -f {{ stacks_base_path }}/gitea/docker-compose.yml down
args:
chdir: "{{ stacks_base_path }}/gitea"
ignore_errors: yes
- name: Stop PostgreSQL Production stack
command: docker compose -f {{ stacks_base_path }}/postgresql-production/docker-compose.yml down
args:
chdir: "{{ stacks_base_path }}/postgresql-production"
ignore_errors: yes
- name: Stop PostgreSQL Staging stack
command: docker compose -f {{ stacks_base_path }}/postgresql-staging/docker-compose.yml down
args:
chdir: "{{ stacks_base_path }}/postgresql-staging"
ignore_errors: yes
- name: Stop Redis stack
command: docker compose -f {{ stacks_base_path }}/redis/docker-compose.yml down
args:
chdir: "{{ stacks_base_path }}/redis"
ignore_errors: yes
- name: Stop Docker Registry stack
command: docker compose -f {{ stacks_base_path }}/registry/docker-compose.yml down
args:
chdir: "{{ stacks_base_path }}/registry"
ignore_errors: yes
- name: Stop MinIO stack
command: docker compose -f {{ stacks_base_path }}/minio/docker-compose.yml down
args:
chdir: "{{ stacks_base_path }}/minio"
ignore_errors: yes
- name: Stop Monitoring stack
command: docker compose -f {{ stacks_base_path }}/monitoring/docker-compose.yml down
args:
chdir: "{{ stacks_base_path }}/monitoring"
ignore_errors: yes
- name: Stop Production stack
command: docker compose -f {{ stacks_base_path }}/production/docker-compose.base.yml -f {{ stacks_base_path }}/production/docker-compose.production.yml down
args:
chdir: "{{ stacks_base_path }}/production"
ignore_errors: yes
- name: Stop Staging stack
command: docker compose -f {{ stacks_base_path }}/staging/docker-compose.base.yml -f {{ stacks_base_path }}/staging/docker-compose.staging.yml down
args:
chdir: "{{ stacks_base_path }}/staging"
ignore_errors: yes
- name: Stop WireGuard stack
command: docker compose -f {{ stacks_base_path }}/wireguard/docker-compose.yml down
args:
chdir: "{{ stacks_base_path }}/wireguard"
ignore_errors: yes
# Remove all containers (including stopped ones)
- name: Get all container IDs
command: docker ps -a -q
register: all_containers
changed_when: false
- name: Remove all containers
command: docker rm -f {{ item }}
loop: "{{ all_containers.stdout_lines }}"
when: all_containers.stdout_lines | length > 0
ignore_errors: yes
# Check for port conflicts
- name: Check what's using port 80
command: sudo ss -tlnp 'sport = :80'
register: port_80_check
changed_when: false
ignore_errors: yes
- name: Display port 80 status
debug:
msg: "{{ port_80_check.stdout_lines if port_80_check.rc == 0 else 'Port 80 is free or cannot be checked' }}"
- name: Check what's using port 443
command: sudo ss -tlnp 'sport = :443'
register: port_443_check
changed_when: false
ignore_errors: yes
- name: Display port 443 status
debug:
msg: "{{ port_443_check.stdout_lines if port_443_check.rc == 0 else 'Port 443 is free or cannot be checked' }}"
# Clean up networks
- name: Remove traefik-public network
community.docker.docker_network:
name: traefik-public
state: absent
ignore_errors: yes
- name: Remove app-internal network
community.docker.docker_network:
name: app-internal
state: absent
ignore_errors: yes
- name: Get all custom networks
command: docker network ls --format '{{ "{{" }}.Name{{ "}}" }}'
register: all_networks
changed_when: false
- name: Remove custom networks (except default ones)
community.docker.docker_network:
name: "{{ item }}"
state: absent
loop: "{{ all_networks.stdout_lines }}"
when:
- item not in ['bridge', 'host', 'none']
- item not in ['traefik-public', 'app-internal'] # Already removed above
ignore_errors: yes
# Clean up volumes (if requested)
- name: Get all volumes
command: docker volume ls -q
register: all_volumes
changed_when: false
when: cleanup_volumes | bool
- name: Remove all volumes
command: docker volume rm {{ item }}
loop: "{{ all_volumes.stdout_lines }}"
when:
- cleanup_volumes | bool
- all_volumes.stdout_lines | length > 0
ignore_errors: yes
# Final verification
- name: List remaining containers
command: docker ps -a
register: containers_after
changed_when: false
- name: Display remaining containers
debug:
msg: "{{ containers_after.stdout_lines }}"
- name: List remaining networks
command: docker network ls
register: networks_after
changed_when: false
- name: Display remaining networks
debug:
msg: "{{ networks_after.stdout_lines }}"
- name: Verify ports 80 and 443 are free
command: sudo ss -tlnp 'sport = :{{ item }}'
register: port_check
changed_when: false
failed_when: port_check.rc == 0 and port_check.stdout_lines | length > 0
loop:
- 80
- 443
- name: Display cleanup summary
debug:
msg:
- "=== Cleanup Complete ==="
- "All containers stopped and removed"
- "Networks cleaned up"
- "Volumes removed: {{ cleanup_volumes }}"
- ""
- "Next steps:"
- "1. Run sync-stacks.yml to sync configurations"
- "2. Run setup-infrastructure.yml to deploy fresh infrastructure"

View File

@@ -1,125 +0,0 @@
---
- name: Debug Grafana 403 Error
hosts: production
gather_facts: yes
become: no
# This playbook requires the production inventory file
# Run with: ansible-playbook -i ../inventory/production.yml debug-grafana-403.yml
tasks:
- name: Check Traefik logs for recent Grafana access attempts
shell: |
cd ~/deployment/stacks/traefik
echo "=== Recent Traefik Access Logs (last 50 lines with grafana) ==="
docker compose logs --tail=100 traefik 2>&1 | grep -i grafana | tail -50 || echo "No grafana entries found"
args:
executable: /bin/bash
register: traefik_logs
ignore_errors: yes
failed_when: false
- name: Display Traefik logs
debug:
msg: "{{ traefik_logs.stdout_lines }}"
- name: Check Traefik access log file
shell: |
cd ~/deployment/stacks/traefik
echo "=== Recent Traefik Access Log (last 50 lines) ==="
tail -50 logs/access.log 2>&1 | tail -50 || echo "Access log not found"
args:
executable: /bin/bash
register: access_log
ignore_errors: yes
failed_when: false
- name: Display access log
debug:
msg: "{{ access_log.stdout_lines }}"
- name: Check Grafana container status
shell: |
cd ~/deployment/stacks/monitoring
docker compose ps grafana
args:
executable: /bin/bash
register: grafana_status
ignore_errors: yes
failed_when: false
- name: Display Grafana status
debug:
msg: "{{ grafana_status.stdout_lines }}"
- name: Check Grafana Traefik labels
shell: |
cd ~/deployment/stacks/monitoring
docker compose config | grep -A 20 "grafana:" | grep -E "(ipwhitelist|middleware|sourcerange)" || echo "No IP whitelist labels found"
args:
executable: /bin/bash
register: grafana_labels
ignore_errors: yes
failed_when: false
- name: Display Grafana labels
debug:
msg: "{{ grafana_labels.stdout_lines }}"
- name: Check CoreDNS configuration
shell: |
cd ~/deployment/stacks/dns
echo "=== CoreDNS Corefile ==="
cat Corefile 2>&1 || echo "Corefile not found"
args:
executable: /bin/bash
register: coredns_config
ignore_errors: yes
failed_when: false
- name: Display CoreDNS configuration
debug:
msg: "{{ coredns_config.stdout_lines }}"
- name: Check monitoring stack environment variables
shell: |
cd ~/deployment/stacks/monitoring
echo "=== MONITORING_VPN_IP_WHITELIST ==="
grep MONITORING_VPN_IP_WHITELIST .env 2>&1 || echo "Variable not found in .env"
args:
executable: /bin/bash
register: monitoring_env
ignore_errors: yes
failed_when: false
- name: Display monitoring environment
debug:
msg: "{{ monitoring_env.stdout_lines }}"
- name: Test DNS resolution for grafana.michaelschiemer.de
shell: |
echo "=== DNS Resolution Test ==="
dig +short grafana.michaelschiemer.de @10.8.0.1 2>&1 || echo "DNS resolution failed"
args:
executable: /bin/bash
register: dns_test
ignore_errors: yes
failed_when: false
- name: Display DNS test result
debug:
msg: "{{ dns_test.stdout_lines }}"
- name: Check WireGuard interface status
shell: |
echo "=== WireGuard Interface Status ==="
sudo wg show 2>&1 || echo "WireGuard not running or no permissions"
args:
executable: /bin/bash
register: wg_status
ignore_errors: yes
failed_when: false
- name: Display WireGuard status
debug:
msg: "{{ wg_status.stdout_lines }}"

View File

@@ -1,250 +0,0 @@
---
- name: Debug Staging Redis Secrets Configuration
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check staging stack directory
shell: |
cd ~/deployment/stacks/staging
echo "=== Staging Stack Directory ==="
pwd
ls -la
register: dir_check
ignore_errors: yes
- name: Display directory contents
debug:
msg: "{{ dir_check.stdout_lines }}"
- name: Check if docker-compose files exist
stat:
path: "{{ item }}"
vars:
deployment_path: "~/deployment/stacks/staging"
with_items:
- "{{ deployment_path }}/docker-compose.base.yml"
- "{{ deployment_path }}/docker-compose.staging.yml"
register: compose_files
- name: Display compose file status
debug:
msg: "{{ item.stat.exists | default(false) }}: {{ item.item }}"
with_items: "{{ compose_files.results }}"
- name: Check docker-compose.staging.yml configuration
shell: |
cd ~/deployment/stacks/staging
echo "=== Checking docker-compose.staging.yml for Redis secrets ==="
if [ -f docker-compose.staging.yml ]; then
echo "--- REDIS_PASSWORD_FILE in environment ---"
grep -A 5 "staging-app:" docker-compose.staging.yml | grep -A 10 "environment:" | grep "REDIS_PASSWORD_FILE" || echo "REDIS_PASSWORD_FILE not found in staging-app environment"
echo ""
echo "--- Secrets section for staging-app ---"
grep -A 10 "staging-app:" docker-compose.staging.yml | grep -A 15 "secrets:" | head -10 || echo "Secrets section not found"
echo ""
echo "--- Secrets definitions at bottom ---"
tail -30 docker-compose.staging.yml | grep -A 5 "redis_password:" || echo "redis_password secret definition not found"
else
echo "docker-compose.staging.yml NOT FOUND"
fi
register: compose_config
ignore_errors: yes
- name: Display compose configuration
debug:
msg: "{{ compose_config.stdout_lines }}"
- name: Check if secrets directory and files exist
shell: |
cd ~/deployment/stacks/staging
echo "=== Secrets Directory ==="
if [ -d secrets ]; then
echo "secrets/ directory exists"
ls -la secrets/
echo ""
echo "--- redis_password.txt content check ---"
if [ -f secrets/redis_password.txt ]; then
echo "secrets/redis_password.txt exists"
FILE_SIZE=$(stat -f%z secrets/redis_password.txt 2>/dev/null || stat -c%s secrets/redis_password.txt 2>/dev/null || echo "unknown")
CONTENT_LENGTH=$(wc -c < secrets/redis_password.txt | tr -d ' ')
echo "File size: $FILE_SIZE bytes"
echo "Content length: $CONTENT_LENGTH bytes"
# Show first 5 chars (for debugging)
FIRST_CHARS=$(head -c 5 secrets/redis_password.txt)
echo "First 5 chars: $FIRST_CHARS"
else
echo "secrets/redis_password.txt NOT FOUND"
fi
else
echo "secrets/ directory DOES NOT EXIST"
fi
register: secrets_check
ignore_errors: yes
- name: Display secrets check
debug:
msg: "{{ secrets_check.stdout_lines }}"
- name: Check if staging-app container is running
shell: |
docker ps --filter "name=staging-app" --format "{{.Names}}\t{{.Status}}\t{{.Image}}"
register: container_status
ignore_errors: yes
- name: Display container status
debug:
msg: "{{ container_status.stdout_lines }}"
- name: Check Docker secrets mounted in staging-app container
shell: |
echo "=== Docker Secrets in staging-app Container ==="
if docker ps --filter "name=staging-app" --format "{{.Names}}" | grep -q staging-app; then
echo "--- Checking /run/secrets/ directory ---"
docker exec staging-app ls -la /run/secrets/ 2>&1 || echo "Cannot access /run/secrets/"
echo ""
echo "--- Checking redis_password secret file ---"
docker exec staging-app cat /run/secrets/redis_password 2>&1 | head -c 20 || echo "redis_password secret NOT FOUND or NOT READABLE"
echo "..."
echo ""
echo "--- File exists check ---"
docker exec staging-app test -f /run/secrets/redis_password && echo "redis_password file EXISTS" || echo "redis_password file DOES NOT EXIST"
docker exec staging-app test -r /run/secrets/redis_password && echo "redis_password file is READABLE" || echo "redis_password file is NOT READABLE"
else
echo "staging-app container is NOT RUNNING"
fi
register: secrets_mounted
ignore_errors: yes
- name: Display secrets mount status
debug:
msg: "{{ secrets_mounted.stdout_lines }}"
- name: Check Environment Variables in staging-app container
shell: |
echo "=== Environment Variables in staging-app ==="
if docker ps --filter "name=staging-app" --format "{{.Names}}" | grep -q staging-app; then
echo "--- Redis-related environment variables ---"
docker exec staging-app env | grep -E "(REDIS_|CACHE_|SESSION_|QUEUE_)" || echo "No Redis env vars found"
echo ""
echo "--- *_FILE environment variables ---"
docker exec staging-app env | grep "_FILE" || echo "No _FILE env vars found"
echo ""
echo "--- All environment variables (first 50) ---"
docker exec staging-app env | sort | head -50 || echo "Cannot read environment"
else
echo "Container not running"
fi
register: env_vars
ignore_errors: yes
- name: Display environment variables
debug:
msg: "{{ env_vars.stdout_lines }}"
- name: Test PHP environment resolution (check DockerSecretsResolver)
shell: |
echo "=== Testing PHP Environment Resolution ==="
docker exec staging-app php -r "
// Simulate what the Framework does
echo '=== System Environment Check ===' . PHP_EOL;
echo 'getenv(\"REDIS_PASSWORD_FILE\"): ' . (getenv('REDIS_PASSWORD_FILE') ?: 'NOT SET') . PHP_EOL;
echo 'getenv(\"REDIS_PASSWORD\"): ' . (getenv('REDIS_PASSWORD') ? 'SET (length: ' . strlen(getenv('REDIS_PASSWORD')) . ')' : 'NOT SET') . PHP_EOL;
echo PHP_EOL;
echo '=== $_ENV Check ===' . PHP_EOL;
echo 'isset($_ENV[\"REDIS_PASSWORD_FILE\"]): ' . (isset(\$_ENV['REDIS_PASSWORD_FILE']) ? 'YES: ' . \$_ENV['REDIS_PASSWORD_FILE'] : 'NO') . PHP_EOL;
echo 'isset($_ENV[\"REDIS_PASSWORD\"]): ' . (isset(\$_ENV['REDIS_PASSWORD']) ? 'YES (length: ' . strlen(\$_ENV['REDIS_PASSWORD']) . ')' : 'NO') . PHP_EOL;
echo PHP_EOL;
echo '=== $_SERVER Check ===' . PHP_EOL;
echo 'isset($_SERVER[\"REDIS_PASSWORD_FILE\"]): ' . (isset(\$_SERVER['REDIS_PASSWORD_FILE']) ? 'YES: ' . \$_SERVER['REDIS_PASSWORD_FILE'] : 'NO') . PHP_EOL;
echo 'isset($_SERVER[\"REDIS_PASSWORD\"]): ' . (isset(\$_SERVER['REDIS_PASSWORD']) ? 'YES (length: ' . strlen(\$_SERVER['REDIS_PASSWORD']) . ')' : 'NO') . PHP_EOL;
echo PHP_EOL;
echo '=== Docker Secrets File Check ===' . PHP_EOL;
\$secret_file = '/run/secrets/redis_password';
echo 'File path: ' . \$secret_file . PHP_EOL;
echo 'File exists: ' . (file_exists(\$secret_file) ? 'YES' : 'NO') . PHP_EOL;
if (file_exists(\$secret_file)) {
echo 'File readable: ' . (is_readable(\$secret_file) ? 'YES' : 'NO') . PHP_EOL;
\$content = file_get_contents(\$secret_file);
if (\$content !== false) {
echo 'File content length: ' . strlen(trim(\$content)) . PHP_EOL;
echo 'File content (first 10 chars): ' . substr(trim(\$content), 0, 10) . '...' . PHP_EOL;
} else {
echo 'File content: COULD NOT READ' . PHP_EOL;
}
}
echo PHP_EOL;
// Test DockerSecretsResolver logic
echo '=== DockerSecretsResolver Simulation ===' . PHP_EOL;
\$variables = getenv();
\$file_key = 'REDIS_PASSWORD_FILE';
if (isset(\$variables[\$file_key])) {
\$file_path = \$variables[\$file_key];
echo 'REDIS_PASSWORD_FILE found: ' . \$file_path . PHP_EOL;
if (file_exists(\$file_path) && is_readable(\$file_path)) {
\$secret_value = trim(file_get_contents(\$file_path));
echo 'Secret resolved: YES (length: ' . strlen(\$secret_value) . ')' . PHP_EOL;
echo 'Secret value (first 10 chars): ' . substr(\$secret_value, 0, 10) . '...' . PHP_EOL;
} else {
echo 'Secret resolved: NO (file not accessible)' . PHP_EOL;
}
} else {
echo 'REDIS_PASSWORD_FILE NOT FOUND in environment' . PHP_EOL;
}
" 2>&1
register: php_test
ignore_errors: yes
- name: Display PHP environment test
debug:
msg: "{{ php_test.stdout_lines }}"
- name: Check staging-redis container configuration
shell: |
echo "=== Staging Redis Container ==="
docker ps --filter "name=staging-redis" --format "{{.Names}}\t{{.Status}}"
echo ""
echo "=== Redis password requirement ==="
docker exec staging-redis redis-cli CONFIG GET requirepass 2>&1 || echo "Cannot check Redis config"
echo ""
echo "=== Test Redis connection without password ==="
docker exec staging-redis redis-cli PING 2>&1 || echo "Connection failed (password required)"
register: redis_config
ignore_errors: yes
- name: Display Redis configuration
debug:
msg: "{{ redis_config.stdout_lines }}"
- name: Check recent staging-app logs for Redis errors
shell: |
cd ~/deployment/stacks/staging
echo "=== Recent staging-app logs (Redis-related) ==="
docker compose -f docker-compose.base.yml -f docker-compose.staging.yml logs staging-app --tail=100 2>&1 | grep -i -E "(redis|password|secret|auth|noauth)" | tail -30 || echo "No Redis-related logs found"
register: app_logs
ignore_errors: yes
- name: Display application logs
debug:
msg: "{{ app_logs.stdout_lines }}"
- name: Summary and Recommendations
debug:
msg:
- "========================================"
- "DEBUG SUMMARY"
- "========================================"
- "Check the output above for:"
- "1. docker-compose.staging.yml has REDIS_PASSWORD_FILE=/run/secrets/redis_password"
- "2. secrets/redis_password.txt exists and is readable"
- "3. Container has /run/secrets/redis_password file mounted"
- "4. Container environment has REDIS_PASSWORD_FILE variable set"
- "5. PHP can read the secret file and resolve REDIS_PASSWORD"
- "6. Redis container requires password (requirepass set)"
- ""
- "If any check fails, the issue is identified above."

View File

@@ -1,167 +1,18 @@
--- ---
- name: Deploy Application Code via Git # Deploy Application Code via Git
hosts: "{{ deployment_hosts | default('production') }}" # Wrapper Playbook for application role deploy_code tasks
- hosts: "{{ deployment_hosts | default('production') }}"
gather_facts: yes gather_facts: yes
become: no become: no
vars: vars:
application_code_dest: "/home/deploy/michaelschiemer/current" application_deployment_method: git
git_repository_url_default: "https://git.michaelschiemer.de/michael/michaelschiemer.git"
# Determine branch based on environment
git_branch: >-
{%- if deployment_environment == 'staging' -%}
{{ git_branch | default('staging') }}
{%- else -%}
{{ git_branch | default('main') }}
{%- endif -%}
git_token: "{{ git_token | default('') }}"
# Deployment environment (staging or production)
deployment_environment: "{{ deployment_environment | default('production') }}" deployment_environment: "{{ deployment_environment | default('production') }}"
tasks: tasks:
- name: Set git_repo_url from provided value or default - name: Include application deploy_code tasks
set_fact: ansible.builtin.include_role:
git_repo_url: "{{ git_repository_url if (git_repository_url is defined and git_repository_url != '') else git_repository_url_default }}" name: application
tasks_from: deploy_code
- name: Ensure Git is installed tags:
ansible.builtin.apt: - application
name: git - deploy
state: present - code
update_cache: no
become: yes
- name: Ensure application code directory exists
file:
path: "{{ application_code_dest }}"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0755'
become: yes
- name: Check if repository already exists
stat:
path: "{{ application_code_dest }}/.git"
register: git_repo_exists
- name: Check if destination directory exists
stat:
path: "{{ application_code_dest }}"
register: dest_dir_exists
- name: Remove destination directory if it exists but is not a git repo
file:
path: "{{ application_code_dest }}"
state: absent
when: dest_dir_exists.stat.exists and not git_repo_exists.stat.exists
become: yes
- name: Clone repository (if not exists)
ansible.builtin.git:
repo: "{{ git_repo_url }}"
dest: "{{ application_code_dest }}"
version: "{{ git_branch }}"
force: no
update: no
when: not git_repo_exists.stat.exists
environment:
GIT_TERMINAL_PROMPT: "0"
vars:
ansible_become: no
register: git_clone_result
retries: 5
delay: 10
until: git_clone_result is succeeded
ignore_errors: yes
- name: Fail if git clone failed after retries
fail:
msg: "Failed to clone repository after 5 retries. Gitea may be unreachable or overloaded. Last error: {{ git_clone_result.msg | default('Unknown error') }}"
when:
- not git_repo_exists.stat.exists
- git_clone_result is failed
- name: Update repository (if exists)
ansible.builtin.git:
repo: "{{ git_repo_url }}"
dest: "{{ application_code_dest }}"
version: "{{ git_branch }}"
force: yes
update: yes
when: git_repo_exists.stat.exists
environment:
GIT_TERMINAL_PROMPT: "0"
vars:
ansible_become: no
register: git_update_result
retries: 5
delay: 10
until: git_update_result is succeeded
ignore_errors: yes
- name: Fail if git update failed after retries
fail:
msg: "Failed to update repository after 5 retries. Gitea may be unreachable or overloaded. Last error: {{ git_update_result.msg | default('Unknown error') }}"
when:
- git_repo_exists.stat.exists
- git_update_result is failed
- name: Set ownership of repository files
file:
path: "{{ application_code_dest }}"
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
recurse: yes
become: yes
- name: Ensure executable permissions on PHP scripts
file:
path: "{{ application_code_dest }}/{{ item }}"
mode: '0755'
loop:
- worker.php
- console.php
ignore_errors: yes
- name: Verify worker.php exists
stat:
path: "{{ application_code_dest }}/worker.php"
register: worker_php_stat
- name: Verify console.php exists
stat:
path: "{{ application_code_dest }}/console.php"
register: console_php_stat
- name: Verify composer.json exists
stat:
path: "{{ application_code_dest }}/composer.json"
register: composer_json_stat
- name: Get current Git commit hash
shell: |
cd {{ application_code_dest }} && git rev-parse HEAD
register: git_commit_hash
changed_when: false
when: git_repo_exists.stat.exists
- name: Display file verification results
debug:
msg: |
File Verification:
- worker.php: {{ 'EXISTS' if worker_php_stat.stat.exists else 'MISSING' }}
- console.php: {{ 'EXISTS' if console_php_stat.stat.exists else 'MISSING' }}
- composer.json: {{ 'EXISTS' if composer_json_stat.stat.exists else 'MISSING' }}
- Git Branch: {{ git_branch }}
- Git Commit: {{ git_commit_hash.stdout | default('N/A') }}
- name: Fail if critical files are missing
fail:
msg: |
Critical files are missing after Git deployment:
{% if not worker_php_stat.stat.exists %}- worker.php{% endif %}
{% if not console_php_stat.stat.exists %}- console.php{% endif %}
{% if not composer_json_stat.stat.exists %}- composer.json{% endif %}
when:
- not worker_php_stat.stat.exists or not console_php_stat.stat.exists or not composer_json_stat.stat.exists

View File

@@ -0,0 +1,30 @@
---
# Complete Deployment Playbook
# Combines all deployment steps into a single Ansible run:
# 1. Deploy Application Code
# 2. Deploy Docker Image
# 3. Install Composer Dependencies
#
# This reduces SSH connections and Ansible overhead compared to running
# three separate playbook calls.
#
# Usage:
# ansible-playbook -i inventory/production.yml playbooks/deploy-complete.yml \
# -e "deployment_environment=staging" \
# -e "deployment_hosts=production" \
# -e "image_tag=latest" \
# -e "docker_registry=registry.michaelschiemer.de" \
# -e "docker_registry_username=admin" \
# -e "docker_registry_password=password" \
# -e "git_branch=staging" \
# --vault-password-file /tmp/vault_pass
# Step 1: Deploy Application Code
- import_playbook: deploy-application-code.yml
# Step 2: Deploy Docker Image
- import_playbook: deploy-image.yml
# Step 3: Install Composer Dependencies
- import_playbook: install-composer-dependencies.yml

View File

@@ -0,0 +1,60 @@
---
# Deploy Gitea Stack Configuration
# Updates docker-compose.yml and restarts containers with new settings
- name: Deploy Gitea Stack Configuration
hosts: production
gather_facts: yes
become: no
vars:
gitea_stack_path: "{{ stacks_base_path }}/gitea"
traefik_auto_restart: false
gitea_auto_restart: false
tasks:
- name: Check if Gitea stack directory exists
ansible.builtin.stat:
path: "{{ gitea_stack_path }}"
register: gitea_stack_dir
failed_when: false
- name: Fail if Gitea stack directory does not exist
ansible.builtin.fail:
msg: "Gitea stack directory does not exist: {{ gitea_stack_path }}"
when: not gitea_stack_dir.stat.exists
- name: Sync Gitea docker-compose.yml
ansible.builtin.synchronize:
src: "{{ playbook_dir }}/../../stacks/gitea/docker-compose.yml"
dest: "{{ gitea_stack_path }}/docker-compose.yml"
mode: push
register: compose_synced
- name: Restart Gitea and Postgres containers to apply new configuration
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose up -d --force-recreate gitea postgres
register: gitea_restart
changed_when: gitea_restart.rc == 0
when: compose_synced.changed | default(false) | bool
- name: Wait for Gitea to be ready
ansible.builtin.wait_for:
timeout: 60
delay: 5
when: gitea_restart.changed | default(false) | bool
- name: Display result
ansible.builtin.debug:
msg: |
================================================================================
GITEA STACK CONFIGURATION DEPLOYED
================================================================================
Changes applied:
- Gitea Connection Pool: MAX_OPEN_CONNS=50, MAX_IDLE_CONNS=30, CONN_MAX_LIFETIME=600, CONN_MAX_IDLE_TIME=300
- Postgres Timeouts: authentication_timeout=180s, statement_timeout=30s, idle_in_transaction_timeout=30s
Containers restarted: {{ 'YES' if (gitea_restart.changed | default(false) | bool) else 'NO (no changes)' }}
================================================================================

View File

@@ -1,72 +1,14 @@
--- ---
- name: Deploy Traefik Configuration Files # Deploy Traefik Configuration Files
hosts: production # Wrapper Playbook for traefik role config tasks
- hosts: production
gather_facts: yes gather_facts: yes
become: no become: no
vars:
traefik_stack_path: "{{ stacks_base_path | default('/home/deploy/deployment/stacks') }}/traefik"
local_traefik_path: "{{ playbook_dir }}/../../stacks/traefik"
tasks: tasks:
- name: Check if local Traefik config directory exists - name: Include traefik config tasks
stat: ansible.builtin.include_role:
path: "{{ local_traefik_path }}" name: traefik
register: local_traefik_exists tasks_from: config
delegate_to: localhost tags:
run_once: true - traefik
- config
- name: Fail if local Traefik config directory does not exist
fail:
msg: "Local Traefik config directory not found at {{ local_traefik_path }}"
when: not local_traefik_exists.stat.exists
delegate_to: localhost
run_once: true
- name: Check if remote Traefik stack directory exists
stat:
path: "{{ traefik_stack_path }}"
register: traefik_stack_exists
- name: Fail if remote Traefik stack directory does not exist
fail:
msg: "Remote Traefik stack directory not found at {{ traefik_stack_path }}"
when: not traefik_stack_exists.stat.exists
- name: Deploy docker-compose.yml
copy:
src: "{{ local_traefik_path }}/docker-compose.yml"
dest: "{{ traefik_stack_path }}/docker-compose.yml"
mode: '0644'
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
register: docker_compose_deployed
- name: Deploy traefik.yml
copy:
src: "{{ local_traefik_path }}/traefik.yml"
dest: "{{ traefik_stack_path }}/traefik.yml"
mode: '0644'
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
register: traefik_yml_deployed
- name: Display deployment status
debug:
msg: |
========================================
Traefik Configuration Deployment
========================================
docker-compose.yml: {{ '✅ DEPLOYED' if docker_compose_deployed.changed else ' No changes' }}
traefik.yml: {{ '✅ DEPLOYED' if traefik_yml_deployed.changed else ' No changes' }}
========================================
{% if docker_compose_deployed.changed or traefik_yml_deployed.changed %}
✅ Configuration files deployed successfully!
Next step: Restart Traefik to apply changes:
ansible-playbook -i inventory/production.yml playbooks/restart-traefik.yml --vault-password-file secrets/.vault_pass
{% else %}
Configuration files are already up to date.
{% endif %}

View File

@@ -1,240 +0,0 @@
---
- name: Deploy Application Update via Docker Compose
hosts: production
gather_facts: yes
become: no
vars:
# These should be passed via -e from CI/CD
image_tag: "{{ image_tag | default('latest') }}"
git_commit_sha: "{{ git_commit_sha | default('unknown') }}"
deployment_timestamp: "{{ deployment_timestamp | default(ansible_date_time.iso8601) }}"
# app_stack_path is now defined in group_vars/production.yml
pre_tasks:
- name: Optionally load registry credentials from encrypted vault
include_vars:
file: "{{ playbook_dir }}/../secrets/production.vault.yml"
no_log: yes
ignore_errors: yes
delegate_to: localhost
become: no
- name: Derive docker registry credentials from vault when not provided
set_fact:
docker_registry_username: "{{ docker_registry_username | default(vault_docker_registry_username | default(docker_registry_username_default)) }}"
docker_registry_password: "{{ docker_registry_password | default(vault_docker_registry_password | default(docker_registry_password_default)) }}"
- name: Ensure system packages are up to date
include_role:
name: system
when: system_update_packages | bool
- name: Verify Docker is running
systemd:
name: docker
state: started
register: docker_service
become: yes
- name: Fail if Docker is not running
fail:
msg: "Docker service is not running"
when: docker_service.status.ActiveState != 'active'
- name: Ensure application stack directory exists
file:
path: "{{ app_stack_path }}"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0755'
- name: Determine application environment
set_fact:
application_environment: "{{ APP_ENV | default('production') }}"
application_compose_suffix: "{{ 'staging.yml' if application_environment == 'staging' else 'production.yml' }}"
- name: Check if docker-compose.base.yml exists in application stack
stat:
path: "{{ app_stack_path }}/docker-compose.base.yml"
register: compose_base_exists
when: not (application_sync_files | default(false) | bool)
- name: Check if docker-compose override file exists in application stack (production or staging)
stat:
path: "{{ app_stack_path }}/docker-compose.{{ application_compose_suffix }}"
register: compose_override_exists
when: not (application_sync_files | default(false) | bool)
- name: Fail if docker-compose files don't exist
fail:
msg: |
Application Stack docker-compose files not found at {{ app_stack_path }}
Required files:
- docker-compose.base.yml
- docker-compose.{{ application_compose_suffix }}
The Application Stack must be deployed first via:
ansible-playbook -i inventory/production.yml playbooks/setup-infrastructure.yml
This will create the application stack with docker-compose files and .env file.
when:
- not (application_sync_files | default(false) | bool)
- (not compose_base_exists.stat.exists or not compose_override_exists.stat.exists)
- name: Create backup directory
file:
path: "{{ backups_path }}/{{ deployment_timestamp | regex_replace(':', '-') }}"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0755'
tasks:
- name: Verify docker-compose files exist
stat:
path: "{{ app_stack_path }}/docker-compose.base.yml"
register: compose_base_check
when: not (application_sync_files | default(false) | bool)
- name: Verify docker-compose override file exists (production or staging)
stat:
path: "{{ app_stack_path }}/docker-compose.{{ application_compose_suffix }}"
register: compose_override_check
when: not (application_sync_files | default(false) | bool)
- name: Fail if docker-compose files don't exist
fail:
msg: |
Application Stack docker-compose files not found at {{ app_stack_path }}
Required files:
- docker-compose.base.yml
- docker-compose.{{ application_compose_suffix }}
The Application Stack must be deployed first via:
ansible-playbook -i inventory/production.yml playbooks/setup-infrastructure.yml
This will create the application stack with docker-compose files and .env file.
when:
- not (application_sync_files | default(false) | bool)
- (not compose_base_check.stat.exists or not compose_override_check.stat.exists)
- name: Backup current deployment metadata
shell: |
docker compose -f {{ app_stack_path }}/docker-compose.base.yml -f {{ app_stack_path }}/docker-compose.{{ application_compose_suffix }} ps --format json 2>/dev/null > {{ backups_path }}/{{ deployment_timestamp | regex_replace(':', '-') }}/current_containers.json || true
docker compose -f {{ app_stack_path }}/docker-compose.base.yml -f {{ app_stack_path }}/docker-compose.{{ application_compose_suffix }} config 2>/dev/null > {{ backups_path }}/{{ deployment_timestamp | regex_replace(':', '-') }}/docker-compose-config.yml || true
args:
executable: /bin/bash
changed_when: false
ignore_errors: yes
when:
- not (application_sync_files | default(false) | bool)
- compose_base_exists.stat.exists | default(false)
- compose_override_exists.stat.exists | default(false)
- name: Login to Docker registry (if credentials provided)
community.docker.docker_login:
registry_url: "{{ docker_registry_url }}"
username: "{{ docker_registry_username }}"
password: "{{ docker_registry_password }}"
no_log: yes
ignore_errors: yes
when:
- docker_registry_username is defined
- docker_registry_password is defined
- docker_registry_username | length > 0
- docker_registry_password | length > 0
register: registry_login
- name: Pull new Docker image
community.docker.docker_image:
name: "{{ app_image }}"
tag: "{{ image_tag }}"
source: pull
force_source: yes
register: image_pull
- name: Verify image was pulled successfully
fail:
msg: "Failed to pull image {{ app_image }}:{{ image_tag }}"
when: image_pull.failed
# Sync files first if application_sync_files=true (before updating docker-compose.production.yml)
- name: Sync application stack files
import_role:
name: application
vars:
application_sync_files: "{{ application_sync_files | default(false) }}"
application_compose_recreate: "never" # Don't recreate yet, just sync files
application_remove_orphans: false
when: application_sync_files | default(false) | bool
- name: Update docker-compose override file with new image tag (all services)
replace:
path: "{{ app_stack_path }}/docker-compose.{{ application_compose_suffix }}"
# Match both localhost:5000 and registry.michaelschiemer.de (or any registry URL)
regexp: '^(\s+image:\s+)(localhost:5000|registry\.michaelschiemer\.de|{{ docker_registry }})/{{ app_name }}:.*$'
replace: '\1{{ app_image }}:{{ image_tag }}'
# Always update to ensure localhost:5000 is used (registry only accessible via localhost)
when: true
register: compose_updated
- name: Redeploy application stack with new image
import_role:
name: application
vars:
application_sync_files: false # Already synced above, don't sync again
application_compose_recreate: "always"
application_remove_orphans: true
- name: Get deployed image information
shell: |
docker compose -f {{ app_stack_path }}/docker-compose.base.yml -f {{ app_stack_path }}/docker-compose.production.yml config | grep -E "^\s+image:" | head -1 | awk '{print $2}' || echo "unknown"
args:
executable: /bin/bash
register: deployed_image
changed_when: false
- name: Record deployment metadata
copy:
content: |
Deployment Timestamp: {{ deployment_timestamp }}
Git Commit: {{ git_commit_sha }}
Image Tag: {{ image_tag }}
Deployed Image: {{ deployed_image.stdout }}
Image Pull: {{ 'SUCCESS' if image_pull.changed else 'SKIPPED (already exists)' }}
Stack Deploy: {{ 'UPDATED' if application_stack_changed else 'NO_CHANGE' }}
Health Status: {{ application_health_output if application_health_output != '' else 'All services healthy' }}
Health Check HTTP Status: {{ application_healthcheck_status }}
dest: "{{ backups_path }}/{{ deployment_timestamp | regex_replace(':', '-') }}/deployment_metadata.txt"
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0644'
- name: Cleanup old backups (keep last {{ max_rollback_versions }})
shell: |
cd {{ backups_path }}
ls -dt */ 2>/dev/null | tail -n +{{ max_rollback_versions + 1 }} | xargs -r rm -rf
args:
executable: /bin/bash
changed_when: false
ignore_errors: yes
post_tasks:
- name: Display deployment summary
debug:
msg:
- "=== Deployment Summary ==="
- "Image: {{ app_image }}:{{ image_tag }}"
- "Commit: {{ git_commit_sha }}"
- "Timestamp: {{ deployment_timestamp }}"
- "Image Pull: {{ 'SUCCESS' if image_pull.changed else 'SKIPPED' }}"
- "Stack Deploy: {{ 'UPDATED' if application_stack_changed else 'NO_CHANGE' }}"
- "Health Output: {{ application_health_output if application_health_output != '' else 'All services healthy' }}"
- "Health Check HTTP Status: {{ application_healthcheck_status }}"
- "Health Check URL: {{ health_check_url }}"
- ""
- "Next: Verify application is healthy"

View File

@@ -1,239 +0,0 @@
---
- name: Deploy Application Update to Production via Docker Compose
hosts: production
gather_facts: yes
become: no
vars:
# These should be passed via -e from CI/CD
application_environment: production
application_compose_suffix: production.yml
# app_stack_path is now defined in group_vars/production.yml
pre_tasks:
- name: Set deployment variables
set_fact:
image_tag: "{{ image_tag | default('latest') }}"
git_commit_sha: "{{ git_commit_sha | default('unknown') }}"
deployment_timestamp: "{{ deployment_timestamp | default(ansible_date_time.iso8601) }}"
- name: Optionally load registry credentials from encrypted vault
include_vars:
file: "{{ playbook_dir }}/../../secrets/production.vault.yml"
no_log: yes
ignore_errors: yes
delegate_to: localhost
become: no
- name: Derive docker registry credentials from vault when not provided
set_fact:
docker_registry_username: "{{ docker_registry_username | default(vault_docker_registry_username | default(docker_registry_username_default)) }}"
docker_registry_password: "{{ docker_registry_password | default(vault_docker_registry_password | default(docker_registry_password_default)) }}"
- name: Ensure system packages are up to date
include_role:
name: system
when: system_update_packages | bool
- name: Verify Docker is running
systemd:
name: docker
state: started
register: docker_service
become: yes
- name: Fail if Docker is not running
fail:
msg: "Docker service is not running"
when: docker_service.status.ActiveState != 'active'
- name: Ensure application stack directory exists
file:
path: "{{ app_stack_path }}"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0755'
- name: Check if docker-compose.base.yml exists in application stack
stat:
path: "{{ app_stack_path }}/docker-compose.base.yml"
register: compose_base_exists
when: not (application_sync_files | default(false) | bool)
- name: Check if docker-compose.production.yml exists in application stack
stat:
path: "{{ app_stack_path }}/docker-compose.production.yml"
register: compose_override_exists
when: not (application_sync_files | default(false) | bool)
- name: Fail if docker-compose files don't exist
fail:
msg: |
Application Stack docker-compose files not found at {{ app_stack_path }}
Required files:
- docker-compose.base.yml
- docker-compose.production.yml
The Application Stack must be deployed first via:
ansible-playbook -i inventory/production.yml playbooks/setup-infrastructure.yml
This will create the application stack with docker-compose files and .env file.
when:
- not (application_sync_files | default(false) | bool)
- (not compose_base_exists.stat.exists or not compose_override_exists.stat.exists)
- name: Create backup directory
file:
path: "{{ backups_path }}/{{ deployment_timestamp | regex_replace(':', '-') }}"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0755'
tasks:
- name: Verify docker-compose files exist
stat:
path: "{{ app_stack_path }}/docker-compose.base.yml"
register: compose_base_check
when: not (application_sync_files | default(false) | bool)
- name: Verify docker-compose.production.yml exists
stat:
path: "{{ app_stack_path }}/docker-compose.production.yml"
register: compose_override_check
when: not (application_sync_files | default(false) | bool)
- name: Fail if docker-compose files don't exist
fail:
msg: |
Application Stack docker-compose files not found at {{ app_stack_path }}
Required files:
- docker-compose.base.yml
- docker-compose.production.yml
The Application Stack must be deployed first via:
ansible-playbook -i inventory/production.yml playbooks/setup-infrastructure.yml
This will create the application stack with docker-compose files and .env file.
when:
- not (application_sync_files | default(false) | bool)
- (not compose_base_check.stat.exists or not compose_override_check.stat.exists)
- name: Backup current deployment metadata
shell: |
docker compose -f {{ app_stack_path }}/docker-compose.base.yml -f {{ app_stack_path }}/docker-compose.production.yml ps --format json 2>/dev/null > {{ backups_path }}/{{ deployment_timestamp | regex_replace(':', '-') }}/current_containers.json || true
docker compose -f {{ app_stack_path }}/docker-compose.base.yml -f {{ app_stack_path }}/docker-compose.production.yml config 2>/dev/null > {{ backups_path }}/{{ deployment_timestamp | regex_replace(':', '-') }}/docker-compose-config.yml || true
args:
executable: /bin/bash
changed_when: false
ignore_errors: yes
when:
- not (application_sync_files | default(false) | bool)
- compose_base_exists.stat.exists | default(false)
- compose_override_exists.stat.exists | default(false)
- name: Login to Docker registry (if credentials provided)
community.docker.docker_login:
registry_url: "{{ docker_registry_url }}"
username: "{{ docker_registry_username }}"
password: "{{ docker_registry_password }}"
no_log: yes
ignore_errors: yes
when:
- docker_registry_username is defined
- docker_registry_password is defined
- docker_registry_username | length > 0
- docker_registry_password | length > 0
register: registry_login
- name: Pull new Docker image
community.docker.docker_image:
name: "{{ app_image }}"
tag: "{{ image_tag }}"
source: pull
force_source: yes
register: image_pull
- name: Verify image was pulled successfully
fail:
msg: "Failed to pull image {{ app_image }}:{{ image_tag }}"
when: image_pull.failed
# Sync files first if application_sync_files=true (before updating docker-compose.production.yml)
- name: Sync application stack files
import_role:
name: application
vars:
application_sync_files: "{{ application_sync_files | default(false) }}"
application_compose_recreate: "never" # Don't recreate yet, just sync files
application_remove_orphans: false
when: application_sync_files | default(false) | bool
- name: Update docker-compose.production.yml with new image tag (all services)
replace:
path: "{{ app_stack_path }}/docker-compose.production.yml"
# Match both localhost:5000 and registry.michaelschiemer.de (or any registry URL)
regexp: '^(\s+image:\s+)(localhost:5000|registry\.michaelschiemer\.de|{{ docker_registry }})/{{ app_name }}:.*$'
replace: '\1{{ app_image }}:{{ image_tag }}'
# Always update to ensure localhost:5000 is used (registry only accessible via localhost)
when: true
register: compose_updated
- name: Redeploy application stack with new image
import_role:
name: application
vars:
application_sync_files: false # Already synced above, don't sync again
application_compose_recreate: "always"
application_remove_orphans: true
- name: Get deployed image information
shell: |
docker compose -f {{ app_stack_path }}/docker-compose.base.yml -f {{ app_stack_path }}/docker-compose.production.yml config | grep -E "^\s+image:" | head -1 | awk '{print $2}' || echo "unknown"
args:
executable: /bin/bash
register: deployed_image
changed_when: false
- name: Record deployment metadata
copy:
content: |
Deployment Timestamp: {{ deployment_timestamp }}
Git Commit: {{ git_commit_sha }}
Image Tag: {{ image_tag }}
Deployed Image: {{ deployed_image.stdout }}
Image Pull: {{ 'SUCCESS' if image_pull.changed else 'SKIPPED (already exists)' }}
Stack Deploy: {{ 'UPDATED' if application_stack_changed else 'NO_CHANGE' }}
Health Status: {{ application_health_output if application_health_output != '' else 'All services healthy' }}
Health Check HTTP Status: {{ application_healthcheck_status }}
dest: "{{ backups_path }}/{{ deployment_timestamp | regex_replace(':', '-') }}/deployment_metadata.txt"
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0644'
- name: Cleanup old backups (keep last {{ max_rollback_versions | default(5) }})
shell: |
cd {{ backups_path }}
ls -dt */ 2>/dev/null | tail -n +{{ max_rollback_versions | default(5) + 1 }} | xargs -r rm -rf
args:
executable: /bin/bash
changed_when: false
ignore_errors: yes
post_tasks:
- name: Display deployment summary
debug:
msg:
- "=== Production Deployment Summary ==="
- "Image: {{ app_image }}:{{ image_tag }}"
- "Commit: {{ git_commit_sha }}"
- "Timestamp: {{ deployment_timestamp }}"
- "Image Pull: {{ 'SUCCESS' if image_pull.changed else 'SKIPPED' }}"
- "Stack Deploy: {{ 'UPDATED' if application_stack_changed else 'NO_CHANGE' }}"
- "Health Output: {{ application_health_output if application_health_output != '' else 'All services healthy' }}"
- "Health Check HTTP Status: {{ application_healthcheck_status }}"
- "Health Check URL: {{ health_check_url | default('https://michaelschiemer.de/health') }}"
- ""
- "Next: Verify application is healthy"

View File

@@ -1,226 +0,0 @@
---
- name: Deploy Application Update to Staging via Docker Compose
hosts: production
gather_facts: yes
become: no
vars:
# These should be passed via -e from CI/CD
application_environment: staging
application_compose_suffix: staging.yml
# app_stack_path is now defined in group_vars/production.yml
pre_tasks:
- name: Set deployment variables
set_fact:
image_tag: "{{ image_tag | default('latest') }}"
git_commit_sha: "{{ git_commit_sha | default('unknown') }}"
deployment_timestamp: "{{ deployment_timestamp | default(ansible_date_time.iso8601) }}"
- name: Optionally load registry credentials from encrypted vault
include_vars:
file: "{{ playbook_dir }}/../../secrets/production.vault.yml"
no_log: yes
ignore_errors: yes
delegate_to: localhost
become: no
- name: Derive docker registry credentials from vault when not provided
set_fact:
docker_registry_username: "{{ docker_registry_username | default(vault_docker_registry_username | default(docker_registry_username_default)) }}"
docker_registry_password: "{{ docker_registry_password | default(vault_docker_registry_password | default(docker_registry_password_default)) }}"
- name: Ensure system packages are up to date
include_role:
name: system
when: system_update_packages | bool
- name: Verify Docker is running
systemd:
name: docker
state: started
register: docker_service
become: yes
- name: Fail if Docker is not running
fail:
msg: "Docker service is not running"
when: docker_service.status.ActiveState != 'active'
- name: Set staging stack path
set_fact:
app_stack_path: "{{ staging_stack_path | default(stacks_base_path + '/staging') }}"
backups_path: "{{ backups_base_path | default('~/deployment/backups') }}"
- name: Ensure application stack directory exists
file:
path: "{{ app_stack_path }}"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0755'
- name: Check if docker-compose.base.yml exists in staging stack
stat:
path: "{{ app_stack_path }}/docker-compose.base.yml"
register: compose_base_exists
- name: Check if docker-compose.staging.yml exists in staging stack
stat:
path: "{{ app_stack_path }}/docker-compose.staging.yml"
register: compose_override_exists
- name: Fail if docker-compose files don't exist
fail:
msg: |
Staging Stack docker-compose files not found at {{ app_stack_path }}
Required files:
- docker-compose.base.yml
- docker-compose.staging.yml
The Staging Stack must be deployed first via:
ansible-playbook -i inventory/production.yml playbooks/setup-infrastructure.yml
This will create the staging stack with docker-compose files and .env file.
when:
- not compose_base_exists.stat.exists or not compose_override_exists.stat.exists
- name: Create backup directory
file:
path: "{{ backups_path }}/{{ deployment_timestamp | regex_replace(':', '-') }}"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0755'
tasks:
- name: Verify docker-compose files exist
stat:
path: "{{ app_stack_path }}/docker-compose.base.yml"
register: compose_base_check
- name: Verify docker-compose.staging.yml exists
stat:
path: "{{ app_stack_path }}/docker-compose.staging.yml"
register: compose_override_check
- name: Fail if docker-compose files don't exist
fail:
msg: |
Staging Stack docker-compose files not found at {{ app_stack_path }}
Required files:
- docker-compose.base.yml
- docker-compose.staging.yml
The Staging Stack must be deployed first via:
ansible-playbook -i inventory/production.yml playbooks/setup-infrastructure.yml
This will create the staging stack with docker-compose files and .env file.
when:
- not compose_base_check.stat.exists or not compose_override_check.stat.exists
- name: Backup current deployment metadata
shell: |
docker compose -f {{ app_stack_path }}/docker-compose.base.yml -f {{ app_stack_path }}/docker-compose.staging.yml ps --format json 2>/dev/null > {{ backups_path }}/{{ deployment_timestamp | regex_replace(':', '-') }}/current_containers.json || true
docker compose -f {{ app_stack_path }}/docker-compose.base.yml -f {{ app_stack_path }}/docker-compose.staging.yml config 2>/dev/null > {{ backups_path }}/{{ deployment_timestamp | regex_replace(':', '-') }}/docker-compose-config.yml || true
args:
executable: /bin/bash
changed_when: false
ignore_errors: yes
- name: Login to Docker registry (if credentials provided)
community.docker.docker_login:
registry_url: "{{ docker_registry_url }}"
username: "{{ docker_registry_username }}"
password: "{{ docker_registry_password }}"
no_log: yes
ignore_errors: yes
when:
- docker_registry_username is defined
- docker_registry_password is defined
- docker_registry_username | length > 0
- docker_registry_password | length > 0
register: registry_login
- name: Pull new Docker image
community.docker.docker_image:
name: "{{ app_image }}"
tag: "{{ image_tag }}"
source: pull
force_source: yes
register: image_pull
- name: Verify image was pulled successfully
fail:
msg: "Failed to pull image {{ app_image }}:{{ image_tag }}"
when: image_pull.failed
- name: Update docker-compose.staging.yml with new image tag (all services)
replace:
path: "{{ app_stack_path }}/docker-compose.staging.yml"
# Match both localhost:5000 and registry.michaelschiemer.de (or any registry URL)
regexp: '^(\s+image:\s+)(localhost:5000|registry\.michaelschiemer\.de|{{ docker_registry }})/{{ app_name }}:.*$'
replace: '\1{{ app_image }}:{{ image_tag }}'
register: compose_updated
- name: Redeploy staging stack with new image
import_role:
name: application
vars:
application_sync_files: false
application_compose_recreate: "always"
application_remove_orphans: true
application_stack_path: "{{ app_stack_path }}"
application_compose_files:
- "{{ app_stack_path }}/docker-compose.base.yml"
- "{{ app_stack_path }}/docker-compose.staging.yml"
- name: Get deployed image information
shell: |
docker compose -f {{ app_stack_path }}/docker-compose.base.yml -f {{ app_stack_path }}/docker-compose.staging.yml config | grep -E "^\s+image:" | head -1 | awk '{print $2}' || echo "unknown"
args:
executable: /bin/bash
register: deployed_image
changed_when: false
- name: Record deployment metadata
copy:
content: |
Deployment Timestamp: {{ deployment_timestamp }}
Git Commit: {{ git_commit_sha }}
Image Tag: {{ image_tag }}
Deployed Image: {{ deployed_image.stdout }}
Image Pull: {{ 'SUCCESS' if image_pull.changed else 'SKIPPED (already exists)' }}
Stack Deploy: {{ 'UPDATED' if application_stack_changed else 'NO_CHANGE' }}
Health Status: {{ application_health_output if application_health_output != '' else 'All services healthy' }}
Health Check HTTP Status: {{ application_healthcheck_status }}
dest: "{{ backups_path }}/{{ deployment_timestamp | regex_replace(':', '-') }}/deployment_metadata.txt"
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0644'
- name: Cleanup old backups (keep last {{ max_rollback_versions | default(5) }})
shell: |
cd {{ backups_path }}
ls -dt */ 2>/dev/null | tail -n +{{ max_rollback_versions | default(5) + 1 }} | xargs -r rm -rf
args:
executable: /bin/bash
changed_when: false
ignore_errors: yes
post_tasks:
- name: Display deployment summary
debug:
msg:
- "=== Staging Deployment Summary ==="
- "Image: {{ app_image }}:{{ image_tag }}"
- "Commit: {{ git_commit_sha }}"
- "Timestamp: {{ deployment_timestamp }}"
- "Image Pull: {{ 'SUCCESS' if image_pull.changed else 'SKIPPED' }}"
- "Stack Deploy: {{ 'UPDATED' if application_stack_changed else 'NO_CHANGE' }}"
- "Health Output: {{ application_health_output if application_health_output != '' else 'All services healthy' }}"
- "Health Check HTTP Status: {{ application_healthcheck_status }}"
- "Health Check URL: {{ health_check_url | default('https://staging.michaelschiemer.de/health') }}"
- ""
- "Next: Verify application is healthy"

View File

@@ -0,0 +1,499 @@
---
# Diagnose Gitea Timeout - Deep Analysis während Request
# Führt alle Checks während eines tatsächlichen Requests durch, inkl. pg_stat_activity, Redis, Backpressure-Tests
- name: Diagnose Gitea Timeout Deep Analysis During Request
hosts: production
gather_facts: yes
become: no
vars:
gitea_stack_path: "{{ stacks_base_path }}/gitea"
traefik_stack_path: "{{ stacks_base_path }}/traefik"
gitea_url: "https://{{ gitea_domain }}"
test_duration_seconds: 60 # Wie lange wir testen
test_timestamp: "{{ ansible_date_time.epoch }}"
postgres_max_connections: 300
tasks:
- name: Display diagnostic plan
ansible.builtin.debug:
msg: |
================================================================================
GITEA TIMEOUT DEEP DIAGNOSE - LIVE WÄHREND REQUEST
================================================================================
Diese erweiterte Diagnose führt alle Checks während eines tatsächlichen Requests durch:
1. Docker Stats (CPU/RAM/IO) während Request
2. pg_stat_activity: Connection Count vs max_connections ({{ postgres_max_connections }})
3. Redis Ping Check (Session-Store-Blockaden)
4. Gitea localhost Test (Backpressure-Analyse)
5. Gitea Logs (DB-Timeouts, Panics, "context deadline exceeded", SESSION: context canceled)
6. Postgres Logs (Connection issues, authentication timeouts)
7. Traefik Logs ("backend connection error", "EOF")
8. Runner Status und git-upload-pack/git gc Jobs
Test-Dauer: {{ test_duration_seconds }} Sekunden
Timestamp: {{ test_timestamp }}
================================================================================
- name: Get initial container stats (baseline)
ansible.builtin.shell: |
docker stats --no-stream --format "table {{ '{{' }}.Name{{ '}}' }}\t{{ '{{' }}.CPUPerc{{ '}}' }}\t{{ '{{' }}.MemUsage{{ '}}' }}\t{{ '{{' }}.NetIO{{ '}}' }}\t{{ '{{' }}.BlockIO{{ '}}' }}" gitea gitea-postgres gitea-redis traefik 2>/dev/null || echo "Stats collection failed"
register: initial_stats
changed_when: false
- name: Get initial PostgreSQL connection count
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose exec -T postgres psql -U gitea -d gitea -c "SELECT count(*) as connection_count FROM pg_stat_activity;" 2>&1 | grep -E "^[[:space:]]*[0-9]+" | head -1 || echo "0"
register: initial_pg_connections
changed_when: false
failed_when: false
- name: Start collecting Docker stats in background
ansible.builtin.shell: |
timeout {{ test_duration_seconds }} docker stats --format "{{ '{{' }}.Name{{ '}}' }},{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.NetIO{{ '}}' }},{{ '{{' }}.BlockIO{{ '}}' }}" gitea gitea-postgres gitea-redis traefik 2>/dev/null | while read line; do
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] $line"
done > /tmp/gitea_stats_{{ test_timestamp }}.log 2>&1 &
STATS_PID=$!
echo $STATS_PID
register: stats_pid
changed_when: false
- name: Start collecting Gitea logs in background
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
timeout {{ test_duration_seconds }} docker compose logs -f gitea 2>&1 | while read line; do
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] $line"
done > /tmp/gitea_logs_{{ test_timestamp }}.log 2>&1 &
echo $!
register: gitea_logs_pid
changed_when: false
- name: Start collecting Postgres logs in background
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
timeout {{ test_duration_seconds }} docker compose logs -f postgres 2>&1 | while read line; do
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] $line"
done > /tmp/postgres_logs_{{ test_timestamp }}.log 2>&1 &
echo $!
register: postgres_logs_pid
changed_when: false
- name: Start collecting Traefik logs in background
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
timeout {{ test_duration_seconds }} docker compose logs -f traefik 2>&1 | while read line; do
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] $line"
done > /tmp/traefik_logs_{{ test_timestamp }}.log 2>&1 &
echo $!
register: traefik_logs_pid
changed_when: false
- name: Start monitoring pg_stat_activity in background
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
for i in $(seq 1 {{ (test_duration_seconds / 5) | int }}); do
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] $(docker compose exec -T postgres psql -U gitea -d gitea -t -c 'SELECT count(*) FROM pg_stat_activity;' 2>&1 | tr -d ' ' || echo 'ERROR')"
sleep 5
done > /tmp/pg_stat_activity_{{ test_timestamp }}.log 2>&1 &
echo $!
register: pg_stat_pid
changed_when: false
- name: Wait a moment for log collection to start
ansible.builtin.pause:
seconds: 2
- name: Trigger Gitea request via Traefik (with timeout)
ansible.builtin.shell: |
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Starting request to {{ gitea_url }}/api/healthz"
timeout 35 curl -k -v -s -o /tmp/gitea_response_{{ test_timestamp }}.log -w "\nHTTP_CODE:%{http_code}\nTIME_TOTAL:%{time_total}\nTIME_CONNECT:%{time_connect}\nTIME_STARTTRANSFER:%{time_starttransfer}\n" "{{ gitea_url }}/api/healthz" 2>&1 | tee /tmp/gitea_curl_{{ test_timestamp }}.log
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Request completed"
register: gitea_request
changed_when: false
failed_when: false
- name: Test Gitea localhost (Backpressure-Test)
ansible.builtin.shell: |
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Starting localhost test"
cd {{ gitea_stack_path }}
timeout 35 docker compose exec -T gitea curl -f -s -w "\nHTTP_CODE:%{http_code}\nTIME_TOTAL:%{time_total}\n" http://localhost:3000/api/healthz 2>&1 | tee /tmp/gitea_localhost_{{ test_timestamp }}.log || echo "LOCALHOST_TEST_FAILED" > /tmp/gitea_localhost_{{ test_timestamp }}.log
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Localhost test completed"
register: gitea_localhost_test
changed_when: false
failed_when: false
- name: Test direct connection Traefik → Gitea (parallel)
ansible.builtin.shell: |
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Starting direct test Traefik → Gitea"
cd {{ traefik_stack_path }}
timeout 35 docker compose exec -T traefik wget -qO- --timeout=30 http://gitea:3000/api/healthz 2>&1 | tee /tmp/traefik_gitea_direct_{{ test_timestamp }}.log || echo "DIRECT_TEST_FAILED" > /tmp/traefik_gitea_direct_{{ test_timestamp }}.log
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Direct test completed"
register: traefik_direct_test
changed_when: false
failed_when: false
- name: Test Redis connection during request
ansible.builtin.shell: |
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Testing Redis connection"
cd {{ gitea_stack_path }}
docker compose exec -T redis redis-cli ping 2>&1 | tee /tmp/redis_ping_{{ test_timestamp }}.log || echo "REDIS_PING_FAILED" > /tmp/redis_ping_{{ test_timestamp }}.log
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Redis ping completed"
register: redis_ping_test
changed_when: false
failed_when: false
- name: Check Gitea Runner status
ansible.builtin.shell: |
docker ps --format "{{ '{{' }}.Names{{ '}}' }}" | grep -q "gitea-runner" && echo "RUNNING" || echo "STOPPED"
register: runner_status
changed_when: false
failed_when: false
- name: Wait for log collection to complete
ansible.builtin.pause:
seconds: "{{ test_duration_seconds - 5 }}"
- name: Stop background processes
ansible.builtin.shell: |
pkill -f "docker.*stats.*gitea" || true
pkill -f "docker compose logs.*gitea" || true
pkill -f "docker compose logs.*postgres" || true
pkill -f "docker compose logs.*traefik" || true
pkill -f "pg_stat_activity" || true
sleep 2
changed_when: false
failed_when: false
- name: Get final PostgreSQL connection count
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose exec -T postgres psql -U gitea -d gitea -c "SELECT count(*) as connection_count FROM pg_stat_activity;" 2>&1 | grep -E "^[[:space:]]*[0-9]+" | head -1 || echo "0"
register: final_pg_connections
changed_when: false
failed_when: false
- name: Collect stats results
ansible.builtin.slurp:
src: "/tmp/gitea_stats_{{ test_timestamp }}.log"
register: stats_results
changed_when: false
failed_when: false
- name: Collect pg_stat_activity results
ansible.builtin.slurp:
src: "/tmp/pg_stat_activity_{{ test_timestamp }}.log"
register: pg_stat_results
changed_when: false
failed_when: false
- name: Collect Gitea logs results
ansible.builtin.slurp:
src: "/tmp/gitea_logs_{{ test_timestamp }}.log"
register: gitea_logs_results
changed_when: false
failed_when: false
- name: Collect Postgres logs results
ansible.builtin.slurp:
src: "/tmp/postgres_logs_{{ test_timestamp }}.log"
register: postgres_logs_results
changed_when: false
failed_when: false
- name: Collect Traefik logs results
ansible.builtin.slurp:
src: "/tmp/traefik_logs_{{ test_timestamp }}.log"
register: traefik_logs_results
changed_when: false
failed_when: false
- name: Get request result
ansible.builtin.slurp:
src: "/tmp/gitea_curl_{{ test_timestamp }}.log"
register: request_result
changed_when: false
failed_when: false
- name: Get localhost test result
ansible.builtin.slurp:
src: "/tmp/gitea_localhost_{{ test_timestamp }}.log"
register: localhost_result
changed_when: false
failed_when: false
- name: Get direct test result
ansible.builtin.slurp:
src: "/tmp/traefik_gitea_direct_{{ test_timestamp }}.log"
register: direct_test_result
changed_when: false
failed_when: false
- name: Get Redis ping result
ansible.builtin.slurp:
src: "/tmp/redis_ping_{{ test_timestamp }}.log"
register: redis_ping_result
changed_when: false
failed_when: false
- name: Analyze pg_stat_activity for connection count
ansible.builtin.shell: |
if [ -f /tmp/pg_stat_activity_{{ test_timestamp }}.log ]; then
echo "=== POSTGRES CONNECTION COUNT ANALYSIS ==="
echo "Initial connections: {{ initial_pg_connections.stdout }}"
echo "Final connections: {{ final_pg_connections.stdout }}"
echo "Max connections: {{ postgres_max_connections }}"
echo ""
echo "=== CONNECTION COUNT TIMELINE ==="
cat /tmp/pg_stat_activity_{{ test_timestamp }}.log | tail -20 || echo "No connection count data"
echo ""
echo "=== CONNECTION COUNT ANALYSIS ==="
MAX_COUNT=$(cat /tmp/pg_stat_activity_{{ test_timestamp }}.log | grep -E "^\[.*\] [0-9]+" | awk -F'] ' '{print $2}' | sort -n | tail -1 || echo "0")
if [ "$MAX_COUNT" != "0" ] && [ "$MAX_COUNT" != "" ]; then
echo "Maximum connections during test: $MAX_COUNT"
WARNING_THRESHOLD=$(({{ postgres_max_connections }} * 80 / 100))
if [ "$MAX_COUNT" -gt "$WARNING_THRESHOLD" ]; then
echo "⚠️ WARNING: Connection count ($MAX_COUNT) is above 80% of max_connections ({{ postgres_max_connections }})"
echo " Consider reducing MAX_OPEN_CONNS or increasing max_connections"
else
echo "✅ Connection count is within safe limits"
fi
fi
else
echo "pg_stat_activity log file not found"
fi
register: pg_stat_analysis
changed_when: false
failed_when: false
- name: Analyze stats for high CPU/Memory/IO
ansible.builtin.shell: |
if [ -f /tmp/gitea_stats_{{ test_timestamp }}.log ]; then
echo "=== STATS SUMMARY ==="
echo "Total samples: $(wc -l < /tmp/gitea_stats_{{ test_timestamp }}.log)"
echo ""
echo "=== HIGH CPU (>80%) ==="
grep -E "gitea|gitea-postgres" /tmp/gitea_stats_{{ test_timestamp }}.log | awk -F',' '{cpu=$2; gsub(/%/, "", cpu); if (cpu+0 > 80) print $0}' | head -10 || echo "No high CPU usage found"
echo ""
echo "=== MEMORY USAGE ==="
grep -E "gitea" /tmp/gitea_stats_{{ test_timestamp }}.log | tail -5 || echo "No memory stats"
else
echo "Stats file not found"
fi
register: stats_analysis
changed_when: false
failed_when: false
- name: Analyze Gitea logs for errors (including SESSION context canceled, panic, git-upload-pack)
ansible.builtin.shell: |
if [ -f /tmp/gitea_logs_{{ test_timestamp }}.log ]; then
echo "=== DB-TIMEOUTS / CONNECTION ERRORS ==="
grep -iE "timeout|deadline exceeded|connection.*failed|database.*error|postgres.*error|context.*deadline" /tmp/gitea_logs_{{ test_timestamp }}.log | tail -20 || echo "No DB-timeouts found"
echo ""
echo "=== SESSION: CONTEXT CANCELED ==="
grep -iE "SESSION.*context canceled|session.*release.*context canceled" /tmp/gitea_logs_{{ test_timestamp }}.log | tail -10 || echo "No SESSION: context canceled found"
echo ""
echo "=== PANICS / FATAL ERRORS ==="
grep -iE "panic|fatal|error.*fatal" /tmp/gitea_logs_{{ test_timestamp }}.log | tail -10 || echo "No panics found"
echo ""
echo "=== GIT-UPLOAD-PACK REQUESTS (can block) ==="
grep -iE "git-upload-pack|ServiceUploadPack" /tmp/gitea_logs_{{ test_timestamp }}.log | tail -10 || echo "No git-upload-pack requests found"
echo ""
echo "=== GIT GC JOBS (can hold connections) ==="
grep -iE "git.*gc|garbage.*collect" /tmp/gitea_logs_{{ test_timestamp }}.log | tail -10 || echo "No git gc jobs found"
echo ""
echo "=== SLOW QUERIES / PERFORMANCE ==="
grep -iE "slow|performance|took.*ms|duration" /tmp/gitea_logs_{{ test_timestamp }}.log | tail -10 || echo "No slow queries found"
else
echo "Gitea logs file not found"
fi
register: gitea_logs_analysis
changed_when: false
failed_when: false
- name: Analyze Postgres logs for errors
ansible.builtin.shell: |
if [ -f /tmp/postgres_logs_{{ test_timestamp }}.log ]; then
echo "=== POSTGRES ERRORS ==="
grep -iE "error|timeout|deadlock|connection.*refused|too many connections|authentication.*timeout" /tmp/postgres_logs_{{ test_timestamp }}.log | tail -20 || echo "No Postgres errors found"
echo ""
echo "=== SLOW QUERIES ==="
grep -iE "slow|duration|statement.*took" /tmp/postgres_logs_{{ test_timestamp }}.log | tail -10 || echo "No slow queries found"
else
echo "Postgres logs file not found"
fi
register: postgres_logs_analysis
changed_when: false
failed_when: false
- name: Analyze Traefik logs for backend errors
ansible.builtin.shell: |
if [ -f /tmp/traefik_logs_{{ test_timestamp }}.log ]; then
echo "=== BACKEND CONNECTION ERRORS ==="
grep -iE "backend.*error|connection.*error|EOF|gitea.*error|git\.michaelschiemer\.de.*error" /tmp/traefik_logs_{{ test_timestamp }}.log | tail -20 || echo "No backend errors found"
echo ""
echo "=== TIMEOUT ERRORS ==="
grep -iE "timeout|504|gateway.*timeout" /tmp/traefik_logs_{{ test_timestamp }}.log | tail -10 || echo "No timeout errors found"
else
echo "Traefik logs file not found"
fi
register: traefik_logs_analysis
changed_when: false
failed_when: false
- name: Display comprehensive diagnosis
ansible.builtin.debug:
msg: |
================================================================================
GITEA TIMEOUT DEEP DIAGNOSE - ERGEBNISSE
================================================================================
BASELINE STATS (vor Request):
{{ initial_stats.stdout }}
POSTGRES CONNECTION COUNT:
{{ pg_stat_analysis.stdout }}
REQUEST ERGEBNIS (Traefik → Gitea):
{% if request_result.content is defined and request_result.content != '' %}
{{ request_result.content | b64decode }}
{% else %}
Request-Ergebnis nicht verfügbar
{% endif %}
BACKPRESSURE TEST - GITEA LOCALHOST:
{% if localhost_result.content is defined and localhost_result.content != '' %}
{{ localhost_result.content | b64decode }}
{% else %}
Localhost-Test-Ergebnis nicht verfügbar
{% endif %}
DIREKTER TEST TRAEFIK → GITEA:
{% if direct_test_result.content is defined and direct_test_result.content != '' %}
{{ direct_test_result.content | b64decode }}
{% else %}
Direkter Test-Ergebnis nicht verfügbar
{% endif %}
REDIS PING TEST:
{% if redis_ping_result.content is defined and redis_ping_result.content != '' %}
{{ redis_ping_result.content | b64decode }}
{% else %}
Redis-Ping-Ergebnis nicht verfügbar
{% endif %}
RUNNER STATUS:
- Status: {{ runner_status.stdout }}
================================================================================
STATS-ANALYSE (während Request):
================================================================================
{{ stats_analysis.stdout }}
================================================================================
GITEA LOGS-ANALYSE:
================================================================================
{{ gitea_logs_analysis.stdout }}
================================================================================
POSTGRES LOGS-ANALYSE:
================================================================================
{{ postgres_logs_analysis.stdout }}
================================================================================
TRAEFIK LOGS-ANALYSE:
================================================================================
{{ traefik_logs_analysis.stdout }}
================================================================================
INTERPRETATION:
================================================================================
{% set request_content = request_result.content | default('') | b64decode | default('') %}
{% set localhost_content = localhost_result.content | default('') | b64decode | default('') %}
{% set direct_content = direct_test_result.content | default('') | b64decode | default('') %}
{% set redis_content = redis_ping_result.content | default('') | b64decode | default('') %}
{% set traefik_errors = traefik_logs_analysis.stdout | default('') %}
{% set gitea_errors = gitea_logs_analysis.stdout | default('') %}
{% set postgres_errors = postgres_logs_analysis.stdout | default('') %}
{% set stats_content = stats_analysis.stdout | default('') %}
{% if 'timeout' in request_content or '504' in request_content or 'HTTP_CODE:504' in request_content %}
⚠️ REQUEST HAT TIMEOUT/504:
BACKPRESSURE-ANALYSE:
{% if 'LOCALHOST_TEST_FAILED' in localhost_content or localhost_content == '' %}
→ Gitea localhost Test schlägt fehl oder blockiert
→ Problem liegt IN Gitea/DB selbst, nicht zwischen Traefik und Gitea
{% elif 'HTTP_CODE:200' in localhost_content or '200 OK' in localhost_content %}
→ Gitea localhost Test funktioniert schnell
→ Problem liegt ZWISCHEN Traefik und Gitea (Netzwerk, Firewall, Limit)
{% endif %}
{% if 'REDIS_PING_FAILED' in redis_content or redis_content == '' or 'PONG' not in redis_content %}
→ Redis ist nicht erreichbar
→ Session-Store blockiert, Gitea läuft in "context canceled"
{% else %}
→ Redis ist erreichbar
{% endif %}
{% if 'SESSION.*context canceled' in gitea_errors or 'session.*release.*context canceled' in gitea_errors %}
→ Gitea hat SESSION: context canceled Fehler
→ Session-Store (Redis) könnte blockieren oder Session-Locks hängen
{% endif %}
{% if 'git-upload-pack' in gitea_errors %}
→ git-upload-pack Requests gefunden (können blockieren)
→ Prüfe ob Runner aktiv ist und viele Git-Operationen durchführt
{% endif %}
{% if 'git.*gc' in gitea_errors %}
→ git gc Jobs gefunden (können Verbindungen halten)
→ Prüfe ob git gc Jobs hängen
{% endif %}
{% if 'EOF' in traefik_errors or 'backend' in traefik_errors | lower or 'connection.*error' in traefik_errors | lower %}
→ Traefik meldet Backend-Connection-Error
→ Gitea antwortet nicht auf Traefik's Verbindungsversuche
{% endif %}
{% if 'timeout' in gitea_errors | lower or 'deadline exceeded' in gitea_errors | lower %}
→ Gitea hat DB-Timeouts oder Context-Deadline-Exceeded
→ Postgres könnte blockieren oder zu langsam sein
{% endif %}
{% if 'too many connections' in postgres_errors | lower %}
→ Postgres hat zu viele Verbindungen
→ Connection Pool könnte überlastet sein
{% endif %}
{% if 'HIGH CPU' in stats_content or '>80' in stats_content %}
→ Gitea oder Postgres haben hohe CPU-Last
→ Performance-Problem, nicht Timeout-Konfiguration
{% endif %}
{% else %}
✅ REQUEST WAR ERFOLGREICH:
→ Problem tritt nur intermittierend auf
→ Prüfe Logs auf sporadische Fehler
{% endif %}
================================================================================
NÄCHSTE SCHRITTE:
================================================================================
1. Prüfe pg_stat_activity: Connection Count nahe max_connections?
2. Prüfe ob Redis erreichbar ist (Session-Store-Blockaden)
3. Prüfe Backpressure: localhost schnell aber Traefik langsam = Netzwerk-Problem
4. Prüfe SESSION: context canceled Fehler (Session-Locks)
5. Prüfe git-upload-pack Requests (Runner-Überlastung)
6. Prüfe git gc Jobs (hängen und halten Verbindungen)
================================================================================
- name: Cleanup temporary files
ansible.builtin.file:
path: "/tmp/gitea_{{ test_timestamp }}.log"
state: absent
failed_when: false

View File

@@ -0,0 +1,343 @@
---
# Diagnose Gitea Timeout - Live während Request
# Führt alle Checks während eines tatsächlichen Requests durch
- name: Diagnose Gitea Timeout During Request
hosts: production
gather_facts: yes
become: no
vars:
gitea_stack_path: "{{ stacks_base_path }}/gitea"
traefik_stack_path: "{{ stacks_base_path }}/traefik"
gitea_url: "https://{{ gitea_domain }}"
test_duration_seconds: 60 # Wie lange wir testen
test_timestamp: "{{ ansible_date_time.epoch }}"
tasks:
- name: Display diagnostic plan
ansible.builtin.debug:
msg: |
================================================================================
GITEA TIMEOUT DIAGNOSE - LIVE WÄHREND REQUEST
================================================================================
Diese Diagnose führt alle Checks während eines tatsächlichen Requests durch:
1. Docker Stats (CPU/RAM/IO) während Request
2. Gitea Logs (DB-Timeouts, Panics, "context deadline exceeded")
3. Postgres Logs (Connection issues)
4. Traefik Logs ("backend connection error", "EOF")
5. Direkter Test Traefik → Gitea
Test-Dauer: {{ test_duration_seconds }} Sekunden
Timestamp: {{ test_timestamp }}
================================================================================
- name: Get initial container stats (baseline)
ansible.builtin.shell: |
docker stats --no-stream --format "table {{ '{{' }}.Name{{ '}}' }}\t{{ '{{' }}.CPUPerc{{ '}}' }}\t{{ '{{' }}.MemUsage{{ '}}' }}\t{{ '{{' }}.NetIO{{ '}}' }}\t{{ '{{' }}.BlockIO{{ '}}' }}" gitea gitea-postgres gitea-redis traefik 2>/dev/null || echo "Stats collection failed"
register: initial_stats
changed_when: false
- name: Start collecting Docker stats in background
ansible.builtin.shell: |
timeout {{ test_duration_seconds }} docker stats --format "{{ '{{' }}.Name{{ '}}' }},{{ '{{' }}.CPUPerc{{ '}}' }},{{ '{{' }}.MemUsage{{ '}}' }},{{ '{{' }}.NetIO{{ '}}' }},{{ '{{' }}.BlockIO{{ '}}' }}" gitea gitea-postgres gitea-redis traefik 2>/dev/null | while read line; do
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] $line"
done > /tmp/gitea_stats_{{ test_timestamp }}.log 2>&1 &
STATS_PID=$!
echo $STATS_PID
register: stats_pid
changed_when: false
- name: Start collecting Gitea logs in background
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
timeout {{ test_duration_seconds }} docker compose logs -f gitea 2>&1 | while read line; do
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] $line"
done > /tmp/gitea_logs_{{ test_timestamp }}.log 2>&1 &
echo $!
register: gitea_logs_pid
changed_when: false
- name: Start collecting Postgres logs in background
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
timeout {{ test_duration_seconds }} docker compose logs -f gitea-postgres 2>&1 | while read line; do
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] $line"
done > /tmp/postgres_logs_{{ test_timestamp }}.log 2>&1 &
echo $!
register: postgres_logs_pid
changed_when: false
- name: Start collecting Traefik logs in background
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
timeout {{ test_duration_seconds }} docker compose logs -f traefik 2>&1 | while read line; do
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] $line"
done > /tmp/traefik_logs_{{ test_timestamp }}.log 2>&1 &
echo $!
register: traefik_logs_pid
changed_when: false
- name: Wait a moment for log collection to start
ansible.builtin.pause:
seconds: 2
- name: Trigger Gitea request via Traefik (with timeout)
ansible.builtin.shell: |
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Starting request to {{ gitea_url }}/api/healthz"
timeout 35 curl -k -v -s -o /tmp/gitea_response_{{ test_timestamp }}.log -w "\nHTTP_CODE:%{http_code}\nTIME_TOTAL:%{time_total}\nTIME_CONNECT:%{time_connect}\nTIME_STARTTRANSFER:%{time_starttransfer}\n" "{{ gitea_url }}/api/healthz" 2>&1 | tee /tmp/gitea_curl_{{ test_timestamp }}.log
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Request completed"
register: gitea_request
changed_when: false
failed_when: false
- name: Test direct connection Traefik → Gitea (parallel)
ansible.builtin.shell: |
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Starting direct test Traefik → Gitea"
cd {{ traefik_stack_path }}
timeout 35 docker compose exec -T traefik wget -qO- --timeout=30 http://gitea:3000/api/healthz 2>&1 | tee /tmp/traefik_gitea_direct_{{ test_timestamp }}.log || echo "DIRECT_TEST_FAILED" > /tmp/traefik_gitea_direct_{{ test_timestamp }}.log
echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')] Direct test completed"
register: traefik_direct_test
changed_when: false
failed_when: false
- name: Wait for log collection to complete
ansible.builtin.pause:
seconds: "{{ test_duration_seconds - 5 }}"
- name: Stop background processes
ansible.builtin.shell: |
pkill -f "docker.*stats.*gitea" || true
pkill -f "docker compose logs.*gitea" || true
pkill -f "docker compose logs.*postgres" || true
pkill -f "docker compose logs.*traefik" || true
sleep 2
changed_when: false
failed_when: false
- name: Collect stats results
ansible.builtin.slurp:
src: "/tmp/gitea_stats_{{ test_timestamp }}.log"
register: stats_results
changed_when: false
failed_when: false
- name: Collect Gitea logs results
ansible.builtin.slurp:
src: "/tmp/gitea_logs_{{ test_timestamp }}.log"
register: gitea_logs_results
changed_when: false
failed_when: false
- name: Collect Postgres logs results
ansible.builtin.slurp:
src: "/tmp/postgres_logs_{{ test_timestamp }}.log"
register: postgres_logs_results
changed_when: false
failed_when: false
- name: Collect Traefik logs results
ansible.builtin.slurp:
src: "/tmp/traefik_logs_{{ test_timestamp }}.log"
register: traefik_logs_results
changed_when: false
failed_when: false
- name: Get request result
ansible.builtin.slurp:
src: "/tmp/gitea_curl_{{ test_timestamp }}.log"
register: request_result
changed_when: false
failed_when: false
- name: Get direct test result
ansible.builtin.slurp:
src: "/tmp/traefik_gitea_direct_{{ test_timestamp }}.log"
register: direct_test_result
changed_when: false
failed_when: false
- name: Analyze stats for high CPU/Memory/IO
ansible.builtin.shell: |
if [ -f /tmp/gitea_stats_{{ test_timestamp }}.log ]; then
echo "=== STATS SUMMARY ==="
echo "Total samples: $(wc -l < /tmp/gitea_stats_{{ test_timestamp }}.log)"
echo ""
echo "=== HIGH CPU (>80%) ==="
grep -E "gitea|gitea-postgres" /tmp/gitea_stats_{{ test_timestamp }}.log | awk -F',' '{cpu=$2; gsub(/%/, "", cpu); if (cpu+0 > 80) print $0}' | head -10 || echo "No high CPU usage found"
echo ""
echo "=== MEMORY USAGE ==="
grep -E "gitea" /tmp/gitea_stats_{{ test_timestamp }}.log | tail -5 || echo "No memory stats"
echo ""
echo "=== NETWORK IO ==="
grep -E "gitea" /tmp/gitea_stats_{{ test_timestamp }}.log | tail -5 || echo "No network activity"
else
echo "Stats file not found"
fi
register: stats_analysis
changed_when: false
failed_when: false
- name: Analyze Gitea logs for errors
ansible.builtin.shell: |
if [ -f /tmp/gitea_logs_{{ test_timestamp }}.log ]; then
echo "=== DB-TIMEOUTS / CONNECTION ERRORS ==="
grep -iE "timeout|deadline exceeded|connection.*failed|database.*error|postgres.*error|context.*deadline" /tmp/gitea_logs_{{ test_timestamp }}.log | tail -20 || echo "No DB-timeouts found"
echo ""
echo "=== PANICS / FATAL ERRORS ==="
grep -iE "panic|fatal|error.*fatal" /tmp/gitea_logs_{{ test_timestamp }}.log | tail -10 || echo "No panics found"
echo ""
echo "=== SLOW QUERIES / PERFORMANCE ==="
grep -iE "slow|performance|took.*ms|duration" /tmp/gitea_logs_{{ test_timestamp }}.log | tail -10 || echo "No slow queries found"
echo ""
echo "=== RECENT LOG ENTRIES (last 10) ==="
tail -10 /tmp/gitea_logs_{{ test_timestamp }}.log || echo "No recent logs"
else
echo "Gitea logs file not found"
fi
register: gitea_logs_analysis
changed_when: false
failed_when: false
- name: Analyze Postgres logs for errors
ansible.builtin.shell: |
if [ -f /tmp/postgres_logs_{{ test_timestamp }}.log ]; then
echo "=== POSTGRES ERRORS ==="
grep -iE "error|timeout|deadlock|connection.*refused|too many connections" /tmp/postgres_logs_{{ test_timestamp }}.log | tail -20 || echo "No Postgres errors found"
echo ""
echo "=== SLOW QUERIES ==="
grep -iE "slow|duration|statement.*took" /tmp/postgres_logs_{{ test_timestamp }}.log | tail -10 || echo "No slow queries found"
echo ""
echo "=== RECENT LOG ENTRIES (last 10) ==="
tail -10 /tmp/postgres_logs_{{ test_timestamp }}.log || echo "No recent logs"
else
echo "Postgres logs file not found"
fi
register: postgres_logs_analysis
changed_when: false
failed_when: false
- name: Analyze Traefik logs for backend errors
ansible.builtin.shell: |
if [ -f /tmp/traefik_logs_{{ test_timestamp }}.log ]; then
echo "=== BACKEND CONNECTION ERRORS ==="
grep -iE "backend.*error|connection.*error|EOF|gitea.*error|git\.michaelschiemer\.de.*error" /tmp/traefik_logs_{{ test_timestamp }}.log | tail -20 || echo "No backend errors found"
echo ""
echo "=== TIMEOUT ERRORS ==="
grep -iE "timeout|504|gateway.*timeout" /tmp/traefik_logs_{{ test_timestamp }}.log | tail -10 || echo "No timeout errors found"
echo ""
echo "=== RECENT LOG ENTRIES (last 10) ==="
tail -10 /tmp/traefik_logs_{{ test_timestamp }}.log || echo "No recent logs"
else
echo "Traefik logs file not found"
fi
register: traefik_logs_analysis
changed_when: false
failed_when: false
- name: Display comprehensive diagnosis
ansible.builtin.debug:
msg: |
================================================================================
GITEA TIMEOUT DIAGNOSE - ERGEBNISSE
================================================================================
BASELINE STATS (vor Request):
{{ initial_stats.stdout }}
REQUEST ERGEBNIS:
{% if request_result.content is defined and request_result.content != '' %}
{{ request_result.content | b64decode }}
{% else %}
Request-Ergebnis nicht verfügbar
{% endif %}
DIREKTER TEST TRAEFIK → GITEA:
{% if direct_test_result.content is defined and direct_test_result.content != '' %}
{{ direct_test_result.content | b64decode }}
{% else %}
Direkter Test-Ergebnis nicht verfügbar
{% endif %}
================================================================================
STATS-ANALYSE (während Request):
================================================================================
{{ stats_analysis.stdout }}
================================================================================
GITEA LOGS-ANALYSE:
================================================================================
{{ gitea_logs_analysis.stdout }}
================================================================================
POSTGRES LOGS-ANALYSE:
================================================================================
{{ postgres_logs_analysis.stdout }}
================================================================================
TRAEFIK LOGS-ANALYSE:
================================================================================
{{ traefik_logs_analysis.stdout }}
================================================================================
INTERPRETATION:
================================================================================
{% set request_content = request_result.content | default('') | b64decode | default('') %}
{% set direct_content = direct_test_result.content | default('') | b64decode | default('') %}
{% set traefik_errors = traefik_logs_analysis.stdout | default('') %}
{% set gitea_errors = gitea_logs_analysis.stdout | default('') %}
{% set postgres_errors = postgres_logs_analysis.stdout | default('') %}
{% set stats_content = stats_analysis.stdout | default('') %}
{% if 'timeout' in request_content or '504' in request_content or 'HTTP_CODE:504' in request_content %}
⚠️ REQUEST HAT TIMEOUT/504:
{% if 'EOF' in traefik_errors or 'backend' in traefik_errors | lower or 'connection.*error' in traefik_errors | lower %}
→ Traefik meldet Backend-Connection-Error
→ Gitea antwortet nicht auf Traefik's Verbindungsversuche
{% endif %}
{% if 'timeout' in gitea_errors | lower or 'deadline exceeded' in gitea_errors | lower %}
→ Gitea hat DB-Timeouts oder Context-Deadline-Exceeded
→ Postgres könnte blockieren oder zu langsam sein
{% endif %}
{% if 'too many connections' in postgres_errors | lower %}
→ Postgres hat zu viele Verbindungen
→ Connection Pool könnte überlastet sein
{% endif %}
{% if 'HIGH CPU' in stats_content or '>80' in stats_content %}
→ Gitea oder Postgres haben hohe CPU-Last
→ Performance-Problem, nicht Timeout-Konfiguration
{% endif %}
{% if 'DIRECT_TEST_FAILED' in direct_content or direct_content == '' %}
→ Direkter Test Traefik → Gitea schlägt fehl
→ Problem liegt bei Gitea selbst, nicht bei Traefik-Routing
{% endif %}
{% else %}
✅ REQUEST WAR ERFOLGREICH:
→ Problem tritt nur intermittierend auf
→ Prüfe Logs auf sporadische Fehler
{% endif %}
================================================================================
NÄCHSTE SCHRITTE:
================================================================================
1. Prüfe ob hohe CPU/Memory bei Gitea oder Postgres
2. Prüfe ob DB-Timeouts in Gitea-Logs
3. Prüfe ob Postgres "too many connections" meldet
4. Prüfe ob Traefik "backend connection error" oder "EOF" meldet
5. Prüfe ob direkter Test Traefik → Gitea funktioniert
================================================================================
- name: Cleanup temporary files
ansible.builtin.file:
path: "/tmp/gitea_{{ test_timestamp }}.log"
state: absent
failed_when: false

View File

@@ -0,0 +1,325 @@
---
# Diagnose Gitea Timeouts
# Prüft Gitea-Status, Traefik-Routing, Netzwerk-Verbindungen und behebt Probleme
- name: Diagnose Gitea Timeouts
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check Gitea container status
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/gitea
docker compose ps gitea
register: gitea_status
changed_when: false
- name: Display Gitea container status
ansible.builtin.debug:
msg: |
================================================================================
Gitea Container Status:
================================================================================
{{ gitea_status.stdout }}
================================================================================
- name: Check Gitea health endpoint (direct from container)
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/gitea
docker compose exec -T gitea curl -f http://localhost:3000/api/healthz 2>&1 || echo "HEALTH_CHECK_FAILED"
register: gitea_health_direct
changed_when: false
failed_when: false
- name: Display Gitea health (direct)
ansible.builtin.debug:
msg: |
================================================================================
Gitea Health Check (direct from container):
================================================================================
{% if 'HEALTH_CHECK_FAILED' not in gitea_health_direct.stdout %}
✅ Gitea is healthy (direct check)
Response: {{ gitea_health_direct.stdout }}
{% else %}
❌ Gitea health check failed (direct)
Error: {{ gitea_health_direct.stdout }}
{% endif %}
================================================================================
- name: Check Gitea health endpoint (via Traefik)
ansible.builtin.uri:
url: "https://git.michaelschiemer.de/api/healthz"
method: GET
status_code: [200]
validate_certs: false
timeout: 10
register: gitea_health_traefik
failed_when: false
changed_when: false
- name: Display Gitea health (via Traefik)
ansible.builtin.debug:
msg: |
================================================================================
Gitea Health Check (via Traefik):
================================================================================
{% if gitea_health_traefik.status == 200 %}
✅ Gitea is reachable via Traefik
Status: {{ gitea_health_traefik.status }}
{% else %}
❌ Gitea is NOT reachable via Traefik
Status: {{ gitea_health_traefik.status | default('TIMEOUT/ERROR') }}
Message: {{ gitea_health_traefik.msg | default('No response') }}
{% endif %}
================================================================================
- name: Check Traefik container status
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/traefik
docker compose ps traefik
register: traefik_status
changed_when: false
- name: Display Traefik container status
ansible.builtin.debug:
msg: |
================================================================================
Traefik Container Status:
================================================================================
{{ traefik_status.stdout }}
================================================================================
- name: Check Redis container status
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/gitea
docker compose ps redis
register: redis_status
changed_when: false
- name: Display Redis container status
ansible.builtin.debug:
msg: |
================================================================================
Redis Container Status:
================================================================================
{{ redis_status.stdout }}
================================================================================
- name: Check PostgreSQL container status
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/gitea
docker compose ps postgres
register: postgres_status
changed_when: false
- name: Display PostgreSQL container status
ansible.builtin.debug:
msg: |
================================================================================
PostgreSQL Container Status:
================================================================================
{{ postgres_status.stdout }}
================================================================================
- name: Check Gitea container IP in traefik-public network
ansible.builtin.shell: |
docker inspect gitea --format '{{ '{{' }}range .NetworkSettings.Networks{{ '}}' }}{{ '{{' }}if eq .NetworkID (docker network inspect traefik-public --format "{{ '{{' }}.Id{{ '}}' }}"){{ '}}' }}{{ '{{' }}.IPAddress{{ '}}' }}{{ '{{' }}end{{ '}}' }}{{ '{{' }}end{{ '}}' }}' 2>/dev/null || echo "NOT_FOUND"
register: gitea_ip
changed_when: false
failed_when: false
- name: Display Gitea IP in traefik-public network
ansible.builtin.debug:
msg: |
================================================================================
Gitea IP in traefik-public Network:
================================================================================
{% if gitea_ip.stdout and gitea_ip.stdout != 'NOT_FOUND' %}
✅ Gitea IP: {{ gitea_ip.stdout }}
{% else %}
❌ Gitea IP not found in traefik-public network
{% endif %}
================================================================================
- name: Test connection from Traefik to Gitea
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/traefik
docker compose exec -T traefik wget -qO- --timeout=5 http://gitea:3000/api/healthz 2>&1 || echo "CONNECTION_FAILED"
register: traefik_gitea_connection
changed_when: false
failed_when: false
- name: Display Traefik-Gitea connection test
ansible.builtin.debug:
msg: |
================================================================================
Traefik → Gitea Connection Test:
================================================================================
{% if 'CONNECTION_FAILED' in traefik_gitea_connection.stdout %}
❌ Traefik cannot reach Gitea
Error: {{ traefik_gitea_connection.stdout }}
{% else %}
✅ Traefik can reach Gitea
Response: {{ traefik_gitea_connection.stdout }}
{% endif %}
================================================================================
- name: Check Traefik routing configuration for Gitea
ansible.builtin.shell: |
docker inspect gitea --format '{{ '{{' }}json .Config.Labels{{ '}}' }}' 2>/dev/null | grep -i "traefik" || echo "NO_TRAEFIK_LABELS"
register: traefik_labels
changed_when: false
failed_when: false
- name: Display Traefik labels for Gitea
ansible.builtin.debug:
msg: |
================================================================================
Traefik Labels for Gitea:
================================================================================
{{ traefik_labels.stdout }}
================================================================================
- name: Check Gitea logs for errors
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/gitea
docker compose logs gitea --tail=50 2>&1 | grep -iE "error|timeout|failed|panic|fatal" | tail -20 || echo "No errors in recent logs"
register: gitea_errors
changed_when: false
failed_when: false
- name: Display Gitea errors
ansible.builtin.debug:
msg: |
================================================================================
Gitea Error Logs (last 50 lines):
================================================================================
{{ gitea_errors.stdout }}
================================================================================
- name: Check Traefik logs for Gitea-related errors
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/traefik
docker compose logs traefik --tail=50 2>&1 | grep -iE "gitea|git\.michaelschiemer\.de|timeout|error" | tail -20 || echo "No Gitea-related errors in Traefik logs"
register: traefik_gitea_errors
changed_when: false
failed_when: false
- name: Display Traefik Gitea errors
ansible.builtin.debug:
msg: |
================================================================================
Traefik Gitea-Related Error Logs (last 50 lines):
================================================================================
{{ traefik_gitea_errors.stdout }}
================================================================================
- name: Check if Gitea is in traefik-public network
ansible.builtin.shell: |
docker network inspect traefik-public --format '{{ '{{' }}range .Containers{{ '}}' }}{{ '{{' }}.Name{{ '}}' }} {{ '{{' }}end{{ '}}' }}' 2>/dev/null | grep -q gitea && echo "YES" || echo "NO"
register: gitea_in_traefik_network
changed_when: false
failed_when: false
- name: Display Gitea network membership
ansible.builtin.debug:
msg: |
================================================================================
Gitea in traefik-public Network:
================================================================================
{% if gitea_in_traefik_network.stdout == 'YES' %}
✅ Gitea is in traefik-public network
{% else %}
❌ Gitea is NOT in traefik-public network
{% endif %}
================================================================================
- name: Check Redis connection from Gitea
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/gitea
docker compose exec -T gitea sh -c "redis-cli -h redis -p 6379 -a gitea_redis_password ping 2>&1" || echo "REDIS_CONNECTION_FAILED"
register: gitea_redis_connection
changed_when: false
failed_when: false
- name: Display Gitea-Redis connection
ansible.builtin.debug:
msg: |
================================================================================
Gitea → Redis Connection:
================================================================================
{% if 'REDIS_CONNECTION_FAILED' in gitea_redis_connection.stdout %}
❌ Gitea cannot connect to Redis
Error: {{ gitea_redis_connection.stdout }}
{% else %}
✅ Gitea can connect to Redis
Response: {{ gitea_redis_connection.stdout }}
{% endif %}
================================================================================
- name: Check PostgreSQL connection from Gitea
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/gitea
docker compose exec -T gitea sh -c "pg_isready -h postgres -p 5432 -U gitea 2>&1" || echo "POSTGRES_CONNECTION_FAILED"
register: gitea_postgres_connection
changed_when: false
failed_when: false
- name: Display Gitea-PostgreSQL connection
ansible.builtin.debug:
msg: |
================================================================================
Gitea → PostgreSQL Connection:
================================================================================
{% if 'POSTGRES_CONNECTION_FAILED' in gitea_postgres_connection.stdout %}
❌ Gitea cannot connect to PostgreSQL
Error: {{ gitea_postgres_connection.stdout }}
{% else %}
✅ Gitea can connect to PostgreSQL
Response: {{ gitea_postgres_connection.stdout }}
{% endif %}
================================================================================
- name: Summary and recommendations
ansible.builtin.debug:
msg: |
================================================================================
ZUSAMMENFASSUNG - Gitea Timeout Diagnose:
================================================================================
Gitea Status: {{ gitea_status.stdout | regex_replace('.*(Up|Down|Restarting).*', '\\1') | default('UNKNOWN') }}
Gitea Health (direct): {% if 'HEALTH_CHECK_FAILED' not in gitea_health_direct.stdout %}✅{% else %}❌{% endif %}
Gitea Health (via Traefik): {% if gitea_health_traefik.status == 200 %}✅{% else %}❌{% endif %}
Traefik Status: {{ traefik_status.stdout | regex_replace('.*(Up|Down|Restarting).*', '\\1') | default('UNKNOWN') }}
Redis Status: {{ redis_status.stdout | regex_replace('.*(Up|Down|Restarting).*', '\\1') | default('UNKNOWN') }}
PostgreSQL Status: {{ postgres_status.stdout | regex_replace('.*(Up|Down|Restarting).*', '\\1') | default('UNKNOWN') }}
Netzwerk:
- Gitea in traefik-public: {% if gitea_in_traefik_network.stdout == 'YES' %}✅{% else %}❌{% endif %}
- Traefik → Gitea: {% if 'CONNECTION_FAILED' not in traefik_gitea_connection.stdout %}✅{% else %}❌{% endif %}
- Gitea → Redis: {% if 'REDIS_CONNECTION_FAILED' not in gitea_redis_connection.stdout %}✅{% else %}❌{% endif %}
- Gitea → PostgreSQL: {% if 'POSTGRES_CONNECTION_FAILED' not in gitea_postgres_connection.stdout %}✅{% else %}❌{% endif %}
Empfohlene Aktionen:
{% if gitea_health_traefik.status != 200 %}
1. ❌ Gitea ist nicht über Traefik erreichbar
→ Führe 'fix-gitea-timeouts.yml' aus um Gitea und Traefik zu restarten
{% endif %}
{% if gitea_in_traefik_network.stdout != 'YES' %}
2. ❌ Gitea ist nicht im traefik-public Netzwerk
→ Gitea Container neu starten um Netzwerk-Verbindung zu aktualisieren
{% endif %}
{% if 'CONNECTION_FAILED' in traefik_gitea_connection.stdout %}
3. ❌ Traefik kann Gitea nicht erreichen
→ Beide Container neu starten
{% endif %}
{% if 'REDIS_CONNECTION_FAILED' in gitea_redis_connection.stdout %}
4. ❌ Gitea kann Redis nicht erreichen
→ Redis Container prüfen und neu starten
{% endif %}
{% if 'POSTGRES_CONNECTION_FAILED' in gitea_postgres_connection.stdout %}
5. ❌ Gitea kann PostgreSQL nicht erreichen
→ PostgreSQL Container prüfen und neu starten
{% endif %}
================================================================================

View File

@@ -1,108 +0,0 @@
---
- name: Diagnose Staging 502 Bad Gateway Error
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check nginx error logs for 502 errors
shell: |
cd ~/deployment/stacks/staging
echo "=== Nginx Error Logs (Last 50 lines) ==="
docker compose logs --tail=50 staging-nginx 2>&1 | grep -iE "(error|502|bad gateway|php|fpm|upstream)" || echo "No obvious errors in logs"
args:
executable: /bin/bash
register: nginx_errors
ignore_errors: yes
failed_when: false
- name: Display nginx errors
debug:
msg: "{{ nginx_errors.stdout_lines }}"
- name: Check PHP-FPM status in staging-app
shell: |
cd ~/deployment/stacks/staging
echo "=== PHP-FPM Status Check ==="
docker compose exec -T staging-app php-fpm-healthcheck 2>&1 || echo "PHP-FPM healthcheck failed"
echo ""
echo "=== Check if PHP-FPM is running ==="
docker compose exec -T staging-app ps aux | grep -E "php-fpm|php" | head -10 || echo "Could not check processes"
args:
executable: /bin/bash
register: php_fpm_status
ignore_errors: yes
failed_when: false
- name: Display PHP-FPM status
debug:
msg: "{{ php_fpm_status.stdout_lines }}"
- name: Test PHP-FPM connection from nginx container
shell: |
cd ~/deployment/stacks/staging
echo "=== Test connection from nginx to PHP-FPM ==="
docker compose exec -T staging-nginx sh -c "nc -zv staging-app 9000 2>&1 || echo 'Connection test failed'" || echo "Connection test failed"
echo ""
echo "=== Try curl from nginx to PHP-FPM ==="
docker compose exec -T staging-nginx sh -c "curl -v http://staging-app:9000 2>&1 | head -20" || echo "Curl test failed"
args:
executable: /bin/bash
register: connection_test
ignore_errors: yes
failed_when: false
- name: Display connection test results
debug:
msg: "{{ connection_test.stdout_lines }}"
- name: Check nginx configuration for PHP-FPM upstream
shell: |
cd ~/deployment/stacks/staging
echo "=== Nginx Configuration ==="
docker compose exec -T staging-nginx cat /etc/nginx/conf.d/default.conf 2>&1 | grep -A 10 -B 5 "staging-app\|9000\|fastcgi_pass" || echo "Could not read nginx config"
args:
executable: /bin/bash
register: nginx_config
ignore_errors: yes
failed_when: false
- name: Display nginx configuration
debug:
msg: "{{ nginx_config.stdout_lines }}"
- name: Check if PHP-FPM is listening on port 9000
shell: |
cd ~/deployment/stacks/staging
echo "=== Check PHP-FPM port 9000 ==="
docker compose exec -T staging-app sh -c "netstat -tlnp 2>/dev/null | grep 9000 || ss -tlnp 2>/dev/null | grep 9000 || echo 'Port 9000 not found'" || echo "Could not check ports"
echo ""
echo "=== PHP-FPM pool config ==="
docker compose exec -T staging-app cat /usr/local/etc/php-fpm.d/www.conf 2>&1 | grep -E "(listen|listen.allowed_clients)" | head -5 || echo "Could not read PHP-FPM config"
args:
executable: /bin/bash
register: php_fpm_port
ignore_errors: yes
failed_when: false
- name: Display PHP-FPM port check
debug:
msg: "{{ php_fpm_port.stdout_lines }}"
- name: Check network connectivity between containers
shell: |
cd ~/deployment/stacks/staging
echo "=== Network connectivity ==="
docker compose exec -T staging-nginx ping -c 2 staging-app 2>&1 || echo "Ping failed"
echo ""
echo "=== Check docker networks ==="
docker network inspect staging-internal 2>&1 | grep -A 5 "staging-app\|staging-nginx" || echo "Could not inspect network"
args:
executable: /bin/bash
register: network_check
ignore_errors: yes
failed_when: false
- name: Display network check
debug:
msg: "{{ network_check.stdout_lines }}"

View File

@@ -1,125 +0,0 @@
---
- name: Diagnose Staging Logs Issue
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check if staging stack directory exists
shell: |
test -d ~/deployment/stacks/staging && echo "? Directory exists" || echo "? Directory missing"
args:
executable: /bin/bash
register: dir_check
- name: Display directory check result
debug:
msg: "{{ dir_check.stdout }}"
- name: Check if docker-compose.yml exists
shell: |
test -f ~/deployment/stacks/staging/docker-compose.yml && echo "? docker-compose.yml exists" || echo "? docker-compose.yml missing"
args:
executable: /bin/bash
register: compose_check
- name: Display compose file check result
debug:
msg: "{{ compose_check.stdout }}"
- name: List staging directory contents
shell: |
cd ~/deployment/stacks/staging && ls -la
args:
executable: /bin/bash
register: dir_contents
ignore_errors: yes
- name: Display directory contents
debug:
msg: "{{ dir_contents.stdout_lines }}"
- name: Check Docker Compose version
shell: |
cd ~/deployment/stacks/staging && docker compose version
args:
executable: /bin/bash
register: compose_version
ignore_errors: yes
- name: Display Docker Compose version
debug:
msg: "{{ compose_version.stdout }}"
- name: Check if containers exist
shell: |
docker ps -a | grep staging || echo "No staging containers found"
args:
executable: /bin/bash
register: container_list
ignore_errors: yes
- name: Display container list
debug:
msg: "{{ container_list.stdout_lines }}"
- name: Try docker compose ps (this is where it might fail)
shell: |
cd ~/deployment/stacks/staging && docker compose ps
args:
executable: /bin/bash
register: compose_ps
ignore_errors: yes
- name: Display docker compose ps result
debug:
msg: "{{ compose_ps.stdout_lines }}"
msg_also: "{{ compose_ps.stderr_lines }}"
- name: Try docker compose logs directly (without service name)
shell: |
cd ~/deployment/stacks/staging && docker compose logs --tail=50 2>&1 | head -100
args:
executable: /bin/bash
register: compose_logs_generic
ignore_errors: yes
- name: Display generic compose logs
debug:
msg: "{{ (compose_logs_generic.stdout_lines | default([])) + (compose_logs_generic.stderr_lines | default([])) }}"
- name: Try individual container logs using docker logs
shell: |
docker logs staging-app --tail=50 2>&1 || echo "? Could not get staging-app logs"
args:
executable: /bin/bash
register: direct_app_logs
ignore_errors: yes
- name: Display direct staging-app logs
debug:
msg: "{{ direct_app_logs.stdout_lines }}"
msg_also: "{{ direct_app_logs.stderr_lines }}"
- name: Check current working directory permissions
shell: |
ls -ld ~/deployment/stacks/staging && pwd && whoami
args:
executable: /bin/bash
register: perm_check
- name: Display permission check
debug:
msg: "{{ perm_check.stdout_lines }}"
- name: Check if we can read docker-compose.yml
shell: |
cd ~/deployment/stacks/staging && head -20 docker-compose.yml
args:
executable: /bin/bash
register: compose_read
ignore_errors: yes
- name: Display docker-compose.yml content (first 20 lines)
debug:
msg: "{{ (compose_read.stdout_lines | default([])) + (compose_read.stderr_lines | default([])) }}"

View File

@@ -0,0 +1,477 @@
---
# Diagnose: Finde Ursache für Traefik Restart-Loop
# Prüft alle möglichen Ursachen für regelmäßige Traefik-Restarts
- name: Diagnose Traefik Restart Loop
hosts: production
gather_facts: yes
become: yes
tasks:
- name: Check systemd timers
ansible.builtin.shell: |
systemctl list-timers --all --no-pager
register: systemd_timers
changed_when: false
- name: Display systemd timers
ansible.builtin.debug:
msg: |
================================================================================
Systemd Timers (können Container stoppen):
================================================================================
{{ systemd_timers.stdout }}
================================================================================
- name: Check root crontab
ansible.builtin.shell: |
crontab -l 2>/dev/null || echo "No root crontab"
register: root_crontab
changed_when: false
- name: Display root crontab
ansible.builtin.debug:
msg: |
================================================================================
Root Crontab:
================================================================================
{{ root_crontab.stdout }}
================================================================================
- name: Check deploy user crontab
ansible.builtin.shell: |
crontab -l -u deploy 2>/dev/null || echo "No deploy user crontab"
register: deploy_crontab
changed_when: false
- name: Display deploy user crontab
ansible.builtin.debug:
msg: |
================================================================================
Deploy User Crontab:
================================================================================
{{ deploy_crontab.stdout }}
================================================================================
- name: Check system-wide cron jobs
ansible.builtin.shell: |
echo "=== /etc/cron.d ==="
ls -la /etc/cron.d 2>/dev/null || echo "Directory not found"
grep -r "traefik\|docker.*compose.*traefik\|docker.*stop\|docker.*restart" /etc/cron.d 2>/dev/null || echo "No matches"
echo ""
echo "=== /etc/cron.daily ==="
ls -la /etc/cron.daily 2>/dev/null || echo "Directory not found"
grep -r "traefik\|docker.*compose.*traefik\|docker.*stop\|docker.*restart" /etc/cron.daily 2>/dev/null || echo "No matches"
echo ""
echo "=== /etc/cron.hourly ==="
ls -la /etc/cron.hourly 2>/dev/null || echo "Directory not found"
grep -r "traefik\|docker.*compose.*traefik\|docker.*stop\|docker.*restart" /etc/cron.hourly 2>/dev/null || echo "No matches"
echo ""
echo "=== /etc/cron.weekly ==="
ls -la /etc/cron.weekly 2>/dev/null || echo "Directory not found"
grep -r "traefik\|docker.*compose.*traefik\|docker.*stop\|docker.*restart" /etc/cron.weekly 2>/dev/null || echo "No matches"
echo ""
echo "=== /etc/cron.monthly ==="
ls -la /etc/cron.monthly 2>/dev/null || echo "Directory not found"
grep -r "traefik\|docker.*compose.*traefik\|docker.*stop\|docker.*restart" /etc/cron.monthly 2>/dev/null || echo "No matches"
register: system_cron
changed_when: false
- name: Display system cron jobs
ansible.builtin.debug:
msg: |
================================================================================
System-Wide Cron Jobs:
================================================================================
{{ system_cron.stdout }}
================================================================================
- name: Check for scripts that might restart Traefik
ansible.builtin.shell: |
find /home/deploy -type f -name "*.sh" -exec grep -l "traefik\|docker.*compose.*restart\|docker.*stop.*traefik\|docker.*down.*traefik" {} \; 2>/dev/null | head -20
register: traefik_scripts
changed_when: false
- name: Display scripts that might restart Traefik
ansible.builtin.debug:
msg: |
================================================================================
Scripts die Traefik stoppen/restarten könnten:
================================================================================
{% if traefik_scripts.stdout %}
{{ traefik_scripts.stdout }}
{% else %}
Keine Skripte gefunden
{% endif %}
================================================================================
- name: Check Docker events for Traefik container (last 24h)
ansible.builtin.shell: |
timeout 5 docker events --since 24h --filter container=traefik --format "{{ '{{' }}.Time{{ '}}' }} {{ '{{' }}.Action{{ '}}' }} {{ '{{' }}.Actor.Attributes.name{{ '}}' }}" 2>/dev/null | tail -50 || echo "No recent events or docker events not available"
register: docker_events
changed_when: false
- name: Display Docker events
ansible.builtin.debug:
msg: |
================================================================================
Docker Events für Traefik (letzte 24h):
================================================================================
{{ docker_events.stdout }}
================================================================================
- name: Check Traefik container exit history
ansible.builtin.shell: |
docker ps -a --filter "name=traefik" --format "{{ '{{' }}.ID{{ '}}' }} | {{ '{{' }}.Status{{ '}}' }} | {{ '{{' }}.CreatedAt{{ '}}' }}" | head -10
register: traefik_exits
changed_when: false
- name: Display Traefik container exit history
ansible.builtin.debug:
msg: |
================================================================================
Traefik Container Exit-Historie:
================================================================================
{{ traefik_exits.stdout }}
================================================================================
- name: Check Docker daemon logs for Traefik stops
ansible.builtin.shell: |
journalctl -u docker.service --since "24h ago" --no-pager | grep -i "traefik\|stop\|kill" | tail -50 || echo "No relevant logs in journalctl"
register: docker_daemon_logs
changed_when: false
- name: Display Docker daemon logs
ansible.builtin.debug:
msg: |
================================================================================
Docker Daemon Logs (Traefik/Stop/Kill):
================================================================================
{{ docker_daemon_logs.stdout }}
================================================================================
- name: Check if there's a health check script running
ansible.builtin.shell: |
ps aux | grep -E "traefik|health.*check|monitor.*docker|auto.*heal|watchdog" | grep -v grep || echo "No health check processes found"
register: health_check_processes
changed_when: false
- name: Display health check processes
ansible.builtin.debug:
msg: |
================================================================================
Laufende Health-Check/Monitoring-Prozesse:
================================================================================
{{ health_check_processes.stdout }}
================================================================================
- name: Check for monitoring/auto-heal scripts
ansible.builtin.shell: |
find /home/deploy -type f \( -name "*monitor*" -o -name "*health*" -o -name "*auto*heal*" -o -name "*watchdog*" \) 2>/dev/null | head -20
register: monitoring_scripts
changed_when: false
- name: Display monitoring scripts
ansible.builtin.debug:
msg: |
================================================================================
Monitoring/Auto-Heal-Skripte:
================================================================================
{% if monitoring_scripts.stdout %}
{{ monitoring_scripts.stdout }}
{% else %}
Keine Monitoring-Skripte gefunden
{% endif %}
================================================================================
- name: Check Docker Compose file for restart policies
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/traefik && grep -A 5 "restart:" docker-compose.yml || echo "No restart policy found"
register: restart_policy
changed_when: false
- name: Display restart policy
ansible.builtin.debug:
msg: |
================================================================================
Docker Compose Restart Policy:
================================================================================
{{ restart_policy.stdout }}
================================================================================
- name: Check if Traefik is managed by systemd
ansible.builtin.shell: |
systemctl list-units --type=service --all | grep -i traefik || echo "No Traefik systemd service found"
register: traefik_systemd
changed_when: false
- name: Display Traefik systemd service
ansible.builtin.debug:
msg: |
================================================================================
Traefik Systemd Service:
================================================================================
{{ traefik_systemd.stdout }}
================================================================================
- name: Check recent Traefik container logs for stop messages
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/traefik && docker compose logs traefik --since 24h 2>&1 | grep -E "I have to go|Stopping server gracefully|SIGTERM|SIGINT|received signal" | tail -20 || echo "No stop messages in logs"
register: traefik_stop_logs
changed_when: false
- name: Display Traefik stop messages
ansible.builtin.debug:
msg: |
================================================================================
Traefik Stop-Meldungen (letzte 24h):
================================================================================
{{ traefik_stop_logs.stdout }}
================================================================================
- name: Check Traefik container uptime and restart count
ansible.builtin.shell: |
docker inspect traefik --format '{{ '{{' }}.State.StartedAt{{ '}}' }} | {{ '{{' }}.State.FinishedAt{{ '}}' }} | Restarts: {{ '{{' }}.RestartCount{{ '}}' }}' 2>/dev/null || echo "Container not found"
register: traefik_uptime
changed_when: false
- name: Display Traefik uptime and restart count
ansible.builtin.debug:
msg: |
================================================================================
Traefik Container Uptime & Restart Count:
================================================================================
{{ traefik_uptime.stdout }}
================================================================================
- name: Check for unattended-upgrades activity
ansible.builtin.shell: |
journalctl -u unattended-upgrades --since "24h ago" --no-pager | tail -20 || echo "No unattended-upgrades logs"
register: unattended_upgrades
changed_when: false
- name: Display unattended-upgrades activity
ansible.builtin.debug:
msg: |
================================================================================
Unattended-Upgrades Aktivität (kann zu Reboots führen):
================================================================================
{{ unattended_upgrades.stdout }}
================================================================================
- name: Check system reboot history
ansible.builtin.shell: |
last reboot | head -10 || echo "No reboot history available"
register: reboot_history
changed_when: false
- name: Display reboot history
ansible.builtin.debug:
msg: |
================================================================================
System Reboot-Historie:
================================================================================
{{ reboot_history.stdout }}
================================================================================
- name: Check Docker Compose processes that might affect Traefik
ansible.builtin.shell: |
ps aux | grep -E "docker.*compose.*traefik|docker-compose.*traefik" | grep -v grep || echo "No docker compose processes for Traefik found"
register: docker_compose_processes
changed_when: false
- name: Display Docker Compose processes
ansible.builtin.debug:
msg: |
================================================================================
Docker Compose Prozesse für Traefik:
================================================================================
{{ docker_compose_processes.stdout }}
================================================================================
- name: Check all user crontabs (not just root/deploy)
ansible.builtin.shell: |
for user in $(cut -f1 -d: /etc/passwd); do
crontab -u "$user" -l 2>/dev/null | grep -q "traefik\|docker.*compose.*traefik\|docker.*restart.*traefik" && echo "=== User: $user ===" && crontab -u "$user" -l 2>/dev/null | grep -E "traefik|docker.*compose.*traefik|docker.*restart.*traefik" || true
done || echo "No user crontabs with Traefik commands found"
register: all_user_crontabs
changed_when: false
- name: Display all user crontabs with Traefik commands
ansible.builtin.debug:
msg: |
================================================================================
Alle User-Crontabs mit Traefik-Befehlen:
================================================================================
{{ all_user_crontabs.stdout }}
================================================================================
- name: Check for Gitea Workflows that might restart Traefik
ansible.builtin.shell: |
find /home/deploy -type f -path "*/.gitea/workflows/*.yml" -o -path "*/.github/workflows/*.yml" 2>/dev/null | xargs grep -l "traefik\|restart.*traefik\|docker.*compose.*traefik" 2>/dev/null | head -10 || echo "No Gitea/GitHub workflows found that restart Traefik"
register: gitea_workflows
changed_when: false
- name: Display Gitea Workflows that might restart Traefik
ansible.builtin.debug:
msg: |
================================================================================
Gitea/GitHub Workflows die Traefik restarten könnten:
================================================================================
{{ gitea_workflows.stdout }}
================================================================================
- name: Check for custom systemd services in /etc/systemd/system/
ansible.builtin.shell: |
find /etc/systemd/system -type f -name "*.service" -o -name "*.timer" 2>/dev/null | xargs grep -l "traefik\|docker.*compose.*traefik\|docker.*restart.*traefik" 2>/dev/null | head -10 || echo "No custom systemd services/timers found for Traefik"
register: custom_systemd_services
changed_when: false
- name: Display custom systemd services
ansible.builtin.debug:
msg: |
================================================================================
Custom Systemd Services/Timers für Traefik:
================================================================================
{{ custom_systemd_services.stdout }}
================================================================================
- name: Check for at jobs (scheduled tasks)
ansible.builtin.shell: |
atq 2>/dev/null | while read line; do
job_id=$(echo "$line" | awk '{print $1}')
at -c "$job_id" 2>/dev/null | grep -q "traefik\|docker.*compose.*traefik\|docker.*restart.*traefik" && echo "=== Job ID: $job_id ===" && at -c "$job_id" 2>/dev/null | grep -E "traefik|docker.*compose.*traefik|docker.*restart.*traefik" || true
done || echo "No at jobs found or atq not available"
register: at_jobs
changed_when: false
- name: Display at jobs
ansible.builtin.debug:
msg: |
================================================================================
At Jobs (geplante Tasks) die Traefik betreffen:
================================================================================
{{ at_jobs.stdout }}
================================================================================
- name: Check for Docker Compose watch mode
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/traefik && docker compose ps --format json 2>/dev/null | jq -r '.[] | select(.Service=="traefik") | .State' || echo "Could not check Docker Compose watch mode"
register: docker_compose_watch
changed_when: false
- name: Check if Docker Compose watch is enabled
ansible.builtin.shell: |
cd /home/deploy/deployment/stacks/traefik && docker compose config 2>/dev/null | grep -i "watch\|x-develop" || echo "No watch mode configured"
register: docker_compose_watch_config
changed_when: false
- name: Display Docker Compose watch mode
ansible.builtin.debug:
msg: |
================================================================================
Docker Compose Watch Mode:
================================================================================
Watch Config: {{ docker_compose_watch_config.stdout }}
================================================================================
- name: Check Ansible traefik_auto_restart setting
ansible.builtin.shell: |
grep -r "traefik_auto_restart" /home/deploy/deployment/ansible/roles/traefik/defaults/ /home/deploy/deployment/ansible/inventory/ 2>/dev/null | head -10 || echo "traefik_auto_restart not found in Ansible config"
register: ansible_auto_restart
changed_when: false
- name: Display Ansible traefik_auto_restart setting
ansible.builtin.debug:
msg: |
================================================================================
Ansible traefik_auto_restart Einstellung:
================================================================================
{{ ansible_auto_restart.stdout }}
================================================================================
- name: Check Port 80/443 configuration
ansible.builtin.shell: |
echo "=== Port 80 ==="
netstat -tlnp 2>/dev/null | grep ":80 " || ss -tlnp 2>/dev/null | grep ":80 " || echo "Could not check port 80"
echo ""
echo "=== Port 443 ==="
netstat -tlnp 2>/dev/null | grep ":443 " || ss -tlnp 2>/dev/null | grep ":443 " || echo "Could not check port 443"
echo ""
echo "=== Docker Port Mappings for Traefik ==="
docker inspect traefik --format '{{ '{{' }}json .HostConfig.PortBindings{{ '}}' }}' 2>/dev/null | jq '.' || echo "Could not get Docker port mappings"
register: port_config
changed_when: false
- name: Display Port configuration
ansible.builtin.debug:
msg: |
================================================================================
Port-Konfiguration (80/443):
================================================================================
{{ port_config.stdout }}
================================================================================
- name: Check if other services are blocking ports 80/443
ansible.builtin.shell: |
echo "=== Services listening on port 80 ==="
lsof -i :80 2>/dev/null || fuser 80/tcp 2>/dev/null || echo "Could not check port 80"
echo ""
echo "=== Services listening on port 443 ==="
lsof -i :443 2>/dev/null || fuser 443/tcp 2>/dev/null || echo "Could not check port 443"
register: port_blockers
changed_when: false
- name: Display port blockers
ansible.builtin.debug:
msg: |
================================================================================
Services die Ports 80/443 blockieren könnten:
================================================================================
{{ port_blockers.stdout }}
================================================================================
- name: Check Traefik network configuration
ansible.builtin.shell: |
docker inspect traefik --format '{{ '{{' }}json .NetworkSettings{{ '}}' }}' 2>/dev/null | jq '.Networks' || echo "Could not get Traefik network configuration"
register: traefik_network
changed_when: false
- name: Display Traefik network configuration
ansible.builtin.debug:
msg: |
================================================================================
Traefik Netzwerk-Konfiguration:
================================================================================
{{ traefik_network.stdout }}
================================================================================
- name: Summary - Most likely causes
ansible.builtin.debug:
msg: |
================================================================================
ZUSAMMENFASSUNG - Mögliche Ursachen für Traefik-Restarts:
================================================================================
Prüfe die obigen Ausgaben auf:
1. Systemd-Timer: Können Container stoppen (z.B. unattended-upgrades)
2. Cronjobs: Regelmäßige Skripte die Traefik stoppen (alle User-Crontabs geprüft)
3. Docker-Events: Zeigen wer/was den Container stoppt
4. Monitoring-Skripte: Auto-Heal-Skripte die bei Fehlern restarten
5. Unattended-Upgrades: Können zu Reboots führen
6. Reboot-Historie: System-Reboots stoppen alle Container
7. Gitea Workflows: Können Traefik via Ansible restarten
8. Custom Systemd Services: Eigene Services die Traefik verwalten
9. At Jobs: Geplante Tasks die Traefik stoppen
10. Docker Compose Watch Mode: Automatische Restarts bei Dateiänderungen
11. Ansible traefik_auto_restart: Automatische Restarts nach Config-Deployment
12. Port-Konfiguration: Ports 80/443 müssen auf Traefik zeigen
Nächste Schritte:
- Prüfe die Docker-Events für wiederkehrende Muster
- Prüfe alle User-Crontabs auf regelmäßige Traefik-Befehle
- Prüfe ob Monitoring-Skripte zu aggressiv sind
- Prüfe ob unattended-upgrades zu Reboots führt
- Prüfe ob traefik_auto_restart zu häufigen Restarts führt
- Verifiziere Port-Konfiguration (80/443)
================================================================================

View File

@@ -1,172 +0,0 @@
---
- name: Diagnose VPN Routing Problem f?r Grafana
hosts: production
gather_facts: yes
become: yes
become_user: root
tasks:
- name: Check WireGuard interface status
shell: |
echo "=== WireGuard Interface Status ==="
ip addr show wg0 2>&1 || echo "WireGuard interface not found"
echo ""
echo "=== WireGuard Peers ==="
wg show 2>&1 || echo "WireGuard not running"
register: wg_status
ignore_errors: yes
failed_when: false
- name: Display WireGuard status
debug:
msg: "{{ wg_status.stdout_lines }}"
- name: Check routing table for VPN network
shell: |
echo "=== Routing Table for 10.8.0.0/24 ==="
ip route show | grep 10.8.0 || echo "No routes found for 10.8.0.0/24"
echo ""
echo "=== Default Route ==="
ip route show default || echo "No default route"
register: routing_info
ignore_errors: yes
failed_when: false
- name: Display routing information
debug:
msg: "{{ routing_info.stdout_lines }}"
- name: Check Traefik access logs for recent Grafana requests
shell: |
cd ~/deployment/stacks/traefik
echo "=== Recent Grafana Access (Last 10 requests) ==="
tail -50 logs/access.log | grep grafana | tail -10 | jq -r '[.ClientAddr, .ClientHost, .RequestHost, .DownstreamStatus] | @tsv' 2>&1 || tail -50 logs/access.log | grep grafana | tail -10
args:
executable: /bin/bash
register: traefik_access
ignore_errors: yes
failed_when: false
- name: Display Traefik access logs
debug:
msg: "{{ traefik_access.stdout_lines }}"
- name: Test DNS resolution from server
shell: |
echo "=== DNS Resolution Tests ==="
echo "1. Grafana via VPN DNS (10.8.0.1):"
dig +short grafana.michaelschiemer.de @10.8.0.1 2>&1 || echo "Failed"
echo ""
echo "2. Grafana via public DNS (8.8.8.8):"
dig +short grafana.michaelschiemer.de @8.8.8.8 2>&1 || echo "Failed"
echo ""
echo "3. Grafana via system DNS:"
dig +short grafana.michaelschiemer.de 2>&1 || echo "Failed"
args:
executable: /bin/bash
register: dns_tests
ignore_errors: yes
failed_when: false
- name: Display DNS test results
debug:
msg: "{{ dns_tests.stdout_lines }}"
- name: Check firewall rules for WireGuard
shell: |
echo "=== Firewall Rules for WireGuard (port 51820) ==="
sudo ufw status | grep 51820 || sudo iptables -L -n | grep 51820 || echo "No firewall rules found"
echo ""
echo "=== Allowed IPs in WireGuard Config ==="
grep -E "AllowedIPs" /etc/wireguard/wg0.conf 2>&1 || echo "WireGuard config not found"
args:
executable: /bin/bash
register: firewall_info
ignore_errors: yes
failed_when: false
- name: Display firewall information
debug:
msg: "{{ firewall_info.stdout_lines }}"
- name: Check Traefik forwardedHeaders configuration
shell: |
cd ~/deployment/stacks/traefik
echo "=== Traefik forwardedHeaders Config ==="
grep -A 10 "forwardedHeaders:" traefik.yml || echo "Not found"
args:
executable: /bin/bash
register: forwarded_headers
ignore_errors: yes
failed_when: false
- name: Display forwardedHeaders configuration
debug:
msg: "{{ forwarded_headers.stdout_lines }}"
- name: Check Grafana middleware configuration
shell: |
cd ~/deployment/stacks/traefik/dynamic
echo "=== Grafana VPN Only Middleware ==="
grep -A 6 "grafana-vpn-only:" middlewares.yml || echo "Not found"
args:
executable: /bin/bash
register: grafana_middleware
ignore_errors: yes
failed_when: false
- name: Display Grafana middleware configuration
debug:
msg: "{{ grafana_middleware.stdout_lines }}"
- name: Check CoreDNS configuration
shell: |
cd ~/deployment/stacks/dns
echo "=== CoreDNS Corefile ==="
cat Corefile 2>&1 || echo "Not found"
args:
executable: /bin/bash
register: coredns_config
ignore_errors: yes
failed_when: false
- name: Display CoreDNS configuration
debug:
msg: "{{ coredns_config.stdout_lines }}"
- name: Test connection to Grafana from server via VPN IP
shell: |
echo "=== Test Connection to Grafana via VPN IP (10.8.0.1) ==="
curl -k -H "User-Agent: Mozilla/5.0" -s -o /dev/null -w "HTTP %{http_code}\n" https://10.8.0.1:443 -H "Host: grafana.michaelschiemer.de" 2>&1 || echo "Connection failed"
echo ""
echo "=== Test Connection via Domain ==="
curl -k -H "User-Agent: Mozilla/5.0" -s -o /dev/null -w "HTTP %{http_code}\n" https://grafana.michaelschiemer.de/ 2>&1 || echo "Connection failed"
args:
executable: /bin/bash
register: connection_tests
ignore_errors: yes
failed_when: false
- name: Display connection test results
debug:
msg: "{{ connection_tests.stdout_lines }}"
- name: Monitor Traefik access logs in real-time (for next request)
shell: |
echo "=== Instructions ==="
echo "1. Connect to VPN with your WireGuard client"
echo "2. Ensure DNS is set to 10.8.0.1 in WireGuard config"
echo "3. Access https://grafana.michaelschiemer.de in your browser"
echo "4. Check the ClientAddr in the access logs below"
echo ""
echo "=== Last Grafana Access Attempt ==="
tail -1 ~/deployment/stacks/traefik/logs/access.log 2>&1 | jq -r '[.ClientAddr, .ClientHost, .DownstreamStatus] | @tsv' || tail -1 ~/deployment/stacks/traefik/logs/access.log
args:
executable: /bin/bash
register: monitoring_info
ignore_errors: yes
failed_when: false
- name: Display monitoring instructions
debug:
msg: "{{ monitoring_info.stdout_lines }}"

View File

@@ -0,0 +1,136 @@
---
# Disable Traefik Auto-Restarts
# Deaktiviert automatische Restarts nach Config-Deployment und entfernt Cronjobs/Systemd-Timer
- name: Disable Traefik Auto-Restarts
hosts: production
gather_facts: yes
become: yes
tasks:
- name: Check current traefik_auto_restart setting in Ansible defaults
ansible.builtin.shell: |
grep -r "traefik_auto_restart" /home/deploy/deployment/ansible/roles/traefik/defaults/main.yml 2>/dev/null || echo "Setting not found"
register: current_auto_restart_setting
changed_when: false
- name: Display current traefik_auto_restart setting
ansible.builtin.debug:
msg: |
================================================================================
Aktuelle traefik_auto_restart Einstellung:
================================================================================
{{ current_auto_restart_setting.stdout }}
================================================================================
- name: Check for cronjobs that restart Traefik
ansible.builtin.shell: |
for user in $(cut -f1 -d: /etc/passwd); do
crontab -u "$user" -l 2>/dev/null | grep -q "traefik\|docker.*compose.*traefik.*restart" && echo "=== User: $user ===" && crontab -u "$user" -l 2>/dev/null | grep -E "traefik|docker.*compose.*traefik.*restart" || true
done || echo "No cronjobs found that restart Traefik"
register: traefik_cronjobs
changed_when: false
- name: Display Traefik cronjobs
ansible.builtin.debug:
msg: |
================================================================================
Cronjobs die Traefik restarten:
================================================================================
{{ traefik_cronjobs.stdout }}
================================================================================
- name: Check for systemd timers that restart Traefik
ansible.builtin.shell: |
find /etc/systemd/system -type f -name "*.timer" 2>/dev/null | xargs grep -l "traefik\|docker.*compose.*traefik.*restart" 2>/dev/null | head -10 || echo "No systemd timers found for Traefik"
register: traefik_timers
changed_when: false
- name: Display Traefik systemd timers
ansible.builtin.debug:
msg: |
================================================================================
Systemd Timers die Traefik restarten:
================================================================================
{{ traefik_timers.stdout }}
================================================================================
- name: Check for systemd services that restart Traefik
ansible.builtin.shell: |
find /etc/systemd/system -type f -name "*.service" 2>/dev/null | xargs grep -l "traefik\|docker.*compose.*traefik.*restart" 2>/dev/null | head -10 || echo "No systemd services found for Traefik"
register: traefik_services
changed_when: false
- name: Display Traefik systemd services
ansible.builtin.debug:
msg: |
================================================================================
Systemd Services die Traefik restarten:
================================================================================
{{ traefik_services.stdout }}
================================================================================
- name: Summary - Found auto-restart mechanisms
ansible.builtin.debug:
msg: |
================================================================================
ZUSAMMENFASSUNG - Gefundene Auto-Restart-Mechanismen:
================================================================================
Ansible traefik_auto_restart: {{ current_auto_restart_setting.stdout }}
{% if traefik_cronjobs.stdout and 'No cronjobs' not in traefik_cronjobs.stdout %}
⚠️ Gefundene Cronjobs:
{{ traefik_cronjobs.stdout }}
Manuelle Deaktivierung erforderlich:
- Entferne die Cronjob-Einträge manuell
- Oder verwende: crontab -e
{% endif %}
{% if traefik_timers.stdout and 'No systemd timers' not in traefik_timers.stdout %}
⚠️ Gefundene Systemd Timers:
{{ traefik_timers.stdout }}
Manuelle Deaktivierung erforderlich:
- systemctl stop <timer-name>
- systemctl disable <timer-name>
{% endif %}
{% if traefik_services.stdout and 'No systemd services' not in traefik_services.stdout %}
⚠️ Gefundene Systemd Services:
{{ traefik_services.stdout }}
Manuelle Deaktivierung erforderlich:
- systemctl stop <service-name>
- systemctl disable <service-name>
{% endif %}
{% if 'No cronjobs' in traefik_cronjobs.stdout and 'No systemd timers' in traefik_timers.stdout and 'No systemd services' in traefik_services.stdout %}
✅ Keine automatischen Restart-Mechanismen gefunden (außer Ansible traefik_auto_restart)
{% endif %}
Empfehlung:
- Setze traefik_auto_restart: false in group_vars oder inventory
- Oder überschreibe bei Config-Deployment: -e "traefik_auto_restart=false"
================================================================================
- name: Note - Manual steps required
ansible.builtin.debug:
msg: |
================================================================================
HINWEIS - Manuelle Schritte erforderlich:
================================================================================
Dieses Playbook zeigt nur gefundene Auto-Restart-Mechanismen an.
Um traefik_auto_restart zu deaktivieren:
1. In group_vars/production/vars.yml oder inventory hinzufügen:
traefik_auto_restart: false
2. Oder bei jedem Config-Deployment überschreiben:
ansible-playbook ... -e "traefik_auto_restart=false"
3. Für Cronjobs/Systemd: Siehe oben für manuelle Deaktivierung
================================================================================

View File

@@ -0,0 +1,90 @@
---
# Ensure Gitea is Discovered by Traefik
# This playbook ensures that Traefik properly discovers Gitea after restarts
- name: Ensure Gitea is Discovered by Traefik
hosts: production
gather_facts: no
become: no
vars:
traefik_stack_path: "{{ stacks_base_path }}/traefik"
gitea_stack_path: "{{ stacks_base_path }}/gitea"
max_wait_seconds: 60
check_interval: 5
tasks:
- name: Check if Gitea container is running
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose ps gitea | grep -q "Up" && echo "RUNNING" || echo "NOT_RUNNING"
register: gitea_status
changed_when: false
- name: Start Gitea if not running
ansible.builtin.shell: |
cd {{ gitea_stack_path }}
docker compose up -d gitea
when: gitea_status.stdout == "NOT_RUNNING"
register: gitea_start
- name: Wait for Gitea to be ready
ansible.builtin.wait_for:
timeout: 30
delay: 2
when: gitea_start.changed | default(false) | bool
- name: Check if Traefik can see Gitea container
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
docker compose exec -T traefik sh -c 'wget -qO- http://localhost:8080/api/http/routers 2>&1 | python3 -m json.tool 2>&1 | grep -qi gitea && echo "FOUND" || echo "NOT_FOUND"'
register: traefik_gitea_check
changed_when: false
failed_when: false
retries: "{{ (max_wait_seconds | int) // (check_interval | int) }}"
delay: "{{ check_interval }}"
until: traefik_gitea_check.stdout == "FOUND"
- name: Restart Traefik if Gitea not found
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
docker compose restart traefik
when: traefik_gitea_check.stdout == "NOT_FOUND"
register: traefik_restart
- name: Wait for Traefik to be ready after restart
ansible.builtin.wait_for:
timeout: 30
delay: 2
when: traefik_restart.changed | default(false) | bool
- name: Verify Gitea is reachable via Traefik
ansible.builtin.uri:
url: "https://{{ gitea_domain }}/api/healthz"
method: GET
status_code: [200]
validate_certs: false
timeout: 10
register: gitea_health_check
retries: 5
delay: 2
until: gitea_health_check.status == 200
failed_when: false
- name: Display result
ansible.builtin.debug:
msg: |
================================================================================
GITEA TRAEFIK DISCOVERY - RESULT
================================================================================
Gitea Status: {{ gitea_status.stdout }}
Traefik Discovery: {{ traefik_gitea_check.stdout }}
Gitea Health Check: {{ 'OK' if (gitea_health_check.status | default(0) == 200) else 'FAILED' }}
{% if gitea_health_check.status | default(0) == 200 %}
✅ Gitea is reachable via Traefik
{% else %}
❌ Gitea is not reachable via Traefik
{% endif %}
================================================================================

View File

@@ -0,0 +1,79 @@
---
- name: Final Status Check - All Containers
hosts: production
gather_facts: no
become: no
vars:
application_stack_dest: "{{ app_stack_path | default(stacks_base_path + '/production') }}"
application_compose_suffix: "production.yml"
tasks:
- name: Wait for containers to fully start
pause:
seconds: 15
- name: Get all container status
shell: |
docker compose -f {{ application_stack_dest }}/docker-compose.base.yml -f {{ application_stack_dest }}/docker-compose.{{ application_compose_suffix }} ps
register: all_containers
changed_when: false
- name: Display all container status
debug:
msg: |
========================================
Final Container Status
========================================
{{ all_containers.stdout }}
- name: Check web container health
shell: |
docker compose -f {{ application_stack_dest }}/docker-compose.base.yml -f {{ application_stack_dest }}/docker-compose.{{ application_compose_suffix }} exec -T web curl -f http://localhost/health 2>&1 || echo "HEALTH_CHECK_FAILED"
register: web_health_check
changed_when: false
failed_when: false
ignore_errors: yes
- name: Display web health check
debug:
msg: |
Web Container Health Check:
{{ web_health_check.stdout }}
- name: Get web container logs (last 10 lines)
shell: |
docker compose -f {{ application_stack_dest }}/docker-compose.base.yml -f {{ application_stack_dest }}/docker-compose.{{ application_compose_suffix }} logs --tail=10 web 2>&1 | tail -10 || true
register: web_logs
changed_when: false
- name: Display web container logs
debug:
msg: |
Web Container Logs (last 10 lines):
{{ web_logs.stdout }}
- name: Get queue-worker logs (last 3 lines)
shell: |
docker compose -f {{ application_stack_dest }}/docker-compose.base.yml -f {{ application_stack_dest }}/docker-compose.{{ application_compose_suffix }} logs --tail=3 queue-worker 2>&1 | tail -3 || true
register: queue_worker_logs
changed_when: false
- name: Display queue-worker logs
debug:
msg: |
Queue-Worker (last 3 lines):
{{ queue_worker_logs.stdout }}
- name: Get scheduler logs (last 3 lines)
shell: |
docker compose -f {{ application_stack_dest }}/docker-compose.base.yml -f {{ application_stack_dest }}/docker-compose.{{ application_compose_suffix }} logs --tail=3 scheduler 2>&1 | tail -3 || true
register: scheduler_logs
changed_when: false
- name: Display scheduler logs
debug:
msg: |
Scheduler (last 3 lines):
{{ scheduler_logs.stdout }}

View File

@@ -0,0 +1,246 @@
---
# Find Ansible Automation Source
# Findet die Quelle der externen Ansible-Automatisierung, die Traefik regelmäßig neu startet
- name: Find Ansible Automation Source
hosts: production
gather_facts: yes
become: yes
tasks:
- name: Check for running Ansible processes
ansible.builtin.shell: |
ps aux | grep -E "ansible|ansible-playbook|ansible-pull" | grep -v grep || echo "No Ansible processes found"
register: ansible_processes
changed_when: false
- name: Check for ansible-pull processes
ansible.builtin.shell: |
ps aux | grep ansible-pull | grep -v grep || echo "No ansible-pull processes found"
register: ansible_pull_processes
changed_when: false
- name: Check systemd timers for ansible-pull
ansible.builtin.shell: |
systemctl list-timers --all --no-pager | grep -i ansible || echo "No ansible timers found"
register: ansible_timers
changed_when: false
- name: Check for ansible-pull cronjobs
ansible.builtin.shell: |
for user in $(cut -f1 -d: /etc/passwd); do
crontab -u "$user" -l 2>/dev/null | grep -q "ansible-pull\|ansible.*playbook" && echo "=== User: $user ===" && crontab -u "$user" -l 2>/dev/null | grep -E "ansible-pull|ansible.*playbook" || true
done || echo "No ansible-pull cronjobs found"
register: ansible_cronjobs
changed_when: false
- name: Check system-wide cron for ansible
ansible.builtin.shell: |
for dir in /etc/cron.d /etc/cron.daily /etc/cron.hourly /etc/cron.weekly /etc/cron.monthly; do
if [ -d "$dir" ]; then
grep -rE "ansible-pull|ansible.*playbook" "$dir" 2>/dev/null && echo "=== Found in $dir ===" || true
fi
done || echo "No ansible in system cron"
register: ansible_system_cron
changed_when: false
- name: Check journalctl for ansible-ansible processes
ansible.builtin.shell: |
journalctl --since "24 hours ago" --no-pager | grep -iE "ansible-ansible|ansible-playbook|ansible-pull" | tail -50 || echo "No ansible processes in journalctl"
register: ansible_journal
changed_when: false
- name: Check for ansible-pull configuration files
ansible.builtin.shell: |
find /home -name "*ansible-pull*" -o -name "*ansible*.yml" -path "*/ansible-pull/*" 2>/dev/null | head -20 || echo "No ansible-pull config files found"
register: ansible_pull_configs
changed_when: false
- name: Check for running docker compose commands related to Traefik
ansible.builtin.shell: |
ps aux | grep -E "docker.*compose.*traefik|docker.*restart.*traefik" | grep -v grep || echo "No docker compose traefik commands running"
register: docker_traefik_commands
changed_when: false
- name: Check Docker events for Traefik kill events (last hour)
ansible.builtin.shell: |
docker events --since 1h --until now --filter container=traefik --filter event=die --format "{{ '{{' }}.Time{{ '}}' }} {{ '{{' }}.Action{{ '}}' }} {{ '{{' }}.Actor.Attributes.signal{{ '}}' }}" 2>/dev/null | tail -20 || echo "No Traefik die events in last hour"
register: traefik_kill_events
changed_when: false
failed_when: false
- name: Check journalctl for docker compose traefik commands
ansible.builtin.shell: |
journalctl --since "24 hours ago" --no-pager | grep -iE "docker.*compose.*traefik|docker.*restart.*traefik" | tail -30 || echo "No docker compose traefik commands in journalctl"
register: docker_traefik_journal
changed_when: false
- name: Check for CI/CD scripts that might run Ansible
ansible.builtin.shell: |
find /home/deploy -type f \( -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) -exec grep -lE "ansible.*playbook.*traefik|docker.*compose.*traefik.*restart" {} \; 2>/dev/null | head -20 || echo "No CI/CD scripts found"
register: cicd_scripts
changed_when: false
- name: Check for Gitea Workflows that run Ansible
ansible.builtin.shell: |
find /home/deploy -type f -path "*/.gitea/workflows/*.yml" -o -path "*/.github/workflows/*.yml" 2>/dev/null | xargs grep -lE "ansible.*playbook.*traefik|docker.*compose.*traefik" 2>/dev/null | head -10 || echo "No Gitea workflows found"
register: gitea_workflows
changed_when: false
- name: Check for monitoring/healthcheck scripts
ansible.builtin.shell: |
find /home/deploy -type f -name "*monitor*" -o -name "*health*" 2>/dev/null | xargs grep -lE "traefik.*restart|docker.*compose.*traefik" 2>/dev/null | head -10 || echo "No monitoring scripts found"
register: monitoring_scripts
changed_when: false
- name: Summary
ansible.builtin.debug:
msg: |
================================================================================
ANSIBLE AUTOMATION SOURCE DIAGNOSE:
================================================================================
Laufende Ansible-Prozesse:
{{ ansible_processes.stdout }}
Ansible-Pull Prozesse:
{{ ansible_pull_processes.stdout }}
Systemd Timers für Ansible:
{{ ansible_timers.stdout }}
Cronjobs für Ansible:
{{ ansible_cronjobs.stdout }}
System-Cron für Ansible:
{{ ansible_system_cron.stdout }}
Ansible-Prozesse in Journalctl (letzte 24h):
{{ ansible_journal.stdout }}
Ansible-Pull Konfigurationsdateien:
{{ ansible_pull_configs.stdout }}
Laufende Docker Compose Traefik-Befehle:
{{ docker_traefik_commands.stdout }}
Traefik Kill-Events (letzte Stunde):
{{ traefik_kill_events.stdout }}
Docker Compose Traefik-Befehle in Journalctl:
{{ docker_traefik_journal.stdout }}
CI/CD Scripts die Traefik restarten:
{{ cicd_scripts.stdout }}
Gitea Workflows die Traefik restarten:
{{ gitea_workflows.stdout }}
Monitoring-Scripts die Traefik restarten:
{{ monitoring_scripts.stdout }}
================================================================================
ANALYSE:
================================================================================
{% if 'No Ansible processes found' not in ansible_processes.stdout %}
⚠️ AKTIVE ANSIBLE-PROZESSE GEFUNDEN:
{{ ansible_processes.stdout }}
→ Diese Prozesse könnten Traefik regelmäßig neu starten
→ Prüfe die Kommandozeile dieser Prozesse um das Playbook zu identifizieren
{% endif %}
{% if 'No ansible-pull processes found' not in ansible_pull_processes.stdout %}
❌ ANSIBLE-PULL LÄUFT:
{{ ansible_pull_processes.stdout }}
→ ansible-pull führt regelmäßig Playbooks aus
→ Dies ist wahrscheinlich die Quelle der Traefik-Restarts
{% endif %}
{% if 'No ansible timers found' not in ansible_timers.stdout %}
❌ ANSIBLE TIMER GEFUNDEN:
{{ ansible_timers.stdout }}
→ Ein Systemd-Timer führt regelmäßig Ansible aus
→ Deaktiviere mit: systemctl disable <timer-name>
{% endif %}
{% if 'No ansible-pull cronjobs found' not in ansible_cronjobs.stdout %}
❌ ANSIBLE CRONJOB GEFUNDEN:
{{ ansible_cronjobs.stdout }}
→ Ein Cronjob führt regelmäßig Ansible aus
→ Entferne oder kommentiere den Cronjob-Eintrag
{% endif %}
{% if cicd_scripts.stdout and 'No CI/CD scripts found' not in cicd_scripts.stdout %}
⚠️ CI/CD SCRIPTS GEFUNDEN:
{{ cicd_scripts.stdout }}
→ Diese Scripts könnten Traefik regelmäßig neu starten
→ Prüfe diese Dateien und entferne/kommentiere Traefik-Restart-Befehle
{% endif %}
{% if gitea_workflows.stdout and 'No Gitea workflows found' not in gitea_workflows.stdout %}
⚠️ GITEA WORKFLOWS GEFUNDEN:
{{ gitea_workflows.stdout }}
→ Diese Workflows könnten Traefik regelmäßig neu starten
→ Prüfe diese Workflows und entferne/kommentiere Traefik-Restart-Schritte
{% endif %}
{% if monitoring_scripts.stdout and 'No monitoring scripts found' not in monitoring_scripts.stdout %}
⚠️ MONITORING SCRIPTS GEFUNDEN:
{{ monitoring_scripts.stdout }}
→ Diese Scripts könnten Traefik regelmäßig neu starten
→ Prüfe diese Scripts und entferne/kommentiere Traefik-Restart-Befehle
{% endif %}
================================================================================
LÖSUNG:
================================================================================
{% if 'No Ansible processes found' in ansible_processes.stdout and 'No ansible-pull processes found' in ansible_pull_processes.stdout and 'No ansible timers found' in ansible_timers.stdout and 'No ansible-pull cronjobs found' in ansible_cronjobs.stdout %}
Keine aktiven Ansible-Automatisierungen gefunden
Mögliche Ursachen:
1. Ansible-Prozesse laufen nur zeitweise (intermittierend)
2. Externe CI/CD-Pipeline führt Ansible aus
3. Manuelle Ansible-Aufrufe von außen
Nächste Schritte:
1. Beobachte Docker Events in Echtzeit: docker events --filter container=traefik
2. Beobachte Ansible-Prozesse: watch -n 1 'ps aux | grep ansible'
3. Prüfe ob externe CI/CD-Pipelines Ansible ausführen
{% else %}
SOFORTMASSNAHME:
{% if 'No ansible-pull processes found' not in ansible_pull_processes.stdout %}
1. ❌ Stoppe ansible-pull:
pkill -f ansible-pull
{% endif %}
{% if 'No ansible timers found' not in ansible_timers.stdout %}
2. ❌ Deaktiviere Ansible-Timer:
systemctl stop <timer-name>
systemctl disable <timer-name>
{% endif %}
{% if 'No ansible-pull cronjobs found' not in ansible_cronjobs.stdout %}
3. ❌ Entferne Ansible-Cronjobs:
crontab -u <user> -e
(Kommentiere oder entferne die Ansible-Zeilen)
{% endif %}
LANGZEITLÖSUNG:
1. Prüfe gefundene Scripts/Workflows und entferne Traefik-Restart-Befehle
2. Falls Healthchecks nötig sind, setze größere Intervalle (z.B. 5 Minuten statt 30 Sekunden)
3. Restarte Traefik nur bei echten Fehlern, nicht präventiv
{% endif %}
================================================================================

View File

@@ -0,0 +1,328 @@
---
# Find Source of Traefik Restarts
# Umfassende Diagnose um die Quelle der regelmäßigen Traefik-Restarts zu finden
- name: Find Source of Traefik Restarts
hosts: production
gather_facts: yes
become: yes
vars:
traefik_stack_path: "{{ stacks_base_path }}/traefik"
monitor_duration_seconds: 120 # 2 Minuten Monitoring (kann erhöht werden)
tasks:
- name: Check Traefik container restart count
ansible.builtin.shell: |
docker inspect traefik --format '{{ '{{' }}.RestartCount{{ '}}' }}' 2>/dev/null || echo "0"
register: traefik_restart_count
changed_when: false
- name: Check Traefik container start time
ansible.builtin.shell: |
docker inspect traefik --format '{{ '{{' }}.State.StartedAt{{ '}}' }}' 2>/dev/null || echo "UNKNOWN"
register: traefik_started_at
changed_when: false
- name: Analyze Traefik logs for "Stopping server gracefully" messages
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
docker compose logs traefik 2>&1 | grep -i "stopping server gracefully\|I have to go" | tail -20
register: traefik_stop_messages
changed_when: false
failed_when: false
- name: Extract timestamps from stop messages
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
docker compose logs traefik 2>&1 | grep -i "stopping server gracefully\|I have to go" | tail -20 | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}' | sort | uniq
register: stop_timestamps
changed_when: false
failed_when: false
- name: Check Docker daemon logs for Traefik stop events
ansible.builtin.shell: |
journalctl -u docker.service --since "24 hours ago" --no-pager | grep -iE "traefik.*stop|traefik.*kill|traefik.*die|container.*traefik.*stopped" | tail -30 || echo "No Traefik stop events in Docker daemon logs"
register: docker_daemon_logs
changed_when: false
failed_when: false
- name: Check Docker events for Traefik (last 24 hours)
ansible.builtin.shell: |
docker events --since 24h --until now --filter container=traefik --filter event=die --format "{{ '{{' }}.Time{{ '}}' }} {{ '{{' }}.Action{{ '}}' }} {{ '{{' }}.Actor.Attributes.name{{ '}}' }}" 2>/dev/null | tail -20 || echo "No Traefik die events found"
register: docker_events_traefik
changed_when: false
failed_when: false
- name: Check all user crontabs for Traefik/Docker commands
ansible.builtin.shell: |
for user in $(cut -f1 -d: /etc/passwd); do
crontab -u "$user" -l 2>/dev/null | grep -qE "traefik|docker.*compose.*traefik|docker.*stop.*traefik|docker.*restart.*traefik|docker.*down.*traefik" && echo "=== User: $user ===" && crontab -u "$user" -l 2>/dev/null | grep -E "traefik|docker.*compose.*traefik|docker.*stop.*traefik|docker.*restart.*traefik|docker.*down.*traefik" || true
done || echo "No user crontabs with Traefik commands found"
register: all_user_crontabs
changed_when: false
- name: Check system-wide cron directories
ansible.builtin.shell: |
for dir in /etc/cron.d /etc/cron.daily /etc/cron.hourly /etc/cron.weekly /etc/cron.monthly; do
if [ -d "$dir" ]; then
echo "=== $dir ==="
grep -rE "traefik|docker.*compose.*traefik|docker.*stop.*traefik|docker.*restart.*traefik|docker.*down.*traefik" "$dir" 2>/dev/null || echo "No matches"
fi
done
register: system_cron_dirs
changed_when: false
- name: Check systemd timers and services
ansible.builtin.shell: |
echo "=== Active Timers ==="
systemctl list-timers --all --no-pager | grep -E "traefik|docker.*compose" || echo "No Traefik-related timers"
echo ""
echo "=== Custom Services ==="
systemctl list-units --type=service --all | grep -E "traefik|docker.*compose" || echo "No Traefik-related services"
register: systemd_services
changed_when: false
- name: Check for scripts in deployment directory that restart Traefik
ansible.builtin.shell: |
find /home/deploy/deployment -type f \( -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) -exec grep -lE "traefik.*restart|docker.*compose.*traefik.*restart|docker.*compose.*traefik.*down|docker.*compose.*traefik.*stop" {} \; 2>/dev/null | head -30
register: deployment_scripts
changed_when: false
- name: Check Ansible roles for traefik_auto_restart or restart tasks
ansible.builtin.shell: |
grep -rE "traefik_auto_restart|traefik.*restart|docker.*compose.*traefik.*restart" /home/deploy/deployment/ansible/roles/ 2>/dev/null | grep -v ".git" | head -20 || echo "No auto-restart settings found"
register: ansible_auto_restart
changed_when: false
- name: Check Docker Compose watch mode
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
docker compose ps traefik 2>/dev/null | grep -q "traefik" && echo "running" || echo "not_running"
register: docker_compose_watch
changed_when: false
failed_when: false
- name: Check if Docker Compose is running in watch mode
ansible.builtin.shell: |
ps aux | grep -E "docker.*compose.*watch|docker.*compose.*--watch" | grep -v grep || echo "No Docker Compose watch mode detected"
register: watch_mode_process
changed_when: false
- name: Check for monitoring/watchdog scripts
ansible.builtin.shell: |
find /home/deploy -type f -name "*monitor*" -o -name "*watchdog*" -o -name "*health*" 2>/dev/null | xargs grep -lE "traefik|docker.*compose.*traefik" 2>/dev/null | head -10 || echo "No monitoring scripts found"
register: monitoring_scripts
changed_when: false
- name: Check Gitea Workflows for Traefik restarts
ansible.builtin.shell: |
find /home/deploy -type f -path "*/.gitea/workflows/*.yml" -o -path "*/.github/workflows/*.yml" 2>/dev/null | xargs grep -lE "traefik.*restart|docker.*compose.*traefik.*restart" 2>/dev/null | head -10 || echo "No Gitea workflows found that restart Traefik"
register: gitea_workflows
changed_when: false
- name: Monitor Docker events in real-time (5 minutes)
ansible.builtin.shell: |
timeout {{ monitor_duration_seconds }} docker events --filter container=traefik --format "{{ '{{' }}.Time{{ '}}' }} {{ '{{' }}.Action{{ '}}' }} {{ '{{' }}.Actor.Attributes.name{{ '}}' }}" 2>&1 || echo "Monitoring completed or timeout"
register: docker_events_realtime
changed_when: false
failed_when: false
async: "{{ monitor_duration_seconds + 10 }}"
poll: 0
- name: Wait for monitoring to complete
ansible.builtin.async_status:
jid: "{{ docker_events_realtime.ansible_job_id }}"
register: monitoring_result
until: monitoring_result.finished
retries: "{{ (monitor_duration_seconds / 10) | int + 5 }}"
delay: 10
failed_when: false
- name: Check system reboot history
ansible.builtin.shell: |
last reboot --since "24 hours ago" 2>/dev/null | head -10 || echo "No reboots in last 24 hours"
register: reboot_history
changed_when: false
failed_when: false
- name: Check for at jobs
ansible.builtin.shell: |
atq 2>/dev/null | while read line; do
job_id=$(echo "$line" | awk '{print $1}')
at -c "$job_id" 2>/dev/null | grep -qE "traefik|docker.*compose.*traefik" && echo "=== Job ID: $job_id ===" && at -c "$job_id" 2>/dev/null | grep -E "traefik|docker.*compose.*traefik" || true
done || echo "No at jobs found or atq not available"
register: at_jobs
changed_when: false
- name: Check Docker daemon configuration for auto-restart
ansible.builtin.shell: |
cat /etc/docker/daemon.json 2>/dev/null | grep -iE "restart|live-restore" || echo "No restart settings in daemon.json"
register: docker_daemon_config
changed_when: false
failed_when: false
- name: Check if Traefik has restart policy
ansible.builtin.shell: |
cd {{ traefik_stack_path }}
docker compose config | grep -A 5 "traefik:" | grep -E "restart|restart_policy" || echo "No explicit restart policy found"
register: traefik_restart_policy
changed_when: false
failed_when: false
- name: Summary
ansible.builtin.debug:
msg: |
================================================================================
TRAEFIK RESTART SOURCE DIAGNOSE - ZUSAMMENFASSUNG:
================================================================================
Traefik Status:
- Restart Count: {{ traefik_restart_count.stdout }}
- Started At: {{ traefik_started_at.stdout }}
- Stop Messages gefunden: {{ traefik_stop_messages.stdout_lines | length }} (letzte 20)
Stop-Zeitstempel (letzte 20):
{% if stop_timestamps.stdout %}
{{ stop_timestamps.stdout }}
{% else %}
Keine Stop-Zeitstempel gefunden
{% endif %}
Docker Events (letzte 24h):
{% if docker_events_traefik.stdout and 'No Traefik die events' not in docker_events_traefik.stdout %}
{{ docker_events_traefik.stdout }}
{% else %}
Keine Traefik die-Events in den letzten 24 Stunden
{% endif %}
Docker Daemon Logs:
{% if docker_daemon_logs.stdout and 'No Traefik stop events' not in docker_daemon_logs.stdout %}
{{ docker_daemon_logs.stdout }}
{% else %}
Keine Traefik-Stop-Events in Docker-Daemon-Logs
{% endif %}
Gefundene Quellen:
{% if all_user_crontabs.stdout and 'No user crontabs' not in all_user_crontabs.stdout %}
1. ❌ CRONJOBS (User):
{{ all_user_crontabs.stdout }}
{% endif %}
{% if system_cron_dirs.stdout and 'No matches' not in system_cron_dirs.stdout %}
2. ❌ SYSTEM CRON:
{{ system_cron_dirs.stdout }}
{% endif %}
{% if systemd_services.stdout and 'No Traefik-related' not in systemd_services.stdout %}
3. ❌ SYSTEMD TIMERS/SERVICES:
{{ systemd_services.stdout }}
{% endif %}
{% if deployment_scripts.stdout and 'No' not in deployment_scripts.stdout %}
4. ⚠️ DEPLOYMENT SCRIPTS:
{{ deployment_scripts.stdout }}
{% endif %}
{% if ansible_auto_restart.stdout and 'No auto-restart' not in ansible_auto_restart.stdout %}
5. ⚠️ ANSIBLE AUTO-RESTART:
{{ ansible_auto_restart.stdout }}
{% endif %}
{% if gitea_workflows.stdout and 'No Gitea workflows' not in gitea_workflows.stdout %}
6. ⚠️ GITEA WORKFLOWS:
{{ gitea_workflows.stdout }}
{% endif %}
{% if monitoring_scripts.stdout and 'No monitoring scripts' not in monitoring_scripts.stdout %}
7. ⚠️ MONITORING SCRIPTS:
{{ monitoring_scripts.stdout }}
{% endif %}
{% if at_jobs.stdout and 'No at jobs' not in at_jobs.stdout %}
8. ❌ AT JOBS:
{{ at_jobs.stdout }}
{% endif %}
{% if docker_compose_watch.stdout and 'Could not check' not in docker_compose_watch.stdout %}
9. ⚠️ DOCKER COMPOSE WATCH:
{{ docker_compose_watch.stdout }}
{% endif %}
{% if watch_mode_process.stdout and 'No Docker Compose watch' not in watch_mode_process.stdout %}
10. ❌ DOCKER COMPOSE WATCH MODE (PROZESS):
{{ watch_mode_process.stdout }}
{% endif %}
{% if reboot_history.stdout and 'No reboots' not in reboot_history.stdout %}
11. ⚠️ SYSTEM REBOOTS:
{{ reboot_history.stdout }}
{% endif %}
Real-Time Monitoring ({{ monitor_duration_seconds }} Sekunden):
{% if monitoring_result.finished and monitoring_result.ansible_job_id %}
{{ monitoring_result.stdout | default('Keine Events während Monitoring') }}
{% else %}
Monitoring läuft noch oder wurde unterbrochen
{% endif %}
================================================================================
NÄCHSTE SCHRITTE:
================================================================================
{% if all_user_crontabs.stdout and 'No user crontabs' not in all_user_crontabs.stdout %}
1. ❌ CRONJOBS DEAKTIVIEREN:
- Prüfe gefundene Cronjobs: {{ all_user_crontabs.stdout }}
- Entferne oder kommentiere die entsprechenden Einträge
{% endif %}
{% if system_cron_dirs.stdout and 'No matches' not in system_cron_dirs.stdout %}
2. ❌ SYSTEM CRON DEAKTIVIEREN:
- Prüfe gefundene System-Cronjobs: {{ system_cron_dirs.stdout }}
- Entferne oder benenne die Dateien um
{% endif %}
{% if systemd_services.stdout and 'No Traefik-related' not in systemd_services.stdout %}
3. ❌ SYSTEMD TIMERS/SERVICES DEAKTIVIEREN:
- Prüfe gefundene Services/Timers: {{ systemd_services.stdout }}
- Deaktiviere mit: systemctl disable <service>
{% endif %}
{% if deployment_scripts.stdout and 'No' not in deployment_scripts.stdout %}
4. ⚠️ DEPLOYMENT SCRIPTS PRÜFEN:
- Prüfe gefundene Scripts: {{ deployment_scripts.stdout }}
- Entferne oder kommentiere Traefik-Restart-Befehle
{% endif %}
{% if ansible_auto_restart.stdout and 'No auto-restart' not in ansible_auto_restart.stdout %}
5. ⚠️ ANSIBLE AUTO-RESTART PRÜFEN:
- Prüfe gefundene Einstellungen: {{ ansible_auto_restart.stdout }}
- Setze traefik_auto_restart: false in group_vars
{% endif %}
{% if not all_user_crontabs.stdout or 'No user crontabs' in all_user_crontabs.stdout %}
{% if not system_cron_dirs.stdout or 'No matches' in system_cron_dirs.stdout %}
{% if not systemd_services.stdout or 'No Traefik-related' in systemd_services.stdout %}
{% if not deployment_scripts.stdout or 'No' in deployment_scripts.stdout %}
{% if not ansible_auto_restart.stdout or 'No auto-restart' in ansible_auto_restart.stdout %}
⚠️ KEINE AUTOMATISCHEN RESTART-MECHANISMEN GEFUNDEN!
Mögliche Ursachen:
1. Externer Prozess (nicht über Cron/Systemd)
2. Docker-Service-Restarts (systemctl restart docker)
3. Host-Reboots
4. Manuelle Restarts (von außen)
5. Monitoring-Service (Portainer, Watchtower, etc.)
Nächste Schritte:
1. Führe 'docker events --filter container=traefik' manuell aus und beobachte
2. Prüfe journalctl -u docker.service für Docker-Service-Restarts
3. Prüfe ob Portainer oder andere Monitoring-Tools laufen
4. Prüfe ob Watchtower oder andere Auto-Update-Tools installiert sind
{% endif %}
{% endif %}
{% endif %}
{% endif %}
{% endif %}
================================================================================

View File

@@ -0,0 +1,17 @@
---
# Fix Container Issues (Composer Dependencies and Permissions)
# Wrapper Playbook for application role containers tasks (fix action)
- hosts: production
gather_facts: no
become: no
vars:
application_container_action: fix
tasks:
- name: Include application containers tasks (fix)
ansible.builtin.include_role:
name: application
tasks_from: containers
tags:
- application
- containers
- fix

View File

@@ -1,81 +0,0 @@
---
- name: Fix Gitea Actions Configuration (non-destructive)
hosts: production
become: no
gather_facts: yes
tasks:
- name: Check current Gitea Actions configuration
shell: |
docker exec gitea cat /data/gitea/conf/app.ini 2>/dev/null | grep -A 5 "\[actions\]" || echo "No actions section found"
register: current_config
changed_when: false
ignore_errors: yes
- name: Backup existing app.ini
shell: |
docker exec gitea cp /data/gitea/conf/app.ini /data/gitea/conf/app.ini.backup.$(date +%Y%m%d_%H%M%S)
changed_when: false
ignore_errors: yes
- name: Copy app.ini from container for editing
shell: |
docker cp gitea:/data/gitea/conf/app.ini /tmp/gitea_app_ini_$$
register: copy_result
- name: Update app.ini Actions section
shell: |
# Remove DEFAULT_ACTIONS_URL line if it exists in [actions] section
sed -i '/^\[actions\]/,/^\[/{ /^DEFAULT_ACTIONS_URL/d; }' /tmp/gitea_app_ini_$$
# Ensure ENABLED = true in [actions] section
if grep -q "^\[actions\]" /tmp/gitea_app_ini_$$; then
# Section exists - ensure ENABLED = true
sed -i '/^\[actions\]/,/^\[/{ s/^ENABLED.*/ENABLED = true/; }' /tmp/gitea_app_ini_$$
# If ENABLED line doesn't exist, add it
if ! grep -A 10 "^\[actions\]" /tmp/gitea_app_ini_$$ | grep -q "^ENABLED"; then
sed -i '/^\[actions\]/a ENABLED = true' /tmp/gitea_app_ini_$$
fi
else
# Section doesn't exist - add it
echo "" >> /tmp/gitea_app_ini_$$
echo "[actions]" >> /tmp/gitea_app_ini_$$
echo "ENABLED = true" >> /tmp/gitea_app_ini_$$
fi
args:
executable: /bin/bash
register: config_updated
- name: Copy updated app.ini back to container
shell: |
docker cp /tmp/gitea_app_ini_$$ gitea:/data/gitea/conf/app.ini
rm -f /tmp/gitea_app_ini_$$
when: config_updated.changed | default(false)
- name: Verify Actions configuration after update
shell: |
docker exec gitea cat /data/gitea/conf/app.ini | grep -A 5 "\[actions\]"
register: updated_config
changed_when: false
- name: Restart Gitea to apply configuration
shell: |
cd {{ stacks_base_path }}/gitea
docker compose restart gitea
when: config_updated.changed | default(false)
- name: Wait for Gitea to be ready
wait_for:
timeout: 60
when: config_updated.changed | default(false)
- name: Display configuration result
debug:
msg:
- "=== Gitea Actions Configuration Fixed ==="
- ""
- "Current [actions] configuration:"
- "{{ updated_config.stdout }}"
- ""
- "Configuration updated: {{ 'Yes' if config_updated.changed else 'No changes needed' }}"
- "Gitea restarted: {{ 'Yes' if config_updated.changed else 'No' }}"

View File

@@ -1,49 +0,0 @@
---
- name: Remove DEFAULT_ACTIONS_URL from Gitea configuration
hosts: production
become: no
gather_facts: yes
tasks:
- name: Check if DEFAULT_ACTIONS_URL exists in app.ini
shell: |
docker exec gitea cat /data/gitea/conf/app.ini 2>/dev/null | grep -q "DEFAULT_ACTIONS_URL" && echo "exists" || echo "not_found"
register: url_check
changed_when: false
ignore_errors: yes
- name: Remove DEFAULT_ACTIONS_URL from app.ini
shell: |
docker exec gitea sh -c 'sed -i "/^DEFAULT_ACTIONS_URL/d" /data/gitea/conf/app.ini'
when: url_check.stdout == "exists"
register: url_removed
- name: Restart Gitea to apply configuration changes
shell: |
cd {{ stacks_base_path }}/gitea
docker compose restart gitea
when: url_removed.changed | default(false)
- name: Wait for Gitea to be ready
wait_for:
timeout: 60
when: url_removed.changed | default(false)
- name: Verify Gitea Actions configuration
shell: |
docker exec gitea cat /data/gitea/conf/app.ini 2>/dev/null | grep -A 3 "\[actions\]" || echo "Config not accessible"
register: gitea_config
changed_when: false
ignore_errors: yes
- name: Display Gitea Actions configuration
debug:
msg:
- "=== Gitea Configuration Fix Complete ==="
- "DEFAULT_ACTIONS_URL removed: {{ 'Yes' if url_removed.changed else 'No (not found or already removed)' }}"
- "Container restarted: {{ 'Yes' if url_removed.changed else 'No' }}"
- ""
- "Current Actions configuration:"
- "{{ gitea_config.stdout if gitea_config.stdout else 'Could not read config' }}"
- ""
- "Gitea will now use its own instance for actions by default (no GitHub fallback)."

View File

@@ -1,200 +1,17 @@
--- ---
- name: Fix Gitea Runner Configuration # Fix Gitea Runner Configuration
hosts: production # Wrapper Playbook for gitea role runner tasks (fix action)
- hosts: production
gather_facts: yes gather_facts: yes
become: no become: no
vars: vars:
gitea_runner_path: "{{ runner_path | default('/home/deploy/deployment/gitea-runner') }}" gitea_runner_action: fix
gitea_instance_url: "https://git.michaelschiemer.de"
runner_container_name: "gitea-runner"
tasks: tasks:
- name: Check if Gitea runner directory exists - name: Include gitea runner tasks (fix)
stat: ansible.builtin.include_role:
path: "{{ gitea_runner_path }}" name: gitea
register: runner_dir_exists tasks_from: runner
tags:
- name: Fail if runner directory does not exist - gitea
fail: - runner
msg: "Gitea runner directory not found at {{ gitea_runner_path }}" - fix
when: not runner_dir_exists.stat.exists
- name: Check if runner container is running
shell: |
docker ps --format json | jq -r 'select(.Names == "{{ runner_container_name }}") | .State'
register: runner_container_state
changed_when: false
failed_when: false
- name: Display runner container status
debug:
msg: |
Runner Container Status: {{ runner_container_state.stdout | default('NOT RUNNING') }}
- name: Check if .runner file exists
stat:
path: "{{ gitea_runner_path }}/data/.runner"
register: runner_file_exists
- name: Read .runner file content (if exists)
slurp:
src: "{{ gitea_runner_path }}/data/.runner"
register: runner_file_content
when: runner_file_exists.stat.exists
changed_when: false
- name: Display .runner file content
debug:
msg: |
Runner Registration File Content:
{{ runner_file_content.content | b64decode | default('File not found') }}
when: runner_file_exists.stat.exists
- name: Check for GitHub URLs in .runner file
shell: |
grep -i "github.com" "{{ gitea_runner_path }}/data/.runner" 2>/dev/null || echo "NO_GITHUB_URLS"
register: github_urls_check
when: runner_file_exists.stat.exists
changed_when: false
failed_when: false
- name: Display GitHub URLs check result
debug:
msg: |
GitHub URLs in .runner file: {{ github_urls_check.stdout }}
- name: Check runner logs for incorrect URLs
shell: |
docker logs {{ runner_container_name }} --tail=100 2>&1 | grep -E "(github.com|instance|repo)" || echo "NO_MATCHES"
register: runner_logs_check
changed_when: false
failed_when: false
- name: Display runner logs analysis
debug:
msg: |
Runner Logs Analysis (last 100 lines):
{{ runner_logs_check.stdout }}
- name: Check .env file for GITEA_INSTANCE_URL
shell: |
grep "^GITEA_INSTANCE_URL=" "{{ gitea_runner_path }}/.env" 2>/dev/null || echo "NOT_FOUND"
register: env_instance_url
changed_when: false
failed_when: false
- name: Display GITEA_INSTANCE_URL from .env
debug:
msg: |
GITEA_INSTANCE_URL in .env: {{ env_instance_url.stdout }}
- name: Check if .env has correct Gitea URL
set_fact:
env_has_correct_url: "{{ env_instance_url.stdout is defined and gitea_instance_url in env_instance_url.stdout }}"
- name: Check if runner needs re-registration
set_fact:
runner_needs_reregistration: >-
{%- if not runner_file_exists.stat.exists -%}
true
{%- elif 'github.com' in (github_urls_check.stdout | default('')) -%}
true
{%- elif not env_has_correct_url -%}
true
{%- else -%}
false
{%- endif -%}
- name: Display re-registration decision
debug:
msg: |
Runner Re-registration Needed: {{ runner_needs_reregistration | bool }}
Reasons:
- Runner file exists: {{ runner_file_exists.stat.exists }}
- Contains GitHub URLs: {{ 'github.com' in (github_urls_check.stdout | default('')) }}
- .env has correct URL: {{ env_has_correct_url | bool }}
- name: Stop runner container before re-registration
shell: |
cd {{ gitea_runner_path }}
docker compose stop {{ runner_container_name }}
when: runner_needs_reregistration | bool
register: stop_runner
changed_when: stop_runner.rc == 0
- name: Backup existing .runner file
copy:
src: "{{ gitea_runner_path }}/data/.runner"
dest: "{{ gitea_runner_path }}/data/.runner.backup.{{ ansible_date_time.epoch }}"
remote_src: yes
when:
- runner_file_exists.stat.exists
- runner_needs_reregistration | bool
- name: Remove existing .runner file
file:
path: "{{ gitea_runner_path }}/data/.runner"
state: absent
when: runner_needs_reregistration | bool
- name: Update .env file with correct GITEA_INSTANCE_URL
lineinfile:
path: "{{ gitea_runner_path }}/.env"
regexp: '^GITEA_INSTANCE_URL='
line: "GITEA_INSTANCE_URL={{ gitea_instance_url }}"
create: yes
when: runner_needs_reregistration | bool
register: env_updated
- name: Display instructions for manual re-registration
debug:
msg: |
========================================
Runner Re-registration Required
========================================
The runner needs to be re-registered with the correct Gitea instance URL.
Steps to re-register:
1. Get a new registration token from Gitea:
{{ gitea_instance_url }}/admin/actions/runners
Click "Create New Runner" and copy the token
2. Update .env file with the token:
GITEA_RUNNER_REGISTRATION_TOKEN=<your-token>
3. Re-register the runner:
cd {{ gitea_runner_path }}
./register.sh
Or use Ansible to set the token and register:
ansible-playbook -i inventory/production.yml \
playbooks/register-gitea-runner.yml \
-e "runner_registration_token=<your-token>"
========================================
when: runner_needs_reregistration | bool
- name: Display final status
debug:
msg: |
========================================
Gitea Runner Configuration Status
========================================
Runner Directory: {{ gitea_runner_path }}
Container Running: {{ 'YES' if runner_container_state.stdout == 'running' else 'NO' }}
Runner File Exists: {{ 'YES' if runner_file_exists.stat.exists else 'NO' }}
Contains GitHub URLs: {{ 'YES' if 'github.com' in (github_urls_check.stdout | default('')) else 'NO' }}
.env has correct URL: {{ 'YES' if env_has_correct_url else 'NO' }}
Re-registration Needed: {{ 'YES' if runner_needs_reregistration | bool else 'NO' }}
========================================
{% if not runner_needs_reregistration | bool %}
✅ Runner configuration looks correct!
{% else %}
⚠️ Runner needs to be re-registered with correct Gitea URL
{% endif %}

View File

@@ -1,143 +0,0 @@
---
- name: Fix Gitea Traefik Configuration - Remove Dynamic Config and Use Labels
hosts: production
gather_facts: yes
become: no
vars:
traefik_stack_path: "{{ stacks_base_path }}/traefik"
gitea_stack_path: "{{ stacks_base_path }}/gitea"
tasks:
- name: Backup dynamic Gitea configuration
shell: |
cd {{ traefik_stack_path }}/dynamic
if [ -f gitea.yml ]; then
cp gitea.yml gitea.yml.backup-$(date +%Y%m%d-%H%M%S)
echo "Backed up to gitea.yml.backup-$(date +%Y%m%d-%H%M%S)"
else
echo "File not found, nothing to backup"
fi
args:
executable: /bin/bash
register: backup_result
ignore_errors: yes
failed_when: false
- name: Display backup result
debug:
msg: "{{ backup_result.stdout_lines }}"
- name: Remove dynamic Gitea configuration
file:
path: "{{ traefik_stack_path }}/dynamic/gitea.yml"
state: absent
register: remove_config
- name: Restart Traefik to reload configuration
community.docker.docker_compose_v2:
project_src: "{{ traefik_stack_path }}"
state: present
pull: never
recreate: always
services:
- traefik
register: traefik_restart
when: remove_config.changed
- name: Wait for Traefik to be ready
wait_for:
port: 443
host: localhost
timeout: 30
delegate_to: localhost
when: traefik_restart.changed
ignore_errors: yes
- name: Check if Gitea docker-compose.yml already has Traefik labels
shell: |
grep -q "traefik.enable=true" {{ gitea_stack_path }}/docker-compose.yml && echo "Labels already present" || echo "Labels missing"
register: labels_check
ignore_errors: yes
failed_when: false
- name: Copy docker-compose.yml from local to ensure labels are present
copy:
src: "{{ playbook_dir }}/../../stacks/gitea/docker-compose.yml"
dest: "{{ gitea_stack_path }}/docker-compose.yml"
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0644'
register: labels_added
when: "'Labels missing' in labels_check.stdout"
- name: Recreate Gitea container with labels
community.docker.docker_compose_v2:
project_src: "{{ gitea_stack_path }}"
state: present
pull: never
recreate: always
remove_orphans: no
register: gitea_recreate
when: labels_added.changed
- name: Wait for Gitea to be healthy
shell: |
for i in {1..30}; do
if docker exec gitea curl -f http://localhost:3000/api/healthz >/dev/null 2>&1; then
echo "Gitea is healthy"
exit 0
fi
echo "Waiting for Gitea... ($i/30)"
sleep 2
done
echo "Health check timeout"
exit 1
args:
executable: /bin/bash
register: health_wait
ignore_errors: yes
failed_when: false
when: gitea_recreate.changed
- name: Display health wait result
debug:
msg: "{{ health_wait.stdout_lines }}"
when: gitea_recreate.changed
- name: Check Traefik service registration
shell: |
sleep 5 # Give Traefik time to discover
docker exec traefik wget -qO- http://localhost:8080/api/http/services 2>&1 | grep -i gitea || echo "Service not found (may take a few seconds)"
register: traefik_service
ignore_errors: yes
failed_when: false
- name: Display Traefik service registration
debug:
msg: "{{ traefik_service.stdout_lines }}"
- name: Test external Gitea access
shell: |
sleep 3 # Give Traefik time to update routing
curl -k -H "User-Agent: Mozilla/5.0" -s -o /dev/null -w "HTTP Status: %{http_code}\n" https://git.michaelschiemer.de/ 2>&1 || echo "Connection failed"
args:
executable: /bin/bash
register: external_test
ignore_errors: yes
failed_when: false
- name: Display external test result
debug:
msg: "{{ external_test.stdout_lines }}"
- name: Summary
debug:
msg:
- "=== FIX SUMMARY ==="
- "Dynamic config removed: {{ 'Yes' if remove_config.changed else 'Already removed' }}"
- "Labels added to docker-compose.yml: {{ 'Yes' if labels_added.changed else 'Already present' }}"
- "Gitea container recreated: {{ 'Yes' if gitea_recreate.changed else 'No' }}"
- ""
- "Gitea should now be accessible via https://git.michaelschiemer.de"
- "If issue persists, check Traefik logs for errors"

View File

@@ -1,139 +0,0 @@
---
- name: Fix Gitea Traefik Labels
hosts: production
gather_facts: yes
become: no
vars:
gitea_stack_path: "{{ stacks_base_path }}/gitea"
tasks:
- name: Check current Gitea container status
shell: |
cd {{ gitea_stack_path }}
docker compose ps gitea
args:
executable: /bin/bash
register: gitea_status_before
ignore_errors: yes
failed_when: false
- name: Display current status
debug:
msg: "{{ gitea_status_before.stdout_lines }}"
- name: Check current Traefik labels
shell: |
docker inspect gitea --format '{{ '{{' }}range .Config.Labels{{ '}}' }}{{ '{{' }}.Key{{ '}}' }}={{ '{{' }}.Value{{ '}}' }}{{ '{{' }}\n{{ '}}' }}{{ '{{' }}end{{ '}}' }}' 2>&1 | grep -i traefik || echo "No Traefik labels found"
register: current_labels
ignore_errors: yes
failed_when: false
- name: Display current labels
debug:
msg: "{{ current_labels.stdout_lines }}"
- name: Recreate Gitea container with Traefik labels
community.docker.docker_compose_v2:
project_src: "{{ gitea_stack_path }}"
state: present
pull: never
recreate: always
remove_orphans: no
register: gitea_recreate
- name: Wait for Gitea to be ready
wait_for:
port: 3000
host: localhost
timeout: 60
delegate_to: localhost
when: gitea_recreate.changed
ignore_errors: yes
- name: Wait for Gitea health check
shell: |
for i in {1..30}; do
if docker exec gitea curl -f http://localhost:3000/api/healthz >/dev/null 2>&1; then
echo "Gitea is healthy"
exit 0
fi
echo "Waiting for Gitea to be healthy... ($i/30)"
sleep 2
done
echo "Gitea health check timeout"
exit 1
args:
executable: /bin/bash
register: health_wait
ignore_errors: yes
failed_when: false
when: gitea_recreate.changed
- name: Display health wait result
debug:
msg: "{{ health_wait.stdout_lines }}"
when: gitea_recreate.changed
- name: Check new Gitea container status
shell: |
cd {{ gitea_stack_path }}
docker compose ps gitea
args:
executable: /bin/bash
register: gitea_status_after
ignore_errors: yes
failed_when: false
- name: Display new status
debug:
msg: "{{ gitea_status_after.stdout_lines }}"
- name: Check new Traefik labels
shell: |
docker inspect gitea --format '{{ '{{' }}range .Config.Labels{{ '}}' }}{{ '{{' }}.Key{{ '}}' }}={{ '{{' }}.Value{{ '}}' }}{{ '{{' }}\n{{ '}}' }}{{ '{{' }}end{{ '}}' }}' 2>&1 | grep -i traefik || echo "No Traefik labels found"
register: new_labels
ignore_errors: yes
failed_when: false
- name: Display new labels
debug:
msg: "{{ new_labels.stdout_lines }}"
- name: Check Traefik service registration
shell: |
docker exec traefik wget -qO- http://localhost:8080/api/http/services 2>&1 | grep -i gitea || echo "Gitea service not found (may take a few seconds to register)"
register: traefik_service
ignore_errors: yes
failed_when: false
- name: Display Traefik service registration
debug:
msg: "{{ traefik_service.stdout_lines }}"
- name: Test external Gitea access
shell: |
echo "Testing external access..."
sleep 5 # Give Traefik time to update
curl -k -H "User-Agent: Mozilla/5.0" -s -o /dev/null -w "HTTP Status: %{http_code}\n" https://git.michaelschiemer.de/ 2>&1 || echo "Connection failed"
args:
executable: /bin/bash
register: external_test
ignore_errors: yes
failed_when: false
- name: Display external test result
debug:
msg: "{{ external_test.stdout_lines }}"
- name: Summary
debug:
msg:
- "=== FIX SUMMARY ==="
- "Container recreated: {{ 'Yes' if gitea_recreate.changed else 'No' }}"
- "Traefik labels: {{ 'Fixed' if 'traefik' in new_labels.stdout|lower else 'Still missing' }}"
- ""
- "If the issue persists:"
- "1. Check Traefik logs: cd {{ stacks_base_path }}/traefik && docker compose logs traefik"
- "2. Verify Traefik can reach Gitea: docker exec traefik ping -c 2 gitea"
- "3. Check Gitea logs for errors: cd {{ gitea_stack_path }} && docker compose logs gitea"

View File

@@ -1,67 +0,0 @@
---
- name: Fix Grafana VPN Access - Update Middleware to ipAllowList
hosts: production
gather_facts: no
become: no
tasks:
- name: Backup current middlewares.yml
shell: |
cd ~/deployment/stacks/traefik/dynamic
cp middlewares.yml middlewares.yml.backup.$(date +%Y%m%d_%H%M%S)
args:
executable: /bin/bash
- name: Update middlewares.yml - Change ipWhiteList to ipAllowList
shell: |
cd ~/deployment/stacks/traefik/dynamic
sed -i 's/ipWhiteList:/ipAllowList:/g' middlewares.yml
sed -i 's/ipWhitelist/ipAllowList/g' middlewares.yml
# Validate YAML syntax
python3 -c "import yaml; yaml.safe_load(open('middlewares.yml')); print('YAML valid')"
args:
executable: /bin/bash
- name: Display updated grafana-vpn-only middleware
shell: |
cd ~/deployment/stacks/traefik/dynamic
grep -A 6 'grafana-vpn-only:' middlewares.yml
args:
executable: /bin/bash
register: grafana_middleware
- name: Show updated middleware
debug:
msg: "{{ grafana_middleware.stdout_lines }}"
- name: Restart Traefik to apply changes
command: docker compose restart traefik
args:
chdir: ~/deployment/stacks/traefik
register: traefik_restart
- name: Wait for Traefik to restart
pause:
seconds: 5
- name: Check Traefik logs for deprecation warnings
shell: |
cd ~/deployment/stacks/traefik
docker compose logs traefik --tail=20 2>&1 | grep -i 'allowlist\|whitelist\|deprecated' || echo "No warnings found"
args:
executable: /bin/bash
register: traefik_warnings
- name: Display Traefik warnings
debug:
msg: "{{ traefik_warnings.stdout_lines }}"
- name: Verify Traefik status
command: docker compose ps traefik
args:
chdir: ~/deployment/stacks/traefik
register: traefik_status
- name: Display Traefik status
debug:
msg: "{{ traefik_status.stdout_lines }}"

View File

@@ -1,80 +0,0 @@
---
- name: Fix Grafana VPN Routing and Remove Temporary IP Allow
hosts: production
gather_facts: no
become: no
tasks:
- name: Check recent Grafana access attempts
shell: |
cd ~/deployment/stacks/traefik
echo "=== Recent Grafana Access (Last 10 attempts) ==="
tail -50 logs/access.log | grep grafana | tail -10 | while read line; do
echo "$line" | grep -oP '"ClientHost":"[^"]*"' || echo "Could not parse"
done
args:
executable: /bin/bash
register: recent_access
ignore_errors: yes
failed_when: false
- name: Display recent access attempts
debug:
msg: "{{ recent_access.stdout_lines }}"
- name: Check if traffic comes from VPN
shell: |
cd ~/deployment/stacks/traefik
echo "=== Checking if recent traffic comes from VPN (10.8.0.0/24) ==="
tail -20 logs/access.log | grep grafana | tail -5 | grep -oP '"ClientHost":"[^"]*"' | sed 's/"ClientHost":"//;s/"//' | while read ip; do
if [[ "$ip" =~ ^10\.8\.0\.[0-9]+$ ]]; then
echo "? $ip -> VPN traffic (10.8.0.0/24)"
else
echo "? $ip -> Public IP (not VPN)"
fi
done
args:
executable: /bin/bash
register: vpn_check
ignore_errors: yes
failed_when: false
- name: Display VPN check results
debug:
msg: "{{ vpn_check.stdout_lines }}"
- name: Backup current middlewares.yml
shell: |
cd ~/deployment/stacks/traefik/dynamic
cp middlewares.yml middlewares.yml.backup.$(date +%Y%m%d_%H%M%S)
args:
executable: /bin/bash
when: false # Skip for now - we'll do this manually
- name: Check current middleware configuration
shell: |
cd ~/deployment/stacks/traefik/dynamic
echo "=== Current grafana-vpn-only Middleware ==="
grep -A 8 "grafana-vpn-only:" middlewares.yml
args:
executable: /bin/bash
register: middleware_config
ignore_errors: yes
failed_when: false
- name: Display middleware configuration
debug:
msg: "{{ middleware_config.stdout_lines }}"
- name: Instructions for removing temporary IP
debug:
msg:
- "=== TO REMOVE TEMPORARY IP ALLOWLIST ==="
- "1. Make sure VPN routing works (DNS = 10.8.0.1 or use hosts file)"
- "2. Test that traffic comes from VPN (ClientHost: 10.8.0.7)"
- "3. Remove temporary IP from middlewares.yml:"
- " cd ~/deployment/stacks/traefik/dynamic"
- " sed -i '/89.246.96.244\/32/d' middlewares.yml"
- "4. Restart Traefik:"
- " cd ~/deployment/stacks/traefik && docker compose restart traefik"
- "5. Test: With VPN = OK, Without VPN = 403"

View File

@@ -1,79 +0,0 @@
---
- name: Fix Nginx Upstream Configuration
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check which nginx config files exist
shell: |
cd ~/deployment/stacks/staging
echo "=== Check nginx config files ==="
docker compose exec -T staging-nginx find /etc/nginx -name "*.conf" -type f 2>&1 | head -20
echo ""
echo "=== Check sites-enabled ==="
docker compose exec -T staging-nginx ls -la /etc/nginx/sites-enabled/ 2>&1 || echo "sites-enabled not found"
echo ""
echo "=== Check sites-available ==="
docker compose exec -T staging-nginx ls -la /etc/nginx/sites-available/ 2>&1 || echo "sites-available not found"
echo ""
echo "=== Check nginx.conf includes ==="
docker compose exec -T staging-nginx grep -E "include|conf.d|sites" /etc/nginx/nginx.conf 2>&1 | head -10
args:
executable: /bin/bash
register: nginx_config_check
ignore_errors: yes
failed_when: false
- name: Display nginx config check
debug:
msg: "{{ nginx_config_check.stdout_lines }}"
- name: Find all fastcgi_pass directives
shell: |
cd ~/deployment/stacks/staging
echo "=== Search for fastcgi_pass in all config files ==="
docker compose exec -T staging-nginx grep -r "fastcgi_pass" /etc/nginx/ 2>&1 || echo "Could not search"
args:
executable: /bin/bash
register: fastcgi_pass_search
ignore_errors: yes
failed_when: false
- name: Display fastcgi_pass search
debug:
msg: "{{ fastcgi_pass_search.stdout_lines }}"
- name: Fix all fastcgi_pass to use staging-app:9000
shell: |
cd ~/deployment/stacks/staging
echo "=== Fix fastcgi_pass in all config files ==="
docker compose exec -T staging-nginx sh -c "find /etc/nginx -name '*.conf' -type f -exec sed -i 's|fastcgi_pass 127.0.0.1:9000;|fastcgi_pass staging-app:9000;|g' {} \;" || echo "Fix failed"
docker compose exec -T staging-nginx sh -c "find /etc/nginx -name '*.conf' -type f -exec sed -i 's|fastcgi_pass localhost:9000;|fastcgi_pass staging-app:9000;|g' {} \;" || echo "Fix failed"
docker compose exec -T staging-nginx sh -c "find /etc/nginx -name '*.conf' -type f -exec sed -i 's|fastcgi_pass php-upstream;|fastcgi_pass staging-app:9000;|g' {} \;" || echo "Note: php-upstream should stay as is"
echo "=== Verify fix ==="
docker compose exec -T staging-nginx grep -r "fastcgi_pass" /etc/nginx/ 2>&1 | grep -v "staging-app" || echo "All fastcgi_pass now use staging-app"
args:
executable: /bin/bash
register: fix_result
ignore_errors: yes
failed_when: false
- name: Display fix result
debug:
msg: "{{ fix_result.stdout_lines }}"
- name: Reload nginx to apply changes
shell: |
cd ~/deployment/stacks/staging
docker compose exec -T staging-nginx nginx -t 2>&1 || echo "Config test failed"
docker compose restart staging-nginx || echo "Restart failed"
args:
executable: /bin/bash
register: nginx_reload
ignore_errors: yes
failed_when: false
- name: Display nginx reload result
debug:
msg: "{{ nginx_reload.stdout_lines }}"

View File

@@ -1,36 +0,0 @@
---
- name: Check and Fix PHP-FPM Configuration
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check PHP-FPM www.conf configuration for allowed_clients
shell: |
cd ~/deployment/stacks/staging
echo "=== PHP-FPM www.conf listen.allowed_clients ==="
docker compose exec -T staging-app cat /usr/local/etc/php-fpm.d/www.conf 2>&1 | grep -E "(listen|allowed_clients|listen\.owner|listen\.group|listen\.mode)" | head -15
args:
executable: /bin/bash
register: fpm_config
ignore_errors: yes
failed_when: false
- name: Display PHP-FPM config
debug:
msg: "{{ fpm_config.stdout_lines }}"
- name: Check nginx error log for specific PHP-FPM errors
shell: |
cd ~/deployment/stacks/staging
echo "=== Nginx Error Log (all lines) ==="
docker compose logs --tail=200 staging-nginx 2>&1 | grep -iE "(502|bad gateway|upstream|php|fpm|connection)" || echo "No specific errors found"
args:
executable: /bin/bash
register: nginx_error_log
ignore_errors: yes
failed_when: false
- name: Display nginx error log
debug:
msg: "{{ nginx_error_log.stdout_lines }}"

View File

@@ -1,57 +0,0 @@
---
- name: Fix sites-available/default upstream configuration
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check php-upstream definition in sites-available/default
shell: |
cd ~/deployment/stacks/staging
echo "=== Check upstream definition ==="
docker compose exec -T staging-nginx grep -A 3 "upstream php-upstream" /etc/nginx/sites-available/default 2>&1 || echo "No upstream found"
echo ""
echo "=== Full sites-available/default file ==="
docker compose exec -T staging-nginx cat /etc/nginx/sites-available/default 2>&1
args:
executable: /bin/bash
register: upstream_check
ignore_errors: yes
failed_when: false
- name: Display upstream check
debug:
msg: "{{ upstream_check.stdout_lines }}"
- name: Fix php-upstream in sites-available/default
shell: |
cd ~/deployment/stacks/staging
echo "=== Fix php-upstream definition ==="
docker compose exec -T staging-nginx sed -i 's|server 127.0.0.1:9000;|server staging-app:9000;|g' /etc/nginx/sites-available/default || echo "Fix 127.0.0.1 failed"
docker compose exec -T staging-nginx sed -i 's|server localhost:9000;|server staging-app:9000;|g' /etc/nginx/sites-available/default || echo "Fix localhost failed"
echo ""
echo "=== Verify fix ==="
docker compose exec -T staging-nginx grep -A 3 "upstream php-upstream" /etc/nginx/sites-available/default 2>&1 || echo "No upstream found"
args:
executable: /bin/bash
register: fix_upstream
ignore_errors: yes
failed_when: false
- name: Display fix result
debug:
msg: "{{ fix_upstream.stdout_lines }}"
- name: Reload nginx
shell: |
cd ~/deployment/stacks/staging
docker compose exec -T staging-nginx nginx -t && docker compose restart staging-nginx || echo "Reload failed"
args:
executable: /bin/bash
register: reload_nginx
ignore_errors: yes
failed_when: false
- name: Display reload result
debug:
msg: "{{ reload_nginx.stdout_lines }}"

View File

@@ -1,52 +0,0 @@
---
- name: Quick Fix Staging 502 Bad Gateway
hosts: production
gather_facts: yes
become: no
tasks:
- name: Fix php-upstream in sites-available/default
shell: |
cd ~/deployment/stacks/staging
echo "=== Fixing nginx upstream configuration ==="
docker compose exec -T staging-nginx sed -i '/upstream php-upstream {/,/}/s|server 127.0.0.1:9000;|server staging-app:9000;|g' /etc/nginx/sites-available/default || echo "Fix 127.0.0.1 failed"
docker compose exec -T staging-nginx sed -i '/upstream php-upstream {/,/}/s|server localhost:9000;|server staging-app:9000;|g' /etc/nginx/sites-available/default || echo "Fix localhost failed"
echo "=== Verifying fix ==="
docker compose exec -T staging-nginx grep -A 3 "upstream php-upstream" /etc/nginx/sites-available/default
args:
executable: /bin/bash
register: fix_result
ignore_errors: yes
failed_when: false
- name: Display fix result
debug:
msg: "{{ fix_result.stdout_lines }}"
- name: Reload nginx
shell: |
cd ~/deployment/stacks/staging
docker compose exec -T staging-nginx nginx -t && docker compose restart staging-nginx
args:
executable: /bin/bash
register: reload_result
ignore_errors: yes
failed_when: false
- name: Display reload result
debug:
msg: "{{ reload_result.stdout_lines }}"
- name: Test if fix worked
shell: |
sleep 3
curl -H "User-Agent: Mozilla/5.0" -s -o /dev/null -w "%{http_code}" https://staging.michaelschiemer.de/ || echo "502"
args:
executable: /bin/bash
register: test_result
ignore_errors: yes
failed_when: false
- name: Display test result
debug:
msg: "HTTP Status: {{ test_result.stdout }} (200 = OK, 502 = Still broken)"

View File

@@ -1,88 +0,0 @@
---
- name: Fix and Verify Staging 502 - Complete Fix
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check current upstream configuration
shell: |
cd ~/deployment/stacks/staging
echo "=== Current upstream config in sites-available/default ==="
docker compose exec -T staging-nginx grep -A 3 "upstream php-upstream" /etc/nginx/sites-available/default 2>&1 || echo "Could not read config"
args:
executable: /bin/bash
register: current_config
ignore_errors: yes
failed_when: false
- name: Display current config
debug:
msg: "{{ current_config.stdout_lines }}"
- name: Fix upstream configuration (multiple methods)
shell: |
cd ~/deployment/stacks/staging
echo "=== Fixing nginx upstream configuration ==="
# Method 1: Fix in upstream block
docker compose exec -T staging-nginx sed -i '/upstream php-upstream {/,/}/s|server 127.0.0.1:9000;|server staging-app:9000;|g' /etc/nginx/sites-available/default || echo "Method 1 failed"
docker compose exec -T staging-nginx sed -i '/upstream php-upstream {/,/}/s|server localhost:9000;|server staging-app:9000;|g' /etc/nginx/sites-available/default || echo "Method 2 failed"
# Method 2: Fix any fastcgi_pass
docker compose exec -T staging-nginx sed -i 's|fastcgi_pass 127.0.0.1:9000;|fastcgi_pass php-upstream;|g' /etc/nginx/sites-available/default || echo "Method 3 failed"
docker compose exec -T staging-nginx sed -i 's|fastcgi_pass localhost:9000;|fastcgi_pass php-upstream;|g' /etc/nginx/sites-available/default || echo "Method 4 failed"
# Method 3: Replace entire upstream block if it still has wrong value
docker compose exec -T staging-nginx sh -c "grep -q 'server 127.0.0.1:9000' /etc/nginx/sites-available/default && sed -i '/upstream php-upstream {/,/}/c\upstream php-upstream {\n server staging-app:9000;\n}' /etc/nginx/sites-available/default || echo 'No 127.0.0.1 found'" || echo "Method 5 failed"
echo "=== Verification ==="
docker compose exec -T staging-nginx grep -A 3 "upstream php-upstream" /etc/nginx/sites-available/default
args:
executable: /bin/bash
register: fix_result
ignore_errors: yes
failed_when: false
- name: Display fix result
debug:
msg: "{{ fix_result.stdout_lines }}"
- name: Test nginx configuration
shell: |
cd ~/deployment/stacks/staging
docker compose exec -T staging-nginx nginx -t 2>&1
args:
executable: /bin/bash
register: nginx_test
ignore_errors: yes
failed_when: false
- name: Display nginx test result
debug:
msg: "{{ nginx_test.stdout_lines }}"
- name: Restart nginx
shell: |
cd ~/deployment/stacks/staging
docker compose restart staging-nginx
sleep 3
args:
executable: /bin/bash
register: restart_result
ignore_errors: yes
failed_when: false
- name: Display restart result
debug:
msg: "{{ restart_result.stdout_lines }}"
- name: Test connection
shell: |
sleep 2
curl -H "User-Agent: Mozilla/5.0" -s -o /dev/null -w "HTTP Status: %{http_code}" https://staging.michaelschiemer.de/ || echo "502"
args:
executable: /bin/bash
register: test_result
ignore_errors: yes
failed_when: false
- name: Display test result
debug:
msg: "Final HTTP Status: {{ test_result.stdout }} (200/404 = OK, 502 = Still broken)"

View File

@@ -1,71 +0,0 @@
---
- name: Complete Fix for Staging (502 + Git)
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check if index.php exists in staging-app
shell: |
cd ~/deployment/stacks/staging && docker compose exec -T staging-app sh -c "ls -la /var/www/html/public/index.php 2>&1" || echo "index.php NOT FOUND"
args:
executable: /bin/bash
register: index_php_app
changed_when: false
ignore_errors: yes
- name: Check if index.php exists in staging-nginx
shell: |
cd ~/deployment/stacks/staging && docker compose exec -T staging-nginx sh -c "ls -la /var/www/html/public/index.php 2>&1" || echo "index.php NOT FOUND"
args:
executable: /bin/bash
register: index_php_nginx
changed_when: false
ignore_errors: yes
- name: Check PHP-FPM listen configuration
shell: |
cd ~/deployment/stacks/staging && docker compose exec -T staging-app sh -c "cat /usr/local/etc/php-fpm.d/www.conf | grep -E '(^listen|^listen.allowed_clients|^listen.owner|^listen.group|^user|^group)' | head -20" || echo "Could not read config"
args:
executable: /bin/bash
register: php_fpm_full_config
changed_when: false
ignore_errors: yes
- name: Test actual HTTP request to staging-app
shell: |
cd ~/deployment/stacks/staging && docker compose exec -T staging-nginx sh -c "curl -v http://staging-app:9000/index.php 2>&1 | head -30" || echo "HTTP test failed"
args:
executable: /bin/bash
register: http_test
changed_when: false
ignore_errors: yes
- name: Fix Git ownership permanently in staging-app entrypoint
shell: |
cd ~/deployment/stacks/staging && docker compose exec -T staging-app sh -c "cd /var/www/html && git config --global --add safe.directory /var/www/html && git config --global --get-all safe.directory" || echo "Git config failed"
args:
executable: /bin/bash
register: git_config_check
changed_when: false
ignore_errors: yes
- name: Display index.php check in app
debug:
msg: "{{ index_php_app.stdout_lines }}"
- name: Display index.php check in nginx
debug:
msg: "{{ index_php_nginx.stdout_lines }}"
- name: Display PHP-FPM config
debug:
msg: "{{ php_fpm_full_config.stdout_lines }}"
- name: Display HTTP test
debug:
msg: "{{ http_test.stdout_lines }}"
- name: Display git config
debug:
msg: "{{ git_config_check.stdout_lines }}"

View File

@@ -1,138 +0,0 @@
---
- name: Fix Staging docker-compose.staging.yml with Redis Secrets
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check current docker-compose.staging.yml on server
shell: |
cd ~/deployment/stacks/staging
echo "=== Current staging-app environment (REDIS-related) ==="
grep -A 50 "staging-app:" docker-compose.staging.yml | grep -A 30 "environment:" | grep -E "(REDIS_|CACHE_|SESSION_)" || echo "No Redis env vars found"
echo ""
echo "=== Current secrets section for staging-app ==="
grep -A 10 "staging-app:" docker-compose.staging.yml | grep -A 15 "secrets:" || echo "Secrets section not found"
echo ""
echo "=== Secrets definitions at bottom ==="
tail -30 docker-compose.staging.yml | grep -A 10 "secrets:" || echo "Secrets definitions not found"
register: current_config
ignore_errors: yes
- name: Display current configuration
debug:
msg: "{{ current_config.stdout_lines }}"
- name: Get repository root path
shell: |
cd "{{ playbook_dir }}/../../.."
pwd
register: repo_root
changed_when: false
delegate_to: localhost
become: no
- name: Display repository root
debug:
msg: "Repository root: {{ repo_root.stdout }}"
- name: Check if docker-compose.staging.yml exists in repository
stat:
path: "{{ repo_root.stdout }}/docker-compose.staging.yml"
register: compose_file_stat
delegate_to: localhost
become: no
- name: Read docker-compose.staging.yml from repository
slurp:
src: "{{ repo_root.stdout }}/docker-compose.staging.yml"
register: compose_file_content
when: compose_file_stat.stat.exists
delegate_to: localhost
become: no
- name: Write docker-compose.staging.yml to server
copy:
content: "{{ compose_file_content.content | b64decode }}"
dest: "~/deployment/stacks/staging/docker-compose.staging.yml"
mode: '0644'
when: compose_file_stat.stat.exists
- name: Fail if docker-compose.staging.yml not found
fail:
msg: "Could not find docker-compose.staging.yml at {{ repo_root.stdout }}/docker-compose.staging.yml. Please ensure the file exists in the repository root."
when: not compose_file_stat.stat.exists
- name: Verify updated docker-compose.staging.yml on server
shell: |
cd ~/deployment/stacks/staging
echo "=== Updated staging-app environment (REDIS-related) ==="
grep -A 50 "staging-app:" docker-compose.staging.yml | grep -A 30 "environment:" | grep -E "(REDIS_|CACHE_|SESSION_|_FILE)" || echo "No Redis env vars found"
echo ""
echo "=== Updated secrets section for staging-app ==="
grep -A 10 "staging-app:" docker-compose.staging.yml | grep -A 15 "secrets:" || echo "Secrets section not found"
echo ""
echo "=== Secrets definitions at bottom ==="
tail -30 docker-compose.staging.yml | grep -A 10 "redis_password:" || echo "Secrets definitions not found"
register: updated_config
ignore_errors: yes
- name: Display updated configuration
debug:
msg: "{{ updated_config.stdout_lines }}"
- name: Restart staging containers to apply changes
shell: |
cd ~/deployment/stacks/staging
docker compose -f docker-compose.base.yml -f docker-compose.staging.yml up -d --force-recreate
register: restart_result
ignore_errors: yes
- name: Display restart result
debug:
msg: "{{ restart_result.stdout_lines }}"
- name: Wait for containers to start
pause:
seconds: 10
- name: Check container status after fix
shell: |
cd ~/deployment/stacks/staging
docker compose -f docker-compose.base.yml -f docker-compose.staging.yml ps
register: container_status
ignore_errors: yes
- name: Display container status
debug:
msg: "{{ container_status.stdout_lines }}"
- name: Verify REDIS_PASSWORD_FILE in container
shell: |
echo "=== Checking REDIS_PASSWORD_FILE in staging-app container ==="
docker exec staging-app env | grep REDIS_PASSWORD || echo "REDIS_PASSWORD variables not found"
docker exec staging-app env | grep "_FILE" | grep REDIS || echo "REDIS_PASSWORD_FILE not found"
echo ""
echo "=== Checking /run/secrets/redis_password ==="
docker exec staging-app ls -la /run/secrets/redis_password 2>&1 || echo "Secret file not found"
register: container_check
ignore_errors: yes
- name: Display container verification
debug:
msg: "{{ container_check.stdout_lines }}"
- name: Summary
debug:
msg:
- "========================================"
- "FIX SUMMARY"
- "========================================"
- "1. Updated docker-compose.staging.yml on server"
- "2. Restarted staging containers"
- "3. Verified REDIS_PASSWORD_FILE configuration"
- ""
- "Next steps:"
- "- Check staging-app logs: docker logs staging-app"
- "- Test Redis connection from staging-app container"
- "- Verify no more NOAUTH errors in logs"

View File

@@ -1,83 +0,0 @@
---
- name: Fix Staging Issues (502 Error + Git Ownership)
hosts: production
gather_facts: yes
become: no
tasks:
- name: Get recent nginx error logs
shell: |
cd ~/deployment/stacks/staging && docker compose logs --tail=100 staging-nginx 2>&1
args:
executable: /bin/bash
register: nginx_all_logs
changed_when: false
- name: Test PHP-FPM connection with curl
shell: |
cd ~/deployment/stacks/staging && docker compose exec -T staging-nginx sh -c "curl -v http://staging-app:9000 2>&1 | head -20" || echo "Connection test completed"
args:
executable: /bin/bash
register: curl_test
changed_when: false
ignore_errors: yes
- name: Check PHP-FPM configuration
shell: |
cd ~/deployment/stacks/staging && docker compose exec -T staging-app sh -c "cat /usr/local/etc/php-fpm.d/www.conf | grep -E '(listen|listen.allowed_clients)' | head -10" || echo "Could not read PHP-FPM config"
args:
executable: /bin/bash
register: php_fpm_config
changed_when: false
ignore_errors: yes
- name: Fix Git ownership issue in staging-app
shell: |
cd ~/deployment/stacks/staging && docker compose exec -T staging-app sh -c "cd /var/www/html && git config --global --add safe.directory /var/www/html && echo 'Git safe.directory configured'" || echo "Git config failed"
args:
executable: /bin/bash
register: git_fix
changed_when: false
ignore_errors: yes
- name: Test if nginx can reach PHP-FPM
shell: |
cd ~/deployment/stacks/staging && docker compose exec -T staging-nginx sh -c "echo 'GET /index.php HTTP/1.0' | nc staging-app 9000 2>&1 | head -10" || docker compose exec -T staging-nginx sh -c "timeout 2 bash -c '</dev/tcp/staging-app/9000' && echo 'Port 9000 is reachable' || echo 'Port 9000 not reachable'" || echo "Could not test connection"
args:
executable: /bin/bash
register: port_test
changed_when: false
ignore_errors: yes
- name: Check if nginx can read public directory
shell: |
cd ~/deployment/stacks/staging && docker compose exec -T staging-nginx sh -c "ls -la /var/www/html/public/ | head -10" || echo "Could not list public directory"
args:
executable: /bin/bash
register: public_dir_check
changed_when: false
ignore_errors: yes
- name: Display all nginx logs
debug:
msg: "{{ nginx_all_logs.stdout_lines[-30:] }}"
- name: Display curl test
debug:
msg: "{{ curl_test.stdout_lines }}"
- name: Display PHP-FPM config
debug:
msg: "{{ php_fpm_config.stdout_lines }}"
- name: Display git fix result
debug:
msg: "{{ git_fix.stdout_lines }}"
- name: Display port test
debug:
msg: "{{ port_test.stdout_lines }}"
- name: Display public directory check
debug:
msg: "{{ public_dir_check.stdout_lines }}"

View File

@@ -1,82 +0,0 @@
---
- name: Fix Staging Secrets Permissions
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check secrets file permissions in staging-app container
shell: |
echo "=== Checking /run/secrets/redis_password permissions ==="
docker exec staging-app ls -la /run/secrets/redis_password 2>&1 || echo "File not found"
echo ""
echo "=== Checking /run/secrets directory permissions ==="
docker exec staging-app ls -la /run/secrets/ | head -10
echo ""
echo "=== Current user ==="
docker exec staging-app whoami
echo ""
echo "=== Testing file read access ==="
docker exec staging-app cat /run/secrets/redis_password 2>&1 | head -c 20 || echo "Cannot read file"
echo "..."
register: permissions_check
ignore_errors: yes
- name: Display permissions check
debug:
msg: "{{ permissions_check.stdout_lines }}"
- name: Try to fix permissions via entrypoint modification
shell: |
cd ~/deployment/stacks/staging
# Check if staging-app has an entrypoint that can be modified
grep -A 5 "staging-app:" docker-compose.staging.yml | grep -A 10 "entrypoint:" | head -5
register: entrypoint_check
ignore_errors: yes
- name: Display entrypoint check
debug:
msg: "{{ entrypoint_check.stdout_lines }}"
- name: Check if we can read secrets as root in container
shell: |
echo "=== Reading secret as root ==="
docker exec -u root staging-app cat /run/secrets/redis_password 2>&1 | head -c 20 || echo "Cannot read even as root"
echo "..."
echo ""
echo "=== Checking file owner ==="
docker exec -u root staging-app stat -c "%U:%G %a" /run/secrets/redis_password 2>&1 || echo "Cannot stat"
register: root_check
ignore_errors: yes
- name: Display root check
debug: "{{ root_check.stdout_lines }}"
debug:
msg: "{{ root_check.stdout_lines }}"
- name: Check container user configuration
shell: |
cd ~/deployment/stacks/staging
echo "=== staging-app user configuration ==="
grep -A 20 "staging-app:" docker-compose.staging.yml | grep -E "(user:|USER)" || echo "No user specified (defaults to www-data)"
register: user_config
ignore_errors: yes
- name: Display user configuration
debug:
msg: "{{ user_config.stdout_lines }}"
- name: Summary and Recommendations
debug:
msg:
- "========================================"
- "PERMISSIONS ISSUE ANALYSIS"
- "========================================"
- "The secret file exists but is not readable by the PHP process."
- ""
- "Possible solutions:"
- "1. Run PHP-FPM as root (NOT RECOMMENDED for security)"
- "2. Create a wrapper script that reads secrets as root and exports them"
- "3. Modify entrypoint to chmod/chown secrets (may not work on /run/secrets)"
- "4. Use environment variables instead of file-based secrets"
- "5. Modify docker-compose to use a different secrets mount path with proper permissions"

View File

@@ -0,0 +1,138 @@
---
# Fix Traefik ACME JSON Permissions
# Prüft und korrigiert Berechtigungen für acme.json Datei
- name: Fix Traefik ACME JSON Permissions
hosts: production
gather_facts: yes
become: no
tasks:
- name: Check if Traefik stack directory exists
ansible.builtin.stat:
path: "{{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }}"
register: traefik_stack_exists
- name: Fail if Traefik stack directory does not exist
ansible.builtin.fail:
msg: "Traefik stack directory not found at {{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }}"
when: not traefik_stack_exists.stat.exists
- name: Check if acme.json exists
ansible.builtin.stat:
path: "{{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }}/acme.json"
register: acme_json_exists
- name: Create acme.json if it doesn't exist
ansible.builtin.file:
path: "{{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }}/acme.json"
state: file
mode: '0600'
owner: "{{ ansible_user | default('deploy') }}"
group: "{{ ansible_user | default('deploy') }}"
when: not acme_json_exists.stat.exists
- name: Get current acme.json permissions
ansible.builtin.stat:
path: "{{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }}/acme.json"
register: acme_json_stat
- name: Display current acme.json permissions
ansible.builtin.debug:
msg: |
================================================================================
Aktuelle acme.json Berechtigungen:
================================================================================
Path: {{ acme_json_stat.stat.path }}
Owner: {{ acme_json_stat.stat.pw_name }} (UID: {{ acme_json_stat.stat.uid }})
Group: {{ acme_json_stat.stat.gr_name }} (GID: {{ acme_json_stat.stat.gid }})
Mode: {{ acme_json_stat.stat.mode | string | regex_replace('^0o?', '') }}
Size: {{ acme_json_stat.stat.size }} bytes
================================================================================
- name: Fix acme.json permissions (chmod 600)
ansible.builtin.file:
path: "{{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }}/acme.json"
mode: '0600'
owner: "{{ ansible_user | default('deploy') }}"
group: "{{ ansible_user | default('deploy') }}"
register: acme_json_permissions_fixed
- name: Verify acme.json permissions after fix
ansible.builtin.stat:
path: "{{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }}/acme.json"
register: acme_json_stat_after
- name: Display fixed acme.json permissions
ansible.builtin.debug:
msg: |
================================================================================
Korrigierte acme.json Berechtigungen:
================================================================================
Path: {{ acme_json_stat_after.stat.path }}
Owner: {{ acme_json_stat_after.stat.pw_name }} (UID: {{ acme_json_stat_after.stat.uid }})
Group: {{ acme_json_stat_after.stat.gr_name }} (GID: {{ acme_json_stat_after.stat.gid }})
Mode: {{ acme_json_stat_after.stat.mode | string | regex_replace('^0o?', '') }}
Size: {{ acme_json_stat_after.stat.size }} bytes
================================================================================
✅ acme.json hat jetzt chmod 600 (nur Owner kann lesen/schreiben)
================================================================================
- name: Check Traefik container can write to acme.json
ansible.builtin.shell: |
cd {{ traefik_stack_path | default('/home/deploy/deployment/stacks/traefik') }}
docker compose exec -T traefik sh -c "test -w /acme.json && echo 'WRITABLE' || echo 'NOT_WRITABLE'" 2>&1 || echo "CONTAINER_CHECK_FAILED"
register: acme_json_writable_check
changed_when: false
failed_when: false
- name: Display acme.json writable check
ansible.builtin.debug:
msg: |
================================================================================
Traefik Container Schreibzugriff auf acme.json:
================================================================================
{% if 'WRITABLE' in acme_json_writable_check.stdout %}
✅ Traefik Container kann auf acme.json schreiben
{% elif 'NOT_WRITABLE' in acme_json_writable_check.stdout %}
⚠️ Traefik Container kann NICHT auf acme.json schreiben
{% else %}
⚠️ Konnte Container-Zugriff nicht prüfen: {{ acme_json_writable_check.stdout }}
{% endif %}
================================================================================
- name: Check Docker volume mount for acme.json
ansible.builtin.shell: |
docker inspect traefik --format '{{ '{{' }}json .Mounts{{ '}}' }}' 2>/dev/null | jq '.[] | select(.Destination=="/acme.json")' || echo "Could not check volume mount"
register: acme_json_mount
changed_when: false
failed_when: false
- name: Display acme.json volume mount
ansible.builtin.debug:
msg: |
================================================================================
Docker Volume Mount für acme.json:
================================================================================
{{ acme_json_mount.stdout }}
================================================================================
- name: Summary
ansible.builtin.debug:
msg: |
================================================================================
ZUSAMMENFASSUNG - acme.json Berechtigungen:
================================================================================
✅ acme.json Berechtigungen auf chmod 600 gesetzt
✅ Owner/Group auf {{ ansible_user | default('deploy') }} gesetzt
Wichtig:
- acme.json muss beschreibbar sein für Traefik Container
- Port 80/443 müssen vom Host auf Traefik zeigen
- Traefik muss stabil laufen (keine häufigen Restarts)
Nächste Schritte:
- Stelle sicher, dass Traefik stabil läuft
- Warte 5-10 Minuten auf ACME-Challenge-Abschluss
- Prüfe Traefik-Logs auf ACME-Fehler
================================================================================

View File

@@ -1,142 +0,0 @@
---
- name: Fix Traefik Configuration
hosts: production
gather_facts: no
become: no
tasks:
- name: Backup current traefik.yml
shell: |
cd ~/deployment/stacks/traefik
cp traefik.yml traefik.yml.backup.$(date +%Y%m%d_%H%M%S)
args:
executable: /bin/bash
- name: Create correct traefik.yml
copy:
content: |
# Static Configuration for Traefik
# Global Configuration
global:
checkNewVersion: true
sendAnonymousUsage: false
# API and Dashboard
# Note: insecure: false means API is only accessible via HTTPS (through Traefik itself)
# No port 8080 needed - dashboard accessible via HTTPS at traefik.michaelschiemer.de
api:
dashboard: true
insecure: false
# Dashboard accessible via HTTPS router (no separate HTTP listener needed)
# Entry Points
entryPoints:
web:
address: ":80"
# No global redirect - ACME challenges need HTTP access
# Redirects are handled per-router via middleware
websecure:
address: ":443"
http:
tls:
certResolver: letsencrypt
domains:
- main: michaelschiemer.de
sans:
- "*.michaelschiemer.de"
# Certificate Resolvers
certificatesResolvers:
letsencrypt:
acme:
email: kontakt@michaelschiemer.de
storage: /acme.json
caServer: https://acme-v02.api.letsencrypt.org/directory
# Use HTTP-01 challenge (requires port 80 accessible)
httpChallenge:
entryPoint: web
# Uncomment for DNS challenge (requires DNS provider)
# dnsChallenge:
# provider: cloudflare
# delayBeforeCheck: 30
# Providers
providers:
docker:
endpoint: "unix:///var/run/docker.sock"
exposedByDefault: false
# Network mode is 'host', so we don't specify a network here
# Traefik can reach containers directly via their IPs in host network mode
watch: true
file:
directory: /dynamic
watch: true
# Forwarded Headers Configuration
# This ensures Traefik correctly identifies the real client IP
# Important for VPN access where requests come from WireGuard interface
forwardedHeaders:
trustedIPs:
- "10.8.0.0/24" # WireGuard VPN network
- "127.0.0.1/32" # Localhost
- "172.17.0.0/16" # Docker bridge network
- "172.18.0.0/16" # Docker user-defined networks
insecure: false
# Logging
log:
level: INFO
filePath: /logs/traefik.log
format: json
# Access Logs
accessLog:
filePath: /logs/access.log
format: json
bufferingSize: 100
filters:
statusCodes:
- "400-499"
- "500-599"
# Metrics
metrics:
prometheus:
addEntryPointsLabels: true
addRoutersLabels: true
addServicesLabels: true
# Ping
ping:
entryPoint: web
dest: ~/deployment/stacks/traefik/traefik.yml
mode: '0644'
- name: Validate YAML syntax
command: python3 -c "import yaml; yaml.safe_load(open('traefik.yml')); print('YAML valid')"
args:
chdir: ~/deployment/stacks/traefik
changed_when: false
- name: Restart Traefik
command: docker compose up -d traefik
args:
chdir: ~/deployment/stacks/traefik
register: traefik_restart
- name: Wait for Traefik to start
pause:
seconds: 5
- name: Check Traefik status
command: docker compose ps traefik
args:
chdir: ~/deployment/stacks/traefik
register: traefik_status
- name: Display Traefik status
debug:
msg: "{{ traefik_status.stdout_lines }}"

View File

@@ -0,0 +1,18 @@
---
# Fix Web Container Permissions
# Wrapper Playbook for application role containers tasks (fix-web action)
- hosts: production
gather_facts: no
become: no
vars:
application_container_action: fix-web
application_container_stabilize_wait: 10
tasks:
- name: Include application containers tasks (fix-web)
ansible.builtin.include_role:
name: application
tasks_from: containers
tags:
- application
- containers
- web

View File

@@ -0,0 +1,350 @@
---
- name: Initial Server Setup - Debian 13 (Trixie)
hosts: production
become: yes
gather_facts: yes
vars:
# User configuration
deploy_user: "{{ ansible_user | default('deploy') }}"
deploy_user_groups: ['sudo'] # docker group added after Docker installation
# SSH configuration
ssh_key_only_auth: false # Set to true AFTER SSH keys are properly configured
ssh_disable_root_login: false # Set to true after deploy user is configured
# Firewall configuration
firewall_enable: false # Set to true after initial setup is complete
firewall_ports:
- { port: 22, proto: 'tcp', comment: 'SSH' }
- { port: 80, proto: 'tcp', comment: 'HTTP' }
- { port: 443, proto: 'tcp', comment: 'HTTPS' }
- { port: 51820, proto: 'udp', comment: 'WireGuard' }
# System packages
system_base_packages:
- curl
- wget
- git
- vim
- sudo
- ufw
- fail2ban
- rsync
tasks:
- name: Display system information
ansible.builtin.debug:
msg:
- "Distribution: {{ ansible_distribution }} {{ ansible_distribution_version }}"
- "Hostname: {{ ansible_hostname }}"
- "Deploy User: {{ deploy_user }}"
# ========================================
# 1. System Updates
# ========================================
- name: Check and wait for apt locks to be released
ansible.builtin.shell:
cmd: |
for lock in /var/lib/dpkg/lock /var/lib/apt/lists/lock /var/cache/apt/archives/lock; do
if [ -f "$lock" ]; then
echo "Waiting for lock: $lock"
count=0
while [ -f "$lock" ] && [ $count -lt 60 ]; do
sleep 1
count=$((count + 1))
done
if [ -f "$lock" ]; then
echo "Warning: Lock still exists after 60s: $lock"
else
echo "Lock released: $lock"
fi
fi
done
changed_when: false
failed_when: false
timeout: 70
- name: Update apt cache
ansible.builtin.shell:
cmd: timeout 300 apt-get update -qq
environment:
DEBIAN_FRONTEND: noninteractive
APT_LISTCHANGES_FRONTEND: none
register: apt_update_result
changed_when: apt_update_result.rc == 0
failed_when: apt_update_result.rc != 0
timeout: 300
- name: Display apt update result
ansible.builtin.debug:
msg: "apt update completed successfully"
when: apt_update_result.rc == 0
- name: Show packages to be upgraded
ansible.builtin.command:
cmd: apt list --upgradable 2>/dev/null | tail -n +2 | wc -l
register: packages_to_upgrade
changed_when: false
failed_when: false
- name: Display upgrade information
ansible.builtin.debug:
msg: "Packages to upgrade: {{ packages_to_upgrade.stdout | default('0') | trim }}"
- name: Upgrade system packages
ansible.builtin.shell:
cmd: timeout 600 apt-get upgrade -y -qq && apt-get autoremove -y -qq
environment:
DEBIAN_FRONTEND: noninteractive
APT_LISTCHANGES_FRONTEND: none
register: apt_upgrade_result
changed_when: apt_upgrade_result.rc == 0
failed_when: apt_upgrade_result.rc != 0
timeout: 600
- name: Display apt upgrade result
ansible.builtin.debug:
msg: "apt upgrade completed: {{ 'Packages upgraded' if apt_upgrade_result.rc == 0 else 'Failed' }}"
when: apt_upgrade_result.rc is defined
# ========================================
# 2. Install Base Packages
# ========================================
- name: Install base packages
ansible.builtin.shell:
cmd: timeout 300 apt-get install -y -qq {{ system_base_packages | join(' ') }}
environment:
DEBIAN_FRONTEND: noninteractive
APT_LISTCHANGES_FRONTEND: none
register: apt_install_result
changed_when: apt_install_result.rc == 0
failed_when: apt_install_result.rc != 0
timeout: 300
- name: Display apt install result
ansible.builtin.debug:
msg: "apt install completed: {{ 'Packages installed/updated' if apt_install_result.rc == 0 else 'Failed' }}"
when: apt_install_result.rc is defined
# ========================================
# 3. Create Deploy User
# ========================================
- name: Check if deploy user exists
ansible.builtin.shell:
cmd: timeout 5 getent passwd {{ deploy_user }} >/dev/null 2>&1 && echo "exists" || echo "not_found"
register: deploy_user_check
changed_when: false
failed_when: false
timeout: 10
- name: Create deploy user
ansible.builtin.user:
name: "{{ deploy_user }}"
groups: "{{ deploy_user_groups }}"
append: yes
shell: /bin/bash
create_home: yes
when:
- "'not_found' in deploy_user_check.stdout"
- deploy_user != 'root'
- name: Ensure deploy user has sudo access
ansible.builtin.lineinfile:
path: /etc/sudoers.d/deploy
line: "{{ deploy_user }} ALL=(ALL) NOPASSWD: ALL"
create: yes
validate: 'visudo -cf %s'
mode: '0440'
when: deploy_user != 'root'
# ========================================
# 4. SSH Configuration
# ========================================
- name: Get deploy user home directory
ansible.builtin.getent:
database: passwd
key: "{{ deploy_user }}"
register: deploy_user_info
when: deploy_user != 'root'
ignore_errors: yes
- name: Set deploy user home directory (root)
ansible.builtin.set_fact:
deploy_user_home: "/root"
when: deploy_user == 'root'
- name: Set deploy user home directory (from getent)
ansible.builtin.set_fact:
deploy_user_home: "{{ deploy_user_info.ansible_facts.getent_passwd[deploy_user][4] }}"
when:
- deploy_user != 'root'
- deploy_user_info.ansible_facts.getent_passwd[deploy_user] is defined
- name: Set deploy user home directory (fallback)
ansible.builtin.set_fact:
deploy_user_home: "/home/{{ deploy_user }}"
when: deploy_user_home is not defined
- name: Ensure .ssh directory exists
ansible.builtin.file:
path: "{{ deploy_user_home }}/.ssh"
state: directory
owner: "{{ deploy_user }}"
group: "{{ deploy_user }}"
mode: '0700'
- name: Add SSH public key from control node
ansible.builtin.authorized_key:
user: "{{ deploy_user }}"
state: present
key: "{{ lookup('file', ansible_ssh_private_key_file | default('~/.ssh/production') + '.pub') }}"
when: ansible_ssh_private_key_file is defined
- name: Verify SSH key is configured before disabling password auth
ansible.builtin.stat:
path: "{{ deploy_user_home }}/.ssh/authorized_keys"
register: ssh_key_file
when: ssh_key_only_auth | bool
- name: Configure SSH key-only authentication
ansible.builtin.lineinfile:
path: /etc/ssh/sshd_config
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
backup: yes
loop:
- { regexp: '^#?PasswordAuthentication', line: 'PasswordAuthentication no' }
- { regexp: '^#?PubkeyAuthentication', line: 'PubkeyAuthentication yes' }
- { regexp: '^#?AuthorizedKeysFile', line: 'AuthorizedKeysFile .ssh/authorized_keys' }
when:
- ssh_key_only_auth | bool
- ssh_key_file.stat.exists | default(false)
notify: restart sshd
- name: Disable root login (optional)
ansible.builtin.lineinfile:
path: /etc/ssh/sshd_config
regexp: '^#?PermitRootLogin'
line: 'PermitRootLogin no'
backup: yes
when: ssh_disable_root_login | bool
notify: restart sshd
# ========================================
# 5. Firewall Configuration
# ========================================
# WICHTIG: Firewall wird erst am Ende konfiguriert, um SSH-Verbindung nicht zu unterbrechen
- name: Check current UFW status
ansible.builtin.command:
cmd: ufw status | head -1
register: ufw_current_status
changed_when: false
failed_when: false
when: firewall_enable | bool
- name: Display current firewall status
ansible.builtin.debug:
msg: "Current firewall status: {{ ufw_current_status.stdout | default('Unknown') }}"
when: firewall_enable | bool and ufw_current_status is defined
- name: Ensure SSH port is allowed before configuring firewall
ansible.builtin.command:
cmd: ufw allow 22/tcp comment 'SSH - Allow before enabling firewall'
when:
- firewall_enable | bool
- "'inactive' in (ufw_current_status.stdout | default(''))"
ignore_errors: yes
- name: Reset UFW to defaults (only if inactive)
ansible.builtin.command:
cmd: ufw --force reset
when:
- firewall_enable | bool
- "'inactive' in (ufw_current_status.stdout | default(''))"
changed_when: false
- name: Set UFW default policies
ansible.builtin.command:
cmd: "ufw default {{ item.policy }} {{ item.direction }}"
loop:
- { policy: 'deny', direction: 'incoming' }
- { policy: 'allow', direction: 'outgoing' }
when:
- firewall_enable | bool
- "'inactive' in (ufw_current_status.stdout | default(''))"
- name: Allow firewall ports (ensure SSH is first)
ansible.builtin.command:
cmd: "ufw allow {{ item.port }}/{{ item.proto }} comment '{{ item.comment }}'"
loop: "{{ firewall_ports }}"
when:
- firewall_enable | bool
- "'inactive' in (ufw_current_status.stdout | default(''))"
register: ufw_rules
changed_when: ufw_rules.rc == 0
- name: Enable UFW (only if inactive)
ansible.builtin.command:
cmd: ufw --force enable
when:
- firewall_enable | bool
- "'inactive' in (ufw_current_status.stdout | default(''))"
- name: Display UFW status
ansible.builtin.command:
cmd: ufw status verbose
register: ufw_status
changed_when: false
- name: Show UFW status
ansible.builtin.debug:
msg: "{{ ufw_status.stdout_lines }}"
# ========================================
# 6. Fail2ban Configuration
# ========================================
- name: Ensure fail2ban is enabled and started
ansible.builtin.systemd:
name: fail2ban
enabled: yes
state: started
when: "'fail2ban' in system_base_packages"
# ========================================
# 7. System Configuration
# ========================================
- name: Configure timezone
ansible.builtin.timezone:
name: Europe/Berlin
- name: Display setup summary
ansible.builtin.debug:
msg:
- "=========================================="
- "Initial Server Setup Complete"
- "=========================================="
- "Deploy User: {{ deploy_user }}"
- "SSH Key-only Auth: {{ ssh_key_only_auth }}"
- "Firewall: {{ 'Enabled' if firewall_enable else 'Disabled' }}"
- "Fail2ban: {{ 'Enabled' if 'fail2ban' in system_base_packages else 'Disabled' }}"
- "=========================================="
- "Next Steps:"
- "1. Test SSH connection: ssh {{ deploy_user }}@{{ ansible_host }}"
- "2. Install Docker: ansible-playbook playbooks/install-docker.yml"
- "3. Deploy Infrastructure: ansible-playbook playbooks/setup-infrastructure.yml"
- "=========================================="
handlers:
- name: restart sshd
ansible.builtin.systemd:
name: sshd
state: restarted

View File

@@ -1,108 +1,16 @@
--- ---
- name: Install Composer Dependencies in Application Container # Install Composer Dependencies in Application Container
hosts: "{{ deployment_hosts | default('production') }}" # Wrapper Playbook for application role composer tasks
- hosts: "{{ deployment_hosts | default('production') }}"
gather_facts: no gather_facts: no
become: no become: no
vars: vars:
# Application code directory (where docker-compose files are located)
application_code_dest: "/home/deploy/michaelschiemer/current"
application_compose_suffix: >-
{%- if deployment_environment == 'staging' -%}
staging.yml
{%- else -%}
production.yml
{%- endif -%}
# Deployment environment (staging or production)
deployment_environment: "{{ deployment_environment | default('production') }}" deployment_environment: "{{ deployment_environment | default('production') }}"
# Service name (php for production, staging-app for staging)
php_service_name: >-
{%- if deployment_environment == 'staging' -%}
staging-app
{%- else -%}
php
{%- endif -%}
tasks: tasks:
- name: Check if composer.json exists - name: Include application composer tasks
stat: ansible.builtin.include_role:
path: /home/deploy/michaelschiemer/current/composer.json name: application
delegate_to: "{{ inventory_hostname }}" tasks_from: composer
register: composer_json_exists tags:
- application
- name: Fail if composer.json is missing - composer
fail:
msg: "composer.json not found at /home/deploy/michaelschiemer/current/composer.json"
when: not composer_json_exists.stat.exists
- name: Check if container is running
shell: |
cd {{ application_code_dest }}
docker compose -f docker-compose.base.yml -f docker-compose.{{ application_compose_suffix }} ps {{ php_service_name }} --format json
register: container_status
changed_when: false
failed_when: false
- name: Display container status
debug:
msg: "Container status: {{ container_status.stdout }}"
- name: Fail if container is not running
fail:
msg: |
Container '{{ php_service_name }}' is not running!
The container must be started before installing composer dependencies.
This is typically done by the 'deploy-image.yml' playbook which should run before this.
To start the container manually:
cd {{ application_code_dest }}
docker compose -f docker-compose.base.yml -f docker-compose.{{ application_compose_suffix }} up -d {{ php_service_name }}
Note: The container requires environment variables (DB_USERNAME, DB_PASSWORD, etc.)
which should be set in a .env file or via docker-compose environment configuration.
when: container_status.rc != 0 or '"State":"running"' not in container_status.stdout
- name: Install composer dependencies in PHP container
shell: |
cd {{ application_code_dest }}
docker compose -f docker-compose.base.yml -f docker-compose.{{ application_compose_suffix }} exec -T {{ php_service_name }} composer install --no-dev --optimize-autoloader --no-interaction
register: composer_install
changed_when: true
failed_when: composer_install.rc != 0
- name: Display composer install output
debug:
msg: |
Composer Install Output:
stdout: {{ composer_install.stdout }}
stderr: {{ composer_install.stderr }}
rc: {{ composer_install.rc }}
when: composer_install.rc != 0
- name: Restart queue-worker and scheduler to pick up vendor directory (production only)
shell: |
cd {{ application_code_dest }}
docker compose -f docker-compose.base.yml -f docker-compose.{{ application_compose_suffix }} restart queue-worker scheduler
register: restart_workers
changed_when: true
failed_when: false
when: deployment_environment == 'production'
- name: Verify vendor/autoload.php exists
shell: |
cd {{ application_code_dest }}
docker compose -f docker-compose.base.yml -f docker-compose.{{ application_compose_suffix }} exec -T {{ php_service_name }} test -f /var/www/html/vendor/autoload.php && echo "EXISTS" || echo "MISSING"
register: autoload_check
changed_when: false
- name: Display autoload verification
debug:
msg: "vendor/autoload.php: {{ autoload_check.stdout.strip() }}"
- name: Fail if autoload.php is missing
fail:
msg: "vendor/autoload.php was not created after composer install"
when: "autoload_check.stdout.strip() != 'EXISTS'"

View File

@@ -0,0 +1,92 @@
---
- name: Install Docker on Production Server
hosts: production
become: yes
gather_facts: yes
tasks:
- name: Install prerequisites
ansible.builtin.apt:
name:
- ca-certificates
- curl
state: present
update_cache: yes
- name: Create keyrings directory
ansible.builtin.file:
path: /etc/apt/keyrings
state: directory
mode: '0755'
- name: Detect distribution (Debian or Ubuntu)
ansible.builtin.set_fact:
docker_distribution: "{{ 'debian' if ansible_distribution == 'Debian' else 'ubuntu' }}"
changed_when: false
- name: Add Docker GPG key
ansible.builtin.shell:
cmd: |
curl -fsSL https://download.docker.com/linux/{{ docker_distribution }}/gpg -o /etc/apt/keyrings/docker.asc
chmod a+r /etc/apt/keyrings/docker.asc
creates: /etc/apt/keyrings/docker.asc
- name: Add Docker repository
ansible.builtin.shell:
cmd: |
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/{{ docker_distribution }} $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
creates: /etc/apt/sources.list.d/docker.list
- name: Update apt cache after adding Docker repo
ansible.builtin.apt:
update_cache: yes
- name: Install Docker packages
ansible.builtin.apt:
name:
- docker-ce
- docker-ce-cli
- containerd.io
- docker-buildx-plugin
- docker-compose-plugin
state: present
- name: Start and enable Docker service
ansible.builtin.systemd:
name: docker
state: started
enabled: yes
- name: Add deploy user to docker group
ansible.builtin.user:
name: "{{ ansible_user | default('deploy') }}"
groups: docker
append: yes
- name: Verify Docker installation
ansible.builtin.command: docker --version
register: docker_version
changed_when: false
- name: Display Docker version
ansible.builtin.debug:
msg: "Docker installed successfully: {{ docker_version.stdout }}"
- name: Verify Docker Compose installation
ansible.builtin.command: docker compose version
register: compose_version
changed_when: false
- name: Display Docker Compose version
ansible.builtin.debug:
msg: "Docker Compose installed successfully: {{ compose_version.stdout }}"
- name: Run Docker hello-world test
ansible.builtin.command: docker run --rm hello-world
register: docker_test
changed_when: false
- name: Display Docker test result
ansible.builtin.debug:
msg: "Docker is working correctly!"
when: "'Hello from Docker!' in docker_test.stdout"

View File

@@ -1,95 +0,0 @@
---
- name: Live Monitor Grafana Access - Watch Traefik Logs in Real-Time
hosts: production
gather_facts: no
become: no
tasks:
- name: Clear previous Grafana access attempts count
shell: |
echo "Starting live monitoring. Make a request to https://grafana.michaelschiemer.de now!"
echo "Waiting 10 seconds for you to make a request..."
sleep 10
args:
executable: /bin/bash
- name: Show recent Grafana access attempts
shell: |
cd ~/deployment/stacks/traefik
echo "=== Last 5 Grafana Access Attempts ==="
tail -100 logs/access.log | grep -i grafana | tail -5
args:
executable: /bin/bash
register: recent_access
ignore_errors: yes
failed_when: false
- name: Display recent access attempts
debug:
msg: "{{ recent_access.stdout_lines }}"
- name: Check current client IP pattern
shell: |
cd ~/deployment/stacks/traefik
echo "=== Client IPs in recent Grafana requests ==="
tail -50 logs/access.log | grep -i grafana | tail -10 | grep -oP '"ClientHost":"[^"]*"' | head -5
args:
executable: /bin/bash
register: client_ips
ignore_errors: yes
failed_when: false
- name: Display client IPs
debug:
msg: "{{ client_ips.stdout_lines }}"
- name: Extract and check client IPs
shell: |
cd ~/deployment/stacks/traefik
echo "=== Checking if client IPs are in VPN range (10.8.0.0/24) ==="
tail -20 logs/access.log | grep -i grafana | tail -3 | grep -oP '"ClientHost":"[^"]*"' | sed 's/"ClientHost":"//;s/"//' | while read ip; do
if [[ "$ip" =~ ^10\.8\.0\.[0-9]+$ ]]; then
echo "$ip -> In VPN range (10.8.0.0/24): YES"
else
echo "$ip -> In VPN range (10.8.0.0/24): NO (this is the problem!)"
fi
done
args:
executable: /bin/bash
register: vpn_check
ignore_errors: yes
failed_when: false
- name: Display VPN range check
debug:
msg: "{{ vpn_check.stdout_lines }}"
- name: Show Traefik middleware errors
shell: |
cd ~/deployment/stacks/traefik
echo "=== Traefik Middleware Errors (if any) ==="
tail -50 logs/traefik.log | grep -iE "(grafana|ipallowlist|403|middleware)" | tail -10 || echo "No middleware errors found"
args:
executable: /bin/bash
register: middleware_errors
ignore_errors: yes
failed_when: false
- name: Display middleware errors
debug:
msg: "{{ middleware_errors.stdout_lines }}"
- name: Verify middleware configuration
shell: |
cd ~/deployment/stacks/traefik/dynamic
echo "=== Current grafana-vpn-only Middleware ==="
grep -A 6 "grafana-vpn-only:" middlewares.yml
args:
executable: /bin/bash
register: middleware_config
ignore_errors: yes
failed_when: false
- name: Display middleware configuration
debug: |
msg: "{{ middleware_config.stdout_lines }}"

View File

@@ -1,75 +0,0 @@
---
- name: Monitor Grafana Client IP - Wait for Next Request
hosts: production
gather_facts: no
become: no
tasks:
- name: Instructions
debug:
msg:
- "=== LIVE MONITORING ==="
- "Bitte mache JETZT einen Zugriff auf https://grafana.michaelschiemer.de im Browser"
- "Ich warte 30 Sekunden und pr?fe dann die Logs..."
- ""
- name: Wait for access attempt
pause:
seconds: 30
- name: Check recent Grafana access attempts
shell: |
cd ~/deployment/stacks/traefik
echo "=== Last 10 Grafana Access Attempts ==="
tail -100 logs/access.log | grep -i grafana | tail -10
args:
executable: /bin/bash
register: recent_access
ignore_errors: yes
failed_when: false
- name: Extract client IPs
shell: |
cd ~/deployment/stacks/traefik
echo "=== Client IPs in recent requests ==="
tail -20 logs/access.log | grep -i grafana | tail -10 | grep -oP '"ClientHost":"[^"]*"' | sed 's/"ClientHost":"//;s/"//' | sort -u
args:
executable: /bin/bash
register: client_ips
ignore_errors: yes
failed_when: false
- name: Display client IPs
debug:
msg: "{{ client_ips.stdout_lines }}"
- name: Check if IP is VPN IP
shell: |
cd ~/deployment/stacks/traefik
tail -10 logs/access.log | grep -i grafana | tail -5 | grep -oP '"ClientHost":"[^"]*"' | sed 's/"ClientHost":"//;s/"//' | while read ip; do
if [[ "$ip" =~ ^10\.8\.0\.[0-9]+$ ]]; then
echo "? $ip -> VPN IP (10.8.0.0/24) - Traffic kommt ?ber VPN!"
else
echo "? $ip -> Public IP (nicht VPN) - Traffic kommt NICHT ?ber VPN"
fi
done
args:
executable: /bin/bash
register: vpn_check
ignore_errors: yes
failed_when: false
- name: Display VPN check
debug:
msg: "{{ vpn_check.stdout_lines }}"
- name: Recommendations
debug:
msg:
- ""
- "=== ERGEBNIS ==="
- "Wenn ClientHost: 10.8.0.7 (VPN-IP) ? Traffic kommt ?ber VPN ?"
- "Dann k?nnen wir die tempor?re IP-Erlaubnis entfernen!"
- ""
- "Wenn ClientHost: 89.246.96.244 (?ffentliche IP) ? Traffic kommt NICHT ?ber VPN ?"
- "Dann m?ssen wir VPN-Routing noch weiter fixen"

View File

@@ -1,80 +0,0 @@
---
- name: Monitor Grafana Access Live - Check Latest Request
hosts: production
gather_facts: no
become: no
tasks:
- name: Get timestamp of last log entry
shell: |
cd ~/deployment/stacks/traefik
tail -1 logs/access.log | grep -oP '"time":"[^"]*"'
args:
executable: /bin/bash
register: last_log_time
ignore_errors: yes
failed_when: false
- name: Display last log time
debug:
msg: "{{ last_log_time.stdout }}"
- name: Get last 30 Grafana access attempts
shell: |
cd ~/deployment/stacks/traefik
tail -1000 logs/access.log | grep -i grafana | tail -30
args:
executable: /bin/bash
register: grafana_logs
ignore_errors: yes
failed_when: false
- name: Extract client IPs from last 10 Grafana requests
shell: |
cd ~/deployment/stacks/traefik
tail -200 logs/access.log | grep -i grafana | tail -10 | grep -oP '"ClientHost":"[^"]*"' | sed 's/"ClientHost":"//;s/"//' | sort -u
args:
executable: /bin/bash
register: client_ips
ignore_errors: yes
failed_when: false
- name: Display client IPs found
debug:
msg: "{{ client_ips.stdout_lines }}"
- name: Analyze last 5 Grafana requests
shell: |
cd ~/deployment/stacks/traefik
tail -100 logs/access.log | grep -i grafana | tail -5 | while IFS= read -r line; do
time=$(echo "$line" | grep -oP '"time":"[^"]*"' | sed 's/"time":"//;s/"//' | cut -d'T' -f2 | cut -d'+' -f1)
client=$(echo "$line" | grep -oP '"ClientHost":"[^"]*"' | sed 's/"ClientHost":"//;s/"//')
status=$(echo "$line" | grep -oP '"DownstreamStatus":[0-9]+' | sed 's/"DownstreamStatus"://')
if [[ "$client" =~ ^10\.8\.0\.[0-9]+$ ]]; then
echo "$time | ClientHost: $client | Status: $status ? VPN-IP"
elif [[ "$client" == "89.246.96.244" ]]; then
echo "$time | ClientHost: $client | Status: $status ? Public IP"
else
echo "$time | ClientHost: $client | Status: $status ? Unknown"
fi
done
args:
executable: /bin/bash
register: analysis
ignore_errors: yes
failed_when: false
- name: Display analysis
debug:
msg: "{{ analysis.stdout_lines }}"
- name: Recommendations
debug:
msg:
- ""
- "=== ERGEBNIS ==="
- "Wenn ClientHost: 10.8.0.7 ? Traffic kommt ?ber VPN ?"
- "? Dann k?nnen wir die tempor?re IP-Erlaubnis entfernen!"
- ""
- "Wenn ClientHost: 89.246.96.244 ? Traffic kommt NICHT ?ber VPN ?"
- "? Dann m?ssen wir weiter debuggen (Route, AllowedIPs, etc.)"

Some files were not shown because too many files have changed in this diff Show More