diff --git a/.gitea/workflows/system-maintenance.yml b/.gitea/workflows/system-maintenance.yml new file mode 100644 index 00000000..f4763ba2 --- /dev/null +++ b/.gitea/workflows/system-maintenance.yml @@ -0,0 +1,72 @@ +name: System Maintenance + +on: + schedule: + # Täglich um 02:30 UTC (≈ 03:30 CET/04:30 CEST) + - cron: '30 2 * * *' + workflow_dispatch: + +env: + DEPLOYMENT_HOST: 94.16.110.151 + +jobs: + run-maintenance: + name: Run Ansible System Maintenance + runs-on: php-ci + environment: + name: production + url: https://michaelschiemer.de + + steps: + - name: Checkout deployment repository + run: | + REF_NAME="${{ github.ref_name }}" + if [ -z "$REF_NAME" ]; then + REF_NAME="main" + fi + REPO="${{ github.repository }}" + + echo "📋 Cloning branch: $REF_NAME" + + if [ -n "${{ secrets.CI_TOKEN }}" ]; then + git clone --depth 1 --branch "$REF_NAME" \ + "https://${{ secrets.CI_TOKEN }}@git.michaelschiemer.de/${REPO}.git" \ + /workspace/repo + else + git clone --depth 1 --branch "$REF_NAME" \ + "https://git.michaelschiemer.de/${REPO}.git" \ + /workspace/repo || \ + git clone --depth 1 \ + "https://git.michaelschiemer.de/${REPO}.git" \ + /workspace/repo + fi + + cd /workspace/repo + + - name: Prepare SSH access + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/production + chmod 600 ~/.ssh/production + ssh-keyscan -H ${{ env.DEPLOYMENT_HOST }} >> ~/.ssh/known_hosts + + - name: Verify Ansible availability + run: ansible --version + + - name: Run system maintenance playbook + run: | + cd /workspace/repo/deployment/ansible + ansible-playbook -i inventory/production.yml \ + playbooks/system-maintenance.yml + + - name: Cleanup SSH key + if: always() + run: rm -f ~/.ssh/production + + - name: Report success + if: success() + run: echo "✅ System maintenance completed" + + - name: Report failure + if: failure() + run: echo "❌ System maintenance failed – review Ansible logs" diff --git a/deployment/ansible/README.md b/deployment/ansible/README.md index 70154221..75c09c4d 100644 --- a/deployment/ansible/README.md +++ b/deployment/ansible/README.md @@ -12,6 +12,7 @@ deployment/ansible/ │ └── local.yml # Local testing inventory ├── playbooks/ │ ├── deploy-update.yml # Deploy application updates +│ ├── system-maintenance.yml # Betriebssystem-Updates & Wartung │ ├── rollback.yml # Rollback deployments │ ├── setup-infrastructure.yml # Provision core stacks │ ├── setup-production-secrets.yml # Deploy secrets @@ -34,7 +35,7 @@ deployment/ansible/ ## Roles -Stack-spezifische Aufgaben liegen in `roles/` (z. B. `application`, `traefik`, `registry`). Playbooks wie `setup-infrastructure.yml` importieren diese Rollen direkt. Die Application-Rolle kann mit Variablen wie `application_sync_files=false` oder `application_compose_recreate="always"` konfiguriert werden (siehe `playbooks/deploy-update.yml` als Beispiel). +Stack-spezifische Aufgaben liegen in `roles/` (z. B. `application`, `traefik`, `registry`). Playbooks wie `setup-infrastructure.yml` importieren diese Rollen direkt. Die Application-Rolle kann mit Variablen wie `application_sync_files=false` oder `application_compose_recreate="always"` konfiguriert werden (siehe `playbooks/deploy-update.yml` als Beispiel). Die neue `system`-Rolle hält Betriebssystem-Pakete aktuell und konfiguriert optionale Unattended-Upgrades bevor Docker-Stacks neu gestartet werden. ## Prerequisites @@ -149,6 +150,17 @@ ansible-playbook -i inventory/production.yml playbooks/add-wireguard-client.yml Siehe [playbooks/README-WIREGUARD.md](playbooks/README-WIREGUARD.md) für detaillierte Anleitung. +### System Maintenance ausführen + +Führt die `system`-Rolle aus, aktualisiert Paketquellen, führt OS-Upgrades durch und aktiviert optional Unattended-Upgrades. + +```bash +ansible-playbook -i inventory/production.yml \ + playbooks/system-maintenance.yml +``` + +Tipp: Mit `--check` lässt sich zunächst ein Dry-Run starten, um anstehende Updates zu prüfen. + ## Ansible Vault Operations ### View Encrypted File @@ -207,6 +219,14 @@ ansible production -m ping ansible-inventory --list -y ``` +### System Maintenance Dry Run + +```bash +ansible-playbook -i inventory/production.yml \ + playbooks/system-maintenance.yml \ + --check +``` + ## Security Best Practices 1. **Never commit unencrypted secrets** @@ -296,6 +316,7 @@ These playbooks are automatically executed by Gitea Actions workflows: - **`.gitea/workflows/production-deploy.yml`** - Calls `deploy-update.yml` on push to main - **`.gitea/workflows/update-production-secrets.yml`** - Calls `setup-production-secrets.yml` on manual trigger +- **`.gitea/workflows/system-maintenance.yml`** - Führt `system-maintenance.yml` geplant oder manuell aus, um Pakete aktuell zu halten Vault password is stored as Gitea Actions secret: `ANSIBLE_VAULT_PASSWORD` @@ -314,6 +335,13 @@ All zentralen Variablen werden in `group_vars/production.yml` gepflegt und könn | `app_domain` | Produktions-Domain | `michaelschiemer.de` | | `health_check_url` | Health-Check Endpoint | `https://michaelschiemer.de/health` | | `max_rollback_versions` | Anzahl vorgehaltener Backups | `5` | +| `system_update_packages` | Aktiviert OS-Paketupdates via `system`-Rolle | `true` | +| `system_apt_upgrade` | Wert für `apt upgrade` (z. B. `dist`) | `dist` | +| `system_enable_unattended_upgrades` | Aktiviert `unattended-upgrades` | `true` | +| `system_enable_unattended_reboot` | Steuert automatische Reboots nach Updates | `false` | +| `system_unattended_reboot_time` | Reboot-Zeitfenster (wenn aktiviert) | `02:00` | +| `system_enable_unattended_timer` | Aktiviert Systemd-Timer für apt | `true` | +| `system_enable_docker_prune` | Führt nach Updates `docker system prune` aus | `false` | ## Backup Management diff --git a/deployment/ansible/inventory/production.yml b/deployment/ansible/inventory/production.yml index fce262d6..efed92f4 100644 --- a/deployment/ansible/inventory/production.yml +++ b/deployment/ansible/inventory/production.yml @@ -11,6 +11,7 @@ all: vars: # Note: Centralized variables are defined in group_vars/production.yml # Only override-specific variables should be here - + # Override system_* defaults here when Wartungsfenster abweichen + # Legacy compose_file reference (deprecated - stacks now use deployment/stacks/) compose_file: "{{ stacks_base_path }}/application/docker-compose.yml" diff --git a/deployment/ansible/roles/registry/defaults/main.yml b/deployment/ansible/roles/registry/defaults/main.yml index c8d253e3..7185efbd 100644 --- a/deployment/ansible/roles/registry/defaults/main.yml +++ b/deployment/ansible/roles/registry/defaults/main.yml @@ -4,4 +4,5 @@ registry_wait_timeout: "{{ wait_timeout | default(60) }}" registry_wait_interval: 5 registry_vault_file: "{{ role_path }}/../../secrets/production.vault.yml" registry_healthcheck_enabled: true -registry_healthcheck_url: "http://127.0.0.1:5000/v2/_catalog" +registry_healthcheck_url: "https://registry.michaelschiemer.de/v2/_catalog" +registry_healthcheck_validate_certs: false diff --git a/deployment/ansible/roles/registry/tasks/main.yml b/deployment/ansible/roles/registry/tasks/main.yml index da6d9350..b512d1db 100644 --- a/deployment/ansible/roles/registry/tasks/main.yml +++ b/deployment/ansible/roles/registry/tasks/main.yml @@ -98,6 +98,7 @@ password: "{{ registry_password }}" status_code: 200 timeout: 5 + validate_certs: "{{ registry_healthcheck_validate_certs | bool }}" register: registry_check ignore_errors: yes changed_when: false diff --git a/deployment/docs/README.md b/deployment/docs/README.md index 98afba00..2e63b02b 100644 --- a/deployment/docs/README.md +++ b/deployment/docs/README.md @@ -113,6 +113,7 @@ - **[gitea-runner/README.md](../gitea-runner/README.md)** - Gitea Runner Setup - **[.gitea/workflows/production-deploy.yml](../../.gitea/workflows/production-deploy.yml)** - Haupt-Deployment-Pipeline +- **[.gitea/workflows/system-maintenance.yml](../../.gitea/workflows/system-maintenance.yml)** - Geplante System-Wartung & Paketupdates --- diff --git a/deployment/docs/guides/deployment-commands.md b/deployment/docs/guides/deployment-commands.md index 342b6b26..38bca025 100644 --- a/deployment/docs/guides/deployment-commands.md +++ b/deployment/docs/guides/deployment-commands.md @@ -41,6 +41,14 @@ ansible-playbook -i inventory/production.yml \ playbooks/setup-infrastructure.yml ``` +### System Maintenance (regelmäßig) + +```bash +cd deployment/ansible +ansible-playbook -i inventory/production.yml \ + playbooks/system-maintenance.yml +``` + --- ## 📋 Alle verfügbaren Playbooks @@ -77,6 +85,7 @@ ansible-playbook -i inventory/production.yml \ # Alles ausführen ansible-playbook ... troubleshoot.yml --tags all ``` +- **`playbooks/system-maintenance.yml`** - Führt Paket-Updates, Unattended-Upgrades und optional Docker-Pruning aus ### VPN diff --git a/deployment/stacks/staging/docker-compose.yml b/deployment/stacks/staging/docker-compose.yml index 95c872c4..bf45e21b 100644 --- a/deployment/stacks/staging/docker-compose.yml +++ b/deployment/stacks/staging/docker-compose.yml @@ -181,7 +181,12 @@ services: # Fix nginx upstream configuration - sites-enabled/default overrides conf.d/default.conf if [ -f "/etc/nginx/sites-available/default" ]; then echo "🔧 [staging-nginx] Fixing PHP-FPM upstream configuration..." - sed -i "s|server 127.0.0.1:9000;|server staging-app:9000;|g" /etc/nginx/sites-available/default || true + sed -i 's|server 127.0.0.1:9000;|server staging-app:9000;|g' /etc/nginx/sites-available/default || true + sed -i 's|server localhost:9000;|server staging-app:9000;|g' /etc/nginx/sites-available/default || true + # Also check and fix upstream definition directly + if grep -q "server 127.0.0.1:9000" /etc/nginx/sites-available/default; then + sed -i '/upstream php-upstream {/,/}/s/server 127.0.0.1:9000;/server staging-app:9000;/' /etc/nginx/sites-available/default || true + fi fi # Start nginx only (no PHP-FPM, no Git clone - staging-app container handles that)