feat: Integrate Ansible playbooks into CI/CD workflows

- Add deploy-application-code.yml for Git-based code deployment - Add install-composer-dependencies.yml for dependency installation - Add deploy-image.yml for Docker image deployment - Update build-image.yml to use Ansible playbooks - Update manual-deploy.yml to use Ansible playbooks - Add ANSIBLE_VAULT_PASSWORD secret handling
2025-11-07 18:14:11 +01:00
parent cf903f2582
commit 1963b10749
10 changed files with 636 additions and 590 deletions
--- a/.gitea/workflows/build-image.yml
+++ b/.gitea/workflows/build-image.yml
@@ -963,169 +963,57 @@ jobs:
          chmod 600 ~/.ssh/production
          ssh-keyscan -H ${{ env.DEPLOYMENT_HOST }} >> ~/.ssh/known_hosts

-      - name: Deploy to Staging Server
+      - name: Install Ansible
        run: |
-          set -e
-          
-          DEPLOYMENT_HOST="${{ env.DEPLOYMENT_HOST }}"
-          REGISTRY_HOST="${{ env.REGISTRY }}"
-          IMAGE_NAME="${{ env.IMAGE_NAME }}"
-          
-          DEFAULT_IMAGE="${REGISTRY_HOST}/${IMAGE_NAME}:latest"
-          
-          # Always use latest image - if a build happened, it would have pushed to latest anyway
-          # Using latest ensures we always get the most recent image, whether it was just built or not
-          SELECTED_IMAGE="$DEFAULT_IMAGE"
+          sudo apt-get update
+          sudo apt-get install -y ansible python3-pip
+          pip3 install --user ansible-core docker

-          STACK_PATH_DISPLAY="~/deployment/stacks/staging"
-          
-          SELECTED_TAG="${SELECTED_IMAGE##*:}"
-          SELECTED_REPO="${SELECTED_IMAGE%:*}"
-          
-          if [ -z "$SELECTED_REPO" ] || [ "$SELECTED_REPO" = "$SELECTED_IMAGE" ]; then
-            FALLBACK_IMAGE="$DEFAULT_IMAGE"
+      - name: Create Ansible Vault password file
+        run: |
+          if [ -n "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" ]; then
+            echo "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" > /tmp/vault_pass
+            chmod 600 /tmp/vault_pass
+            echo "✅ Vault password file created"
          else
-            FALLBACK_IMAGE="${SELECTED_REPO}:latest"
+            echo "⚠️  ANSIBLE_VAULT_PASSWORD secret not set, using empty password file"
+            touch /tmp/vault_pass
+            chmod 600 /tmp/vault_pass
          fi
-          
-          echo "🚀 Starting staging deployment..."
-          echo "   Image: ${SELECTED_IMAGE}"
-          echo "   Tag: ${SELECTED_TAG}"
-          echo "   Host: ${DEPLOYMENT_HOST}"
-          echo "   Stack: ${STACK_PATH_DISPLAY}"
-          
-          FULL_IMAGE_ARG=$(printf '%q' "$SELECTED_IMAGE")
-          FALLBACK_IMAGE_ARG=$(printf '%q' "$FALLBACK_IMAGE")
-          IMAGE_NAME_ARG=$(printf '%q' "$IMAGE_NAME")
-          REGISTRY_ARG=$(printf '%q' "$REGISTRY_HOST")
-          
-          ssh -i ~/.ssh/production \
-              -o StrictHostKeyChecking=no \
-              -o UserKnownHostsFile=/dev/null \
-              deploy@${DEPLOYMENT_HOST} "bash -s -- $FULL_IMAGE_ARG $FALLBACK_IMAGE_ARG $IMAGE_NAME_ARG $REGISTRY_ARG" <<'EOF'
-            set -e
-            
-            FULL_IMAGE="$1"
-            FALLBACK_IMAGE="$2"
-            IMAGE_NAME="$3"
-            REGISTRY="$4"
-            shift 4
-            
-            CURRENT_USER="$(whoami)"
-            USER_HOME="$(getent passwd "$CURRENT_USER" | cut -d: -f6 2>/dev/null)"
-            [ -z "$USER_HOME" ] && USER_HOME="$HOME"
-            [ -z "$USER_HOME" ] && USER_HOME="/home/$CURRENT_USER"
-            
-            STACK_TARGET="${USER_HOME}/deployment/stacks/staging"

-            # Ensure staging stack directory exists
-            mkdir -p "${STACK_TARGET}"
-            cd "${STACK_TARGET}"
-            
-            declare -a REGISTRY_TARGETS=()
-            if [ -n "${REGISTRY}" ]; then
-              REGISTRY_TARGETS+=("${REGISTRY}")
-            fi
-            for IMAGE_REF in "${FULL_IMAGE}" "${FALLBACK_IMAGE}"; do
-              if [ -n "${IMAGE_REF}" ]; then
-                HOST_PART="${IMAGE_REF%%/*}"
-                if [ -n "${HOST_PART}" ]; then
-                  if ! printf '%s\n' "${REGISTRY_TARGETS[@]}" | grep -qx "${HOST_PART}"; then
-                    REGISTRY_TARGETS+=("${HOST_PART}")
-                  fi
-                fi
-              fi
-            done
-            
-            for TARGET in "${REGISTRY_TARGETS[@]}"; do
-              [ -z "${TARGET}" ] && continue
-              echo "🔐 Logging in to Docker registry ${TARGET}..."
-              echo "${{ secrets.REGISTRY_PASSWORD }}" | docker login "${TARGET}" \
-                -u "${{ secrets.REGISTRY_USER }}" \
-                --password-stdin || echo "⚠️  Registry login failed for ${TARGET}, continuing..."
-            done
-            
-            DEPLOY_IMAGE="$FULL_IMAGE"
-            echo "📥 Pulling image ${DEPLOY_IMAGE}..."
-            if ! docker pull "${DEPLOY_IMAGE}"; then
-              if [ -n "${FALLBACK_IMAGE}" ] && [ "${DEPLOY_IMAGE}" != "${FALLBACK_IMAGE}" ]; then
-                echo "⚠️  Failed to pull ${DEPLOY_IMAGE}, attempting fallback ${FALLBACK_IMAGE}"
-                if docker pull "${FALLBACK_IMAGE}"; then
-                  DEPLOY_IMAGE="${FALLBACK_IMAGE}"
-                  echo "ℹ️ Using fallback image ${DEPLOY_IMAGE}"
-                else
-                  echo "❌ Failed to pull fallback image ${FALLBACK_IMAGE}"
-                  exit 1
-                fi
-              else
-                echo "❌ Failed to pull image ${DEPLOY_IMAGE}"
-                exit 1
-              fi
-            fi
-            
-            # Copy base and staging docker-compose files if they don't exist
-            if [ ! -f docker-compose.base.yml ]; then
-              echo "⚠️  docker-compose.base.yml not found, copying from repo..."
-              cp /workspace/repo/docker-compose.base.yml . || {
-                echo "❌ Failed to copy docker-compose.base.yml"
-                exit 1
-              }
-            fi
-            
-            if [ ! -f docker-compose.staging.yml ]; then
-              echo "⚠️  docker-compose.staging.yml not found, copying from repo..."
-              cp /workspace/repo/docker-compose.staging.yml . || {
-                echo "❌ Failed to copy docker-compose.staging.yml"
-                exit 1
-              }
-            fi
-            
-            # Update docker-compose.staging.yml with new image tag
-            echo "📝 Updating docker-compose.staging.yml with new image tag..."
-            sed -i "s|image:.*/${IMAGE_NAME}:.*|image: ${DEPLOY_IMAGE}|g" docker-compose.staging.yml
-            
-            echo "✅ Updated docker-compose.staging.yml:"
-            grep "image:" docker-compose.staging.yml | head -5
-            
-            # Ensure networks exist
-            echo "🔗 Ensuring Docker networks exist..."
-            docker network create traefik-public 2>/dev/null || true
-            docker network create staging-internal 2>/dev/null || true
-            
-            echo "🔄 Starting/updating services..."
-            # Use --pull missing instead of --pull always since we already pulled the specific image
-            docker compose -f docker-compose.base.yml -f docker-compose.staging.yml up -d --pull missing --force-recreate || {
-              echo "❌ Failed to start services"
-              exit 1
-            }
-            
-            echo "⏳ Waiting for services to start..."
-            sleep 15
-            
-            # Pull latest code from Git repository - always sync code when deploying
-            echo "🔄 Pulling latest code from Git repository in staging-app container..."
-            docker compose -f docker-compose.base.yml -f docker-compose.staging.yml exec -T staging-app bash -c "cd /var/www/html && git -c safe.directory=/var/www/html fetch origin staging && git -c safe.directory=/var/www/html reset --hard origin/staging && git -c safe.directory=/var/www/html clean -fd" || echo "⚠️  Git pull failed, container will sync on next restart"
-            
-            # Also trigger a restart to ensure entrypoint script runs
-            echo "🔄 Restarting staging-app to ensure all services are up-to-date..."
-            docker compose -f docker-compose.base.yml -f docker-compose.staging.yml restart staging-app || echo "⚠️  Failed to restart staging-app"
-            
-            # Fix nginx upstream configuration - critical fix for 502 errors
-            # sites-available/default uses 127.0.0.1:9000 but PHP-FPM runs in staging-app container
-            echo "🔧 Fixing nginx PHP-FPM upstream configuration (post-deploy fix)..."
-            sleep 5
-            docker compose -f docker-compose.base.yml -f docker-compose.staging.yml exec -T staging-nginx sed -i '/upstream php-upstream {/,/}/s|server 127.0.0.1:9000;|server staging-app:9000;|g' /etc/nginx/sites-available/default || echo "⚠️  Upstream fix (127.0.0.1) failed"
-            docker compose -f docker-compose.base.yml -f docker-compose.staging.yml exec -T staging-nginx sed -i '/upstream php-upstream {/,/}/s|server localhost:9000;|server staging-app:9000;|g' /etc/nginx/sites-available/default || echo "⚠️  Upstream fix (localhost) failed"
-            docker compose -f docker-compose.base.yml -f docker-compose.staging.yml exec -T staging-nginx nginx -t && docker compose -f docker-compose.base.yml -f docker-compose.staging.yml restart staging-nginx || echo "⚠️  Nginx config test or restart failed"
-            echo "✅ Nginx configuration fixed and reloaded"
-            
-            echo "⏳ Waiting for services to stabilize..."
-            sleep 10
-            echo "📊 Container status:"
-            docker compose -f docker-compose.base.yml -f docker-compose.staging.yml ps
-            
-            echo "✅ Staging deployment completed!"
-          EOF
+      - name: Deploy Application Code to Staging
+        run: |
+          cd /workspace/repo/deployment/ansible
+          ansible-playbook -i inventory/production.yml \
+            playbooks/deploy-application-code.yml \
+            -e "deployment_environment=staging" \
+            -e "deployment_hosts=production" \
+            -e "git_branch=staging" \
+            --vault-password-file /tmp/vault_pass \
+            --private-key ~/.ssh/production
+
+      - name: Install Composer Dependencies
+        run: |
+          cd /workspace/repo/deployment/ansible
+          ansible-playbook -i inventory/production.yml \
+            playbooks/install-composer-dependencies.yml \
+            -e "deployment_environment=staging" \
+            --vault-password-file /tmp/vault_pass \
+            --private-key ~/.ssh/production
+
+      - name: Deploy Docker Image to Staging
+        run: |
+          cd /workspace/repo/deployment/ansible
+          ansible-playbook -i inventory/production.yml \
+            playbooks/deploy-image.yml \
+            -e "deployment_environment=staging" \
+            -e "deployment_hosts=production" \
+            -e "image_tag=latest" \
+            -e "docker_registry=${{ env.REGISTRY }}" \
+            -e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \
+            -e "docker_registry_password=${{ secrets.REGISTRY_PASSWORD }}" \
+            --vault-password-file /tmp/vault_pass \
+            --private-key ~/.ssh/production

      - name: Wait for deployment to stabilize
        run: sleep 30
@@ -1133,16 +1021,42 @@ jobs:
      - name: Health check
        id: health
        run: |
+          echo "🔍 Performing health checks..."
+          
+          # Basic health check
+          BASIC_HEALTH_OK=false
          for i in {1..10}; do
-            if curl -f -k https://staging.michaelschiemer.de/health; then
-              echo "✅ Health check passed"
-              exit 0
+            if curl -f -k -s https://staging.michaelschiemer.de/health > /dev/null 2>&1; then
+              echo "✅ Basic health check passed"
+              BASIC_HEALTH_OK=true
+              break
            fi
            echo "⏳ Waiting for staging service... (attempt $i/10)"
            sleep 10
          done
-          echo "❌ Health check failed"
-          exit 1
+          
+          if [ "$BASIC_HEALTH_OK" != "true" ]; then
+            echo "❌ Basic health check failed"
+            exit 1
+          fi
+          
+          # Extended health check (if available)
+          echo "🔍 Checking extended health status..."
+          HEALTH_SUMMARY=$(curl -f -k -s https://staging.michaelschiemer.de/admin/health/api/summary 2>/dev/null || echo "")
+          if [ -n "$HEALTH_SUMMARY" ]; then
+            OVERALL_STATUS=$(echo "$HEALTH_SUMMARY" | grep -o '"overall_status":"[^"]*"' | cut -d'"' -f4 || echo "unknown")
+            echo "📊 Overall health status: $OVERALL_STATUS"
+            if [ "$OVERALL_STATUS" = "unhealthy" ]; then
+              echo "⚠️  Extended health check shows unhealthy status"
+              echo "   Health summary: $HEALTH_SUMMARY"
+            else
+              echo "✅ Extended health check passed"
+            fi
+          else
+            echo "ℹ️  Extended health check endpoint not available (this is OK)"
+          fi
+          
+          echo "✅ All health checks completed"

      - name: Notify deployment success
        if: success()
@@ -1204,14 +1118,27 @@ jobs:
          chmod 600 ~/.ssh/production
          ssh-keyscan -H ${{ env.DEPLOYMENT_HOST }} >> ~/.ssh/known_hosts

-      - name: Deploy to Production Server
+      - name: Install Ansible
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y ansible python3-pip
+          pip3 install --user ansible-core docker
+
+      - name: Create Ansible Vault password file
+        run: |
+          if [ -n "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" ]; then
+            echo "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" > /tmp/vault_pass
+            chmod 600 /tmp/vault_pass
+            echo "✅ Vault password file created"
+          else
+            echo "⚠️  ANSIBLE_VAULT_PASSWORD secret not set, using empty password file"
+            touch /tmp/vault_pass
+            chmod 600 /tmp/vault_pass
+          fi
+
+      - name: Determine image tag
+        id: image_tag
        run: |
-          set -e
-          
-          DEPLOYMENT_HOST="${{ env.DEPLOYMENT_HOST }}"
-          REGISTRY="${{ env.REGISTRY }}"
-          IMAGE_NAME="${{ env.IMAGE_NAME }}"
-          
          # Get image tag from build job output with fallback
          IMAGE_TAG="${{ needs.build.outputs.image_tag }}"
          
@@ -1229,76 +1156,42 @@ jobs:
            fi
          fi
          
-          FULL_IMAGE="${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}"
-          STACK_PATH="~/deployment/stacks/application"
-          
-          echo "🚀 Starting production deployment..."
-          echo "   Image: ${FULL_IMAGE}"
-          echo "   Tag: ${IMAGE_TAG}"
-          echo "   Host: ${DEPLOYMENT_HOST}"
-          echo "   Stack: ${STACK_PATH}"
+          echo "IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_OUTPUT
+          echo "📦 Image Tag: ${IMAGE_TAG}"

-          echo "📋 Deploying docker-compose configuration files..."
-          cd /workspace/repo
-          scp -i ~/.ssh/production \
-              -o StrictHostKeyChecking=no \
-              -o UserKnownHostsFile=/dev/null \
-              docker-compose.base.yml docker-compose.production.yml \
-              deploy@${DEPLOYMENT_HOST}:${STACK_PATH}/ || {
-            echo "❌ Failed to deploy docker-compose files"
-            exit 1
-          }
-          echo "✅ Docker Compose files deployed successfully"
+      - name: Deploy Application Code to Production
+        run: |
+          cd /workspace/repo/deployment/ansible
+          ansible-playbook -i inventory/production.yml \
+            playbooks/deploy-application-code.yml \
+            -e "deployment_environment=production" \
+            -e "deployment_hosts=production" \
+            -e "git_branch=main" \
+            --vault-password-file /tmp/vault_pass \
+            --private-key ~/.ssh/production

-          ssh -i ~/.ssh/production \
-              -o StrictHostKeyChecking=no \
-              -o UserKnownHostsFile=/dev/null \
-              deploy@${DEPLOYMENT_HOST} <<EOF
-            set -e
-            
-            cd ${STACK_PATH}
-            
-            echo "🔐 Logging in to Docker registry..."
-            echo "${{ secrets.REGISTRY_PASSWORD }}" | docker login ${REGISTRY} \
-              -u "${{ secrets.REGISTRY_USER }}" \
-              --password-stdin || echo "⚠️  Registry login failed, continuing..."
-            
-            echo "📥 Pulling image ${FULL_IMAGE}..."
-            docker pull ${FULL_IMAGE} || {
-              echo "❌ Failed to pull image ${FULL_IMAGE}"
-              exit 1
-            }
-            
-            # Ensure docker-compose files exist (rsync deployment handles this)
-            if [ ! -f docker-compose.base.yml ] || [ ! -f docker-compose.production.yml ]; then
-              echo "❌ Docker Compose files not found in ${STACK_PATH}"
-              echo "   Expected files are deployed via rsync in deployment scripts"
-              exit 1
-            fi
-            echo "✅ Docker Compose files present in deployment directory"
-            
-            echo "📝 Updating docker-compose.production.yml with new image tag..."
-            sed -i "s|image:.*/${IMAGE_NAME}:.*|image: ${FULL_IMAGE}|g" docker-compose.production.yml
-            sed -i "s|image:.*/${IMAGE_NAME}@.*|image: ${FULL_IMAGE}|g" docker-compose.production.yml
-            
-            echo "✅ Updated docker-compose.production.yml:"
-            grep "image:" docker-compose.production.yml | head -5
-            
-            echo "🔄 Restarting services..."
-            # Use --pull missing instead of --pull always since we already pulled the specific image
-            docker compose -f docker-compose.base.yml -f docker-compose.production.yml up -d --pull missing --force-recreate || {
-              echo "❌ Failed to restart services"
-              exit 1
-            }
-            
-            echo "⏳ Waiting for services to start..."
-            sleep 10
-            
-            echo "📊 Container status:"
-            docker compose -f docker-compose.base.yml -f docker-compose.production.yml ps
-            
-            echo "✅ Production deployment completed!"
-          EOF
+      - name: Install Composer Dependencies
+        run: |
+          cd /workspace/repo/deployment/ansible
+          ansible-playbook -i inventory/production.yml \
+            playbooks/install-composer-dependencies.yml \
+            -e "deployment_environment=production" \
+            --vault-password-file /tmp/vault_pass \
+            --private-key ~/.ssh/production
+
+      - name: Deploy Docker Image to Production
+        run: |
+          cd /workspace/repo/deployment/ansible
+          ansible-playbook -i inventory/production.yml \
+            playbooks/deploy-image.yml \
+            -e "deployment_environment=production" \
+            -e "deployment_hosts=production" \
+            -e "image_tag=${{ steps.image_tag.outputs.IMAGE_TAG }}" \
+            -e "docker_registry=${{ env.REGISTRY }}" \
+            -e "docker_registry_username=${{ secrets.REGISTRY_USER }}" \
+            -e "docker_registry_password=${{ secrets.REGISTRY_PASSWORD }}" \
+            --vault-password-file /tmp/vault_pass \
+            --private-key ~/.ssh/production

      - name: Wait for deployment to stabilize
        run: sleep 30
@@ -1306,16 +1199,42 @@ jobs:
      - name: Health check
        id: health
        run: |
+          echo "🔍 Performing health checks..."
+          
+          # Basic health check
+          BASIC_HEALTH_OK=false
          for i in {1..10}; do
-            if curl -f -k https://michaelschiemer.de/health; then
-              echo "✅ Health check passed"
-              exit 0
+            if curl -f -k -s https://michaelschiemer.de/health > /dev/null 2>&1; then
+              echo "✅ Basic health check passed"
+              BASIC_HEALTH_OK=true
+              break
            fi
            echo "⏳ Waiting for production service... (attempt $i/10)"
            sleep 10
          done
-          echo "❌ Health check failed"
-          exit 1
+          
+          if [ "$BASIC_HEALTH_OK" != "true" ]; then
+            echo "❌ Basic health check failed"
+            exit 1
+          fi
+          
+          # Extended health check (if available)
+          echo "🔍 Checking extended health status..."
+          HEALTH_SUMMARY=$(curl -f -k -s https://michaelschiemer.de/admin/health/api/summary 2>/dev/null || echo "")
+          if [ -n "$HEALTH_SUMMARY" ]; then
+            OVERALL_STATUS=$(echo "$HEALTH_SUMMARY" | grep -o '"overall_status":"[^"]*"' | cut -d'"' -f4 || echo "unknown")
+            echo "📊 Overall health status: $OVERALL_STATUS"
+            if [ "$OVERALL_STATUS" = "unhealthy" ]; then
+              echo "⚠️  Extended health check shows unhealthy status"
+              echo "   Health summary: $HEALTH_SUMMARY"
+            else
+              echo "✅ Extended health check passed"
+            fi
+          else
+            echo "ℹ️  Extended health check endpoint not available (this is OK)"
+          fi
+          
+          echo "✅ All health checks completed"

      - name: Notify deployment success
        if: success()