---
# Find Ansible Automation Source
# Findet die Quelle der externen Ansible-Automatisierung, die Traefik regelmäßig neu startet
- name: Find Ansible Automation Source
  hosts: production
  gather_facts: yes
  become: yes

  tasks:
    - name: Check for running Ansible processes
      ansible.builtin.shell: |
        ps aux | grep -E "ansible|ansible-playbook|ansible-pull" | grep -v grep || echo "No Ansible processes found"
      register: ansible_processes
      changed_when: false

    - name: Check for ansible-pull processes
      ansible.builtin.shell: |
        ps aux | grep ansible-pull | grep -v grep || echo "No ansible-pull processes found"
      register: ansible_pull_processes
      changed_when: false

    - name: Check systemd timers for ansible-pull
      ansible.builtin.shell: |
        systemctl list-timers --all --no-pager | grep -i ansible || echo "No ansible timers found"
      register: ansible_timers
      changed_when: false

    - name: Check for ansible-pull cronjobs
      ansible.builtin.shell: |
        for user in $(cut -f1 -d: /etc/passwd); do
          crontab -u "$user" -l 2>/dev/null | grep -q "ansible-pull\|ansible.*playbook" && echo "=== User: $user ===" && crontab -u "$user" -l 2>/dev/null | grep -E "ansible-pull|ansible.*playbook" || true
        done || echo "No ansible-pull cronjobs found"
      register: ansible_cronjobs
      changed_when: false

    - name: Check system-wide cron for ansible
      ansible.builtin.shell: |
        for dir in /etc/cron.d /etc/cron.daily /etc/cron.hourly /etc/cron.weekly /etc/cron.monthly; do
          if [ -d "$dir" ]; then
            grep -rE "ansible-pull|ansible.*playbook" "$dir" 2>/dev/null && echo "=== Found in $dir ===" || true
          fi
        done || echo "No ansible in system cron"
      register: ansible_system_cron
      changed_when: false

    - name: Check journalctl for ansible-ansible processes
      ansible.builtin.shell: |
        journalctl --since "24 hours ago" --no-pager | grep -iE "ansible-ansible|ansible-playbook|ansible-pull" | tail -50 || echo "No ansible processes in journalctl"
      register: ansible_journal
      changed_when: false

    - name: Check for ansible-pull configuration files
      ansible.builtin.shell: |
        find /home -name "*ansible-pull*" -o -name "*ansible*.yml" -path "*/ansible-pull/*" 2>/dev/null | head -20 || echo "No ansible-pull config files found"
      register: ansible_pull_configs
      changed_when: false

    - name: Check for running docker compose commands related to Traefik
      ansible.builtin.shell: |
        ps aux | grep -E "docker.*compose.*traefik|docker.*restart.*traefik" | grep -v grep || echo "No docker compose traefik commands running"
      register: docker_traefik_commands
      changed_when: false

    - name: Check Docker events for Traefik kill events (last hour)
      ansible.builtin.shell: |
        docker events --since 1h --until now --filter container=traefik --filter event=die --format "{{ '{{' }}.Time{{ '}}' }} {{ '{{' }}.Action{{ '}}' }} {{ '{{' }}.Actor.Attributes.signal{{ '}}' }}" 2>/dev/null | tail -20 || echo "No Traefik die events in last hour"
      register: traefik_kill_events
      changed_when: false
      failed_when: false

    - name: Check journalctl for docker compose traefik commands
      ansible.builtin.shell: |
        journalctl --since "24 hours ago" --no-pager | grep -iE "docker.*compose.*traefik|docker.*restart.*traefik" | tail -30 || echo "No docker compose traefik commands in journalctl"
      register: docker_traefik_journal
      changed_when: false

    - name: Check for CI/CD scripts that might run Ansible
      ansible.builtin.shell: |
        find /home/deploy -type f \( -name "*.sh" -o -name "*.yml" -o -name "*.yaml" \) -exec grep -lE "ansible.*playbook.*traefik|docker.*compose.*traefik.*restart" {} \; 2>/dev/null | head -20 || echo "No CI/CD scripts found"
      register: cicd_scripts
      changed_when: false

    - name: Check for Gitea Workflows that run Ansible
      ansible.builtin.shell: |
        find /home/deploy -type f -path "*/.gitea/workflows/*.yml" -o -path "*/.github/workflows/*.yml" 2>/dev/null | xargs grep -lE "ansible.*playbook.*traefik|docker.*compose.*traefik" 2>/dev/null | head -10 || echo "No Gitea workflows found"
      register: gitea_workflows
      changed_when: false

    - name: Check for monitoring/healthcheck scripts
      ansible.builtin.shell: |
        find /home/deploy -type f -name "*monitor*" -o -name "*health*" 2>/dev/null | xargs grep -lE "traefik.*restart|docker.*compose.*traefik" 2>/dev/null | head -10 || echo "No monitoring scripts found"
      register: monitoring_scripts
      changed_when: false

    - name: Summary
      ansible.builtin.debug:
        msg: |
          ================================================================================
          ANSIBLE AUTOMATION SOURCE DIAGNOSE:
          ================================================================================
          
          Laufende Ansible-Prozesse:
          {{ ansible_processes.stdout }}
          
          Ansible-Pull Prozesse:
          {{ ansible_pull_processes.stdout }}
          
          Systemd Timers für Ansible:
          {{ ansible_timers.stdout }}
          
          Cronjobs für Ansible:
          {{ ansible_cronjobs.stdout }}
          
          System-Cron für Ansible:
          {{ ansible_system_cron.stdout }}
          
          Ansible-Prozesse in Journalctl (letzte 24h):
          {{ ansible_journal.stdout }}
          
          Ansible-Pull Konfigurationsdateien:
          {{ ansible_pull_configs.stdout }}
          
          Laufende Docker Compose Traefik-Befehle:
          {{ docker_traefik_commands.stdout }}
          
          Traefik Kill-Events (letzte Stunde):
          {{ traefik_kill_events.stdout }}
          
          Docker Compose Traefik-Befehle in Journalctl:
          {{ docker_traefik_journal.stdout }}
          
          CI/CD Scripts die Traefik restarten:
          {{ cicd_scripts.stdout }}
          
          Gitea Workflows die Traefik restarten:
          {{ gitea_workflows.stdout }}
          
          Monitoring-Scripts die Traefik restarten:
          {{ monitoring_scripts.stdout }}
          
          ================================================================================
          ANALYSE:
          ================================================================================
          
          {% if 'No Ansible processes found' not in ansible_processes.stdout %}
          ⚠️  AKTIVE ANSIBLE-PROZESSE GEFUNDEN:
          {{ ansible_processes.stdout }}
          
          → Diese Prozesse könnten Traefik regelmäßig neu starten
          → Prüfe die Kommandozeile dieser Prozesse um das Playbook zu identifizieren
          {% endif %}
          
          {% if 'No ansible-pull processes found' not in ansible_pull_processes.stdout %}
          ❌ ANSIBLE-PULL LÄUFT:
          {{ ansible_pull_processes.stdout }}
          
          → ansible-pull führt regelmäßig Playbooks aus
          → Dies ist wahrscheinlich die Quelle der Traefik-Restarts
          {% endif %}
          
          {% if 'No ansible timers found' not in ansible_timers.stdout %}
          ❌ ANSIBLE TIMER GEFUNDEN:
          {{ ansible_timers.stdout }}
          
          → Ein Systemd-Timer führt regelmäßig Ansible aus
          → Deaktiviere mit: systemctl disable <timer-name>
          {% endif %}
          
          {% if 'No ansible-pull cronjobs found' not in ansible_cronjobs.stdout %}
          ❌ ANSIBLE CRONJOB GEFUNDEN:
          {{ ansible_cronjobs.stdout }}
          
          → Ein Cronjob führt regelmäßig Ansible aus
          → Entferne oder kommentiere den Cronjob-Eintrag
          {% endif %}
          
          {% if cicd_scripts.stdout and 'No CI/CD scripts found' not in cicd_scripts.stdout %}
          ⚠️  CI/CD SCRIPTS GEFUNDEN:
          {{ cicd_scripts.stdout }}
          
          → Diese Scripts könnten Traefik regelmäßig neu starten
          → Prüfe diese Dateien und entferne/kommentiere Traefik-Restart-Befehle
          {% endif %}
          
          {% if gitea_workflows.stdout and 'No Gitea workflows found' not in gitea_workflows.stdout %}
          ⚠️  GITEA WORKFLOWS GEFUNDEN:
          {{ gitea_workflows.stdout }}
          
          → Diese Workflows könnten Traefik regelmäßig neu starten
          → Prüfe diese Workflows und entferne/kommentiere Traefik-Restart-Schritte
          {% endif %}
          
          {% if monitoring_scripts.stdout and 'No monitoring scripts found' not in monitoring_scripts.stdout %}
          ⚠️  MONITORING SCRIPTS GEFUNDEN:
          {{ monitoring_scripts.stdout }}
          
          → Diese Scripts könnten Traefik regelmäßig neu starten
          → Prüfe diese Scripts und entferne/kommentiere Traefik-Restart-Befehle
          {% endif %}
          
          ================================================================================
          LÖSUNG:
          ================================================================================
          
          {% if 'No Ansible processes found' in ansible_processes.stdout and 'No ansible-pull processes found' in ansible_pull_processes.stdout and 'No ansible timers found' in ansible_timers.stdout and 'No ansible-pull cronjobs found' in ansible_cronjobs.stdout %}
          ℹ️  Keine aktiven Ansible-Automatisierungen gefunden
          
          Mögliche Ursachen:
          1. Ansible-Prozesse laufen nur zeitweise (intermittierend)
          2. Externe CI/CD-Pipeline führt Ansible aus
          3. Manuelle Ansible-Aufrufe von außen
          
          Nächste Schritte:
          1. Beobachte Docker Events in Echtzeit: docker events --filter container=traefik
          2. Beobachte Ansible-Prozesse: watch -n 1 'ps aux | grep ansible'
          3. Prüfe ob externe CI/CD-Pipelines Ansible ausführen
          {% else %}
          
          SOFORTMASSNAHME:
          
          {% if 'No ansible-pull processes found' not in ansible_pull_processes.stdout %}
          1. ❌ Stoppe ansible-pull:
             pkill -f ansible-pull
          {% endif %}
          
          {% if 'No ansible timers found' not in ansible_timers.stdout %}
          2. ❌ Deaktiviere Ansible-Timer:
             systemctl stop <timer-name>
             systemctl disable <timer-name>
          {% endif %}
          
          {% if 'No ansible-pull cronjobs found' not in ansible_cronjobs.stdout %}
          3. ❌ Entferne Ansible-Cronjobs:
             crontab -u <user> -e
             (Kommentiere oder entferne die Ansible-Zeilen)
          {% endif %}
          
          LANGZEITLÖSUNG:
          
          1. Prüfe gefundene Scripts/Workflows und entferne Traefik-Restart-Befehle
          2. Falls Healthchecks nötig sind, setze größere Intervalle (z.B. 5 Minuten statt 30 Sekunden)
          3. Restarte Traefik nur bei echten Fehlern, nicht präventiv
          {% endif %}
          
          ================================================================================