michaelschiemer/deployment/infrastructure/roles/monitoring/templates/system-monitor.sh.j2

#!/bin/bash
# System Resource Monitoring Script
# Custom PHP Framework - {{ environment | upper }}
# Generated by Ansible

set -euo pipefail

# Configuration
LOG_DIR="{{ monitoring_logs_dir }}"
LOG_FILE="${LOG_DIR}/system-monitor.log"
ALERT_SCRIPT="{{ monitoring_scripts_dir }}/send-alert.sh"
CONFIG_FILE="{{ monitoring_config_dir }}/monitoring.conf"

# Load configuration
source "${CONFIG_FILE}"

# Create log directory if it doesn't exist
mkdir -p "${LOG_DIR}"

# Function to log with timestamp
log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >> "${LOG_FILE}"
}

# Function to check CPU usage
check_cpu() {
    local cpu_usage
    cpu_usage=$(top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}')
    cpu_usage=${cpu_usage%.*}  # Remove decimal part

    log "CPU Usage: ${cpu_usage}%"

    if (( cpu_usage > {{ monitoring_thresholds.cpu_usage_critical }} )); then
        "${ALERT_SCRIPT}" "CRITICAL" "CPU Usage Critical" "CPU usage is ${cpu_usage}% (Critical threshold: {{ monitoring_thresholds.cpu_usage_critical }}%)"
    elif (( cpu_usage > {{ monitoring_thresholds.cpu_usage_warning }} )); then
        "${ALERT_SCRIPT}" "WARNING" "CPU Usage High" "CPU usage is ${cpu_usage}% (Warning threshold: {{ monitoring_thresholds.cpu_usage_warning }}%)"
    fi
}

# Function to check memory usage
check_memory() {
    local mem_usage
    mem_usage=$(free | grep Mem | awk '{printf "%.0f", $3/$2 * 100.0}')

    log "Memory Usage: ${mem_usage}%"

    if (( mem_usage > {{ monitoring_thresholds.memory_usage_critical }} )); then
        "${ALERT_SCRIPT}" "CRITICAL" "Memory Usage Critical" "Memory usage is ${mem_usage}% (Critical threshold: {{ monitoring_thresholds.memory_usage_critical }}%)"
    elif (( mem_usage > {{ monitoring_thresholds.memory_usage_warning }} )); then
        "${ALERT_SCRIPT}" "WARNING" "Memory Usage High" "Memory usage is ${mem_usage}% (Warning threshold: {{ monitoring_thresholds.memory_usage_warning }}%)"
    fi
}

# Function to check disk usage
check_disk() {
    local disk_usage
    disk_usage=$(df / | tail -1 | awk '{print $5}' | sed 's/%//')

    log "Disk Usage: ${disk_usage}%"

    if (( disk_usage > {{ monitoring_thresholds.disk_usage_critical }} )); then
        "${ALERT_SCRIPT}" "CRITICAL" "Disk Usage Critical" "Disk usage is ${disk_usage}% (Critical threshold: {{ monitoring_thresholds.disk_usage_critical }}%)"
    elif (( disk_usage > {{ monitoring_thresholds.disk_usage_warning }} )); then
        "${ALERT_SCRIPT}" "WARNING" "Disk Usage High" "Disk usage is ${disk_usage}% (Warning threshold: {{ monitoring_thresholds.disk_usage_warning }}%)"
    fi
}

# Function to check load average
check_load() {
    local load_avg
    load_avg=$(uptime | awk -F'load average:' '{ print $2 }' | cut -d, -f1 | tr -d ' ')

    log "Load Average: ${load_avg}"

    if (( $(echo "${load_avg} > {{ monitoring_thresholds.load_average_critical }}" | bc -l) )); then
        "${ALERT_SCRIPT}" "CRITICAL" "Load Average Critical" "Load average is ${load_avg} (Critical threshold: {{ monitoring_thresholds.load_average_critical }})"
    elif (( $(echo "${load_avg} > {{ monitoring_thresholds.load_average_warning }}" | bc -l) )); then
        "${ALERT_SCRIPT}" "WARNING" "Load Average High" "Load average is ${load_avg} (Warning threshold: {{ monitoring_thresholds.load_average_warning }})"
    fi
}

# Main monitoring function
main() {
    log "Starting system monitoring check"

    check_cpu
    check_memory
    check_disk
    check_load

    log "System monitoring check completed"
}

# Run main function
main "$@"