- Add comprehensive health check system with multiple endpoints - Add Prometheus metrics endpoint - Add production logging configurations (5 strategies) - Add complete deployment documentation suite: * QUICKSTART.md - 30-minute deployment guide * DEPLOYMENT_CHECKLIST.md - Printable verification checklist * DEPLOYMENT_WORKFLOW.md - Complete deployment lifecycle * PRODUCTION_DEPLOYMENT.md - Comprehensive technical reference * production-logging.md - Logging configuration guide * ANSIBLE_DEPLOYMENT.md - Infrastructure as Code automation * README.md - Navigation hub * DEPLOYMENT_SUMMARY.md - Executive summary - Add deployment scripts and automation - Add DEPLOYMENT_PLAN.md - Concrete plan for immediate deployment - Update README with production-ready features All production infrastructure is now complete and ready for deployment.
418 lines
16 KiB
PHP
418 lines
16 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
/**
|
|
* Queue Job Anomaly Detection - Usage Example
|
|
*
|
|
* Demonstrates ML-based anomaly detection for background queue jobs
|
|
* using the Job Anomaly Detection system.
|
|
*
|
|
* Features Demonstrated:
|
|
* - JobHistoryAnalyzer for job pattern analysis
|
|
* - JobFeatureExtractor with 8 behavioral features
|
|
* - JobAnomalyDetector using Core Score for confidence
|
|
* - Job-level, Queue-level, and Failed Jobs analysis
|
|
* - Health monitoring and recommendations
|
|
*/
|
|
|
|
require_once __DIR__ . '/../vendor/autoload.php';
|
|
|
|
use App\Framework\Queue\MachineLearning\JobHistoryAnalyzer;
|
|
use App\Framework\Queue\MachineLearning\JobFeatureExtractor;
|
|
use App\Framework\Queue\MachineLearning\JobAnomalyDetector;
|
|
use App\Framework\Queue\MachineLearning\ValueObjects\JobExecutionContext;
|
|
use App\Framework\Queue\MachineLearning\ValueObjects\JobExecutionSequence;
|
|
use App\Framework\Queue\MachineLearning\ValueObjects\JobFeatures;
|
|
use App\Framework\Queue\Services\JobMetricsManagerInterface;
|
|
use App\Framework\Core\ValueObjects\Duration;
|
|
use App\Framework\Core\ValueObjects\Score;
|
|
use App\Framework\Core\ValueObjects\Timestamp;
|
|
|
|
echo "=== Queue Job Anomaly Detection Demo ===\n\n";
|
|
|
|
// ========================================
|
|
// 1. Setup Components (Mock for Demo)
|
|
// ========================================
|
|
|
|
echo "1. Initializing Job Anomaly Detection components...\n";
|
|
|
|
// Mock JobMetricsManager for demonstration
|
|
$metricsManager = new class implements JobMetricsManagerInterface {
|
|
public function getJobMetrics(string $jobId): ?\App\Framework\Queue\ValueObjects\JobMetrics
|
|
{
|
|
// Mock: Return sample job metrics
|
|
return new \App\Framework\Queue\ValueObjects\JobMetrics(
|
|
jobId: $jobId,
|
|
queueName: 'email-queue',
|
|
status: 'completed',
|
|
attempts: 2,
|
|
maxAttempts: 3,
|
|
executionTimeMs: 1250.5,
|
|
memoryUsageBytes: 52428800, // 50 MB
|
|
errorMessage: null,
|
|
createdAt: date('Y-m-d H:i:s', strtotime('-1 hour')),
|
|
startedAt: date('Y-m-d H:i:s', strtotime('-59 minutes')),
|
|
completedAt: date('Y-m-d H:i:s', strtotime('-58 minutes')),
|
|
failedAt: null,
|
|
metadata: []
|
|
);
|
|
}
|
|
|
|
public function getQueueMetrics(string $queueName, string $timeWindow): \App\Framework\Queue\ValueObjects\QueueMetrics
|
|
{
|
|
return new \App\Framework\Queue\ValueObjects\QueueMetrics(
|
|
queueName: $queueName,
|
|
totalJobs: 1000,
|
|
completedJobs: 850,
|
|
failedJobs: 150,
|
|
averageExecutionTime: 1200.0,
|
|
peakMemoryUsage: 104857600 // 100 MB
|
|
);
|
|
}
|
|
|
|
public function getFailedJobs(?string $queueName, string $timeWindow): array
|
|
{
|
|
return []; // Mock: No failed jobs
|
|
}
|
|
|
|
public function getPerformanceStats(?string $queueName, string $timeWindow): array
|
|
{
|
|
return [
|
|
'queue_depth' => 50,
|
|
'avg_payload_size' => 10240 // 10 KB
|
|
];
|
|
}
|
|
};
|
|
|
|
$featureExtractor = new JobFeatureExtractor(minConfidence: 0.6);
|
|
$anomalyDetector = new JobAnomalyDetector(
|
|
anomalyThreshold: Score::medium(), // 50% threshold
|
|
zScoreThreshold: 3.0,
|
|
iqrMultiplier: 1.5
|
|
);
|
|
|
|
$historyAnalyzer = new JobHistoryAnalyzer(
|
|
$metricsManager,
|
|
$featureExtractor,
|
|
$anomalyDetector
|
|
);
|
|
|
|
echo "✓ Components initialized\n\n";
|
|
|
|
// ========================================
|
|
// 2. Simulate Normal Job Execution
|
|
// ========================================
|
|
|
|
echo "2. Simulating normal job execution pattern...\n";
|
|
|
|
$normalExecutions = [];
|
|
|
|
for ($i = 1; $i <= 10; $i++) {
|
|
$normalExecutions[] = new JobExecutionContext(
|
|
jobId: "job-{$i}",
|
|
queueName: 'email-queue',
|
|
status: 'completed',
|
|
attempts: 1,
|
|
maxAttempts: 3,
|
|
executionTimeMs: 1000.0 + ($i * 50), // Slight variance
|
|
memoryUsageBytes: 50 * 1024 * 1024, // 50 MB
|
|
errorMessage: null,
|
|
createdAt: Timestamp::fromString("-{$i} minutes"),
|
|
startedAt: Timestamp::fromString("-{$i} minutes"),
|
|
completedAt: Timestamp::fromString("-{$i} minutes"),
|
|
failedAt: null,
|
|
queueDepth: 10 + $i,
|
|
payloadSizeBytes: 10240, // 10 KB
|
|
metadata: []
|
|
);
|
|
}
|
|
|
|
$normalSequence = JobExecutionSequence::fromExecutions($normalExecutions);
|
|
echo "✓ Created {$normalSequence->count()} normal job executions\n";
|
|
|
|
$normalFeatures = $featureExtractor->extract($normalSequence);
|
|
echo " Features extracted:\n";
|
|
echo " - Execution Time Variance: " . round($normalFeatures->executionTimeVariance, 3) . "\n";
|
|
echo " - Memory Usage Pattern: " . round($normalFeatures->memoryUsagePattern, 3) . "\n";
|
|
echo " - Failure Rate: " . round($normalFeatures->failureRate, 3) . "\n";
|
|
echo " - Queue Depth Correlation: " . round($normalFeatures->queueDepthCorrelation, 3) . "\n\n";
|
|
|
|
$normalResult = $anomalyDetector->detect($normalFeatures);
|
|
echo " Anomaly Detection: " . ($normalResult->isAnomalous ? '❌ ANOMALOUS' : '✓ NORMAL') . "\n";
|
|
echo " Score: {$normalResult->anomalyScore->toString()}\n";
|
|
echo " Severity: {$normalResult->getSeverity()}\n\n";
|
|
|
|
// ========================================
|
|
// 3. Simulate High Failure Rate Pattern
|
|
// ========================================
|
|
|
|
echo "3. Simulating high failure rate pattern...\n";
|
|
|
|
$failedExecutions = [];
|
|
|
|
for ($i = 1; $i <= 10; $i++) {
|
|
$status = $i <= 7 ? 'failed' : 'completed'; // 70% failure rate
|
|
|
|
$failedExecutions[] = new JobExecutionContext(
|
|
jobId: "failing-job-{$i}",
|
|
queueName: 'data-processing',
|
|
status: $status,
|
|
attempts: $status === 'failed' ? 3 : 1, // Max retries on failures
|
|
maxAttempts: 3,
|
|
executionTimeMs: 2500.0,
|
|
memoryUsageBytes: 100 * 1024 * 1024, // 100 MB
|
|
errorMessage: $status === 'failed' ? 'Database connection timeout' : null,
|
|
createdAt: Timestamp::fromString("-{$i} minutes"),
|
|
startedAt: Timestamp::fromString("-{$i} minutes"),
|
|
completedAt: $status === 'completed' ? Timestamp::fromString("-{$i} minutes") : null,
|
|
failedAt: $status === 'failed' ? Timestamp::fromString("-{$i} minutes") : null,
|
|
queueDepth: 50,
|
|
payloadSizeBytes: 20480, // 20 KB
|
|
metadata: []
|
|
);
|
|
}
|
|
|
|
$failedSequence = JobExecutionSequence::fromExecutions($failedExecutions);
|
|
echo "✓ Created {$failedSequence->count()} job executions with high failure rate\n";
|
|
|
|
$failedFeatures = $featureExtractor->extract($failedSequence);
|
|
echo " Features extracted:\n";
|
|
echo " - Failure Rate: " . round($failedFeatures->failureRate, 3) . " 🚨\n";
|
|
echo " - Retry Frequency: " . round($failedFeatures->retryFrequency, 3) . " 🚨\n";
|
|
echo " - Memory Usage Pattern: " . round($failedFeatures->memoryUsagePattern, 3) . "\n\n";
|
|
|
|
$failedResult = $anomalyDetector->detect($failedFeatures);
|
|
echo " Anomaly Detection: " . ($failedResult->isAnomalous ? '❌ ANOMALOUS' : '✓ NORMAL') . "\n";
|
|
echo " Score: {$failedResult->anomalyScore->toString()} 🚨\n";
|
|
echo " Severity: {$failedResult->getSeverity()}\n";
|
|
echo " Primary Indicator: {$failedResult->primaryIndicator}\n";
|
|
echo " Recommended Action: {$failedResult->getRecommendedAction()}\n";
|
|
|
|
if (!empty($failedResult->detectedPatterns)) {
|
|
echo " Detected Patterns:\n";
|
|
foreach ($failedResult->detectedPatterns as $pattern) {
|
|
echo " - {$pattern['type']}: {$pattern['description']}\n";
|
|
}
|
|
}
|
|
echo "\n";
|
|
|
|
// ========================================
|
|
// 4. Simulate Performance Degradation
|
|
// ========================================
|
|
|
|
echo "4. Simulating performance degradation pattern...\n";
|
|
|
|
$degradedExecutions = [];
|
|
|
|
for ($i = 1; $i <= 10; $i++) {
|
|
// Execution time increases dramatically
|
|
$executionTime = 1000.0 + ($i * 500); // Exponential growth
|
|
|
|
// Memory usage also increases (potential leak)
|
|
$memoryUsage = (50 + ($i * 10)) * 1024 * 1024; // 50 MB → 150 MB
|
|
|
|
$degradedExecutions[] = new JobExecutionContext(
|
|
jobId: "slow-job-{$i}",
|
|
queueName: 'image-processing',
|
|
status: 'completed',
|
|
attempts: 1,
|
|
maxAttempts: 3,
|
|
executionTimeMs: $executionTime,
|
|
memoryUsageBytes: $memoryUsage,
|
|
errorMessage: null,
|
|
createdAt: Timestamp::fromString("-{$i} minutes"),
|
|
startedAt: Timestamp::fromString("-{$i} minutes"),
|
|
completedAt: Timestamp::fromString("-{$i} minutes"),
|
|
failedAt: null,
|
|
queueDepth: 100 + ($i * 5), // Queue depth increases with time
|
|
payloadSizeBytes: 102400, // 100 KB
|
|
metadata: []
|
|
);
|
|
}
|
|
|
|
$degradedSequence = JobExecutionSequence::fromExecutions($degradedExecutions);
|
|
echo "✓ Created {$degradedSequence->count()} job executions with performance degradation\n";
|
|
|
|
$degradedFeatures = $featureExtractor->extract($degradedSequence);
|
|
echo " Features extracted:\n";
|
|
echo " - Execution Time Variance: " . round($degradedFeatures->executionTimeVariance, 3) . " 🚨\n";
|
|
echo " - Memory Usage Pattern: " . round($degradedFeatures->memoryUsagePattern, 3) . " 🚨\n";
|
|
echo " - Queue Depth Correlation: " . round($degradedFeatures->queueDepthCorrelation, 3) . "\n\n";
|
|
|
|
$degradedResult = $anomalyDetector->detect($degradedFeatures);
|
|
echo " Anomaly Detection: " . ($degradedResult->isAnomalous ? '❌ ANOMALOUS' : '✓ NORMAL') . "\n";
|
|
echo " Score: {$degradedResult->anomalyScore->toString()} 🚨\n";
|
|
echo " Severity: {$degradedResult->getSeverity()}\n";
|
|
echo " Recommended Action: {$degradedResult->getRecommendedAction()}\n\n";
|
|
|
|
// ========================================
|
|
// 5. Simulate Automated/Bot Execution
|
|
// ========================================
|
|
|
|
echo "5. Simulating automated/bot execution pattern...\n";
|
|
|
|
$botExecutions = [];
|
|
|
|
for ($i = 1; $i <= 10; $i++) {
|
|
// Perfect timing regularity (exactly 60 seconds apart)
|
|
$createdAt = Timestamp::fromString("-" . ($i * 60) . " seconds");
|
|
|
|
// Identical execution characteristics
|
|
$botExecutions[] = new JobExecutionContext(
|
|
jobId: "bot-job-{$i}",
|
|
queueName: 'api-requests',
|
|
status: 'completed',
|
|
attempts: 1,
|
|
maxAttempts: 3,
|
|
executionTimeMs: 500.0, // Exactly 500ms every time
|
|
memoryUsageBytes: 20 * 1024 * 1024, // Exactly 20 MB
|
|
errorMessage: null,
|
|
createdAt: $createdAt,
|
|
startedAt: $createdAt,
|
|
completedAt: $createdAt,
|
|
failedAt: null,
|
|
queueDepth: 10,
|
|
payloadSizeBytes: 5120, // Exactly 5 KB
|
|
metadata: []
|
|
);
|
|
}
|
|
|
|
$botSequence = JobExecutionSequence::fromExecutions($botExecutions);
|
|
echo "✓ Created {$botSequence->count()} bot-like job executions\n";
|
|
|
|
$botFeatures = $featureExtractor->extract($botSequence);
|
|
echo " Features extracted:\n";
|
|
echo " - Execution Timing Regularity: " . round($botFeatures->executionTimingRegularity, 3) . " 🚨\n";
|
|
echo " - Execution Time Variance: " . round($botFeatures->executionTimeVariance, 3) . "\n";
|
|
echo " - Payload Size Anomaly: " . round($botFeatures->payloadSizeAnomaly, 3) . "\n\n";
|
|
|
|
$botResult = $anomalyDetector->detect($botFeatures);
|
|
echo " Anomaly Detection: " . ($botResult->isAnomalous ? '❌ ANOMALOUS' : '✓ NORMAL') . "\n";
|
|
echo " Score: {$botResult->anomalyScore->toString()}\n";
|
|
echo " Severity: {$botResult->getSeverity()}\n";
|
|
|
|
if ($botResult->hasPattern('automated_execution')) {
|
|
echo " ⚠️ Automated execution pattern detected\n";
|
|
$pattern = $botResult->getPattern('automated_execution');
|
|
echo " Description: {$pattern['description']}\n";
|
|
}
|
|
echo "\n";
|
|
|
|
// ========================================
|
|
// 6. JobHistoryAnalyzer Integration
|
|
// ========================================
|
|
|
|
echo "6. JobHistoryAnalyzer integration...\n\n";
|
|
|
|
// Analyze specific job
|
|
echo " Analyzing specific job:\n";
|
|
$jobResult = $historyAnalyzer->analyzeJob('job-123', Duration::fromHours(1));
|
|
echo " Status: " . ($jobResult->isAnomalous ? 'ANOMALOUS' : 'NORMAL') . "\n";
|
|
echo " Score: {$jobResult->anomalyScore->toString()}\n";
|
|
echo " Action: {$jobResult->getRecommendedAction()}\n\n";
|
|
|
|
// Analyze queue
|
|
echo " Analyzing queue:\n";
|
|
$queueResult = $historyAnalyzer->analyzeQueue('email-queue', Duration::fromHours(1));
|
|
echo " Status: " . ($queueResult->isAnomalous ? 'ANOMALOUS' : 'NORMAL') . "\n";
|
|
echo " Score: {$queueResult->anomalyScore->toString()}\n\n";
|
|
|
|
// Get queue health summary
|
|
echo " Queue Health Summary:\n";
|
|
$healthSummary = $historyAnalyzer->getQueueHealthSummary('email-queue', Duration::fromHours(1));
|
|
echo " Queue: {$healthSummary['queue_name']}\n";
|
|
echo " Health Status: {$healthSummary['health_status']}\n";
|
|
echo " Total Jobs: {$healthSummary['metrics_summary']['total_jobs']}\n";
|
|
echo " Completion Rate: {$healthSummary['metrics_summary']['completion_rate']}%\n";
|
|
echo " Recommendations:\n";
|
|
foreach ($healthSummary['recommendations'] as $recommendation) {
|
|
echo " - {$recommendation}\n";
|
|
}
|
|
echo "\n";
|
|
|
|
// ========================================
|
|
// 7. Feature Analysis
|
|
// ========================================
|
|
|
|
echo "7. Detailed feature analysis comparison:\n\n";
|
|
|
|
echo " Normal Traffic Features:\n";
|
|
foreach ($normalFeatures->toArray() as $feature => $value) {
|
|
echo " - " . str_pad($feature, 35) . ": " . round($value, 3) . "\n";
|
|
}
|
|
echo "\n";
|
|
|
|
echo " High Failure Rate Features:\n";
|
|
foreach ($failedFeatures->toArray() as $feature => $value) {
|
|
$indicator = $value > 0.5 ? " 🚨" : "";
|
|
echo " - " . str_pad($feature, 35) . ": " . round($value, 3) . $indicator . "\n";
|
|
}
|
|
echo "\n";
|
|
|
|
echo " Performance Degradation Features:\n";
|
|
foreach ($degradedFeatures->toArray() as $feature => $value) {
|
|
$indicator = $value > 0.5 ? " 🚨" : "";
|
|
echo " - " . str_pad($feature, 35) . ": " . round($value, 3) . $indicator . "\n";
|
|
}
|
|
echo "\n";
|
|
|
|
// ========================================
|
|
// 8. Top Contributors Analysis
|
|
// ========================================
|
|
|
|
echo "8. Top contributing features to anomalies:\n\n";
|
|
|
|
if ($failedResult->isAnomalous) {
|
|
echo " Failed Jobs Anomaly - Top Contributors:\n";
|
|
foreach ($failedResult->getTopContributors(3) as $contributor) {
|
|
echo " - {$contributor['feature']}: {$contributor['score']->toString()} ";
|
|
echo "({$contributor['contribution_percentage']}% contribution)\n";
|
|
}
|
|
echo "\n";
|
|
}
|
|
|
|
if ($degradedResult->isAnomalous) {
|
|
echo " Performance Degradation - Top Contributors:\n";
|
|
foreach ($degradedResult->getTopContributors(3) as $contributor) {
|
|
echo " - {$contributor['feature']}: {$contributor['score']->toString()} ";
|
|
echo "({$contributor['contribution_percentage']}% contribution)\n";
|
|
}
|
|
echo "\n";
|
|
}
|
|
|
|
// ========================================
|
|
// 9. Statistics Summary
|
|
// ========================================
|
|
|
|
echo "9. Execution sequence statistics:\n\n";
|
|
|
|
echo " Normal Traffic:\n";
|
|
$normalStats = $normalSequence->getStatistics();
|
|
foreach ($normalStats as $key => $value) {
|
|
echo " - " . str_pad($key, 30) . ": {$value}\n";
|
|
}
|
|
echo "\n";
|
|
|
|
echo " Failed Jobs:\n";
|
|
$failedStats = $failedSequence->getStatistics();
|
|
foreach ($failedStats as $key => $value) {
|
|
echo " - " . str_pad($key, 30) . ": {$value}\n";
|
|
}
|
|
echo "\n";
|
|
|
|
echo "=== Demo Complete ===\n\n";
|
|
|
|
echo "Summary:\n";
|
|
echo "✓ Job Anomaly Detection successfully detects:\n";
|
|
echo " - High failure rates with excessive retries\n";
|
|
echo " - Performance degradation (execution time variance + memory growth)\n";
|
|
echo " - Resource exhaustion (queue depth correlation)\n";
|
|
echo " - Automated/bot execution patterns (timing regularity)\n";
|
|
echo " - Data processing anomalies (payload size variations)\n\n";
|
|
|
|
echo "✓ Uses Core Score for confidence levels (0-100)\n";
|
|
echo "✓ Provides actionable recommendations per severity\n";
|
|
echo "✓ Integrates with existing Queue JobMetricsManager\n";
|
|
echo "✓ Supports job-level, queue-level, and batch analysis\n";
|