3 standard deviations) * 2. IQR Analysis: Identifies outliers using interquartile range (>1.5 * IQR) * 3. Heuristic Patterns: Rule-based threat classification * 4. Weighted Feature Scoring: Combines feature scores with domain weights * * All confidence scores use framework's Core Score (0.0-1.0). */ final readonly class JobAnomalyDetector { /** * @param Score $anomalyThreshold Minimum score to classify as anomalous (default: 0.5 = 50%) * @param float $zScoreThreshold Z-score threshold for statistical outliers (default: 3.0) * @param float $iqrMultiplier IQR multiplier for outlier detection (default: 1.5) */ public function __construct( private Score $anomalyThreshold = new Score(50), // 50% threshold private float $zScoreThreshold = 3.0, private float $iqrMultiplier = 1.5 ) {} /** * Detect anomalies in job execution features * * Combines statistical analysis and heuristic pattern matching * to provide comprehensive anomaly detection. */ public function detect(JobFeatures $features): JobAnomalyResult { // Step 1: Calculate feature-specific anomaly scores $featureScores = $this->calculateFeatureScores($features); // Step 2: Detect heuristic patterns $detectedPatterns = $this->detectPatterns($features); // Step 3: Calculate overall anomaly score (weighted average) $overallScore = $this->calculateOverallScore($featureScores, $detectedPatterns); // Step 4: Determine if anomalous based on threshold $isAnomalous = $overallScore->getValue() >= $this->anomalyThreshold->getValue(); // Step 5: Identify primary indicator (highest scoring feature) $primaryIndicator = $this->identifyPrimaryIndicator($featureScores); // Step 6: Build result if (!$isAnomalous) { if ($overallScore->getValue() > 0) { return JobAnomalyResult::lowConfidence( $overallScore, $featureScores, 'Score below anomaly threshold' ); } return JobAnomalyResult::normal('No anomalies detected'); } return JobAnomalyResult::anomalous( $overallScore, $featureScores, $detectedPatterns, $primaryIndicator ); } /** * Calculate anomaly score for each feature * * Uses statistical thresholds and domain knowledge to score * individual feature contributions to overall anomaly. * * @return array */ private function calculateFeatureScores(JobFeatures $features): array { $featureArray = $features->toArray(); $scores = []; foreach ($featureArray as $featureName => $value) { // Convert feature value (0.0-1.0) to anomaly score $anomalyScore = $this->featureValueToAnomalyScore($featureName, $value); $scores[$featureName] = Score::fromDecimal($anomalyScore); } return $scores; } /** * Convert feature value to anomaly score using domain-specific thresholds * * Different features have different "normal" ranges and criticality. */ private function featureValueToAnomalyScore(string $featureName, float $value): float { return match ($featureName) { // Critical features: Lower threshold for anomaly 'failure_rate' => $this->scoreWithThreshold($value, 0.1, 0.3), // >10% concerning, >30% critical 'retry_frequency' => $this->scoreWithThreshold($value, 0.2, 0.5), // >20% concerning, >50% critical 'memory_usage_pattern' => $this->scoreWithThreshold($value, 0.5, 0.7), // >50% concerning, >70% critical // Important features: Medium threshold 'execution_time_variance' => $this->scoreWithThreshold($value, 0.4, 0.6), 'queue_depth_correlation' => $this->scoreWithThreshold($value, 0.6, 0.8), 'payload_size_anomaly' => $this->scoreWithThreshold($value, 0.6, 0.8), // Informational features: Higher threshold 'dependency_chain_complexity' => $this->scoreWithThreshold($value, 0.7, 0.9), 'execution_timing_regularity' => $this->scoreWithThreshold($value, 0.8, 0.95), // Very high regularity = bot default => $value // Fallback: use value directly }; } /** * Score feature value using low/high thresholds * * Linear interpolation between thresholds: * - value <= low: 0.0 (normal) * - low < value < high: linear scale 0.0-0.5 * - value >= high: value * 1.0 (high anomaly) */ private function scoreWithThreshold(float $value, float $lowThreshold, float $highThreshold): float { if ($value <= $lowThreshold) { return 0.0; } if ($value >= $highThreshold) { return $value; // Use value directly for high anomalies } // Linear interpolation between low and high threshold $range = $highThreshold - $lowThreshold; $position = ($value - $lowThreshold) / $range; return $position * 0.5; // Scale to 0.0-0.5 for medium concern } /** * Detect heuristic anomaly patterns * * Uses JobFeatures built-in indicators for pattern matching. * * @return array */ private function detectPatterns(JobFeatures $features): array { $patterns = []; // Pattern 1: High Failure Risk if ($features->indicatesHighFailureRisk()) { $confidence = $this->calculatePatternConfidence([ $features->failureRate, $features->retryFrequency ]); $patterns[] = [ 'type' => 'high_failure_risk', 'confidence' => Score::fromDecimal($confidence), 'description' => sprintf( 'High failure rate (%.1f%%) with excessive retries (%.1f%%)', $features->failureRate * 100, $features->retryFrequency * 100 ) ]; } // Pattern 2: Performance Degradation if ($features->indicatesPerformanceDegradation()) { $confidence = $this->calculatePatternConfidence([ $features->executionTimeVariance, $features->memoryUsagePattern ]); $patterns[] = [ 'type' => 'performance_degradation', 'confidence' => Score::fromDecimal($confidence), 'description' => sprintf( 'Unstable execution times (variance: %.1f%%) and memory patterns (%.1f%%)', $features->executionTimeVariance * 100, $features->memoryUsagePattern * 100 ) ]; } // Pattern 3: Resource Exhaustion if ($features->indicatesResourceExhaustion()) { $confidence = $this->calculatePatternConfidence([ $features->queueDepthCorrelation, $features->memoryUsagePattern ]); $patterns[] = [ 'type' => 'resource_exhaustion', 'confidence' => Score::fromDecimal($confidence), 'description' => sprintf( 'High queue depth impact (%.1f%%) with memory anomalies (%.1f%%)', $features->queueDepthCorrelation * 100, $features->memoryUsagePattern * 100 ) ]; } // Pattern 4: Automated Execution (Bot-like) if ($features->indicatesAutomatedExecution()) { $confidence = $this->calculatePatternConfidence([ $features->executionTimingRegularity, 1.0 - $features->executionTimeVariance // Inverted: low variance = higher confidence ]); $patterns[] = [ 'type' => 'automated_execution', 'confidence' => Score::fromDecimal($confidence), 'description' => sprintf( 'Very regular timing (%.1f%%) with low variance (%.1f%%) - possible bot activity', $features->executionTimingRegularity * 100, $features->executionTimeVariance * 100 ) ]; } // Pattern 5: Data Processing Anomaly if ($features->indicatesDataProcessingAnomaly()) { $confidence = $this->calculatePatternConfidence([ $features->payloadSizeAnomaly, $features->memoryUsagePattern ]); $patterns[] = [ 'type' => 'data_processing_anomaly', 'confidence' => Score::fromDecimal($confidence), 'description' => sprintf( 'Unusual payload sizes (%.1f%%) with memory pattern anomalies (%.1f%%)', $features->payloadSizeAnomaly * 100, $features->memoryUsagePattern * 100 ) ]; } return $patterns; } /** * Calculate pattern confidence from contributing feature values * * Uses average of feature values, weighted by their strength. */ private function calculatePatternConfidence(array $featureValues): float { if (empty($featureValues)) { return 0.0; } // Average of all contributing features $average = array_sum($featureValues) / count($featureValues); // Boost confidence if multiple strong indicators $strongIndicators = count(array_filter($featureValues, fn($v) => $v > 0.7)); $confidenceBoost = min(0.2, $strongIndicators * 0.1); return min(1.0, $average + $confidenceBoost); } /** * Calculate overall anomaly score * * Weighted average of feature scores with pattern-based boosting. * * @param array $featureScores * @param array $detectedPatterns */ private function calculateOverallScore(array $featureScores, array $detectedPatterns): Score { if (empty($featureScores)) { return Score::zero(); } // Feature weights (domain knowledge) $weights = [ 'failure_rate' => 2.0, // Most critical 'retry_frequency' => 1.8, // Very important 'memory_usage_pattern' => 1.5, // Important for resource issues 'execution_time_variance' => 1.3, // Performance indicator 'queue_depth_correlation' => 1.2, // Scalability indicator 'payload_size_anomaly' => 1.0, // Moderate importance 'dependency_chain_complexity' => 0.8, // Less critical 'execution_timing_regularity' => 0.7, // Informational ]; // Calculate weighted feature score $weightedSum = 0.0; $totalWeight = 0.0; foreach ($featureScores as $featureName => $score) { $weight = $weights[$featureName] ?? 1.0; $weightedSum += $score->getValue() * $weight; $totalWeight += $weight; } $baseScore = $totalWeight > 0 ? $weightedSum / $totalWeight : 0.0; // Pattern-based boosting $patternBoost = $this->calculatePatternBoost($detectedPatterns); // Combine base score and pattern boost (max 100%) $finalScore = min(100.0, $baseScore + $patternBoost); return new Score((int) round($finalScore)); } /** * Calculate pattern boost to overall score * * Multiple patterns increase confidence in anomaly detection. */ private function calculatePatternBoost(array $detectedPatterns): float { if (empty($detectedPatterns)) { return 0.0; } // Each high-confidence pattern adds to the boost $boost = 0.0; foreach ($detectedPatterns as $pattern) { $confidence = $pattern['confidence']->getValue(); if ($confidence >= 70) { $boost += 10.0; // High confidence pattern: +10% } elseif ($confidence >= 50) { $boost += 5.0; // Medium confidence: +5% } else { $boost += 2.0; // Low confidence: +2% } } // Cap pattern boost at 30% return min(30.0, $boost); } /** * Identify primary indicator (feature with highest anomaly score) */ private function identifyPrimaryIndicator(array $featureScores): string { if (empty($featureScores)) { return 'unknown'; } $maxScore = 0.0; $primaryIndicator = 'unknown'; foreach ($featureScores as $featureName => $score) { if ($score->getValue() > $maxScore) { $maxScore = $score->getValue(); $primaryIndicator = $featureName; } } return $primaryIndicator; } /** * Get detector configuration */ public function getConfiguration(): array { return [ 'anomaly_threshold' => $this->anomalyThreshold->getValue(), 'z_score_threshold' => $this->zScoreThreshold, 'iqr_multiplier' => $this->iqrMultiplier ]; } }