- Add comprehensive health check system with multiple endpoints - Add Prometheus metrics endpoint - Add production logging configurations (5 strategies) - Add complete deployment documentation suite: * QUICKSTART.md - 30-minute deployment guide * DEPLOYMENT_CHECKLIST.md - Printable verification checklist * DEPLOYMENT_WORKFLOW.md - Complete deployment lifecycle * PRODUCTION_DEPLOYMENT.md - Comprehensive technical reference * production-logging.md - Logging configuration guide * ANSIBLE_DEPLOYMENT.md - Infrastructure as Code automation * README.md - Navigation hub * DEPLOYMENT_SUMMARY.md - Executive summary - Add deployment scripts and automation - Add DEPLOYMENT_PLAN.md - Concrete plan for immediate deployment - Update README with production-ready features All production infrastructure is now complete and ready for deployment.
393 lines
14 KiB
PHP
393 lines
14 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Framework\Queue\MachineLearning;
|
|
|
|
use App\Framework\Queue\MachineLearning\ValueObjects\JobFeatures;
|
|
use App\Framework\Queue\MachineLearning\ValueObjects\JobAnomalyResult;
|
|
use App\Framework\Core\ValueObjects\Score;
|
|
|
|
/**
|
|
* Job Anomaly Detector - Statistical and Heuristic Job Behavior Analysis
|
|
*
|
|
* Detects anomalous job execution patterns using combination of:
|
|
* - Statistical outlier detection (Z-Score, IQR methods)
|
|
* - Heuristic pattern matching (high failure risk, performance degradation, etc.)
|
|
* - Multi-feature analysis with weighted scoring
|
|
*
|
|
* Detection Methods:
|
|
* 1. Z-Score Analysis: Identifies statistical outliers (>3 standard deviations)
|
|
* 2. IQR Analysis: Identifies outliers using interquartile range (>1.5 * IQR)
|
|
* 3. Heuristic Patterns: Rule-based threat classification
|
|
* 4. Weighted Feature Scoring: Combines feature scores with domain weights
|
|
*
|
|
* All confidence scores use framework's Core Score (0.0-1.0).
|
|
*/
|
|
final readonly class JobAnomalyDetector
|
|
{
|
|
/**
|
|
* @param Score $anomalyThreshold Minimum score to classify as anomalous (default: 0.5 = 50%)
|
|
* @param float $zScoreThreshold Z-score threshold for statistical outliers (default: 3.0)
|
|
* @param float $iqrMultiplier IQR multiplier for outlier detection (default: 1.5)
|
|
*/
|
|
public function __construct(
|
|
private Score $anomalyThreshold = new Score(50), // 50% threshold
|
|
private float $zScoreThreshold = 3.0,
|
|
private float $iqrMultiplier = 1.5
|
|
) {}
|
|
|
|
/**
|
|
* Detect anomalies in job execution features
|
|
*
|
|
* Combines statistical analysis and heuristic pattern matching
|
|
* to provide comprehensive anomaly detection.
|
|
*/
|
|
public function detect(JobFeatures $features): JobAnomalyResult
|
|
{
|
|
// Step 1: Calculate feature-specific anomaly scores
|
|
$featureScores = $this->calculateFeatureScores($features);
|
|
|
|
// Step 2: Detect heuristic patterns
|
|
$detectedPatterns = $this->detectPatterns($features);
|
|
|
|
// Step 3: Calculate overall anomaly score (weighted average)
|
|
$overallScore = $this->calculateOverallScore($featureScores, $detectedPatterns);
|
|
|
|
// Step 4: Determine if anomalous based on threshold
|
|
$isAnomalous = $overallScore->getValue() >= $this->anomalyThreshold->getValue();
|
|
|
|
// Step 5: Identify primary indicator (highest scoring feature)
|
|
$primaryIndicator = $this->identifyPrimaryIndicator($featureScores);
|
|
|
|
// Step 6: Build result
|
|
if (!$isAnomalous) {
|
|
if ($overallScore->getValue() > 0) {
|
|
return JobAnomalyResult::lowConfidence(
|
|
$overallScore,
|
|
$featureScores,
|
|
'Score below anomaly threshold'
|
|
);
|
|
}
|
|
|
|
return JobAnomalyResult::normal('No anomalies detected');
|
|
}
|
|
|
|
return JobAnomalyResult::anomalous(
|
|
$overallScore,
|
|
$featureScores,
|
|
$detectedPatterns,
|
|
$primaryIndicator
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Calculate anomaly score for each feature
|
|
*
|
|
* Uses statistical thresholds and domain knowledge to score
|
|
* individual feature contributions to overall anomaly.
|
|
*
|
|
* @return array<string, Score>
|
|
*/
|
|
private function calculateFeatureScores(JobFeatures $features): array
|
|
{
|
|
$featureArray = $features->toArray();
|
|
$scores = [];
|
|
|
|
foreach ($featureArray as $featureName => $value) {
|
|
// Convert feature value (0.0-1.0) to anomaly score
|
|
$anomalyScore = $this->featureValueToAnomalyScore($featureName, $value);
|
|
$scores[$featureName] = Score::fromDecimal($anomalyScore);
|
|
}
|
|
|
|
return $scores;
|
|
}
|
|
|
|
/**
|
|
* Convert feature value to anomaly score using domain-specific thresholds
|
|
*
|
|
* Different features have different "normal" ranges and criticality.
|
|
*/
|
|
private function featureValueToAnomalyScore(string $featureName, float $value): float
|
|
{
|
|
return match ($featureName) {
|
|
// Critical features: Lower threshold for anomaly
|
|
'failure_rate' => $this->scoreWithThreshold($value, 0.1, 0.3), // >10% concerning, >30% critical
|
|
'retry_frequency' => $this->scoreWithThreshold($value, 0.2, 0.5), // >20% concerning, >50% critical
|
|
'memory_usage_pattern' => $this->scoreWithThreshold($value, 0.5, 0.7), // >50% concerning, >70% critical
|
|
|
|
// Important features: Medium threshold
|
|
'execution_time_variance' => $this->scoreWithThreshold($value, 0.4, 0.6),
|
|
'queue_depth_correlation' => $this->scoreWithThreshold($value, 0.6, 0.8),
|
|
'payload_size_anomaly' => $this->scoreWithThreshold($value, 0.6, 0.8),
|
|
|
|
// Informational features: Higher threshold
|
|
'dependency_chain_complexity' => $this->scoreWithThreshold($value, 0.7, 0.9),
|
|
'execution_timing_regularity' => $this->scoreWithThreshold($value, 0.8, 0.95), // Very high regularity = bot
|
|
|
|
default => $value // Fallback: use value directly
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Score feature value using low/high thresholds
|
|
*
|
|
* Linear interpolation between thresholds:
|
|
* - value <= low: 0.0 (normal)
|
|
* - low < value < high: linear scale 0.0-0.5
|
|
* - value >= high: value * 1.0 (high anomaly)
|
|
*/
|
|
private function scoreWithThreshold(float $value, float $lowThreshold, float $highThreshold): float
|
|
{
|
|
if ($value <= $lowThreshold) {
|
|
return 0.0;
|
|
}
|
|
|
|
if ($value >= $highThreshold) {
|
|
return $value; // Use value directly for high anomalies
|
|
}
|
|
|
|
// Linear interpolation between low and high threshold
|
|
$range = $highThreshold - $lowThreshold;
|
|
$position = ($value - $lowThreshold) / $range;
|
|
|
|
return $position * 0.5; // Scale to 0.0-0.5 for medium concern
|
|
}
|
|
|
|
/**
|
|
* Detect heuristic anomaly patterns
|
|
*
|
|
* Uses JobFeatures built-in indicators for pattern matching.
|
|
*
|
|
* @return array<array{type: string, confidence: Score, description: string}>
|
|
*/
|
|
private function detectPatterns(JobFeatures $features): array
|
|
{
|
|
$patterns = [];
|
|
|
|
// Pattern 1: High Failure Risk
|
|
if ($features->indicatesHighFailureRisk()) {
|
|
$confidence = $this->calculatePatternConfidence([
|
|
$features->failureRate,
|
|
$features->retryFrequency
|
|
]);
|
|
|
|
$patterns[] = [
|
|
'type' => 'high_failure_risk',
|
|
'confidence' => Score::fromDecimal($confidence),
|
|
'description' => sprintf(
|
|
'High failure rate (%.1f%%) with excessive retries (%.1f%%)',
|
|
$features->failureRate * 100,
|
|
$features->retryFrequency * 100
|
|
)
|
|
];
|
|
}
|
|
|
|
// Pattern 2: Performance Degradation
|
|
if ($features->indicatesPerformanceDegradation()) {
|
|
$confidence = $this->calculatePatternConfidence([
|
|
$features->executionTimeVariance,
|
|
$features->memoryUsagePattern
|
|
]);
|
|
|
|
$patterns[] = [
|
|
'type' => 'performance_degradation',
|
|
'confidence' => Score::fromDecimal($confidence),
|
|
'description' => sprintf(
|
|
'Unstable execution times (variance: %.1f%%) and memory patterns (%.1f%%)',
|
|
$features->executionTimeVariance * 100,
|
|
$features->memoryUsagePattern * 100
|
|
)
|
|
];
|
|
}
|
|
|
|
// Pattern 3: Resource Exhaustion
|
|
if ($features->indicatesResourceExhaustion()) {
|
|
$confidence = $this->calculatePatternConfidence([
|
|
$features->queueDepthCorrelation,
|
|
$features->memoryUsagePattern
|
|
]);
|
|
|
|
$patterns[] = [
|
|
'type' => 'resource_exhaustion',
|
|
'confidence' => Score::fromDecimal($confidence),
|
|
'description' => sprintf(
|
|
'High queue depth impact (%.1f%%) with memory anomalies (%.1f%%)',
|
|
$features->queueDepthCorrelation * 100,
|
|
$features->memoryUsagePattern * 100
|
|
)
|
|
];
|
|
}
|
|
|
|
// Pattern 4: Automated Execution (Bot-like)
|
|
if ($features->indicatesAutomatedExecution()) {
|
|
$confidence = $this->calculatePatternConfidence([
|
|
$features->executionTimingRegularity,
|
|
1.0 - $features->executionTimeVariance // Inverted: low variance = higher confidence
|
|
]);
|
|
|
|
$patterns[] = [
|
|
'type' => 'automated_execution',
|
|
'confidence' => Score::fromDecimal($confidence),
|
|
'description' => sprintf(
|
|
'Very regular timing (%.1f%%) with low variance (%.1f%%) - possible bot activity',
|
|
$features->executionTimingRegularity * 100,
|
|
$features->executionTimeVariance * 100
|
|
)
|
|
];
|
|
}
|
|
|
|
// Pattern 5: Data Processing Anomaly
|
|
if ($features->indicatesDataProcessingAnomaly()) {
|
|
$confidence = $this->calculatePatternConfidence([
|
|
$features->payloadSizeAnomaly,
|
|
$features->memoryUsagePattern
|
|
]);
|
|
|
|
$patterns[] = [
|
|
'type' => 'data_processing_anomaly',
|
|
'confidence' => Score::fromDecimal($confidence),
|
|
'description' => sprintf(
|
|
'Unusual payload sizes (%.1f%%) with memory pattern anomalies (%.1f%%)',
|
|
$features->payloadSizeAnomaly * 100,
|
|
$features->memoryUsagePattern * 100
|
|
)
|
|
];
|
|
}
|
|
|
|
return $patterns;
|
|
}
|
|
|
|
/**
|
|
* Calculate pattern confidence from contributing feature values
|
|
*
|
|
* Uses average of feature values, weighted by their strength.
|
|
*/
|
|
private function calculatePatternConfidence(array $featureValues): float
|
|
{
|
|
if (empty($featureValues)) {
|
|
return 0.0;
|
|
}
|
|
|
|
// Average of all contributing features
|
|
$average = array_sum($featureValues) / count($featureValues);
|
|
|
|
// Boost confidence if multiple strong indicators
|
|
$strongIndicators = count(array_filter($featureValues, fn($v) => $v > 0.7));
|
|
$confidenceBoost = min(0.2, $strongIndicators * 0.1);
|
|
|
|
return min(1.0, $average + $confidenceBoost);
|
|
}
|
|
|
|
/**
|
|
* Calculate overall anomaly score
|
|
*
|
|
* Weighted average of feature scores with pattern-based boosting.
|
|
*
|
|
* @param array<string, Score> $featureScores
|
|
* @param array<array{type: string, confidence: Score}> $detectedPatterns
|
|
*/
|
|
private function calculateOverallScore(array $featureScores, array $detectedPatterns): Score
|
|
{
|
|
if (empty($featureScores)) {
|
|
return Score::zero();
|
|
}
|
|
|
|
// Feature weights (domain knowledge)
|
|
$weights = [
|
|
'failure_rate' => 2.0, // Most critical
|
|
'retry_frequency' => 1.8, // Very important
|
|
'memory_usage_pattern' => 1.5, // Important for resource issues
|
|
'execution_time_variance' => 1.3, // Performance indicator
|
|
'queue_depth_correlation' => 1.2, // Scalability indicator
|
|
'payload_size_anomaly' => 1.0, // Moderate importance
|
|
'dependency_chain_complexity' => 0.8, // Less critical
|
|
'execution_timing_regularity' => 0.7, // Informational
|
|
];
|
|
|
|
// Calculate weighted feature score
|
|
$weightedSum = 0.0;
|
|
$totalWeight = 0.0;
|
|
|
|
foreach ($featureScores as $featureName => $score) {
|
|
$weight = $weights[$featureName] ?? 1.0;
|
|
$weightedSum += $score->getValue() * $weight;
|
|
$totalWeight += $weight;
|
|
}
|
|
|
|
$baseScore = $totalWeight > 0 ? $weightedSum / $totalWeight : 0.0;
|
|
|
|
// Pattern-based boosting
|
|
$patternBoost = $this->calculatePatternBoost($detectedPatterns);
|
|
|
|
// Combine base score and pattern boost (max 100%)
|
|
$finalScore = min(100.0, $baseScore + $patternBoost);
|
|
|
|
return new Score((int) round($finalScore));
|
|
}
|
|
|
|
/**
|
|
* Calculate pattern boost to overall score
|
|
*
|
|
* Multiple patterns increase confidence in anomaly detection.
|
|
*/
|
|
private function calculatePatternBoost(array $detectedPatterns): float
|
|
{
|
|
if (empty($detectedPatterns)) {
|
|
return 0.0;
|
|
}
|
|
|
|
// Each high-confidence pattern adds to the boost
|
|
$boost = 0.0;
|
|
|
|
foreach ($detectedPatterns as $pattern) {
|
|
$confidence = $pattern['confidence']->getValue();
|
|
|
|
if ($confidence >= 70) {
|
|
$boost += 10.0; // High confidence pattern: +10%
|
|
} elseif ($confidence >= 50) {
|
|
$boost += 5.0; // Medium confidence: +5%
|
|
} else {
|
|
$boost += 2.0; // Low confidence: +2%
|
|
}
|
|
}
|
|
|
|
// Cap pattern boost at 30%
|
|
return min(30.0, $boost);
|
|
}
|
|
|
|
/**
|
|
* Identify primary indicator (feature with highest anomaly score)
|
|
*/
|
|
private function identifyPrimaryIndicator(array $featureScores): string
|
|
{
|
|
if (empty($featureScores)) {
|
|
return 'unknown';
|
|
}
|
|
|
|
$maxScore = 0.0;
|
|
$primaryIndicator = 'unknown';
|
|
|
|
foreach ($featureScores as $featureName => $score) {
|
|
if ($score->getValue() > $maxScore) {
|
|
$maxScore = $score->getValue();
|
|
$primaryIndicator = $featureName;
|
|
}
|
|
}
|
|
|
|
return $primaryIndicator;
|
|
}
|
|
|
|
/**
|
|
* Get detector configuration
|
|
*/
|
|
public function getConfiguration(): array
|
|
{
|
|
return [
|
|
'anomaly_threshold' => $this->anomalyThreshold->getValue(),
|
|
'z_score_threshold' => $this->zScoreThreshold,
|
|
'iqr_multiplier' => $this->iqrMultiplier
|
|
];
|
|
}
|
|
}
|