Files
michaelschiemer/src/Framework/Waf/MachineLearning/MachineLearningEngine.php
Michael Schiemer fc3d7e6357 feat(Production): Complete production deployment infrastructure
- Add comprehensive health check system with multiple endpoints
- Add Prometheus metrics endpoint
- Add production logging configurations (5 strategies)
- Add complete deployment documentation suite:
  * QUICKSTART.md - 30-minute deployment guide
  * DEPLOYMENT_CHECKLIST.md - Printable verification checklist
  * DEPLOYMENT_WORKFLOW.md - Complete deployment lifecycle
  * PRODUCTION_DEPLOYMENT.md - Comprehensive technical reference
  * production-logging.md - Logging configuration guide
  * ANSIBLE_DEPLOYMENT.md - Infrastructure as Code automation
  * README.md - Navigation hub
  * DEPLOYMENT_SUMMARY.md - Executive summary
- Add deployment scripts and automation
- Add DEPLOYMENT_PLAN.md - Concrete plan for immediate deployment
- Update README with production-ready features

All production infrastructure is now complete and ready for deployment.
2025-10-25 19:18:37 +02:00

731 lines
25 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Framework\Waf\MachineLearning;
use App\Framework\Core\ValueObjects\Duration;
use App\Framework\Core\ValueObjects\Percentage;
use App\Framework\DateTime\Clock;
use App\Framework\MachineLearning\Core\AnomalyDetectorInterface;
use App\Framework\MachineLearning\Core\FeatureExtractorMetadata;
use App\Framework\MachineLearning\ValueObjects\AnomalyDetection;
use App\Framework\MachineLearning\ValueObjects\Baseline;
use App\Framework\MachineLearning\ValueObjects\Feature;
use App\Framework\MachineLearning\ValueObjects\FeatureType;
use App\Framework\Waf\Analysis\ValueObjects\RequestAnalysisData;
use App\Framework\Waf\MachineLearning\WafFeatureExtractor;
/**
* Main machine learning engine for behavioral analysis and anomaly detection
*/
final class MachineLearningEngine
{
/**
* @param WafFeatureExtractor[] $extractors
* @param AnomalyDetectorInterface[] $detectors
*/
public function __construct(
private readonly bool $enabled,
private readonly array $extractors,
private readonly array $detectors,
private readonly Clock $clock,
private readonly Duration $analysisTimeout,
private readonly Percentage $confidenceThreshold,
private readonly bool $enableParallelProcessing = false,
private readonly bool $enableFeatureCaching = true,
private readonly int $maxFeaturesPerRequest = 100,
private array $featureCache = [],
private array $baselineCache = [],
private array $performanceMetrics = []
) {
}
/**
* Analyze request for behavioral anomalies
*/
public function analyzeRequest(RequestAnalysisData $requestData, array $context = []): MachineLearningResult
{
$startTime = $this->clock->time();
if (! $this->enabled) {
return new MachineLearningResult(
features: [],
anomalies: [],
confidence: Percentage::from(0.0),
processingTime: Duration::zero(),
enabled: false
);
}
try {
// Extract behavioral features
$features = $this->extractFeatures($requestData, $context);
// Get relevant baselines
$baselines = $this->getBaselines($features);
// Detect anomalies
$anomalies = $this->detectAnomalies($features, $baselines);
// Calculate overall confidence
$confidence = $this->calculateOverallConfidence($anomalies);
// Update models with new data
$this->updateModels($features);
$processingTime = $startTime->diff($this->clock->time());
// Record performance metrics
$this->recordPerformanceMetrics($processingTime, count($features), count($anomalies));
return new MachineLearningResult(
features: $features,
anomalies: $anomalies,
confidence: $confidence,
processingTime: $processingTime,
enabled: true,
extractorResults: $this->getExtractorResults($features),
detectorResults: $this->getDetectorResults($anomalies),
baselineStats: $this->getBaselineStats($baselines)
);
} catch (\Throwable $e) {
$processingTime = $startTime->diff($this->clock->time());
return new MachineLearningResult(
features: [],
anomalies: [],
confidence: Percentage::from(0.0),
processingTime: $processingTime,
enabled: true,
error: $e->getMessage()
);
}
}
/**
* Extract behavioral features from request data
*/
private function extractFeatures(RequestAnalysisData $requestData, array $context): array
{
$allFeatures = [];
$extractorResults = [];
// Check cache first
$cacheKey = $this->generateFeatureCacheKey($requestData);
if ($this->enableFeatureCaching && isset($this->featureCache[$cacheKey])) {
return $this->featureCache[$cacheKey];
}
// Sort extractors by priority
$sortedExtractors = $this->extractors;
usort($sortedExtractors, fn ($a, $b) => $b->getPriority() <=> $a->getPriority());
foreach ($sortedExtractors as $extractor) {
if (! $extractor->isEnabled() || ! $extractor->canExtract($requestData)) {
continue;
}
try {
$extractorStart = $this->clock->time();
// Check timeout
if ($extractorStart->diff($this->clock->time())->toMilliseconds() > $this->analysisTimeout->toMilliseconds()) {
break;
}
$features = $extractor->extractFeatures($requestData, $context);
$extractorTime = $extractorStart->diff($this->clock->time());
// Validate and filter features
$validFeatures = $this->validateFeatures($features);
$allFeatures = array_merge($allFeatures, $validFeatures);
$extractorResults[] = [
'extractor' => get_class($extractor),
'feature_type' => $extractor->getFeatureType()->value,
'feature_count' => count($validFeatures),
'processing_time' => $extractorTime->toMilliseconds(),
'success' => true,
];
// Check feature limit
if (count($allFeatures) >= $this->maxFeaturesPerRequest) {
break;
}
} catch (\Throwable $e) {
$extractorResults[] = [
'extractor' => get_class($extractor),
'feature_type' => $extractor->getFeatureType()->value,
'feature_count' => 0,
'processing_time' => 0,
'success' => false,
'error' => $e->getMessage(),
];
}
}
// Cache results
if ($this->enableFeatureCaching) {
$this->featureCache[$cacheKey] = $allFeatures;
// Limit cache size
if (count($this->featureCache) > 100) {
array_shift($this->featureCache);
}
}
return $allFeatures;
}
/**
* Detect anomalies in extracted features
*/
private function detectAnomalies(array $features, array $baselines): array
{
$allAnomalies = [];
$detectorResults = [];
foreach ($this->detectors as $detector) {
if (! $detector->isEnabled() || ! $detector->canAnalyze($features)) {
continue;
}
try {
$detectorStart = $this->clock->time();
// Get relevant baseline for this detector
$relevantBaseline = $this->getRelevantBaseline($detector, $baselines);
$anomalies = $detector->detectAnomalies($features, $relevantBaseline);
$detectorTime = $detectorStart->diff($this->clock->time());
// Filter by confidence threshold
$validAnomalies = array_filter(
$anomalies,
fn (AnomalyDetection $anomaly) => $anomaly->confidence->getValue() >= $this->confidenceThreshold->getValue()
);
$allAnomalies = array_merge($allAnomalies, $validAnomalies);
$detectorResults[] = [
'detector' => get_class($detector),
'detector_name' => $detector->getName(),
'anomaly_count' => count($validAnomalies),
'processing_time' => $detectorTime->toMilliseconds(),
'success' => true,
];
} catch (\Throwable $e) {
$detectorResults[] = [
'detector' => get_class($detector),
'detector_name' => $detector->getName(),
'anomaly_count' => 0,
'processing_time' => 0,
'success' => false,
'error' => $e->getMessage(),
];
}
}
// Remove duplicate anomalies and rank by severity
return $this->deduplicateAndRankAnomalies($allAnomalies);
}
/**
* Get behavioral baselines for analysis
*/
private function getBaselines(array $features): array
{
$baselines = [];
// Group features by feature type
$featureGroups = [];
foreach ($features as $feature) {
if ($feature instanceof Feature) {
$typeKey = $feature->type->value;
if (! isset($featureGroups[$typeKey])) {
$featureGroups[$typeKey] = [];
}
$featureGroups[$typeKey][] = $feature;
}
}
// Get or create baselines for each feature type
foreach ($featureGroups as $featureType => $groupFeatures) {
$cacheKey = "baseline:{$featureType}";
if (isset($this->baselineCache[$cacheKey])) {
$baselines[$featureType] = $this->baselineCache[$cacheKey];
} else {
// Create new baseline from features
$baseline = $this->createBaselineFromFeatures($groupFeatures, FeatureType::from($featureType));
if ($baseline !== null) {
$baselines[$featureType] = $baseline;
$this->baselineCache[$cacheKey] = $baseline;
}
}
}
return $baselines;
}
/**
* Create behavioral baseline from feature set
*/
private function createBaselineFromFeatures(array $features, FeatureType $featureType): ?Baseline
{
if (empty($features)) {
return null;
}
$values = array_map(fn (Feature $f) => $f->value, $features);
if (empty($values)) {
return null;
}
$mean = array_sum($values) / count($values);
$variance = array_sum(array_map(fn ($v) => pow($v - $mean, 2), $values)) / count($values);
$stdDev = sqrt($variance);
sort($values);
$p50 = $values[(int)(count($values) * 0.5)];
$p95 = $values[(int)(count($values) * 0.95)];
$p99 = $values[(int)(count($values) * 0.99)];
return new Baseline(
type: $featureType,
identifier: 'dynamic-baseline',
mean: $mean,
standardDeviation: $stdDev,
median: $p50,
minimum: min($values),
maximum: max($values),
percentiles: [
50 => $p50,
95 => $p95,
99 => $p99
],
sampleCount: count($values),
createdAt: $this->clock->time(),
lastUpdated: $this->clock->time(),
windowSize: Duration::fromMinutes(30),
confidence: min(1.0, count($values) * 0.05) // 5% per sample, max 100%
);
}
/**
* Get relevant baseline for a detector
*/
private function getRelevantBaseline(AnomalyDetectorInterface $detector, array $baselines): ?Baseline
{
$supportedTypes = $detector->getSupportedFeatureTypes();
foreach ($supportedTypes as $featureType) {
$typeKey = $featureType->value;
if (isset($baselines[$typeKey])) {
return $baselines[$typeKey];
}
}
return null;
}
/**
* Validate extracted features
*/
private function validateFeatures(array $features): array
{
$validFeatures = [];
foreach ($features as $feature) {
if ($feature instanceof Feature) {
// Validate feature values
if (is_numeric($feature->value) &&
! is_nan($feature->value) &&
! is_infinite($feature->value)) {
$validFeatures[] = $feature;
}
}
}
return $validFeatures;
}
/**
* Calculate overall confidence from anomaly detections
*/
private function calculateOverallConfidence(array $anomalies): Percentage
{
if (empty($anomalies)) {
return Percentage::from(0.0);
}
$confidenceSum = 0.0;
$weightSum = 0.0;
foreach ($anomalies as $anomaly) {
if ($anomaly instanceof AnomalyDetection) {
$weight = $anomaly->anomalyScore; // Use anomaly score as weight
$confidenceSum += $anomaly->confidence->getValue() * $weight;
$weightSum += $weight;
}
}
$overallConfidence = $weightSum > 0 ? $confidenceSum / $weightSum : 0.0;
return Percentage::from(min(100.0, $overallConfidence));
}
/**
* Update machine learning models with new data
*/
private function updateModels(array $features): void
{
foreach ($this->detectors as $detector) {
if ($detector->isEnabled()) {
try {
$detector->updateModel($features);
} catch (\Throwable $e) {
// Log error but continue processing
}
}
}
}
/**
* Apply feedback-based adjustments to machine learning models
*
* @param array<string, ValueObjects\ModelAdjustment> $adjustments Adjustments to apply
* @return array<string, mixed> Results of applying adjustments
*/
public function applyFeedbackAdjustments(array $adjustments): array
{
if (empty($adjustments)) {
return [
'success' => true,
'applied_count' => 0,
'message' => 'No adjustments to apply',
];
}
$appliedCount = 0;
$failedCount = 0;
$results = [];
foreach ($adjustments as $id => $adjustment) {
try {
// Find detectors that handle this category
$applicableDetectors = $this->findDetectorsForCategory($adjustment->category);
if (empty($applicableDetectors)) {
$results[$id] = [
'success' => false,
'message' => 'No applicable detectors found for category: ' . $adjustment->category->value,
];
$failedCount++;
continue;
}
// Apply adjustments to each applicable detector
$detectorResults = [];
foreach ($applicableDetectors as $detector) {
$detectorResult = $this->applyAdjustmentToDetector($detector, $adjustment);
$detectorResults[$detector::class] = $detectorResult;
}
$results[$id] = [
'success' => true,
'detector_results' => $detectorResults,
'adjustment' => $adjustment->toArray(),
];
$appliedCount++;
} catch (\Throwable $e) {
$results[$id] = [
'success' => false,
'message' => 'Error applying adjustment: ' . $e->getMessage(),
'adjustment' => $adjustment->toArray(),
];
$failedCount++;
}
}
return [
'success' => $failedCount === 0,
'applied_count' => $appliedCount,
'failed_count' => $failedCount,
'results' => $results,
];
}
/**
* Find detectors that handle a specific category
*
* @param DetectionCategory $category The category to find detectors for
* @return array<AnomalyDetectorInterface> Applicable detectors
*/
private function findDetectorsForCategory(DetectionCategory $category): array
{
// In a real implementation, this would use detector metadata or capabilities
// to determine which detectors can handle which categories
// For now, we'll use a simplified approach based on detector class names
$applicableDetectors = [];
foreach ($this->detectors as $detector) {
// Check if detector handles this category based on class name or metadata
$detectorClass = get_class($detector);
$categoryName = $category->value;
// Simple heuristic: if detector class name contains category name or is generic
if (
stripos($detectorClass, $categoryName) !== false ||
stripos($detectorClass, 'Generic') !== false ||
stripos($detectorClass, 'Statistical') !== false ||
stripos($detectorClass, 'Clustering') !== false
) {
$applicableDetectors[] = $detector;
}
}
return $applicableDetectors;
}
/**
* Apply a model adjustment to a specific detector
*
* @param AnomalyDetectorInterface $detector The detector to adjust
* @param ValueObjects\ModelAdjustment $adjustment The adjustment to apply
* @return array<string, mixed> Result of applying the adjustment
*/
private function applyAdjustmentToDetector(
AnomalyDetectorInterface $detector,
ValueObjects\ModelAdjustment $adjustment
): array {
$result = [
'threshold_adjusted' => false,
'confidence_adjusted' => false,
'features_adjusted' => 0,
];
// Apply threshold adjustment if detector supports it
if ($detector instanceof ThresholdAdjustableInterface && ! $adjustment->thresholdAdjustment->isZero()) {
$detector->adjustThreshold($adjustment->thresholdAdjustment);
$result['threshold_adjusted'] = true;
}
// Apply confidence adjustment if detector supports it
if ($detector instanceof ConfidenceAdjustableInterface && ! $adjustment->confidenceAdjustment->isZero()) {
$detector->adjustConfidence($adjustment->confidenceAdjustment);
$result['confidence_adjusted'] = true;
}
// Apply feature weight adjustments if detector supports it
if ($detector instanceof FeatureWeightAdjustableInterface && $adjustment->hasFeatureWeightAdjustments()) {
$adjustedFeatures = $detector->adjustFeatureWeights($adjustment->featureWeightAdjustments);
$result['features_adjusted'] = count($adjustedFeatures);
$result['adjusted_features'] = $adjustedFeatures;
}
return $result;
}
/**
* Deduplicate and rank anomalies by severity
*/
private function deduplicateAndRankAnomalies(array $anomalies): array
{
// Remove duplicates based on type and feature type
$seen = [];
$unique = [];
foreach ($anomalies as $anomaly) {
if ($anomaly instanceof AnomalyDetection) {
$key = $anomaly->type->value . ':' . $anomaly->featureType->value;
if (! isset($seen[$key]) || $anomaly->confidence->getValue() > $seen[$key]->confidence->getValue()) {
$seen[$key] = $anomaly;
}
}
}
$unique = array_values($seen);
// Sort by anomaly score (descending)
usort($unique, fn ($a, $b) => $b->anomalyScore <=> $a->anomalyScore);
return $unique;
}
/**
* Generate cache key for features
*/
private function generateFeatureCacheKey(RequestAnalysisData $requestData): string
{
return md5(serialize([
'path' => $requestData->path,
'method' => $requestData->method,
'params' => $requestData->getAllParameters(),
'user_agent' => (string) $requestData->userAgent,
'ip' => (string) $requestData->clientIp,
]));
}
/**
* Get extractor results summary
*/
private function getExtractorResults(array $features): array
{
$results = [];
$featuresByType = [];
foreach ($features as $feature) {
if ($feature instanceof Feature) {
$typeKey = $feature->type->value;
if (! isset($featuresByType[$typeKey])) {
$featuresByType[$typeKey] = [];
}
$featuresByType[$typeKey][] = $feature;
}
}
foreach ($featuresByType as $featureType => $typeFeatures) {
$results[] = [
'feature_type' => $featureType,
'feature_count' => count($typeFeatures),
'avg_value' => array_sum(array_map(fn ($f) => $f->value, $typeFeatures)) / count($typeFeatures),
'feature_names' => array_unique(array_map(fn ($f) => $f->name, $typeFeatures)),
];
}
return $results;
}
/**
* Get detector results summary
*/
private function getDetectorResults(array $anomalies): array
{
$results = [];
$anomaliesByDetector = [];
foreach ($anomalies as $anomaly) {
if ($anomaly instanceof AnomalyDetection) {
$detectorKey = $anomaly->type->value;
if (! isset($anomaliesByDetector[$detectorKey])) {
$anomaliesByDetector[$detectorKey] = [];
}
$anomaliesByDetector[$detectorKey][] = $anomaly;
}
}
foreach ($anomaliesByDetector as $detectorType => $detectorAnomalies) {
$results[] = [
'detector_type' => $detectorType,
'anomaly_count' => count($detectorAnomalies),
'avg_confidence' => array_sum(array_map(fn ($a) => $a->confidence->getValue(), $detectorAnomalies)) / count($detectorAnomalies),
'max_score' => max(array_map(fn ($a) => $a->anomalyScore, $detectorAnomalies)),
];
}
return $results;
}
/**
* Get baseline statistics summary
*/
private function getBaselineStats(array $baselines): array
{
$stats = [];
foreach ($baselines as $featureType => $baseline) {
if ($baseline instanceof Baseline) {
$stats[] = [
'feature_type' => $featureType,
'sample_size' => $baseline->sampleCount,
'mean' => $baseline->mean,
'std_dev' => $baseline->standardDeviation,
'confidence' => $baseline->confidence,
'last_updated' => $baseline->lastUpdated->toIso8601String(),
];
}
}
return $stats;
}
/**
* Record performance metrics
*/
private function recordPerformanceMetrics(Duration $processingTime, int $featureCount, int $anomalyCount): void
{
$this->performanceMetrics[] = [
'timestamp' => $this->clock->time()->toTimestamp(),
'processing_time_ms' => $processingTime->toMilliseconds(),
'feature_count' => $featureCount,
'anomaly_count' => $anomalyCount,
];
// Limit metrics history
if (count($this->performanceMetrics) > 1000) {
array_shift($this->performanceMetrics);
}
}
/**
* Get performance statistics
*/
public function getPerformanceStats(): array
{
if (empty($this->performanceMetrics)) {
return [];
}
$processingTimes = array_column($this->performanceMetrics, 'processing_time_ms');
$featureCounts = array_column($this->performanceMetrics, 'feature_count');
$anomalyCounts = array_column($this->performanceMetrics, 'anomaly_count');
return [
'total_requests' => count($this->performanceMetrics),
'avg_processing_time_ms' => array_sum($processingTimes) / count($processingTimes),
'max_processing_time_ms' => max($processingTimes),
'avg_feature_count' => array_sum($featureCounts) / count($featureCounts),
'avg_anomaly_count' => array_sum($anomalyCounts) / count($anomalyCounts),
'cache_hit_ratio' => $this->enableFeatureCaching ? count($this->featureCache) / max(count($this->performanceMetrics), 1) : 0.0,
];
}
/**
* Get configuration
*/
public function getConfiguration(): array
{
return [
'enabled' => $this->enabled,
'analysis_timeout_ms' => $this->analysisTimeout->toMilliseconds(),
'confidence_threshold' => $this->confidenceThreshold->getValue(),
'enable_parallel_processing' => $this->enableParallelProcessing,
'enable_feature_caching' => $this->enableFeatureCaching,
'max_features_per_request' => $this->maxFeaturesPerRequest,
'extractor_count' => count($this->extractors),
'detector_count' => count($this->detectors),
'cache_size' => count($this->featureCache),
'baseline_count' => count($this->baselineCache),
];
}
/**
* Check if engine is enabled
*/
public function isEnabled(): bool
{
return $this->enabled;
}
}