Enable Discovery debug logging for production troubleshooting
- Add DISCOVERY_LOG_LEVEL=debug - Add DISCOVERY_SHOW_PROGRESS=true - Temporary changes for debugging InitializerProcessor fixes on production
This commit is contained in:
529
src/Framework/Waf/MachineLearning/BaselineManager.php
Normal file
529
src/Framework/Waf/MachineLearning/BaselineManager.php
Normal file
@@ -0,0 +1,529 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Waf\MachineLearning;
|
||||
|
||||
use App\Framework\Core\ValueObjects\Duration;
|
||||
use App\Framework\Core\ValueObjects\Percentage;
|
||||
use App\Framework\DateTime\Clock;
|
||||
use App\Framework\Waf\MachineLearning\ValueObjects\BehaviorBaseline;
|
||||
use App\Framework\Waf\MachineLearning\ValueObjects\BehaviorFeature;
|
||||
|
||||
/**
|
||||
* Manages behavioral baselines for anomaly detection
|
||||
*/
|
||||
final class BaselineManager
|
||||
{
|
||||
public function __construct(
|
||||
private readonly Clock $clock,
|
||||
private readonly Duration $baselineUpdateInterval,
|
||||
private readonly Duration $baselineMaxAge,
|
||||
private readonly int $minSamplesForBaseline = 50,
|
||||
private readonly int $maxSamplesPerBaseline = 10000,
|
||||
private readonly float $learningRate = 0.1,
|
||||
private readonly bool $enableAdaptiveBaselines = true,
|
||||
private readonly bool $enableSeasonalAdjustment = true,
|
||||
private array $baselines = [],
|
||||
private array $featureHistory = [],
|
||||
private array $updateTimestamps = [],
|
||||
private array $performanceMetrics = []
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get baseline for a specific behavior type and feature
|
||||
*/
|
||||
public function getBaseline(BehaviorType $behaviorType, string $featureName = 'default'): ?BehaviorBaseline
|
||||
{
|
||||
$key = $this->generateBaselineKey($behaviorType, $featureName);
|
||||
|
||||
if (! isset($this->baselines[$key])) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$baseline = $this->baselines[$key];
|
||||
|
||||
// Check if baseline is too old
|
||||
if ($this->isBaselineExpired($baseline)) {
|
||||
unset($this->baselines[$key]);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return $baseline;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update baseline with new feature data
|
||||
*/
|
||||
public function updateBaseline(BehaviorFeature $feature): void
|
||||
{
|
||||
$key = $this->generateBaselineKey($feature->type, $feature->name);
|
||||
|
||||
// Record feature in history
|
||||
$this->recordFeature($key, $feature);
|
||||
|
||||
// Check if update is needed
|
||||
if (! $this->shouldUpdateBaseline($key)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$existingBaseline = $this->baselines[$key] ?? null;
|
||||
$featureHistory = $this->featureHistory[$key] ?? [];
|
||||
|
||||
if (count($featureHistory) < $this->minSamplesForBaseline) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Create or update baseline
|
||||
if ($existingBaseline === null) {
|
||||
$this->baselines[$key] = $this->createInitialBaseline($feature->type, $feature->name, $featureHistory);
|
||||
} else {
|
||||
$this->baselines[$key] = $this->updateExistingBaseline($existingBaseline, $featureHistory);
|
||||
}
|
||||
|
||||
$this->updateTimestamps[$key] = $this->clock->time();
|
||||
|
||||
// Record performance metrics
|
||||
$this->recordBaselineUpdate($key, count($featureHistory));
|
||||
}
|
||||
|
||||
/**
|
||||
* Update baseline incrementally with new feature
|
||||
*/
|
||||
public function updateBaselineIncremental(BehaviorFeature $feature): void
|
||||
{
|
||||
$key = $this->generateBaselineKey($feature->type, $feature->name);
|
||||
$existingBaseline = $this->baselines[$key] ?? null;
|
||||
|
||||
if ($existingBaseline === null) {
|
||||
// Need enough samples for initial baseline
|
||||
$this->updateBaseline($feature);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Incremental update using exponential moving average
|
||||
$newBaseline = $this->incrementalUpdate($existingBaseline, $feature);
|
||||
$this->baselines[$key] = $newBaseline;
|
||||
$this->updateTimestamps[$key] = $this->clock->time();
|
||||
|
||||
// Record feature for history
|
||||
$this->recordFeature($key, $feature);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all baselines for a behavior type
|
||||
*/
|
||||
public function getBaselinesForBehaviorType(BehaviorType $behaviorType): array
|
||||
{
|
||||
$baselines = [];
|
||||
|
||||
foreach ($this->baselines as $key => $baseline) {
|
||||
if ($baseline->type === $behaviorType && ! $this->isBaselineExpired($baseline)) {
|
||||
$baselines[$key] = $baseline;
|
||||
}
|
||||
}
|
||||
|
||||
return $baselines;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean expired baselines
|
||||
*/
|
||||
public function cleanExpiredBaselines(): int
|
||||
{
|
||||
$removedCount = 0;
|
||||
|
||||
foreach ($this->baselines as $key => $baseline) {
|
||||
if ($this->isBaselineExpired($baseline)) {
|
||||
unset($this->baselines[$key]);
|
||||
unset($this->featureHistory[$key]);
|
||||
unset($this->updateTimestamps[$key]);
|
||||
$removedCount++;
|
||||
}
|
||||
}
|
||||
|
||||
return $removedCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get baseline statistics
|
||||
*/
|
||||
public function getBaselineStats(): array
|
||||
{
|
||||
$totalBaselines = count($this->baselines);
|
||||
$expiredBaselines = 0;
|
||||
$avgSampleSize = 0;
|
||||
$avgAge = 0;
|
||||
|
||||
$now = $this->clock->time();
|
||||
|
||||
foreach ($this->baselines as $baseline) {
|
||||
if ($this->isBaselineExpired($baseline)) {
|
||||
$expiredBaselines++;
|
||||
}
|
||||
|
||||
$avgSampleSize += $baseline->sampleSize;
|
||||
$age = $baseline->lastUpdated->diff($now);
|
||||
$avgAge += $age->toSeconds();
|
||||
}
|
||||
|
||||
if ($totalBaselines > 0) {
|
||||
$avgSampleSize /= $totalBaselines;
|
||||
$avgAge /= $totalBaselines;
|
||||
}
|
||||
|
||||
return [
|
||||
'total_baselines' => $totalBaselines,
|
||||
'expired_baselines' => $expiredBaselines,
|
||||
'active_baselines' => $totalBaselines - $expiredBaselines,
|
||||
'avg_sample_size' => $avgSampleSize,
|
||||
'avg_age_seconds' => $avgAge,
|
||||
'behavior_types' => $this->getBaselineBehaviorTypes(),
|
||||
'feature_history_size' => array_sum(array_map('count', $this->featureHistory)),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Create initial baseline from feature history
|
||||
*/
|
||||
private function createInitialBaseline(BehaviorType $behaviorType, string $featureName, array $featureHistory): BehaviorBaseline
|
||||
{
|
||||
$values = array_map(fn (BehaviorFeature $f) => $f->value, $featureHistory);
|
||||
|
||||
$stats = $this->calculateStatistics($values);
|
||||
$confidence = $this->calculateBaselineConfidence($stats['sample_size']);
|
||||
|
||||
// Apply seasonal adjustment if enabled
|
||||
if ($this->enableSeasonalAdjustment) {
|
||||
$stats = $this->applySeasonalAdjustment($stats, $featureHistory);
|
||||
}
|
||||
|
||||
return new BehaviorBaseline(
|
||||
type: $behaviorType,
|
||||
mean: $stats['mean'],
|
||||
standardDeviation: $stats['std_dev'],
|
||||
sampleSize: $stats['sample_size'],
|
||||
p50: $stats['p50'],
|
||||
p95: $stats['p95'],
|
||||
p99: $stats['p99'],
|
||||
confidence: $confidence,
|
||||
lastUpdated: $this->clock->time(),
|
||||
metadata: [
|
||||
'feature_name' => $featureName,
|
||||
'creation_method' => 'initial',
|
||||
'seasonal_adjusted' => $this->enableSeasonalAdjustment,
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update existing baseline with new data
|
||||
*/
|
||||
private function updateExistingBaseline(BehaviorBaseline $existingBaseline, array $featureHistory): BehaviorBaseline
|
||||
{
|
||||
if (! $this->enableAdaptiveBaselines) {
|
||||
return $existingBaseline;
|
||||
}
|
||||
|
||||
$values = array_map(fn (BehaviorFeature $f) => $f->value, $featureHistory);
|
||||
$newStats = $this->calculateStatistics($values);
|
||||
|
||||
// Adaptive learning rate based on sample size and confidence
|
||||
$adaptiveLearningRate = $this->calculateAdaptiveLearningRate($existingBaseline, $newStats['sample_size']);
|
||||
|
||||
// Exponential moving average update
|
||||
$updatedMean = $existingBaseline->mean * (1 - $adaptiveLearningRate) + $newStats['mean'] * $adaptiveLearningRate;
|
||||
$updatedStdDev = $existingBaseline->standardDeviation * (1 - $adaptiveLearningRate) + $newStats['std_dev'] * $adaptiveLearningRate;
|
||||
|
||||
// Update percentiles with recent data
|
||||
$combinedValues = array_slice($values, -$this->maxSamplesPerBaseline);
|
||||
sort($combinedValues);
|
||||
|
||||
$p50 = $this->calculatePercentile($combinedValues, 50);
|
||||
$p95 = $this->calculatePercentile($combinedValues, 95);
|
||||
$p99 = $this->calculatePercentile($combinedValues, 99);
|
||||
|
||||
$newSampleSize = min($existingBaseline->sampleSize + $newStats['sample_size'], $this->maxSamplesPerBaseline);
|
||||
$confidence = $this->calculateBaselineConfidence($newSampleSize);
|
||||
|
||||
return new BehaviorBaseline(
|
||||
type: $existingBaseline->type,
|
||||
mean: $updatedMean,
|
||||
standardDeviation: $updatedStdDev,
|
||||
sampleSize: $newSampleSize,
|
||||
p50: $p50,
|
||||
p95: $p95,
|
||||
p99: $p99,
|
||||
confidence: $confidence,
|
||||
lastUpdated: $this->clock->time(),
|
||||
metadata: array_merge($existingBaseline->metadata, [
|
||||
'update_method' => 'adaptive',
|
||||
'learning_rate' => $adaptiveLearningRate,
|
||||
'updates_count' => ($existingBaseline->metadata['updates_count'] ?? 0) + 1,
|
||||
])
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Incremental update using exponential moving average
|
||||
*/
|
||||
private function incrementalUpdate(BehaviorBaseline $baseline, BehaviorFeature $newFeature): BehaviorBaseline
|
||||
{
|
||||
$learningRate = $this->calculateAdaptiveLearningRate($baseline, 1);
|
||||
|
||||
$updatedMean = $baseline->mean * (1 - $learningRate) + $newFeature->value * $learningRate;
|
||||
|
||||
// Update variance using Welford's online algorithm
|
||||
$delta = $newFeature->value - $baseline->mean;
|
||||
$delta2 = $newFeature->value - $updatedMean;
|
||||
$variance = pow($baseline->standardDeviation, 2);
|
||||
$updatedVariance = $variance * (1 - $learningRate) + $delta * $delta2 * $learningRate;
|
||||
$updatedStdDev = sqrt(max(0, $updatedVariance));
|
||||
|
||||
return new BehaviorBaseline(
|
||||
type: $baseline->type,
|
||||
mean: $updatedMean,
|
||||
standardDeviation: $updatedStdDev,
|
||||
sampleSize: $baseline->sampleSize + 1,
|
||||
p50: $baseline->p50, // Keep existing percentiles for incremental updates
|
||||
p95: $baseline->p95,
|
||||
p99: $baseline->p99,
|
||||
confidence: $baseline->confidence,
|
||||
lastUpdated: $this->clock->time(),
|
||||
metadata: array_merge($baseline->metadata, [
|
||||
'update_method' => 'incremental',
|
||||
'last_value' => $newFeature->value,
|
||||
])
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate statistics from values
|
||||
*/
|
||||
private function calculateStatistics(array $values): array
|
||||
{
|
||||
if (empty($values)) {
|
||||
return [
|
||||
'mean' => 0.0,
|
||||
'std_dev' => 0.0,
|
||||
'sample_size' => 0,
|
||||
'p50' => 0.0,
|
||||
'p95' => 0.0,
|
||||
'p99' => 0.0,
|
||||
];
|
||||
}
|
||||
|
||||
$mean = array_sum($values) / count($values);
|
||||
$variance = array_sum(array_map(fn ($v) => pow($v - $mean, 2), $values)) / count($values);
|
||||
$stdDev = sqrt($variance);
|
||||
|
||||
sort($values);
|
||||
|
||||
return [
|
||||
'mean' => $mean,
|
||||
'std_dev' => $stdDev,
|
||||
'sample_size' => count($values),
|
||||
'p50' => $this->calculatePercentile($values, 50),
|
||||
'p95' => $this->calculatePercentile($values, 95),
|
||||
'p99' => $this->calculatePercentile($values, 99),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate percentile from sorted values
|
||||
*/
|
||||
private function calculatePercentile(array $sortedValues, float $percentile): float
|
||||
{
|
||||
if (empty($sortedValues)) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
$index = ($percentile / 100) * (count($sortedValues) - 1);
|
||||
$lower = (int)floor($index);
|
||||
$upper = (int)ceil($index);
|
||||
|
||||
if ($lower === $upper) {
|
||||
return $sortedValues[$lower];
|
||||
}
|
||||
|
||||
$weight = $index - $lower;
|
||||
|
||||
return $sortedValues[$lower] * (1 - $weight) + $sortedValues[$upper] * $weight;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate adaptive learning rate
|
||||
*/
|
||||
private function calculateAdaptiveLearningRate(BehaviorBaseline $baseline, int $newSamples): float
|
||||
{
|
||||
// Decrease learning rate as confidence increases
|
||||
$confidenceFactor = 1.0 - ($baseline->confidence->getValue() / 100.0);
|
||||
|
||||
// Increase learning rate for more new samples
|
||||
$sampleFactor = min(1.0, $newSamples / 10.0);
|
||||
|
||||
return $this->learningRate * $confidenceFactor * $sampleFactor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate baseline confidence based on sample size
|
||||
*/
|
||||
private function calculateBaselineConfidence(int $sampleSize): Percentage
|
||||
{
|
||||
// Confidence increases with sample size, plateaus at 95%
|
||||
$confidence = min(95.0, ($sampleSize / $this->minSamplesForBaseline) * 75.0);
|
||||
|
||||
return new Percentage(max(0.0, $confidence));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply seasonal adjustment to statistics
|
||||
*/
|
||||
private function applySeasonalAdjustment(array $stats, array $featureHistory): array
|
||||
{
|
||||
// Simple seasonal adjustment based on time of day/week patterns
|
||||
$hourCounts = array_fill(0, 24, 0);
|
||||
$dayOfWeekCounts = array_fill(0, 7, 0);
|
||||
|
||||
foreach ($featureHistory as $feature) {
|
||||
if ($feature->timestamp !== null) {
|
||||
$hour = (int)$feature->timestamp->format('H');
|
||||
$dayOfWeek = (int)$feature->timestamp->format('w');
|
||||
|
||||
$hourCounts[$hour]++;
|
||||
$dayOfWeekCounts[$dayOfWeek]++;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate seasonal factors (simplified)
|
||||
$currentHour = (int)$this->clock->time()->format('H');
|
||||
$currentDayOfWeek = (int)$this->clock->time()->format('w');
|
||||
|
||||
$avgHourlyCount = array_sum($hourCounts) / 24;
|
||||
$avgDailyCount = array_sum($dayOfWeekCounts) / 7;
|
||||
|
||||
$hourlyFactor = $avgHourlyCount > 0 ? $hourCounts[$currentHour] / $avgHourlyCount : 1.0;
|
||||
$dailyFactor = $avgDailyCount > 0 ? $dayOfWeekCounts[$currentDayOfWeek] / $avgDailyCount : 1.0;
|
||||
|
||||
// Apply seasonal adjustment (conservative)
|
||||
$seasonalFactor = ($hourlyFactor + $dailyFactor) / 2;
|
||||
$adjustmentWeight = 0.1; // Limit seasonal impact
|
||||
|
||||
$stats['mean'] *= (1 + ($seasonalFactor - 1) * $adjustmentWeight);
|
||||
$stats['std_dev'] *= (1 + abs($seasonalFactor - 1) * $adjustmentWeight);
|
||||
|
||||
return $stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record feature in history
|
||||
*/
|
||||
private function recordFeature(string $key, BehaviorFeature $feature): void
|
||||
{
|
||||
if (! isset($this->featureHistory[$key])) {
|
||||
$this->featureHistory[$key] = [];
|
||||
}
|
||||
|
||||
$this->featureHistory[$key][] = $feature;
|
||||
|
||||
// Limit history size
|
||||
if (count($this->featureHistory[$key]) > $this->maxSamplesPerBaseline) {
|
||||
array_shift($this->featureHistory[$key]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if baseline should be updated
|
||||
*/
|
||||
private function shouldUpdateBaseline(string $key): bool
|
||||
{
|
||||
$lastUpdate = $this->updateTimestamps[$key] ?? null;
|
||||
|
||||
if ($lastUpdate === null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$timeSinceUpdate = $lastUpdate->diff($this->clock->time());
|
||||
|
||||
return $timeSinceUpdate->toMilliseconds() >= $this->baselineUpdateInterval->toMilliseconds();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if baseline is expired
|
||||
*/
|
||||
private function isBaselineExpired(BehaviorBaseline $baseline): bool
|
||||
{
|
||||
$age = $baseline->lastUpdated->diff($this->clock->time());
|
||||
|
||||
return $age->toMilliseconds() > $this->baselineMaxAge->toMilliseconds();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate baseline key
|
||||
*/
|
||||
private function generateBaselineKey(BehaviorType $behaviorType, string $featureName): string
|
||||
{
|
||||
return $behaviorType->value . ':' . $featureName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get behavior types with baselines
|
||||
*/
|
||||
private function getBaselineBehaviorTypes(): array
|
||||
{
|
||||
$types = [];
|
||||
|
||||
foreach ($this->baselines as $baseline) {
|
||||
$types[] = $baseline->type->value;
|
||||
}
|
||||
|
||||
return array_unique($types);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record baseline update metrics
|
||||
*/
|
||||
private function recordBaselineUpdate(string $key, int $sampleCount): void
|
||||
{
|
||||
$this->performanceMetrics[] = [
|
||||
'timestamp' => $this->clock->time()->toUnixTimestamp(),
|
||||
'baseline_key' => $key,
|
||||
'sample_count' => $sampleCount,
|
||||
'operation' => 'update',
|
||||
];
|
||||
|
||||
// Limit metrics history
|
||||
if (count($this->performanceMetrics) > 1000) {
|
||||
array_shift($this->performanceMetrics);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get performance metrics
|
||||
*/
|
||||
public function getPerformanceMetrics(): array
|
||||
{
|
||||
return $this->performanceMetrics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get configuration
|
||||
*/
|
||||
public function getConfiguration(): array
|
||||
{
|
||||
return [
|
||||
'baseline_update_interval_ms' => $this->baselineUpdateInterval->toMilliseconds(),
|
||||
'baseline_max_age_ms' => $this->baselineMaxAge->toMilliseconds(),
|
||||
'min_samples_for_baseline' => $this->minSamplesForBaseline,
|
||||
'max_samples_per_baseline' => $this->maxSamplesPerBaseline,
|
||||
'learning_rate' => $this->learningRate,
|
||||
'enable_adaptive_baselines' => $this->enableAdaptiveBaselines,
|
||||
'enable_seasonal_adjustment' => $this->enableSeasonalAdjustment,
|
||||
'active_baselines' => count($this->baselines),
|
||||
'total_feature_history' => array_sum(array_map('count', $this->featureHistory)),
|
||||
];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user