Enable Discovery debug logging for production troubleshooting

- Add DISCOVERY_LOG_LEVEL=debug
- Add DISCOVERY_SHOW_PROGRESS=true
- Temporary changes for debugging InitializerProcessor fixes on production
This commit is contained in:
2025-08-11 20:13:26 +02:00
parent 59fd3dd3b1
commit 55a330b223
3683 changed files with 2956207 additions and 16948 deletions

View File

@@ -0,0 +1,570 @@
<?php
declare(strict_types=1);
namespace App\Framework\Waf\MachineLearning\Extractors;
use App\Framework\Core\ValueObjects\Timestamp;
use App\Framework\Waf\Analysis\ValueObjects\RequestAnalysisData;
use App\Framework\Waf\MachineLearning\BehaviorType;
use App\Framework\Waf\MachineLearning\FeatureExtractorInterface;
use App\Framework\Waf\MachineLearning\ValueObjects\BehaviorFeature;
/**
* Extracts request frequency and rate-based behavioral features
*/
final class FrequencyFeatureExtractor implements FeatureExtractorInterface
{
public function __construct(
private readonly bool $enabled = true,
private readonly int $timeWindowSeconds = 300, // 5 minutes
private readonly int $maxStoredRequests = 1000,
private readonly float $burstThreshold = 10.0, // requests per second
private array $requestHistory = [] // In-memory storage (would be Redis in production)
) {
}
public function getBehaviorType(): BehaviorType
{
return BehaviorType::REQUEST_FREQUENCY;
}
public function canExtract(RequestAnalysisData $requestData): bool
{
return $requestData->clientIp !== null;
}
public function extractFeatures(RequestAnalysisData $requestData, array $context = []): array
{
$clientId = $this->getClientId($requestData);
$currentTime = $requestData->timestamp ?? Timestamp::now();
// Record current request
$this->recordRequest($clientId, $currentTime);
// Clean old requests
$this->cleanOldRequests($clientId, $currentTime);
// Get request history for analysis
$requests = $this->getRequestHistory($clientId, $currentTime);
if (empty($requests)) {
return [];
}
$features = [];
// Basic frequency features
$features[] = $this->extractRequestRate($requests, $this->timeWindowSeconds);
$features[] = $this->extractBurstRate($requests, 60); // 1 minute bursts
$features[] = $this->extractSustainedRate($requests, 1800); // 30 minute sustained
// Pattern-based features
$features[] = $this->extractInterArrivalVariance($requests);
$features[] = $this->extractRequestSpacing($requests);
$features[] = $this->extractPeriodicityScore($requests);
// Time-based features
$features[] = $this->extractTimeOfDayPattern($requests);
$features[] = $this->extractWeekdayPattern($requests);
// Advanced statistical features
$features[] = $this->extractFrequencyEntropy($requests);
$features[] = $this->extractBurstiness($requests);
return array_filter($features);
}
/**
* Extract basic request rate
*/
private function extractRequestRate(array $requests, int $windowSeconds): BehaviorFeature
{
$count = count($requests);
$rate = $windowSeconds > 0 ? $count / $windowSeconds : 0.0;
return BehaviorFeature::frequency(
name: "request_rate_{$windowSeconds}s",
count: $count,
timeWindow: $windowSeconds
);
}
/**
* Extract burst detection rate
*/
private function extractBurstRate(array $requests, int $windowSeconds): BehaviorFeature
{
if (count($requests) < 2) {
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: "burst_rate_{$windowSeconds}s",
value: 0.0,
unit: 'requests/second'
);
}
$maxRate = 0.0;
$windowSize = $windowSeconds;
// Sliding window to find maximum rate
for ($i = 0; $i < count($requests) - 1; $i++) {
$windowStart = $requests[$i];
$requestsInWindow = 0;
for ($j = $i; $j < count($requests); $j++) {
if ($requests[$j] - $windowStart <= $windowSize) {
$requestsInWindow++;
} else {
break;
}
}
$rate = $requestsInWindow / $windowSize;
$maxRate = max($maxRate, $rate);
}
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: "burst_rate_{$windowSeconds}s",
value: $maxRate,
unit: 'requests/second'
);
}
/**
* Extract sustained rate (longer window)
*/
private function extractSustainedRate(array $requests, int $windowSeconds): BehaviorFeature
{
$count = count($requests);
// Filter requests within the sustained window
$currentTime = time();
$sustainedRequests = array_filter(
$requests,
fn ($timestamp) => ($currentTime - $timestamp) <= $windowSeconds
);
$sustainedCount = count($sustainedRequests);
$rate = $windowSeconds > 0 ? $sustainedCount / $windowSeconds : 0.0;
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: "sustained_rate_{$windowSeconds}s",
value: $rate,
unit: 'requests/second'
);
}
/**
* Extract inter-arrival time variance
*/
private function extractInterArrivalVariance(array $requests): BehaviorFeature
{
if (count($requests) < 3) {
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: 'inter_arrival_variance',
value: 0.0,
unit: 'seconds²'
);
}
// Sort requests by timestamp
sort($requests);
// Calculate inter-arrival times
$interArrivals = [];
for ($i = 1; $i < count($requests); $i++) {
$interArrivals[] = $requests[$i] - $requests[$i - 1];
}
return BehaviorFeature::statistical(
type: $this->getBehaviorType(),
name: 'inter_arrival_variance',
values: $interArrivals,
statistic: 'variance'
);
}
/**
* Extract request spacing regularity
*/
private function extractRequestSpacing(array $requests): BehaviorFeature
{
if (count($requests) < 3) {
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: 'request_spacing_regularity',
value: 0.0,
unit: 'coefficient'
);
}
sort($requests);
// Calculate inter-arrival times
$interArrivals = [];
for ($i = 1; $i < count($requests); $i++) {
$interArrivals[] = $requests[$i] - $requests[$i - 1];
}
$mean = array_sum($interArrivals) / count($interArrivals);
$variance = BehaviorFeature::statistical(
type: $this->getBehaviorType(),
name: 'temp_variance',
values: $interArrivals,
statistic: 'variance'
)->value;
// Coefficient of variation (lower = more regular)
$regularity = $mean > 0 ? sqrt($variance) / $mean : 1.0;
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: 'request_spacing_regularity',
value: 1.0 / (1.0 + $regularity), // Normalize: 1 = perfectly regular, 0 = very irregular
unit: 'regularity_score'
);
}
/**
* Extract periodicity score using autocorrelation
*/
private function extractPeriodicityScore(array $requests): BehaviorFeature
{
if (count($requests) < 10) {
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: 'periodicity_score',
value: 0.0,
unit: 'correlation'
);
}
sort($requests);
// Create time series with 1-second buckets
$minTime = min($requests);
$maxTime = max($requests);
$duration = $maxTime - $minTime;
if ($duration <= 0) {
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: 'periodicity_score',
value: 0.0,
unit: 'correlation'
);
}
// Create histogram
$buckets = [];
foreach ($requests as $timestamp) {
$bucket = (int)($timestamp - $minTime);
$buckets[$bucket] = ($buckets[$bucket] ?? 0) + 1;
}
// Calculate autocorrelation for common periods (10s, 30s, 60s)
$maxCorrelation = 0.0;
$periods = [10, 30, 60];
foreach ($periods as $period) {
if ($period >= $duration) {
continue;
}
$correlation = $this->calculateAutocorrelation($buckets, $period, (int)$duration);
$maxCorrelation = max($maxCorrelation, abs($correlation));
}
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: 'periodicity_score',
value: $maxCorrelation,
unit: 'correlation'
);
}
/**
* Extract time of day pattern
*/
private function extractTimeOfDayPattern(array $requests): BehaviorFeature
{
if (empty($requests)) {
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: 'time_of_day_entropy',
value: 0.0,
unit: 'bits'
);
}
// Group by hour of day
$hourDistribution = array_fill(0, 24, 0);
foreach ($requests as $timestamp) {
$hour = (int)date('H', $timestamp);
$hourDistribution[$hour]++;
}
return BehaviorFeature::entropy(
type: $this->getBehaviorType(),
name: 'time_of_day_entropy',
distribution: $hourDistribution
);
}
/**
* Extract weekday pattern
*/
private function extractWeekdayPattern(array $requests): BehaviorFeature
{
if (empty($requests)) {
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: 'weekday_entropy',
value: 0.0,
unit: 'bits'
);
}
// Group by day of week (0 = Sunday, 6 = Saturday)
$dayDistribution = array_fill(0, 7, 0);
foreach ($requests as $timestamp) {
$day = (int)date('w', $timestamp);
$dayDistribution[$day]++;
}
return BehaviorFeature::entropy(
type: $this->getBehaviorType(),
name: 'weekday_entropy',
distribution: $dayDistribution
);
}
/**
* Extract frequency distribution entropy
*/
private function extractFrequencyEntropy(array $requests): BehaviorFeature
{
if (count($requests) < 5) {
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: 'frequency_entropy',
value: 0.0,
unit: 'bits'
);
}
// Create frequency distribution in 10-second buckets
$buckets = [];
$minTime = min($requests);
$bucketSize = 10; // seconds
foreach ($requests as $timestamp) {
$bucket = (int)(($timestamp - $minTime) / $bucketSize);
$buckets[$bucket] = ($buckets[$bucket] ?? 0) + 1;
}
return BehaviorFeature::entropy(
type: $this->getBehaviorType(),
name: 'frequency_entropy',
distribution: array_values($buckets)
);
}
/**
* Extract burstiness measure
*/
private function extractBurstiness(array $requests): BehaviorFeature
{
if (count($requests) < 5) {
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: 'burstiness',
value: 0.0,
unit: 'burstiness_coefficient'
);
}
sort($requests);
// Calculate inter-arrival times
$interArrivals = [];
for ($i = 1; $i < count($requests); $i++) {
$interArrivals[] = $requests[$i] - $requests[$i - 1];
}
$mean = array_sum($interArrivals) / count($interArrivals);
$variance = BehaviorFeature::statistical(
type: $this->getBehaviorType(),
name: 'temp_variance',
values: $interArrivals,
statistic: 'variance'
)->value;
// Burstiness coefficient: (σ - μ) / (σ + μ)
// Range: -1 (regular) to +1 (bursty)
$stdDev = sqrt($variance);
$burstiness = ($stdDev + $mean) > 0 ? ($stdDev - $mean) / ($stdDev + $mean) : 0.0;
return BehaviorFeature::create(
type: $this->getBehaviorType(),
name: 'burstiness',
value: $burstiness,
unit: 'burstiness_coefficient'
);
}
/**
* Record a request timestamp
*/
private function recordRequest(string $clientId, Timestamp $timestamp): void
{
if (! isset($this->requestHistory[$clientId])) {
$this->requestHistory[$clientId] = [];
}
$this->requestHistory[$clientId][] = $timestamp->toUnixTimestamp();
// Limit memory usage
if (count($this->requestHistory[$clientId]) > $this->maxStoredRequests) {
array_shift($this->requestHistory[$clientId]);
}
}
/**
* Clean old requests outside the analysis window
*/
private function cleanOldRequests(string $clientId, Timestamp $currentTime): void
{
if (! isset($this->requestHistory[$clientId])) {
return;
}
$cutoffTime = $currentTime->toUnixTimestamp() - $this->timeWindowSeconds;
$this->requestHistory[$clientId] = array_filter(
$this->requestHistory[$clientId],
fn ($timestamp) => $timestamp >= $cutoffTime
);
}
/**
* Get request history for analysis
*/
private function getRequestHistory(string $clientId, Timestamp $currentTime): array
{
if (! isset($this->requestHistory[$clientId])) {
return [];
}
$cutoffTime = $currentTime->toUnixTimestamp() - $this->timeWindowSeconds;
return array_filter(
$this->requestHistory[$clientId],
fn ($timestamp) => $timestamp >= $cutoffTime
);
}
/**
* Get client identifier
*/
private function getClientId(RequestAnalysisData $requestData): string
{
// Prefer session ID, fallback to IP address
if (! empty($requestData->sessionId)) {
return 'session:' . $requestData->sessionId;
}
if ($requestData->clientIp !== null) {
return 'ip:' . $requestData->clientIp->toString();
}
return 'unknown';
}
/**
* Calculate autocorrelation for a given lag
*/
private function calculateAutocorrelation(array $buckets, int $lag, int $duration): float
{
if ($lag >= $duration || $lag <= 0) {
return 0.0;
}
$sum = 0.0;
$sumSquares = 0.0;
$count = 0;
for ($i = 0; $i < $duration - $lag; $i++) {
$x = $buckets[$i] ?? 0;
$y = $buckets[$i + $lag] ?? 0;
$sum += $x * $y;
$sumSquares += $x * $x + $y * $y;
$count++;
}
if ($count === 0 || $sumSquares === 0) {
return 0.0;
}
return $sum / sqrt($sumSquares / 2);
}
public function getFeatureNames(): array
{
return [
"request_rate_{$this->timeWindowSeconds}s",
'burst_rate_60s',
'sustained_rate_1800s',
'inter_arrival_variance',
'request_spacing_regularity',
'periodicity_score',
'time_of_day_entropy',
'weekday_entropy',
'frequency_entropy',
'burstiness',
];
}
public function getConfiguration(): array
{
return [
'enabled' => $this->enabled,
'time_window_seconds' => $this->timeWindowSeconds,
'max_stored_requests' => $this->maxStoredRequests,
'burst_threshold' => $this->burstThreshold,
'feature_count' => count($this->getFeatureNames()),
];
}
public function isEnabled(): bool
{
return $this->enabled;
}
public function getPriority(): int
{
return 100; // High priority for frequency analysis
}
public function getExpectedProcessingTime(): int
{
return 50; // milliseconds
}
public function supportsParallelExecution(): bool
{
return false; // Needs sequential access for request history
}
public function getDependencies(): array
{
return []; // No dependencies
}
}