Enable Discovery debug logging for production troubleshooting
- Add DISCOVERY_LOG_LEVEL=debug - Add DISCOVERY_SHOW_PROGRESS=true - Temporary changes for debugging InitializerProcessor fixes on production
This commit is contained in:
@@ -0,0 +1,570 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Waf\MachineLearning\Extractors;
|
||||
|
||||
use App\Framework\Core\ValueObjects\Timestamp;
|
||||
use App\Framework\Waf\Analysis\ValueObjects\RequestAnalysisData;
|
||||
use App\Framework\Waf\MachineLearning\BehaviorType;
|
||||
use App\Framework\Waf\MachineLearning\FeatureExtractorInterface;
|
||||
use App\Framework\Waf\MachineLearning\ValueObjects\BehaviorFeature;
|
||||
|
||||
/**
|
||||
* Extracts request frequency and rate-based behavioral features
|
||||
*/
|
||||
final class FrequencyFeatureExtractor implements FeatureExtractorInterface
|
||||
{
|
||||
public function __construct(
|
||||
private readonly bool $enabled = true,
|
||||
private readonly int $timeWindowSeconds = 300, // 5 minutes
|
||||
private readonly int $maxStoredRequests = 1000,
|
||||
private readonly float $burstThreshold = 10.0, // requests per second
|
||||
private array $requestHistory = [] // In-memory storage (would be Redis in production)
|
||||
) {
|
||||
}
|
||||
|
||||
public function getBehaviorType(): BehaviorType
|
||||
{
|
||||
return BehaviorType::REQUEST_FREQUENCY;
|
||||
}
|
||||
|
||||
public function canExtract(RequestAnalysisData $requestData): bool
|
||||
{
|
||||
return $requestData->clientIp !== null;
|
||||
}
|
||||
|
||||
public function extractFeatures(RequestAnalysisData $requestData, array $context = []): array
|
||||
{
|
||||
$clientId = $this->getClientId($requestData);
|
||||
$currentTime = $requestData->timestamp ?? Timestamp::now();
|
||||
|
||||
// Record current request
|
||||
$this->recordRequest($clientId, $currentTime);
|
||||
|
||||
// Clean old requests
|
||||
$this->cleanOldRequests($clientId, $currentTime);
|
||||
|
||||
// Get request history for analysis
|
||||
$requests = $this->getRequestHistory($clientId, $currentTime);
|
||||
|
||||
if (empty($requests)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$features = [];
|
||||
|
||||
// Basic frequency features
|
||||
$features[] = $this->extractRequestRate($requests, $this->timeWindowSeconds);
|
||||
$features[] = $this->extractBurstRate($requests, 60); // 1 minute bursts
|
||||
$features[] = $this->extractSustainedRate($requests, 1800); // 30 minute sustained
|
||||
|
||||
// Pattern-based features
|
||||
$features[] = $this->extractInterArrivalVariance($requests);
|
||||
$features[] = $this->extractRequestSpacing($requests);
|
||||
$features[] = $this->extractPeriodicityScore($requests);
|
||||
|
||||
// Time-based features
|
||||
$features[] = $this->extractTimeOfDayPattern($requests);
|
||||
$features[] = $this->extractWeekdayPattern($requests);
|
||||
|
||||
// Advanced statistical features
|
||||
$features[] = $this->extractFrequencyEntropy($requests);
|
||||
$features[] = $this->extractBurstiness($requests);
|
||||
|
||||
return array_filter($features);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract basic request rate
|
||||
*/
|
||||
private function extractRequestRate(array $requests, int $windowSeconds): BehaviorFeature
|
||||
{
|
||||
$count = count($requests);
|
||||
$rate = $windowSeconds > 0 ? $count / $windowSeconds : 0.0;
|
||||
|
||||
return BehaviorFeature::frequency(
|
||||
name: "request_rate_{$windowSeconds}s",
|
||||
count: $count,
|
||||
timeWindow: $windowSeconds
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract burst detection rate
|
||||
*/
|
||||
private function extractBurstRate(array $requests, int $windowSeconds): BehaviorFeature
|
||||
{
|
||||
if (count($requests) < 2) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: "burst_rate_{$windowSeconds}s",
|
||||
value: 0.0,
|
||||
unit: 'requests/second'
|
||||
);
|
||||
}
|
||||
|
||||
$maxRate = 0.0;
|
||||
$windowSize = $windowSeconds;
|
||||
|
||||
// Sliding window to find maximum rate
|
||||
for ($i = 0; $i < count($requests) - 1; $i++) {
|
||||
$windowStart = $requests[$i];
|
||||
$requestsInWindow = 0;
|
||||
|
||||
for ($j = $i; $j < count($requests); $j++) {
|
||||
if ($requests[$j] - $windowStart <= $windowSize) {
|
||||
$requestsInWindow++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$rate = $requestsInWindow / $windowSize;
|
||||
$maxRate = max($maxRate, $rate);
|
||||
}
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: "burst_rate_{$windowSeconds}s",
|
||||
value: $maxRate,
|
||||
unit: 'requests/second'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract sustained rate (longer window)
|
||||
*/
|
||||
private function extractSustainedRate(array $requests, int $windowSeconds): BehaviorFeature
|
||||
{
|
||||
$count = count($requests);
|
||||
|
||||
// Filter requests within the sustained window
|
||||
$currentTime = time();
|
||||
$sustainedRequests = array_filter(
|
||||
$requests,
|
||||
fn ($timestamp) => ($currentTime - $timestamp) <= $windowSeconds
|
||||
);
|
||||
|
||||
$sustainedCount = count($sustainedRequests);
|
||||
$rate = $windowSeconds > 0 ? $sustainedCount / $windowSeconds : 0.0;
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: "sustained_rate_{$windowSeconds}s",
|
||||
value: $rate,
|
||||
unit: 'requests/second'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract inter-arrival time variance
|
||||
*/
|
||||
private function extractInterArrivalVariance(array $requests): BehaviorFeature
|
||||
{
|
||||
if (count($requests) < 3) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'inter_arrival_variance',
|
||||
value: 0.0,
|
||||
unit: 'seconds²'
|
||||
);
|
||||
}
|
||||
|
||||
// Sort requests by timestamp
|
||||
sort($requests);
|
||||
|
||||
// Calculate inter-arrival times
|
||||
$interArrivals = [];
|
||||
for ($i = 1; $i < count($requests); $i++) {
|
||||
$interArrivals[] = $requests[$i] - $requests[$i - 1];
|
||||
}
|
||||
|
||||
return BehaviorFeature::statistical(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'inter_arrival_variance',
|
||||
values: $interArrivals,
|
||||
statistic: 'variance'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract request spacing regularity
|
||||
*/
|
||||
private function extractRequestSpacing(array $requests): BehaviorFeature
|
||||
{
|
||||
if (count($requests) < 3) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'request_spacing_regularity',
|
||||
value: 0.0,
|
||||
unit: 'coefficient'
|
||||
);
|
||||
}
|
||||
|
||||
sort($requests);
|
||||
|
||||
// Calculate inter-arrival times
|
||||
$interArrivals = [];
|
||||
for ($i = 1; $i < count($requests); $i++) {
|
||||
$interArrivals[] = $requests[$i] - $requests[$i - 1];
|
||||
}
|
||||
|
||||
$mean = array_sum($interArrivals) / count($interArrivals);
|
||||
$variance = BehaviorFeature::statistical(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'temp_variance',
|
||||
values: $interArrivals,
|
||||
statistic: 'variance'
|
||||
)->value;
|
||||
|
||||
// Coefficient of variation (lower = more regular)
|
||||
$regularity = $mean > 0 ? sqrt($variance) / $mean : 1.0;
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'request_spacing_regularity',
|
||||
value: 1.0 / (1.0 + $regularity), // Normalize: 1 = perfectly regular, 0 = very irregular
|
||||
unit: 'regularity_score'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract periodicity score using autocorrelation
|
||||
*/
|
||||
private function extractPeriodicityScore(array $requests): BehaviorFeature
|
||||
{
|
||||
if (count($requests) < 10) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'periodicity_score',
|
||||
value: 0.0,
|
||||
unit: 'correlation'
|
||||
);
|
||||
}
|
||||
|
||||
sort($requests);
|
||||
|
||||
// Create time series with 1-second buckets
|
||||
$minTime = min($requests);
|
||||
$maxTime = max($requests);
|
||||
$duration = $maxTime - $minTime;
|
||||
|
||||
if ($duration <= 0) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'periodicity_score',
|
||||
value: 0.0,
|
||||
unit: 'correlation'
|
||||
);
|
||||
}
|
||||
|
||||
// Create histogram
|
||||
$buckets = [];
|
||||
foreach ($requests as $timestamp) {
|
||||
$bucket = (int)($timestamp - $minTime);
|
||||
$buckets[$bucket] = ($buckets[$bucket] ?? 0) + 1;
|
||||
}
|
||||
|
||||
// Calculate autocorrelation for common periods (10s, 30s, 60s)
|
||||
$maxCorrelation = 0.0;
|
||||
$periods = [10, 30, 60];
|
||||
|
||||
foreach ($periods as $period) {
|
||||
if ($period >= $duration) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$correlation = $this->calculateAutocorrelation($buckets, $period, (int)$duration);
|
||||
$maxCorrelation = max($maxCorrelation, abs($correlation));
|
||||
}
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'periodicity_score',
|
||||
value: $maxCorrelation,
|
||||
unit: 'correlation'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract time of day pattern
|
||||
*/
|
||||
private function extractTimeOfDayPattern(array $requests): BehaviorFeature
|
||||
{
|
||||
if (empty($requests)) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'time_of_day_entropy',
|
||||
value: 0.0,
|
||||
unit: 'bits'
|
||||
);
|
||||
}
|
||||
|
||||
// Group by hour of day
|
||||
$hourDistribution = array_fill(0, 24, 0);
|
||||
|
||||
foreach ($requests as $timestamp) {
|
||||
$hour = (int)date('H', $timestamp);
|
||||
$hourDistribution[$hour]++;
|
||||
}
|
||||
|
||||
return BehaviorFeature::entropy(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'time_of_day_entropy',
|
||||
distribution: $hourDistribution
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract weekday pattern
|
||||
*/
|
||||
private function extractWeekdayPattern(array $requests): BehaviorFeature
|
||||
{
|
||||
if (empty($requests)) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'weekday_entropy',
|
||||
value: 0.0,
|
||||
unit: 'bits'
|
||||
);
|
||||
}
|
||||
|
||||
// Group by day of week (0 = Sunday, 6 = Saturday)
|
||||
$dayDistribution = array_fill(0, 7, 0);
|
||||
|
||||
foreach ($requests as $timestamp) {
|
||||
$day = (int)date('w', $timestamp);
|
||||
$dayDistribution[$day]++;
|
||||
}
|
||||
|
||||
return BehaviorFeature::entropy(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'weekday_entropy',
|
||||
distribution: $dayDistribution
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract frequency distribution entropy
|
||||
*/
|
||||
private function extractFrequencyEntropy(array $requests): BehaviorFeature
|
||||
{
|
||||
if (count($requests) < 5) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'frequency_entropy',
|
||||
value: 0.0,
|
||||
unit: 'bits'
|
||||
);
|
||||
}
|
||||
|
||||
// Create frequency distribution in 10-second buckets
|
||||
$buckets = [];
|
||||
$minTime = min($requests);
|
||||
$bucketSize = 10; // seconds
|
||||
|
||||
foreach ($requests as $timestamp) {
|
||||
$bucket = (int)(($timestamp - $minTime) / $bucketSize);
|
||||
$buckets[$bucket] = ($buckets[$bucket] ?? 0) + 1;
|
||||
}
|
||||
|
||||
return BehaviorFeature::entropy(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'frequency_entropy',
|
||||
distribution: array_values($buckets)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract burstiness measure
|
||||
*/
|
||||
private function extractBurstiness(array $requests): BehaviorFeature
|
||||
{
|
||||
if (count($requests) < 5) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'burstiness',
|
||||
value: 0.0,
|
||||
unit: 'burstiness_coefficient'
|
||||
);
|
||||
}
|
||||
|
||||
sort($requests);
|
||||
|
||||
// Calculate inter-arrival times
|
||||
$interArrivals = [];
|
||||
for ($i = 1; $i < count($requests); $i++) {
|
||||
$interArrivals[] = $requests[$i] - $requests[$i - 1];
|
||||
}
|
||||
|
||||
$mean = array_sum($interArrivals) / count($interArrivals);
|
||||
$variance = BehaviorFeature::statistical(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'temp_variance',
|
||||
values: $interArrivals,
|
||||
statistic: 'variance'
|
||||
)->value;
|
||||
|
||||
// Burstiness coefficient: (σ - μ) / (σ + μ)
|
||||
// Range: -1 (regular) to +1 (bursty)
|
||||
$stdDev = sqrt($variance);
|
||||
$burstiness = ($stdDev + $mean) > 0 ? ($stdDev - $mean) / ($stdDev + $mean) : 0.0;
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'burstiness',
|
||||
value: $burstiness,
|
||||
unit: 'burstiness_coefficient'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a request timestamp
|
||||
*/
|
||||
private function recordRequest(string $clientId, Timestamp $timestamp): void
|
||||
{
|
||||
if (! isset($this->requestHistory[$clientId])) {
|
||||
$this->requestHistory[$clientId] = [];
|
||||
}
|
||||
|
||||
$this->requestHistory[$clientId][] = $timestamp->toUnixTimestamp();
|
||||
|
||||
// Limit memory usage
|
||||
if (count($this->requestHistory[$clientId]) > $this->maxStoredRequests) {
|
||||
array_shift($this->requestHistory[$clientId]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean old requests outside the analysis window
|
||||
*/
|
||||
private function cleanOldRequests(string $clientId, Timestamp $currentTime): void
|
||||
{
|
||||
if (! isset($this->requestHistory[$clientId])) {
|
||||
return;
|
||||
}
|
||||
|
||||
$cutoffTime = $currentTime->toUnixTimestamp() - $this->timeWindowSeconds;
|
||||
|
||||
$this->requestHistory[$clientId] = array_filter(
|
||||
$this->requestHistory[$clientId],
|
||||
fn ($timestamp) => $timestamp >= $cutoffTime
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get request history for analysis
|
||||
*/
|
||||
private function getRequestHistory(string $clientId, Timestamp $currentTime): array
|
||||
{
|
||||
if (! isset($this->requestHistory[$clientId])) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$cutoffTime = $currentTime->toUnixTimestamp() - $this->timeWindowSeconds;
|
||||
|
||||
return array_filter(
|
||||
$this->requestHistory[$clientId],
|
||||
fn ($timestamp) => $timestamp >= $cutoffTime
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get client identifier
|
||||
*/
|
||||
private function getClientId(RequestAnalysisData $requestData): string
|
||||
{
|
||||
// Prefer session ID, fallback to IP address
|
||||
if (! empty($requestData->sessionId)) {
|
||||
return 'session:' . $requestData->sessionId;
|
||||
}
|
||||
|
||||
if ($requestData->clientIp !== null) {
|
||||
return 'ip:' . $requestData->clientIp->toString();
|
||||
}
|
||||
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate autocorrelation for a given lag
|
||||
*/
|
||||
private function calculateAutocorrelation(array $buckets, int $lag, int $duration): float
|
||||
{
|
||||
if ($lag >= $duration || $lag <= 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
$sum = 0.0;
|
||||
$sumSquares = 0.0;
|
||||
$count = 0;
|
||||
|
||||
for ($i = 0; $i < $duration - $lag; $i++) {
|
||||
$x = $buckets[$i] ?? 0;
|
||||
$y = $buckets[$i + $lag] ?? 0;
|
||||
|
||||
$sum += $x * $y;
|
||||
$sumSquares += $x * $x + $y * $y;
|
||||
$count++;
|
||||
}
|
||||
|
||||
if ($count === 0 || $sumSquares === 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
return $sum / sqrt($sumSquares / 2);
|
||||
}
|
||||
|
||||
public function getFeatureNames(): array
|
||||
{
|
||||
return [
|
||||
"request_rate_{$this->timeWindowSeconds}s",
|
||||
'burst_rate_60s',
|
||||
'sustained_rate_1800s',
|
||||
'inter_arrival_variance',
|
||||
'request_spacing_regularity',
|
||||
'periodicity_score',
|
||||
'time_of_day_entropy',
|
||||
'weekday_entropy',
|
||||
'frequency_entropy',
|
||||
'burstiness',
|
||||
];
|
||||
}
|
||||
|
||||
public function getConfiguration(): array
|
||||
{
|
||||
return [
|
||||
'enabled' => $this->enabled,
|
||||
'time_window_seconds' => $this->timeWindowSeconds,
|
||||
'max_stored_requests' => $this->maxStoredRequests,
|
||||
'burst_threshold' => $this->burstThreshold,
|
||||
'feature_count' => count($this->getFeatureNames()),
|
||||
];
|
||||
}
|
||||
|
||||
public function isEnabled(): bool
|
||||
{
|
||||
return $this->enabled;
|
||||
}
|
||||
|
||||
public function getPriority(): int
|
||||
{
|
||||
return 100; // High priority for frequency analysis
|
||||
}
|
||||
|
||||
public function getExpectedProcessingTime(): int
|
||||
{
|
||||
return 50; // milliseconds
|
||||
}
|
||||
|
||||
public function supportsParallelExecution(): bool
|
||||
{
|
||||
return false; // Needs sequential access for request history
|
||||
}
|
||||
|
||||
public function getDependencies(): array
|
||||
{
|
||||
return []; // No dependencies
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user