Enable Discovery debug logging for production troubleshooting
- Add DISCOVERY_LOG_LEVEL=debug - Add DISCOVERY_SHOW_PROGRESS=true - Temporary changes for debugging InitializerProcessor fixes on production
This commit is contained in:
@@ -0,0 +1,570 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Waf\MachineLearning\Extractors;
|
||||
|
||||
use App\Framework\Core\ValueObjects\Timestamp;
|
||||
use App\Framework\Waf\Analysis\ValueObjects\RequestAnalysisData;
|
||||
use App\Framework\Waf\MachineLearning\BehaviorType;
|
||||
use App\Framework\Waf\MachineLearning\FeatureExtractorInterface;
|
||||
use App\Framework\Waf\MachineLearning\ValueObjects\BehaviorFeature;
|
||||
|
||||
/**
|
||||
* Extracts request frequency and rate-based behavioral features
|
||||
*/
|
||||
final class FrequencyFeatureExtractor implements FeatureExtractorInterface
|
||||
{
|
||||
public function __construct(
|
||||
private readonly bool $enabled = true,
|
||||
private readonly int $timeWindowSeconds = 300, // 5 minutes
|
||||
private readonly int $maxStoredRequests = 1000,
|
||||
private readonly float $burstThreshold = 10.0, // requests per second
|
||||
private array $requestHistory = [] // In-memory storage (would be Redis in production)
|
||||
) {
|
||||
}
|
||||
|
||||
public function getBehaviorType(): BehaviorType
|
||||
{
|
||||
return BehaviorType::REQUEST_FREQUENCY;
|
||||
}
|
||||
|
||||
public function canExtract(RequestAnalysisData $requestData): bool
|
||||
{
|
||||
return $requestData->clientIp !== null;
|
||||
}
|
||||
|
||||
public function extractFeatures(RequestAnalysisData $requestData, array $context = []): array
|
||||
{
|
||||
$clientId = $this->getClientId($requestData);
|
||||
$currentTime = $requestData->timestamp ?? Timestamp::now();
|
||||
|
||||
// Record current request
|
||||
$this->recordRequest($clientId, $currentTime);
|
||||
|
||||
// Clean old requests
|
||||
$this->cleanOldRequests($clientId, $currentTime);
|
||||
|
||||
// Get request history for analysis
|
||||
$requests = $this->getRequestHistory($clientId, $currentTime);
|
||||
|
||||
if (empty($requests)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$features = [];
|
||||
|
||||
// Basic frequency features
|
||||
$features[] = $this->extractRequestRate($requests, $this->timeWindowSeconds);
|
||||
$features[] = $this->extractBurstRate($requests, 60); // 1 minute bursts
|
||||
$features[] = $this->extractSustainedRate($requests, 1800); // 30 minute sustained
|
||||
|
||||
// Pattern-based features
|
||||
$features[] = $this->extractInterArrivalVariance($requests);
|
||||
$features[] = $this->extractRequestSpacing($requests);
|
||||
$features[] = $this->extractPeriodicityScore($requests);
|
||||
|
||||
// Time-based features
|
||||
$features[] = $this->extractTimeOfDayPattern($requests);
|
||||
$features[] = $this->extractWeekdayPattern($requests);
|
||||
|
||||
// Advanced statistical features
|
||||
$features[] = $this->extractFrequencyEntropy($requests);
|
||||
$features[] = $this->extractBurstiness($requests);
|
||||
|
||||
return array_filter($features);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract basic request rate
|
||||
*/
|
||||
private function extractRequestRate(array $requests, int $windowSeconds): BehaviorFeature
|
||||
{
|
||||
$count = count($requests);
|
||||
$rate = $windowSeconds > 0 ? $count / $windowSeconds : 0.0;
|
||||
|
||||
return BehaviorFeature::frequency(
|
||||
name: "request_rate_{$windowSeconds}s",
|
||||
count: $count,
|
||||
timeWindow: $windowSeconds
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract burst detection rate
|
||||
*/
|
||||
private function extractBurstRate(array $requests, int $windowSeconds): BehaviorFeature
|
||||
{
|
||||
if (count($requests) < 2) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: "burst_rate_{$windowSeconds}s",
|
||||
value: 0.0,
|
||||
unit: 'requests/second'
|
||||
);
|
||||
}
|
||||
|
||||
$maxRate = 0.0;
|
||||
$windowSize = $windowSeconds;
|
||||
|
||||
// Sliding window to find maximum rate
|
||||
for ($i = 0; $i < count($requests) - 1; $i++) {
|
||||
$windowStart = $requests[$i];
|
||||
$requestsInWindow = 0;
|
||||
|
||||
for ($j = $i; $j < count($requests); $j++) {
|
||||
if ($requests[$j] - $windowStart <= $windowSize) {
|
||||
$requestsInWindow++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$rate = $requestsInWindow / $windowSize;
|
||||
$maxRate = max($maxRate, $rate);
|
||||
}
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: "burst_rate_{$windowSeconds}s",
|
||||
value: $maxRate,
|
||||
unit: 'requests/second'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract sustained rate (longer window)
|
||||
*/
|
||||
private function extractSustainedRate(array $requests, int $windowSeconds): BehaviorFeature
|
||||
{
|
||||
$count = count($requests);
|
||||
|
||||
// Filter requests within the sustained window
|
||||
$currentTime = time();
|
||||
$sustainedRequests = array_filter(
|
||||
$requests,
|
||||
fn ($timestamp) => ($currentTime - $timestamp) <= $windowSeconds
|
||||
);
|
||||
|
||||
$sustainedCount = count($sustainedRequests);
|
||||
$rate = $windowSeconds > 0 ? $sustainedCount / $windowSeconds : 0.0;
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: "sustained_rate_{$windowSeconds}s",
|
||||
value: $rate,
|
||||
unit: 'requests/second'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract inter-arrival time variance
|
||||
*/
|
||||
private function extractInterArrivalVariance(array $requests): BehaviorFeature
|
||||
{
|
||||
if (count($requests) < 3) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'inter_arrival_variance',
|
||||
value: 0.0,
|
||||
unit: 'seconds²'
|
||||
);
|
||||
}
|
||||
|
||||
// Sort requests by timestamp
|
||||
sort($requests);
|
||||
|
||||
// Calculate inter-arrival times
|
||||
$interArrivals = [];
|
||||
for ($i = 1; $i < count($requests); $i++) {
|
||||
$interArrivals[] = $requests[$i] - $requests[$i - 1];
|
||||
}
|
||||
|
||||
return BehaviorFeature::statistical(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'inter_arrival_variance',
|
||||
values: $interArrivals,
|
||||
statistic: 'variance'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract request spacing regularity
|
||||
*/
|
||||
private function extractRequestSpacing(array $requests): BehaviorFeature
|
||||
{
|
||||
if (count($requests) < 3) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'request_spacing_regularity',
|
||||
value: 0.0,
|
||||
unit: 'coefficient'
|
||||
);
|
||||
}
|
||||
|
||||
sort($requests);
|
||||
|
||||
// Calculate inter-arrival times
|
||||
$interArrivals = [];
|
||||
for ($i = 1; $i < count($requests); $i++) {
|
||||
$interArrivals[] = $requests[$i] - $requests[$i - 1];
|
||||
}
|
||||
|
||||
$mean = array_sum($interArrivals) / count($interArrivals);
|
||||
$variance = BehaviorFeature::statistical(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'temp_variance',
|
||||
values: $interArrivals,
|
||||
statistic: 'variance'
|
||||
)->value;
|
||||
|
||||
// Coefficient of variation (lower = more regular)
|
||||
$regularity = $mean > 0 ? sqrt($variance) / $mean : 1.0;
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'request_spacing_regularity',
|
||||
value: 1.0 / (1.0 + $regularity), // Normalize: 1 = perfectly regular, 0 = very irregular
|
||||
unit: 'regularity_score'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract periodicity score using autocorrelation
|
||||
*/
|
||||
private function extractPeriodicityScore(array $requests): BehaviorFeature
|
||||
{
|
||||
if (count($requests) < 10) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'periodicity_score',
|
||||
value: 0.0,
|
||||
unit: 'correlation'
|
||||
);
|
||||
}
|
||||
|
||||
sort($requests);
|
||||
|
||||
// Create time series with 1-second buckets
|
||||
$minTime = min($requests);
|
||||
$maxTime = max($requests);
|
||||
$duration = $maxTime - $minTime;
|
||||
|
||||
if ($duration <= 0) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'periodicity_score',
|
||||
value: 0.0,
|
||||
unit: 'correlation'
|
||||
);
|
||||
}
|
||||
|
||||
// Create histogram
|
||||
$buckets = [];
|
||||
foreach ($requests as $timestamp) {
|
||||
$bucket = (int)($timestamp - $minTime);
|
||||
$buckets[$bucket] = ($buckets[$bucket] ?? 0) + 1;
|
||||
}
|
||||
|
||||
// Calculate autocorrelation for common periods (10s, 30s, 60s)
|
||||
$maxCorrelation = 0.0;
|
||||
$periods = [10, 30, 60];
|
||||
|
||||
foreach ($periods as $period) {
|
||||
if ($period >= $duration) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$correlation = $this->calculateAutocorrelation($buckets, $period, (int)$duration);
|
||||
$maxCorrelation = max($maxCorrelation, abs($correlation));
|
||||
}
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'periodicity_score',
|
||||
value: $maxCorrelation,
|
||||
unit: 'correlation'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract time of day pattern
|
||||
*/
|
||||
private function extractTimeOfDayPattern(array $requests): BehaviorFeature
|
||||
{
|
||||
if (empty($requests)) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'time_of_day_entropy',
|
||||
value: 0.0,
|
||||
unit: 'bits'
|
||||
);
|
||||
}
|
||||
|
||||
// Group by hour of day
|
||||
$hourDistribution = array_fill(0, 24, 0);
|
||||
|
||||
foreach ($requests as $timestamp) {
|
||||
$hour = (int)date('H', $timestamp);
|
||||
$hourDistribution[$hour]++;
|
||||
}
|
||||
|
||||
return BehaviorFeature::entropy(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'time_of_day_entropy',
|
||||
distribution: $hourDistribution
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract weekday pattern
|
||||
*/
|
||||
private function extractWeekdayPattern(array $requests): BehaviorFeature
|
||||
{
|
||||
if (empty($requests)) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'weekday_entropy',
|
||||
value: 0.0,
|
||||
unit: 'bits'
|
||||
);
|
||||
}
|
||||
|
||||
// Group by day of week (0 = Sunday, 6 = Saturday)
|
||||
$dayDistribution = array_fill(0, 7, 0);
|
||||
|
||||
foreach ($requests as $timestamp) {
|
||||
$day = (int)date('w', $timestamp);
|
||||
$dayDistribution[$day]++;
|
||||
}
|
||||
|
||||
return BehaviorFeature::entropy(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'weekday_entropy',
|
||||
distribution: $dayDistribution
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract frequency distribution entropy
|
||||
*/
|
||||
private function extractFrequencyEntropy(array $requests): BehaviorFeature
|
||||
{
|
||||
if (count($requests) < 5) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'frequency_entropy',
|
||||
value: 0.0,
|
||||
unit: 'bits'
|
||||
);
|
||||
}
|
||||
|
||||
// Create frequency distribution in 10-second buckets
|
||||
$buckets = [];
|
||||
$minTime = min($requests);
|
||||
$bucketSize = 10; // seconds
|
||||
|
||||
foreach ($requests as $timestamp) {
|
||||
$bucket = (int)(($timestamp - $minTime) / $bucketSize);
|
||||
$buckets[$bucket] = ($buckets[$bucket] ?? 0) + 1;
|
||||
}
|
||||
|
||||
return BehaviorFeature::entropy(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'frequency_entropy',
|
||||
distribution: array_values($buckets)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract burstiness measure
|
||||
*/
|
||||
private function extractBurstiness(array $requests): BehaviorFeature
|
||||
{
|
||||
if (count($requests) < 5) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'burstiness',
|
||||
value: 0.0,
|
||||
unit: 'burstiness_coefficient'
|
||||
);
|
||||
}
|
||||
|
||||
sort($requests);
|
||||
|
||||
// Calculate inter-arrival times
|
||||
$interArrivals = [];
|
||||
for ($i = 1; $i < count($requests); $i++) {
|
||||
$interArrivals[] = $requests[$i] - $requests[$i - 1];
|
||||
}
|
||||
|
||||
$mean = array_sum($interArrivals) / count($interArrivals);
|
||||
$variance = BehaviorFeature::statistical(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'temp_variance',
|
||||
values: $interArrivals,
|
||||
statistic: 'variance'
|
||||
)->value;
|
||||
|
||||
// Burstiness coefficient: (σ - μ) / (σ + μ)
|
||||
// Range: -1 (regular) to +1 (bursty)
|
||||
$stdDev = sqrt($variance);
|
||||
$burstiness = ($stdDev + $mean) > 0 ? ($stdDev - $mean) / ($stdDev + $mean) : 0.0;
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'burstiness',
|
||||
value: $burstiness,
|
||||
unit: 'burstiness_coefficient'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a request timestamp
|
||||
*/
|
||||
private function recordRequest(string $clientId, Timestamp $timestamp): void
|
||||
{
|
||||
if (! isset($this->requestHistory[$clientId])) {
|
||||
$this->requestHistory[$clientId] = [];
|
||||
}
|
||||
|
||||
$this->requestHistory[$clientId][] = $timestamp->toUnixTimestamp();
|
||||
|
||||
// Limit memory usage
|
||||
if (count($this->requestHistory[$clientId]) > $this->maxStoredRequests) {
|
||||
array_shift($this->requestHistory[$clientId]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean old requests outside the analysis window
|
||||
*/
|
||||
private function cleanOldRequests(string $clientId, Timestamp $currentTime): void
|
||||
{
|
||||
if (! isset($this->requestHistory[$clientId])) {
|
||||
return;
|
||||
}
|
||||
|
||||
$cutoffTime = $currentTime->toUnixTimestamp() - $this->timeWindowSeconds;
|
||||
|
||||
$this->requestHistory[$clientId] = array_filter(
|
||||
$this->requestHistory[$clientId],
|
||||
fn ($timestamp) => $timestamp >= $cutoffTime
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get request history for analysis
|
||||
*/
|
||||
private function getRequestHistory(string $clientId, Timestamp $currentTime): array
|
||||
{
|
||||
if (! isset($this->requestHistory[$clientId])) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$cutoffTime = $currentTime->toUnixTimestamp() - $this->timeWindowSeconds;
|
||||
|
||||
return array_filter(
|
||||
$this->requestHistory[$clientId],
|
||||
fn ($timestamp) => $timestamp >= $cutoffTime
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get client identifier
|
||||
*/
|
||||
private function getClientId(RequestAnalysisData $requestData): string
|
||||
{
|
||||
// Prefer session ID, fallback to IP address
|
||||
if (! empty($requestData->sessionId)) {
|
||||
return 'session:' . $requestData->sessionId;
|
||||
}
|
||||
|
||||
if ($requestData->clientIp !== null) {
|
||||
return 'ip:' . $requestData->clientIp->toString();
|
||||
}
|
||||
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate autocorrelation for a given lag
|
||||
*/
|
||||
private function calculateAutocorrelation(array $buckets, int $lag, int $duration): float
|
||||
{
|
||||
if ($lag >= $duration || $lag <= 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
$sum = 0.0;
|
||||
$sumSquares = 0.0;
|
||||
$count = 0;
|
||||
|
||||
for ($i = 0; $i < $duration - $lag; $i++) {
|
||||
$x = $buckets[$i] ?? 0;
|
||||
$y = $buckets[$i + $lag] ?? 0;
|
||||
|
||||
$sum += $x * $y;
|
||||
$sumSquares += $x * $x + $y * $y;
|
||||
$count++;
|
||||
}
|
||||
|
||||
if ($count === 0 || $sumSquares === 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
return $sum / sqrt($sumSquares / 2);
|
||||
}
|
||||
|
||||
public function getFeatureNames(): array
|
||||
{
|
||||
return [
|
||||
"request_rate_{$this->timeWindowSeconds}s",
|
||||
'burst_rate_60s',
|
||||
'sustained_rate_1800s',
|
||||
'inter_arrival_variance',
|
||||
'request_spacing_regularity',
|
||||
'periodicity_score',
|
||||
'time_of_day_entropy',
|
||||
'weekday_entropy',
|
||||
'frequency_entropy',
|
||||
'burstiness',
|
||||
];
|
||||
}
|
||||
|
||||
public function getConfiguration(): array
|
||||
{
|
||||
return [
|
||||
'enabled' => $this->enabled,
|
||||
'time_window_seconds' => $this->timeWindowSeconds,
|
||||
'max_stored_requests' => $this->maxStoredRequests,
|
||||
'burst_threshold' => $this->burstThreshold,
|
||||
'feature_count' => count($this->getFeatureNames()),
|
||||
];
|
||||
}
|
||||
|
||||
public function isEnabled(): bool
|
||||
{
|
||||
return $this->enabled;
|
||||
}
|
||||
|
||||
public function getPriority(): int
|
||||
{
|
||||
return 100; // High priority for frequency analysis
|
||||
}
|
||||
|
||||
public function getExpectedProcessingTime(): int
|
||||
{
|
||||
return 50; // milliseconds
|
||||
}
|
||||
|
||||
public function supportsParallelExecution(): bool
|
||||
{
|
||||
return false; // Needs sequential access for request history
|
||||
}
|
||||
|
||||
public function getDependencies(): array
|
||||
{
|
||||
return []; // No dependencies
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,914 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Waf\MachineLearning\Extractors;
|
||||
|
||||
use App\Framework\Waf\Analysis\ValueObjects\RequestAnalysisData;
|
||||
use App\Framework\Waf\MachineLearning\BehaviorType;
|
||||
use App\Framework\Waf\MachineLearning\FeatureExtractorInterface;
|
||||
use App\Framework\Waf\MachineLearning\ValueObjects\BehaviorFeature;
|
||||
|
||||
/**
|
||||
* Extracts behavioral patterns from URL paths, parameters, and request structure
|
||||
*/
|
||||
final class PatternFeatureExtractor implements FeatureExtractorInterface
|
||||
{
|
||||
public function __construct(
|
||||
private readonly bool $enabled = true,
|
||||
private readonly int $maxPathSegments = 20,
|
||||
private readonly int $maxParameterKeys = 100,
|
||||
private readonly int $historySize = 100,
|
||||
private array $pathHistory = [],
|
||||
private array $parameterHistory = []
|
||||
) {
|
||||
}
|
||||
|
||||
public function getBehaviorType(): BehaviorType
|
||||
{
|
||||
return BehaviorType::PATH_PATTERNS;
|
||||
}
|
||||
|
||||
public function canExtract(RequestAnalysisData $requestData): bool
|
||||
{
|
||||
return ! empty($requestData->path);
|
||||
}
|
||||
|
||||
public function extractFeatures(RequestAnalysisData $requestData, array $context = []): array
|
||||
{
|
||||
$clientId = $this->getClientId($requestData);
|
||||
|
||||
// Record current request patterns
|
||||
$this->recordPatterns($clientId, $requestData);
|
||||
|
||||
$features = [];
|
||||
|
||||
// Path-based features
|
||||
$features = array_merge($features, $this->extractPathFeatures($requestData, $clientId));
|
||||
|
||||
// Parameter-based features
|
||||
$features = array_merge($features, $this->extractParameterFeatures($requestData, $clientId));
|
||||
|
||||
// Sequence-based features
|
||||
$features = array_merge($features, $this->extractSequenceFeatures($requestData, $clientId));
|
||||
|
||||
// Structure-based features
|
||||
$features = array_merge($features, $this->extractStructureFeatures($requestData));
|
||||
|
||||
return array_filter($features);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract path-related behavioral features
|
||||
*/
|
||||
private function extractPathFeatures(RequestAnalysisData $requestData, string $clientId): array
|
||||
{
|
||||
$features = [];
|
||||
$path = $requestData->path;
|
||||
|
||||
// Path structure features
|
||||
$features[] = $this->extractPathDepth($path);
|
||||
$features[] = $this->extractPathComplexity($path);
|
||||
$features[] = $this->extractPathEntropy($path);
|
||||
|
||||
// Path pattern features
|
||||
$features[] = $this->extractPathUniqueness($clientId);
|
||||
$features[] = $this->extractPathRepetition($clientId);
|
||||
$features[] = $this->extractPathDiversity($clientId);
|
||||
|
||||
// Suspicious path characteristics
|
||||
$features[] = $this->extractSuspiciousPathScore($path);
|
||||
$features[] = $this->extractFileExtensionPattern($path);
|
||||
$features[] = $this->extractDirectoryTraversalScore($path);
|
||||
|
||||
return $features;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract parameter-related behavioral features
|
||||
*/
|
||||
private function extractParameterFeatures(RequestAnalysisData $requestData, string $clientId): array
|
||||
{
|
||||
$features = [];
|
||||
$allParams = $requestData->getAllParameters();
|
||||
|
||||
if (empty($allParams)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Parameter count and structure
|
||||
$features[] = $this->extractParameterCount($allParams);
|
||||
$features[] = $this->extractParameterComplexity($allParams);
|
||||
$features[] = $this->extractParameterEntropy($allParams);
|
||||
|
||||
// Parameter patterns
|
||||
$features[] = $this->extractParameterUniqueness($clientId);
|
||||
$features[] = $this->extractParameterKeyDiversity($clientId);
|
||||
$features[] = $this->extractParameterValueEntropy($allParams);
|
||||
|
||||
// Suspicious parameter characteristics
|
||||
$features[] = $this->extractSuspiciousParameterScore($allParams);
|
||||
$features[] = $this->extractInjectionPatternScore($allParams);
|
||||
|
||||
return $features;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract sequence-based features
|
||||
*/
|
||||
private function extractSequenceFeatures(RequestAnalysisData $requestData, string $clientId): array
|
||||
{
|
||||
$features = [];
|
||||
|
||||
// Path sequence analysis
|
||||
$pathHistory = $this->pathHistory[$clientId] ?? [];
|
||||
if (count($pathHistory) >= 2) {
|
||||
$features[] = $this->extractPathSequenceEntropy($pathHistory);
|
||||
$features[] = $this->extractPathTransitionScore($pathHistory);
|
||||
$features[] = $this->extractNavigationPattern($pathHistory);
|
||||
}
|
||||
|
||||
return $features;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract structural features
|
||||
*/
|
||||
private function extractStructureFeatures(RequestAnalysisData $requestData): array
|
||||
{
|
||||
$features = [];
|
||||
|
||||
// Request structure
|
||||
$features[] = $this->extractRequestComplexity($requestData);
|
||||
$features[] = $this->extractHeaderToBodyRatio($requestData);
|
||||
$features[] = $this->extractContentTypeConsistency($requestData);
|
||||
|
||||
return $features;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract path depth (number of segments)
|
||||
*/
|
||||
private function extractPathDepth(string $path): BehaviorFeature
|
||||
{
|
||||
$segments = array_filter(explode('/', trim($path, '/')));
|
||||
$depth = count($segments);
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'path_depth',
|
||||
value: $depth,
|
||||
unit: 'segments'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract path complexity score
|
||||
*/
|
||||
private function extractPathComplexity(string $path): BehaviorFeature
|
||||
{
|
||||
$segments = array_filter(explode('/', trim($path, '/')));
|
||||
|
||||
$complexity = 0.0;
|
||||
|
||||
foreach ($segments as $segment) {
|
||||
// Length complexity
|
||||
$complexity += strlen($segment) / 20.0;
|
||||
|
||||
// Character diversity
|
||||
$uniqueChars = count(array_unique(str_split($segment)));
|
||||
$complexity += $uniqueChars / 10.0;
|
||||
|
||||
// Special characters
|
||||
$specialChars = preg_match_all('/[^a-zA-Z0-9_-]/', $segment);
|
||||
$complexity += $specialChars * 0.5;
|
||||
}
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'path_complexity',
|
||||
value: $complexity,
|
||||
unit: 'complexity_score'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract path entropy
|
||||
*/
|
||||
private function extractPathEntropy(string $path): BehaviorFeature
|
||||
{
|
||||
// Character frequency distribution
|
||||
$chars = str_split(strtolower($path));
|
||||
$distribution = array_count_values($chars);
|
||||
|
||||
return BehaviorFeature::entropy(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'path_entropy',
|
||||
distribution: array_values($distribution)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract path uniqueness for this client
|
||||
*/
|
||||
private function extractPathUniqueness(string $clientId): BehaviorFeature
|
||||
{
|
||||
$pathHistory = $this->pathHistory[$clientId] ?? [];
|
||||
|
||||
if (empty($pathHistory)) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'path_uniqueness',
|
||||
value: 1.0,
|
||||
unit: 'ratio'
|
||||
);
|
||||
}
|
||||
|
||||
$uniquePaths = count(array_unique($pathHistory));
|
||||
$totalPaths = count($pathHistory);
|
||||
|
||||
$uniqueness = $totalPaths > 0 ? $uniquePaths / $totalPaths : 0.0;
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'path_uniqueness',
|
||||
value: $uniqueness,
|
||||
unit: 'ratio'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract path repetition score
|
||||
*/
|
||||
private function extractPathRepetition(string $clientId): BehaviorFeature
|
||||
{
|
||||
$pathHistory = $this->pathHistory[$clientId] ?? [];
|
||||
|
||||
if (count($pathHistory) < 2) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'path_repetition',
|
||||
value: 0.0,
|
||||
unit: 'score'
|
||||
);
|
||||
}
|
||||
|
||||
$pathCounts = array_count_values($pathHistory);
|
||||
$maxCount = max($pathCounts);
|
||||
$totalCount = count($pathHistory);
|
||||
|
||||
$repetition = $totalCount > 0 ? $maxCount / $totalCount : 0.0;
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'path_repetition',
|
||||
value: $repetition,
|
||||
unit: 'ratio'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract path diversity score
|
||||
*/
|
||||
private function extractPathDiversity(string $clientId): BehaviorFeature
|
||||
{
|
||||
$pathHistory = $this->pathHistory[$clientId] ?? [];
|
||||
|
||||
if (empty($pathHistory)) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'path_diversity',
|
||||
value: 0.0,
|
||||
unit: 'bits'
|
||||
);
|
||||
}
|
||||
|
||||
$pathCounts = array_count_values($pathHistory);
|
||||
|
||||
return BehaviorFeature::entropy(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'path_diversity',
|
||||
distribution: array_values($pathCounts)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract suspicious path characteristics score
|
||||
*/
|
||||
private function extractSuspiciousPathScore(string $path): BehaviorFeature
|
||||
{
|
||||
$suspiciousScore = 0.0;
|
||||
|
||||
// Admin/system paths
|
||||
$adminPatterns = ['/admin', '/administrator', '/config', '/debug', '/test'];
|
||||
foreach ($adminPatterns as $pattern) {
|
||||
if (stripos($path, $pattern) !== false) {
|
||||
$suspiciousScore += 0.3;
|
||||
}
|
||||
}
|
||||
|
||||
// Encoded characters
|
||||
if (preg_match('/%[0-9a-fA-F]{2}/', $path)) {
|
||||
$suspiciousScore += 0.2;
|
||||
}
|
||||
|
||||
// Double encoding
|
||||
if (preg_match('/%25[0-9a-fA-F]{2}/', $path)) {
|
||||
$suspiciousScore += 0.4;
|
||||
}
|
||||
|
||||
// Null bytes
|
||||
if (strpos($path, '%00') !== false) {
|
||||
$suspiciousScore += 0.5;
|
||||
}
|
||||
|
||||
// Excessive length
|
||||
if (strlen($path) > 200) {
|
||||
$suspiciousScore += 0.2;
|
||||
}
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'suspicious_path_score',
|
||||
value: min($suspiciousScore, 1.0),
|
||||
unit: 'score'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract file extension pattern
|
||||
*/
|
||||
private function extractFileExtensionPattern(string $path): BehaviorFeature
|
||||
{
|
||||
$extension = pathinfo($path, PATHINFO_EXTENSION);
|
||||
$extension = strtolower($extension);
|
||||
|
||||
$riskScore = 0.0;
|
||||
|
||||
$dangerousExtensions = [
|
||||
'php', 'asp', 'aspx', 'jsp', 'py', 'pl', 'cgi', 'sh', 'bat', 'exe',
|
||||
];
|
||||
|
||||
if (in_array($extension, $dangerousExtensions, true)) {
|
||||
$riskScore = 1.0;
|
||||
} elseif (! empty($extension)) {
|
||||
$riskScore = 0.1; // Any extension is slightly suspicious
|
||||
}
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'file_extension_risk',
|
||||
value: $riskScore,
|
||||
unit: 'risk_score'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract directory traversal score
|
||||
*/
|
||||
private function extractDirectoryTraversalScore(string $path): BehaviorFeature
|
||||
{
|
||||
$traversalScore = 0.0;
|
||||
|
||||
// Count directory traversal patterns
|
||||
$patterns = ['../', '..\\', '%2e%2e%2f', '%2e%2e%5c'];
|
||||
|
||||
foreach ($patterns as $pattern) {
|
||||
$matches = substr_count(strtolower($path), strtolower($pattern));
|
||||
$traversalScore += $matches * 0.3;
|
||||
}
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'directory_traversal_score',
|
||||
value: min($traversalScore, 1.0),
|
||||
unit: 'score'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract parameter count
|
||||
*/
|
||||
private function extractParameterCount(array $parameters): BehaviorFeature
|
||||
{
|
||||
return BehaviorFeature::create(
|
||||
type: BehaviorType::PARAMETER_PATTERNS,
|
||||
name: 'parameter_count',
|
||||
value: count($parameters),
|
||||
unit: 'count'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract parameter complexity
|
||||
*/
|
||||
private function extractParameterComplexity(array $parameters): BehaviorFeature
|
||||
{
|
||||
$complexity = 0.0;
|
||||
|
||||
foreach ($parameters as $key => $value) {
|
||||
// Key complexity
|
||||
$complexity += strlen($key) / 50.0;
|
||||
$complexity += preg_match_all('/[^a-zA-Z0-9_]/', $key) * 0.1;
|
||||
|
||||
// Value complexity
|
||||
if (is_string($value)) {
|
||||
$complexity += strlen($value) / 200.0;
|
||||
$complexity += preg_match_all('/[^a-zA-Z0-9\\s]/', $value) * 0.05;
|
||||
}
|
||||
}
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: BehaviorType::PARAMETER_PATTERNS,
|
||||
name: 'parameter_complexity',
|
||||
value: $complexity,
|
||||
unit: 'complexity_score'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract parameter key entropy
|
||||
*/
|
||||
private function extractParameterEntropy(array $parameters): BehaviorFeature
|
||||
{
|
||||
if (empty($parameters)) {
|
||||
return BehaviorFeature::create(
|
||||
type: BehaviorType::PARAMETER_PATTERNS,
|
||||
name: 'parameter_entropy',
|
||||
value: 0.0,
|
||||
unit: 'bits'
|
||||
);
|
||||
}
|
||||
|
||||
// Character distribution across all parameter keys
|
||||
$allKeys = implode('', array_keys($parameters));
|
||||
$chars = str_split(strtolower($allKeys));
|
||||
$distribution = array_count_values($chars);
|
||||
|
||||
return BehaviorFeature::entropy(
|
||||
type: BehaviorType::PARAMETER_PATTERNS,
|
||||
name: 'parameter_entropy',
|
||||
distribution: array_values($distribution)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract parameter uniqueness for this client
|
||||
*/
|
||||
private function extractParameterUniqueness(string $clientId): BehaviorFeature
|
||||
{
|
||||
$paramHistory = $this->parameterHistory[$clientId] ?? [];
|
||||
|
||||
if (empty($paramHistory)) {
|
||||
return BehaviorFeature::create(
|
||||
type: BehaviorType::PARAMETER_PATTERNS,
|
||||
name: 'parameter_uniqueness',
|
||||
value: 1.0,
|
||||
unit: 'ratio'
|
||||
);
|
||||
}
|
||||
|
||||
$uniqueParams = count(array_unique($paramHistory, SORT_REGULAR));
|
||||
$totalParams = count($paramHistory);
|
||||
|
||||
$uniqueness = $totalParams > 0 ? $uniqueParams / $totalParams : 0.0;
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: BehaviorType::PARAMETER_PATTERNS,
|
||||
name: 'parameter_uniqueness',
|
||||
value: $uniqueness,
|
||||
unit: 'ratio'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract parameter key diversity
|
||||
*/
|
||||
private function extractParameterKeyDiversity(string $clientId): BehaviorFeature
|
||||
{
|
||||
$paramHistory = $this->parameterHistory[$clientId] ?? [];
|
||||
|
||||
if (empty($paramHistory)) {
|
||||
return BehaviorFeature::create(
|
||||
type: BehaviorType::PARAMETER_PATTERNS,
|
||||
name: 'parameter_key_diversity',
|
||||
value: 0.0,
|
||||
unit: 'bits'
|
||||
);
|
||||
}
|
||||
|
||||
// Collect all parameter keys
|
||||
$allKeys = [];
|
||||
foreach ($paramHistory as $params) {
|
||||
if (is_array($params)) {
|
||||
$allKeys = array_merge($allKeys, array_keys($params));
|
||||
}
|
||||
}
|
||||
|
||||
$keyCounts = array_count_values($allKeys);
|
||||
|
||||
return BehaviorFeature::entropy(
|
||||
type: BehaviorType::PARAMETER_PATTERNS,
|
||||
name: 'parameter_key_diversity',
|
||||
distribution: array_values($keyCounts)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract parameter value entropy
|
||||
*/
|
||||
private function extractParameterValueEntropy(array $parameters): BehaviorFeature
|
||||
{
|
||||
if (empty($parameters)) {
|
||||
return BehaviorFeature::create(
|
||||
type: BehaviorType::PARAMETER_PATTERNS,
|
||||
name: 'parameter_value_entropy',
|
||||
value: 0.0,
|
||||
unit: 'bits'
|
||||
);
|
||||
}
|
||||
|
||||
// Character distribution across all parameter values
|
||||
$allValues = implode('', array_filter(array_values($parameters), 'is_string'));
|
||||
|
||||
if (empty($allValues)) {
|
||||
return BehaviorFeature::create(
|
||||
type: BehaviorType::PARAMETER_PATTERNS,
|
||||
name: 'parameter_value_entropy',
|
||||
value: 0.0,
|
||||
unit: 'bits'
|
||||
);
|
||||
}
|
||||
|
||||
$chars = str_split(strtolower($allValues));
|
||||
$distribution = array_count_values($chars);
|
||||
|
||||
return BehaviorFeature::entropy(
|
||||
type: BehaviorType::PARAMETER_PATTERNS,
|
||||
name: 'parameter_value_entropy',
|
||||
distribution: array_values($distribution)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract suspicious parameter score
|
||||
*/
|
||||
private function extractSuspiciousParameterScore(array $parameters): BehaviorFeature
|
||||
{
|
||||
$suspiciousScore = 0.0;
|
||||
|
||||
$suspiciousKeys = [
|
||||
'eval', 'exec', 'system', 'cmd', 'command', 'shell',
|
||||
'admin', 'root', 'password', 'pass', 'auth', 'token',
|
||||
'debug', 'test', 'dev', 'config', 'settings',
|
||||
];
|
||||
|
||||
foreach ($parameters as $key => $value) {
|
||||
$lowerKey = strtolower($key);
|
||||
|
||||
// Check for suspicious parameter names
|
||||
foreach ($suspiciousKeys as $suspicious) {
|
||||
if (strpos($lowerKey, $suspicious) !== false) {
|
||||
$suspiciousScore += 0.3;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for encoded values
|
||||
if (is_string($value) && preg_match('/%[0-9a-fA-F]{2}/', $value)) {
|
||||
$suspiciousScore += 0.1;
|
||||
}
|
||||
|
||||
// Check for extremely long values
|
||||
if (is_string($value) && strlen($value) > 1000) {
|
||||
$suspiciousScore += 0.2;
|
||||
}
|
||||
}
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: BehaviorType::PARAMETER_PATTERNS,
|
||||
name: 'suspicious_parameter_score',
|
||||
value: min($suspiciousScore, 1.0),
|
||||
unit: 'score'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract injection pattern score
|
||||
*/
|
||||
private function extractInjectionPatternScore(array $parameters): BehaviorFeature
|
||||
{
|
||||
$injectionScore = 0.0;
|
||||
|
||||
$injectionPatterns = [
|
||||
'sql' => ['/union\\s+select/i', '/or\\s+1\\s*=\\s*1/i', '/\\s*;\\s*drop\\s+table/i'],
|
||||
'xss' => ['/<script/i', '/javascript:/i', '/onerror\\s*=/i'],
|
||||
'cmd' => ['/;\\s*(cat|ls|pwd|id)/i', '/\\|\\s*(nc|netcat)/i'],
|
||||
];
|
||||
|
||||
foreach ($parameters as $key => $value) {
|
||||
if (! is_string($value)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach ($injectionPatterns as $type => $patterns) {
|
||||
foreach ($patterns as $pattern) {
|
||||
if (preg_match($pattern, $value)) {
|
||||
$injectionScore += 0.4;
|
||||
|
||||
break 2; // Break out of both loops
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: BehaviorType::PARAMETER_PATTERNS,
|
||||
name: 'injection_pattern_score',
|
||||
value: min($injectionScore, 1.0),
|
||||
unit: 'score'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract path sequence entropy
|
||||
*/
|
||||
private function extractPathSequenceEntropy(array $pathHistory): BehaviorFeature
|
||||
{
|
||||
// Create bigrams (consecutive path pairs)
|
||||
$bigrams = [];
|
||||
for ($i = 0; $i < count($pathHistory) - 1; $i++) {
|
||||
$bigram = $pathHistory[$i] . ' -> ' . $pathHistory[$i + 1];
|
||||
$bigrams[] = $bigram;
|
||||
}
|
||||
|
||||
$bigramCounts = array_count_values($bigrams);
|
||||
|
||||
return BehaviorFeature::entropy(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'path_sequence_entropy',
|
||||
distribution: array_values($bigramCounts)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract path transition score
|
||||
*/
|
||||
private function extractPathTransitionScore(array $pathHistory): BehaviorFeature
|
||||
{
|
||||
if (count($pathHistory) < 2) {
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'path_transition_score',
|
||||
value: 0.0,
|
||||
unit: 'score'
|
||||
);
|
||||
}
|
||||
|
||||
$transitionScore = 0.0;
|
||||
|
||||
for ($i = 0; $i < count($pathHistory) - 1; $i++) {
|
||||
$current = $pathHistory[$i];
|
||||
$next = $pathHistory[$i + 1];
|
||||
|
||||
// Calculate path similarity (Levenshtein distance)
|
||||
$similarity = 1.0 - (levenshtein($current, $next) / max(strlen($current), strlen($next)));
|
||||
$transitionScore += $similarity;
|
||||
}
|
||||
|
||||
$averageTransition = $transitionScore / (count($pathHistory) - 1);
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'path_transition_score',
|
||||
value: $averageTransition,
|
||||
unit: 'similarity_score'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract navigation pattern
|
||||
*/
|
||||
private function extractNavigationPattern(array $pathHistory): BehaviorFeature
|
||||
{
|
||||
$backtrackingScore = 0.0;
|
||||
|
||||
// Detect backtracking patterns (returning to previously visited paths)
|
||||
for ($i = 2; $i < count($pathHistory); $i++) {
|
||||
$current = $pathHistory[$i];
|
||||
|
||||
// Check if current path was visited in the last few requests
|
||||
for ($j = max(0, $i - 5); $j < $i; $j++) {
|
||||
if ($pathHistory[$j] === $current) {
|
||||
$backtrackingScore += 1.0 / ($i - $j); // More recent = higher score
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$normalizedScore = count($pathHistory) > 2 ? $backtrackingScore / (count($pathHistory) - 2) : 0.0;
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'navigation_backtracking',
|
||||
value: $normalizedScore,
|
||||
unit: 'backtracking_score'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract request complexity
|
||||
*/
|
||||
private function extractRequestComplexity(RequestAnalysisData $requestData): BehaviorFeature
|
||||
{
|
||||
$complexity = 0.0;
|
||||
|
||||
// Path complexity
|
||||
$complexity += strlen($requestData->path) / 100.0;
|
||||
|
||||
// Parameter complexity
|
||||
$paramCount = count($requestData->getAllParameters());
|
||||
$complexity += $paramCount / 20.0;
|
||||
|
||||
// Header complexity
|
||||
$headerCount = count($requestData->headers);
|
||||
$complexity += $headerCount / 30.0;
|
||||
|
||||
// Body complexity
|
||||
$bodySize = strlen($requestData->body);
|
||||
$complexity += $bodySize / 5000.0;
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'request_complexity',
|
||||
value: $complexity,
|
||||
unit: 'complexity_score'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract header to body ratio
|
||||
*/
|
||||
private function extractHeaderToBodyRatio(RequestAnalysisData $requestData): BehaviorFeature
|
||||
{
|
||||
$headerSize = array_sum(array_map(
|
||||
fn ($name, $value) => strlen($name) + strlen($value),
|
||||
array_keys($requestData->headers),
|
||||
array_values($requestData->headers)
|
||||
));
|
||||
|
||||
$bodySize = strlen($requestData->body);
|
||||
|
||||
$ratio = ($headerSize + $bodySize) > 0 ? $headerSize / ($headerSize + $bodySize) : 0.0;
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'header_body_ratio',
|
||||
value: $ratio,
|
||||
unit: 'ratio'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract content type consistency
|
||||
*/
|
||||
private function extractContentTypeConsistency(RequestAnalysisData $requestData): BehaviorFeature
|
||||
{
|
||||
$consistencyScore = 1.0;
|
||||
|
||||
// Check if content type matches the actual content
|
||||
if ($requestData->contentType !== null) {
|
||||
if ($requestData->isJson() && ! empty($requestData->body)) {
|
||||
json_decode($requestData->body);
|
||||
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||
$consistencyScore -= 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
if ($requestData->isXml() && ! empty($requestData->body)) {
|
||||
$previousSetting = libxml_use_internal_errors(true);
|
||||
simplexml_load_string($requestData->body);
|
||||
$errors = libxml_get_errors();
|
||||
libxml_use_internal_errors($previousSetting);
|
||||
libxml_clear_errors();
|
||||
|
||||
if (! empty($errors)) {
|
||||
$consistencyScore -= 0.5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return BehaviorFeature::create(
|
||||
type: $this->getBehaviorType(),
|
||||
name: 'content_type_consistency',
|
||||
value: max(0.0, $consistencyScore),
|
||||
unit: 'consistency_score'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record patterns for this client
|
||||
*/
|
||||
private function recordPatterns(string $clientId, RequestAnalysisData $requestData): void
|
||||
{
|
||||
// Record path
|
||||
if (! isset($this->pathHistory[$clientId])) {
|
||||
$this->pathHistory[$clientId] = [];
|
||||
}
|
||||
|
||||
$this->pathHistory[$clientId][] = $requestData->path;
|
||||
|
||||
// Limit history size
|
||||
if (count($this->pathHistory[$clientId]) > $this->historySize) {
|
||||
array_shift($this->pathHistory[$clientId]);
|
||||
}
|
||||
|
||||
// Record parameters
|
||||
if (! isset($this->parameterHistory[$clientId])) {
|
||||
$this->parameterHistory[$clientId] = [];
|
||||
}
|
||||
|
||||
$allParams = $requestData->getAllParameters();
|
||||
if (! empty($allParams)) {
|
||||
$this->parameterHistory[$clientId][] = $allParams;
|
||||
|
||||
// Limit history size
|
||||
if (count($this->parameterHistory[$clientId]) > $this->historySize) {
|
||||
array_shift($this->parameterHistory[$clientId]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get client identifier
|
||||
*/
|
||||
private function getClientId(RequestAnalysisData $requestData): string
|
||||
{
|
||||
if (! empty($requestData->sessionId)) {
|
||||
return 'session:' . $requestData->sessionId;
|
||||
}
|
||||
|
||||
if ($requestData->clientIp !== null) {
|
||||
return 'ip:' . $requestData->clientIp->toString();
|
||||
}
|
||||
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
public function getFeatureNames(): array
|
||||
{
|
||||
return [
|
||||
// Path features
|
||||
'path_depth', 'path_complexity', 'path_entropy', 'path_uniqueness',
|
||||
'path_repetition', 'path_diversity', 'suspicious_path_score',
|
||||
'file_extension_risk', 'directory_traversal_score',
|
||||
|
||||
// Parameter features
|
||||
'parameter_count', 'parameter_complexity', 'parameter_entropy',
|
||||
'parameter_uniqueness', 'parameter_key_diversity', 'parameter_value_entropy',
|
||||
'suspicious_parameter_score', 'injection_pattern_score',
|
||||
|
||||
// Sequence features
|
||||
'path_sequence_entropy', 'path_transition_score', 'navigation_backtracking',
|
||||
|
||||
// Structure features
|
||||
'request_complexity', 'header_body_ratio', 'content_type_consistency',
|
||||
];
|
||||
}
|
||||
|
||||
public function getConfiguration(): array
|
||||
{
|
||||
return [
|
||||
'enabled' => $this->enabled,
|
||||
'max_path_segments' => $this->maxPathSegments,
|
||||
'max_parameter_keys' => $this->maxParameterKeys,
|
||||
'history_size' => $this->historySize,
|
||||
'feature_count' => count($this->getFeatureNames()),
|
||||
];
|
||||
}
|
||||
|
||||
public function isEnabled(): bool
|
||||
{
|
||||
return $this->enabled;
|
||||
}
|
||||
|
||||
public function getPriority(): int
|
||||
{
|
||||
return 80; // Medium-high priority
|
||||
}
|
||||
|
||||
public function getExpectedProcessingTime(): int
|
||||
{
|
||||
return 75; // milliseconds
|
||||
}
|
||||
|
||||
public function supportsParallelExecution(): bool
|
||||
{
|
||||
return false; // Needs sequential access for pattern history
|
||||
}
|
||||
|
||||
public function getDependencies(): array
|
||||
{
|
||||
return []; // No dependencies
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user