Enable Discovery debug logging for production troubleshooting
- Add DISCOVERY_LOG_LEVEL=debug - Add DISCOVERY_SHOW_PROGRESS=true - Temporary changes for debugging InitializerProcessor fixes on production
This commit is contained in:
400
src/Framework/Discovery/Resilience/DiscoveryTimeoutHandler.php
Normal file
400
src/Framework/Discovery/Resilience/DiscoveryTimeoutHandler.php
Normal file
@@ -0,0 +1,400 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Discovery\Resilience;
|
||||
|
||||
use App\Framework\DateTime\Clock;
|
||||
use App\Framework\Discovery\Exceptions\DiscoveryException;
|
||||
use App\Framework\Discovery\ValueObjects\DiscoveryContext;
|
||||
use App\Framework\Logging\Logger;
|
||||
|
||||
/**
|
||||
* Handles timeout management for Discovery operations
|
||||
*
|
||||
* Prevents long-running discovery operations from hanging
|
||||
* the system by implementing configurable timeouts with
|
||||
* proper cleanup and resource management.
|
||||
*/
|
||||
final class DiscoveryTimeoutHandler
|
||||
{
|
||||
private array $activeOperations = [];
|
||||
|
||||
public function __construct(
|
||||
private readonly Clock $clock,
|
||||
private readonly ?Logger $logger = null,
|
||||
private readonly int $defaultTimeoutSeconds = 300, // 5 minutes
|
||||
private readonly int $memoryTimeoutSeconds = 120, // 2 minutes for memory-constrained operations
|
||||
private readonly int $fileTimeoutSeconds = 600, // 10 minutes for file-heavy operations
|
||||
private readonly int $maxOperationsPerContext = 1 // Prevent concurrent operations on same context
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute operation with timeout protection
|
||||
*/
|
||||
public function executeWithTimeout(
|
||||
callable $operation,
|
||||
DiscoveryContext $context,
|
||||
?int $customTimeoutSeconds = null
|
||||
): mixed {
|
||||
$operationId = $this->generateOperationId($context);
|
||||
$timeoutSeconds = $customTimeoutSeconds ?? $this->determineTimeout($context);
|
||||
|
||||
// Check for concurrent operations on same context
|
||||
$this->checkConcurrentOperations($context);
|
||||
|
||||
// Register operation
|
||||
$this->registerOperation($operationId, $context, $timeoutSeconds);
|
||||
|
||||
try {
|
||||
$this->logger?->debug('Starting timed discovery operation', [
|
||||
'operation_id' => $operationId,
|
||||
'timeout_seconds' => $timeoutSeconds,
|
||||
'context_key' => $context->getCacheKey(),
|
||||
]);
|
||||
|
||||
// Set up timeout using pcntl_alarm if available, otherwise use basic timing
|
||||
if (function_exists('pcntl_alarm') && function_exists('pcntl_signal')) {
|
||||
return $this->executeWithSignalTimeout($operation, $operationId, $timeoutSeconds);
|
||||
} else {
|
||||
return $this->executeWithPollingTimeout($operation, $operationId, $timeoutSeconds);
|
||||
}
|
||||
|
||||
} finally {
|
||||
$this->unregisterOperation($operationId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute with signal-based timeout (if available)
|
||||
*/
|
||||
private function executeWithSignalTimeout(callable $operation, string $operationId, int $timeoutSeconds): mixed
|
||||
{
|
||||
$timedOut = false;
|
||||
|
||||
// Set up signal handler
|
||||
pcntl_signal(SIGALRM, function () use (&$timedOut, $operationId) {
|
||||
$timedOut = true;
|
||||
$this->logger?->warning('Discovery operation timed out (signal)', [
|
||||
'operation_id' => $operationId,
|
||||
]);
|
||||
});
|
||||
|
||||
// Set alarm
|
||||
pcntl_alarm($timeoutSeconds);
|
||||
|
||||
try {
|
||||
$result = $operation();
|
||||
|
||||
// Clear alarm
|
||||
pcntl_alarm(0);
|
||||
|
||||
if ($timedOut) {
|
||||
throw DiscoveryException::timeout($timeoutSeconds, $timeoutSeconds);
|
||||
}
|
||||
|
||||
return $result;
|
||||
|
||||
} catch (\Throwable $e) {
|
||||
pcntl_alarm(0); // Clear alarm
|
||||
|
||||
if ($timedOut) {
|
||||
throw DiscoveryException::timeout($timeoutSeconds, $timeoutSeconds);
|
||||
}
|
||||
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute with polling-based timeout (fallback)
|
||||
*/
|
||||
private function executeWithPollingTimeout(callable $operation, string $operationId, int $timeoutSeconds): mixed
|
||||
{
|
||||
$startTime = $this->clock->time();
|
||||
$checkInterval = min(10, $timeoutSeconds / 10); // Check every 10 seconds or 10% of timeout
|
||||
|
||||
// This is a simplified approach - in a real implementation you'd need
|
||||
// to make the operation interruptible or use a separate process
|
||||
$result = $operation();
|
||||
|
||||
$elapsed = $this->clock->time()->diff($startTime);
|
||||
|
||||
if ($elapsed > $timeoutSeconds) {
|
||||
$this->logger?->warning('Discovery operation exceeded timeout (polling)', [
|
||||
'operation_id' => $operationId,
|
||||
'elapsed_seconds' => $elapsed->toHumanReadable(),
|
||||
'timeout_seconds' => $timeoutSeconds,
|
||||
]);
|
||||
|
||||
throw DiscoveryException::timeout($timeoutSeconds, (int) $elapsed);
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine appropriate timeout for context
|
||||
*/
|
||||
private function determineTimeout(DiscoveryContext $context): int
|
||||
{
|
||||
// Estimate timeout based on context characteristics
|
||||
$pathCount = count($context->paths);
|
||||
$isIncremental = $context->isIncremental();
|
||||
|
||||
// Base timeout
|
||||
$timeout = $this->defaultTimeoutSeconds;
|
||||
|
||||
// Adjust for path count
|
||||
if ($pathCount > 5) {
|
||||
$timeout += ($pathCount - 5) * 30; // 30 seconds per additional path
|
||||
}
|
||||
|
||||
// Reduce for incremental scans
|
||||
if ($isIncremental) {
|
||||
$timeout = (int) ($timeout * 0.5);
|
||||
}
|
||||
|
||||
// Estimate based on scan type
|
||||
$timeout = match ($context->scanType->value) {
|
||||
'FULL' => $timeout * 2, // Full scans take longer
|
||||
'INCREMENTAL' => (int) ($timeout * 0.3), // Incremental scans are faster
|
||||
'TARGETED' => (int) ($timeout * 0.7), // Targeted scans are moderate
|
||||
default => $timeout
|
||||
};
|
||||
|
||||
// Apply minimum and maximum bounds
|
||||
return max(30, min(1800, $timeout)); // Between 30 seconds and 30 minutes
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for concurrent operations on same context
|
||||
*/
|
||||
private function checkConcurrentOperations(DiscoveryContext $context): void
|
||||
{
|
||||
$contextKey = $context->getCacheKey();
|
||||
$activeCount = 0;
|
||||
|
||||
foreach ($this->activeOperations as $operation) {
|
||||
if ($operation['context_key'] === $contextKey) {
|
||||
$activeCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if ($activeCount >= $this->maxOperationsPerContext) {
|
||||
throw DiscoveryException::concurrentDiscovery($contextKey);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Register active operation
|
||||
*/
|
||||
private function registerOperation(string $operationId, DiscoveryContext $context, int $timeoutSeconds): void
|
||||
{
|
||||
$this->activeOperations[$operationId] = [
|
||||
'context_key' => $context->getCacheKey(),
|
||||
'start_time' => $this->clock->time(),
|
||||
'timeout_seconds' => $timeoutSeconds,
|
||||
'paths' => $context->paths,
|
||||
'scan_type' => $context->scanType->value,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Unregister active operation
|
||||
*/
|
||||
private function unregisterOperation(string $operationId): void
|
||||
{
|
||||
if (isset($this->activeOperations[$operationId])) {
|
||||
$operation = $this->activeOperations[$operationId];
|
||||
$duration = $this->clock->time() - $operation['start_time'];
|
||||
|
||||
$this->logger?->debug('Discovery operation completed', [
|
||||
'operation_id' => $operationId,
|
||||
'duration_seconds' => $duration,
|
||||
'timeout_seconds' => $operation['timeout_seconds'],
|
||||
'within_timeout' => $duration <= $operation['timeout_seconds'],
|
||||
]);
|
||||
|
||||
unset($this->activeOperations[$operationId]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate unique operation ID
|
||||
*/
|
||||
private function generateOperationId(DiscoveryContext $context): string
|
||||
{
|
||||
return 'timeout_' . md5($context->getCacheKey() . '_' . microtime(true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get active operations status
|
||||
*/
|
||||
public function getActiveOperations(): array
|
||||
{
|
||||
$currentTime = $this->clock->time();
|
||||
$operations = [];
|
||||
|
||||
foreach ($this->activeOperations as $operationId => $operation) {
|
||||
$elapsed = $currentTime - $operation['start_time'];
|
||||
$remaining = max(0, $operation['timeout_seconds'] - $elapsed);
|
||||
|
||||
$operations[$operationId] = [
|
||||
'context_key' => $operation['context_key'],
|
||||
'scan_type' => $operation['scan_type'],
|
||||
'paths_count' => count($operation['paths']),
|
||||
'elapsed_seconds' => (int) $elapsed,
|
||||
'timeout_seconds' => $operation['timeout_seconds'],
|
||||
'remaining_seconds' => (int) $remaining,
|
||||
'progress_percentage' => min(100, ($elapsed / $operation['timeout_seconds']) * 100),
|
||||
'is_overdue' => $remaining <= 0,
|
||||
];
|
||||
}
|
||||
|
||||
return $operations;
|
||||
}
|
||||
|
||||
/**
|
||||
* Kill overdue operations (cleanup)
|
||||
*/
|
||||
public function cleanupOverdueOperations(): array
|
||||
{
|
||||
$currentTime = $this->clock->time();
|
||||
$overdueOperations = [];
|
||||
|
||||
foreach ($this->activeOperations as $operationId => $operation) {
|
||||
$elapsed = $currentTime - $operation['start_time'];
|
||||
|
||||
if ($elapsed > $operation['timeout_seconds']) {
|
||||
$overdueOperations[] = [
|
||||
'operation_id' => $operationId,
|
||||
'context_key' => $operation['context_key'],
|
||||
'elapsed_seconds' => (int) $elapsed,
|
||||
'timeout_seconds' => $operation['timeout_seconds'],
|
||||
];
|
||||
|
||||
$this->logger?->warning('Cleaning up overdue discovery operation', [
|
||||
'operation_id' => $operationId,
|
||||
'elapsed_seconds' => (int) $elapsed,
|
||||
'timeout_seconds' => $operation['timeout_seconds'],
|
||||
]);
|
||||
|
||||
unset($this->activeOperations[$operationId]);
|
||||
}
|
||||
}
|
||||
|
||||
return $overdueOperations;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get timeout statistics
|
||||
*/
|
||||
public function getStatistics(): array
|
||||
{
|
||||
$currentTime = $this->clock->time();
|
||||
$stats = [
|
||||
'active_operations' => count($this->activeOperations),
|
||||
'configuration' => [
|
||||
'default_timeout_seconds' => $this->defaultTimeoutSeconds,
|
||||
'memory_timeout_seconds' => $this->memoryTimeoutSeconds,
|
||||
'file_timeout_seconds' => $this->fileTimeoutSeconds,
|
||||
'max_operations_per_context' => $this->maxOperationsPerContext,
|
||||
],
|
||||
'operations_by_type' => [],
|
||||
'operations_by_status' => [
|
||||
'within_timeout' => 0,
|
||||
'near_timeout' => 0, // Within 10% of timeout
|
||||
'overdue' => 0,
|
||||
],
|
||||
];
|
||||
|
||||
foreach ($this->activeOperations as $operation) {
|
||||
$scanType = $operation['scan_type'];
|
||||
$stats['operations_by_type'][$scanType] = ($stats['operations_by_type'][$scanType] ?? 0) + 1;
|
||||
|
||||
$elapsed = $currentTime - $operation['start_time'];
|
||||
$remaining = $operation['timeout_seconds'] - $elapsed;
|
||||
$remainingPercentage = ($remaining / $operation['timeout_seconds']) * 100;
|
||||
|
||||
if ($remaining <= 0) {
|
||||
$stats['operations_by_status']['overdue']++;
|
||||
} elseif ($remainingPercentage <= 10) {
|
||||
$stats['operations_by_status']['near_timeout']++;
|
||||
} else {
|
||||
$stats['operations_by_status']['within_timeout']++;
|
||||
}
|
||||
}
|
||||
|
||||
return $stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Force timeout for specific operation
|
||||
*/
|
||||
public function forceTimeout(string $operationId): bool
|
||||
{
|
||||
if (isset($this->activeOperations[$operationId])) {
|
||||
$operation = $this->activeOperations[$operationId];
|
||||
|
||||
$this->logger?->warning('Force timing out discovery operation', [
|
||||
'operation_id' => $operationId,
|
||||
'context_key' => $operation['context_key'],
|
||||
]);
|
||||
|
||||
unset($this->activeOperations[$operationId]);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get timeout recommendations for a context
|
||||
*/
|
||||
public function getTimeoutRecommendations(DiscoveryContext $context): array
|
||||
{
|
||||
$recommendedTimeout = $this->determineTimeout($context);
|
||||
$pathCount = count($context->paths);
|
||||
|
||||
return [
|
||||
'recommended_timeout_seconds' => $recommendedTimeout,
|
||||
'factors' => [
|
||||
'path_count' => $pathCount,
|
||||
'scan_type' => $context->scanType->value,
|
||||
'is_incremental' => $context->isIncremental(),
|
||||
],
|
||||
'alternatives' => [
|
||||
'conservative' => (int) ($recommendedTimeout * 1.5),
|
||||
'aggressive' => (int) ($recommendedTimeout * 0.7),
|
||||
'memory_constrained' => $this->memoryTimeoutSeconds,
|
||||
'file_heavy' => $this->fileTimeoutSeconds,
|
||||
],
|
||||
'recommendations' => $this->generateTimeoutRecommendations($context, $recommendedTimeout),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate timeout recommendations
|
||||
*/
|
||||
private function generateTimeoutRecommendations(DiscoveryContext $context, int $recommendedTimeout): array
|
||||
{
|
||||
$recommendations = [];
|
||||
|
||||
if (count($context->paths) > 10) {
|
||||
$recommendations[] = 'Consider breaking large path sets into smaller batches';
|
||||
}
|
||||
|
||||
if ($recommendedTimeout > 600) {
|
||||
$recommendations[] = 'Very long timeout detected - consider enabling incremental discovery';
|
||||
}
|
||||
|
||||
if ($context->scanType->value === 'FULL' && count($context->paths) > 5) {
|
||||
$recommendations[] = 'Full scan with many paths - consider using TARGETED scan type';
|
||||
}
|
||||
|
||||
return $recommendations;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user