database = $this->createTestDatabase(); $this->workerRegistry = new WorkerRegistry($this->database); $this->healthCheckService = new WorkerHealthCheckService( $this->database, $this->workerRegistry ); $this->failoverService = new FailoverRecoveryService( $this->database, $this->workerRegistry ); $this->distributionService = new JobDistributionService( $this->database, $this->workerRegistry ); $this->cleanupTestData(); PerformanceTestHelper::warmupDatabase($this->database->getConnection()); } protected function tearDown(): void { $this->cleanupTestData(); } public function testWorkerFailureDetectionTime(): void { // Create workers with recent heartbeats $workers = $this->createHealthyWorkers(10); $this->registerWorkers($workers); // Simulate worker failures by setting old heartbeats $failedWorkerIds = []; $currentTime = new \DateTimeImmutable(); // Make 3 workers appear failed (no heartbeat for 2 minutes) for ($i = 0; $i < 3; $i++) { $workerId = $workers[$i]->id->toString(); $failedWorkerIds[] = $workerId; $this->updateWorkerHeartbeat($workerId, $currentTime->modify('-2 minutes')); } // Measure failure detection time $detectionTimes = []; $iterations = 10; for ($i = 0; $i < $iterations; $i++) { $time = PerformanceTestHelper::measureTime(function () { return $this->healthCheckService->checkAllWorkers(); }); $detectionTimes[] = $time; // Brief pause between checks usleep(10000); // 10ms } $stats = PerformanceTestHelper::calculateStatistics($detectionTimes); // Verify failed workers were detected $failedWorkers = $this->workerRegistry->getWorkersByStatus(WorkerStatus::FAILED); $detectedFailures = array_map(fn ($w) => $w->id->toString(), $failedWorkers); echo "\nWorker Failure Detection Results:\n"; echo "Workers created: " . count($workers) . "\n"; echo "Workers failed: " . count($failedWorkerIds) . "\n"; echo "Failures detected: " . count($detectedFailures) . "\n"; echo "Detection performance: " . PerformanceTestHelper::formatStatistics($stats) . "\n"; // All failed workers should be detected foreach ($failedWorkerIds as $failedId) { $this->assertContains($failedId, $detectedFailures, "Failed worker {$failedId} not detected"); } // Detection should be fast $this->assertLessThan(50.0, $stats['avg'], 'Average failure detection time too slow'); $this->assertLessThan(100.0, $stats['p95'], 'P95 failure detection time too slow'); PerformanceTestHelper::assertPerformance( $detectionTimes, 50.0, 100.0, 'Worker failure detection' ); } public function testJobReassignmentSpeed(): void { // Create workers and assign jobs $workers = $this->createHealthyWorkers(5); $this->registerWorkers($workers); // Distribute jobs to workers $jobs = PerformanceTestHelper::createBulkJobs(50); $assignedJobs = []; foreach ($jobs as $job) { $assignedWorker = $this->distributionService->distributeJob($job); if ($assignedWorker) { $assignedJobs[] = [ 'job' => $job, 'worker_id' => $assignedWorker->id->toString(), ]; } } // Simulate worker failure $failedWorkerId = $workers[0]->id->toString(); $this->updateWorkerStatus($failedWorkerId, WorkerStatus::FAILED); // Find jobs assigned to failed worker $jobsToReassign = array_filter( $assignedJobs, fn ($item) => $item['worker_id'] === $failedWorkerId ); echo "\nJob Reassignment Test:\n"; echo "Total jobs: " . count($assignedJobs) . "\n"; echo "Jobs to reassign: " . count($jobsToReassign) . "\n"; // Measure job reassignment performance $reassignmentTime = PerformanceTestHelper::measureTime(function () use ($failedWorkerId) { return $this->failoverService->reassignFailedWorkerJobs($failedWorkerId); }); echo "Reassignment time: {$reassignmentTime}ms\n"; // Verify jobs were reassigned $reassignedCount = $this->countJobsReassignedFrom($failedWorkerId); echo "Jobs successfully reassigned: {$reassignedCount}\n"; // Job reassignment should be fast $this->assertLessThan(200.0, $reassignmentTime, 'Job reassignment took too long'); // All jobs should be reassigned $this->assertEquals( count($jobsToReassign), $reassignedCount, 'Not all jobs were reassigned' ); // Performance should scale reasonably with job count $averageTimePerJob = $reassignmentTime / max(1, count($jobsToReassign)); $this->assertLessThan(5.0, $averageTimePerJob, 'Reassignment time per job too high'); } public function testSystemRecoveryTime(): void { // Create a system with multiple workers and jobs $workers = $this->createHealthyWorkers(8); $this->registerWorkers($workers); // Distribute many jobs $jobs = PerformanceTestHelper::createBulkJobs(200); foreach ($jobs as $job) { $this->distributionService->distributeJob($job); } // Simulate multiple worker failures $failedWorkerIds = [ $workers[0]->id->toString(), $workers[1]->id->toString(), $workers[2]->id->toString(), ]; foreach ($failedWorkerIds as $workerId) { $this->updateWorkerStatus($workerId, WorkerStatus::FAILED); } echo "\nSystem Recovery Test:\n"; echo "Total workers: " . count($workers) . "\n"; echo "Failed workers: " . count($failedWorkerIds) . "\n"; echo "Total jobs: " . count($jobs) . "\n"; // Measure full system recovery time $recoveryTime = PerformanceTestHelper::measureTime(function () { return $this->failoverService->performFullSystemRecovery(); }); echo "Full recovery time: {$recoveryTime}ms\n"; // Verify system state after recovery $activeWorkers = $this->workerRegistry->getAvailableWorkers(); $pendingJobs = $this->countJobsByStatus(JobStatus::PENDING); $processingJobs = $this->countJobsByStatus(JobStatus::PROCESSING); echo "Active workers after recovery: " . count($activeWorkers) . "\n"; echo "Pending jobs: {$pendingJobs}\n"; echo "Processing jobs: {$processingJobs}\n"; // Recovery should complete within reasonable time $this->assertLessThan(5000.0, $recoveryTime, 'System recovery took too long (>5 seconds)'); // Should have remaining active workers $this->assertGreaterThan(0, count($activeWorkers), 'No workers available after recovery'); // Jobs should be properly redistributed $this->assertGreaterThan(0, $pendingJobs + $processingJobs, 'No jobs available after recovery'); } public function testPartialFailureGracefulDegradation(): void { // Create system with mixed capacity workers $workers = [ PerformanceTestHelper::createTestWorker('high_capacity_1', 50), PerformanceTestHelper::createTestWorker('high_capacity_2', 50), PerformanceTestHelper::createTestWorker('medium_capacity_1', 20), PerformanceTestHelper::createTestWorker('medium_capacity_2', 20), PerformanceTestHelper::createTestWorker('low_capacity_1', 10), PerformanceTestHelper::createTestWorker('low_capacity_2', 10), ]; $this->registerWorkers($workers); // Measure baseline throughput $baselineThroughput = $this->measureDistributionThroughput(100, 'baseline'); // Fail high capacity workers $this->updateWorkerStatus('high_capacity_1', WorkerStatus::FAILED); $this->updateWorkerStatus('high_capacity_2', WorkerStatus::FAILED); $degradedThroughput = $this->measureDistributionThroughput(100, 'degraded'); // Fail medium capacity workers too $this->updateWorkerStatus('medium_capacity_1', WorkerStatus::FAILED); $this->updateWorkerStatus('medium_capacity_2', WorkerStatus::FAILED); $severeDegradationThroughput = $this->measureDistributionThroughput(100, 'severe'); echo "\nGraceful Degradation Results:\n"; echo "Baseline throughput: {$baselineThroughput} jobs/sec\n"; echo "After high-capacity failure: {$degradedThroughput} jobs/sec\n"; echo "After medium-capacity failure: {$severeDegradationThroughput} jobs/sec\n"; $degradationRatio1 = $degradedThroughput / $baselineThroughput; $degradationRatio2 = $severeDegradationThroughput / $baselineThroughput; echo "First degradation ratio: " . round($degradationRatio1 * 100, 1) . "%\n"; echo "Severe degradation ratio: " . round($degradationRatio2 * 100, 1) . "%\n"; // System should degrade gracefully $this->assertGreaterThan(0.3, $degradationRatio1, 'Degradation too severe after high-capacity failure'); $this->assertGreaterThan(0.1, $degradationRatio2, 'System should still function with low-capacity workers'); // Should maintain some reasonable performance $this->assertGreaterThan(10, $severeDegradationThroughput, 'Minimum throughput too low'); } public function testFailoverUnderHighLoad(): void { // Create workers under high load $workers = $this->createHealthyWorkers(6); $this->registerWorkers($workers); // Start high load job distribution $jobsDistributed = 0; $distributionErrors = 0; $startTime = microtime(true); $testDuration = 20; // 20 seconds $endTime = $startTime + $testDuration; $distributionTimes = []; // Simulate ongoing load while (microtime(true) < $endTime) { $job = PerformanceTestHelper::createTestJob("load_job_{$jobsDistributed}"); $result = PerformanceTestHelper::measureTimeWithResult(function () use ($job) { try { return $this->distributionService->distributeJob($job); } catch (\Exception $e) { return null; } }); $distributionTimes[] = $result['time_ms']; if ($result['result'] !== null) { $jobsDistributed++; } else { $distributionErrors++; } // Simulate worker failure at 1/3 of test duration if (microtime(true) > $startTime + ($testDuration / 3) && microtime(true) < $startTime + ($testDuration / 3) + 1) { // Fail 2 workers during high load $this->updateWorkerStatus($workers[0]->id->toString(), WorkerStatus::FAILED); $this->updateWorkerStatus($workers[1]->id->toString(), WorkerStatus::FAILED); // Trigger recovery $this->failoverService->performFullSystemRecovery(); } usleep(5000); // 5ms between jobs } $actualDuration = microtime(true) - $startTime; $throughput = $jobsDistributed / $actualDuration; $errorRate = $distributionErrors / ($jobsDistributed + $distributionErrors) * 100; $stats = PerformanceTestHelper::calculateStatistics($distributionTimes); echo "\nFailover Under High Load Results:\n"; echo "Test duration: {$actualDuration} seconds\n"; echo "Jobs distributed: {$jobsDistributed}\n"; echo "Distribution errors: {$distributionErrors}\n"; echo "Throughput: {$throughput} jobs/sec\n"; echo "Error rate: {$errorRate}%\n"; echo "Distribution performance: " . PerformanceTestHelper::formatStatistics($stats) . "\n"; // System should maintain reasonable performance during failover $this->assertGreaterThan(20, $throughput, 'Throughput too low during failover'); $this->assertLessThan(10.0, $errorRate, 'Error rate too high during failover'); // Distribution times may be higher during failover but should recover $this->assertLessThan(100.0, $stats['avg'], 'Average distribution time too high during failover'); } public function testWorkerRecoveryPerformance(): void { // Create workers, some initially failed $workers = $this->createHealthyWorkers(8); $this->registerWorkers($workers); // Mark some workers as failed $failedWorkerIds = [ $workers[0]->id->toString(), $workers[1]->id->toString(), $workers[2]->id->toString(), ]; foreach ($failedWorkerIds as $workerId) { $this->updateWorkerStatus($workerId, WorkerStatus::FAILED); } echo "\nWorker Recovery Performance Test:\n"; echo "Failed workers: " . count($failedWorkerIds) . "\n"; // Simulate workers coming back online $recoveryTimes = []; foreach ($failedWorkerIds as $workerId) { // Update heartbeat to simulate worker recovery $this->updateWorkerHeartbeat($workerId, new \DateTimeImmutable()); $recoveryTime = PerformanceTestHelper::measureTime(function () use ($workerId) { // Simulate health check detecting recovery $this->healthCheckService->checkWorker($workerId); // Trigger recovery process return $this->failoverService->recoverWorker($workerId); }); $recoveryTimes[] = $recoveryTime; } $stats = PerformanceTestHelper::calculateStatistics($recoveryTimes); echo "Worker recovery performance: " . PerformanceTestHelper::formatStatistics($stats) . "\n"; // Verify workers are back online $availableWorkers = $this->workerRegistry->getAvailableWorkers(); $availableCount = count($availableWorkers); echo "Workers available after recovery: {$availableCount}\n"; // Recovery should be fast $this->assertLessThan(100.0, $stats['avg'], 'Average worker recovery time too slow'); $this->assertLessThan(200.0, $stats['p95'], 'P95 worker recovery time too slow'); // All workers should be recovered $this->assertGreaterThanOrEqual( count($workers), $availableCount, 'Not all workers recovered successfully' ); PerformanceTestHelper::assertPerformance( $recoveryTimes, 100.0, 200.0, 'Worker recovery' ); } private function measureDistributionThroughput(int $jobCount, string $label): float { $jobs = PerformanceTestHelper::createBulkJobs($jobCount); $startTime = microtime(true); foreach ($jobs as $job) { $this->distributionService->distributeJob($job); } $endTime = microtime(true); $duration = $endTime - $startTime; return round($jobCount / $duration, 1); } private function createHealthyWorkers(int $count): array { $workers = []; for ($i = 1; $i <= $count; $i++) { $workers[] = PerformanceTestHelper::createTestWorker( "healthy_worker_{$i}", 20, WorkerStatus::AVAILABLE ); } return $workers; } private function registerWorkers(array $workers): void { foreach ($workers as $worker) { $this->workerRegistry->registerWorker($worker); } } private function updateWorkerHeartbeat(string $workerId, \DateTimeImmutable $heartbeat): void { $pdo = $this->database->getConnection(); $stmt = $pdo->prepare('UPDATE workers SET last_heartbeat = ? WHERE id = ?'); $stmt->execute([$heartbeat->format('Y-m-d H:i:s'), $workerId]); } private function updateWorkerStatus(string $workerId, WorkerStatus $status): void { $pdo = $this->database->getConnection(); $stmt = $pdo->prepare('UPDATE workers SET status = ? WHERE id = ?'); $stmt->execute([$status->value, $workerId]); } private function countJobsReassignedFrom(string $failedWorkerId): int { $pdo = $this->database->getConnection(); $stmt = $pdo->prepare('SELECT COUNT(*) FROM jobs WHERE worker_id != ? AND worker_id IS NOT NULL'); $stmt->execute([$failedWorkerId]); return (int) $stmt->fetchColumn(); } private function countJobsByStatus(JobStatus $status): int { $pdo = $this->database->getConnection(); $stmt = $pdo->prepare('SELECT COUNT(*) FROM jobs WHERE status = ?'); $stmt->execute([$status->value]); return (int) $stmt->fetchColumn(); } private function createTestDatabase(): DatabaseManager { $pdo = new \PDO('sqlite::memory:'); $pdo->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); $pdo->exec(' CREATE TABLE workers ( id TEXT PRIMARY KEY, queue_names TEXT NOT NULL, capacity INTEGER NOT NULL, status TEXT NOT NULL, last_heartbeat TEXT NOT NULL, metadata TEXT ) '); $pdo->exec(' CREATE TABLE jobs ( id TEXT PRIMARY KEY, type TEXT NOT NULL, payload TEXT NOT NULL, queue_name TEXT NOT NULL, priority INTEGER NOT NULL, status TEXT NOT NULL, worker_id TEXT, created_at TEXT NOT NULL, started_at TEXT, completed_at TEXT, attempts INTEGER DEFAULT 0, error_message TEXT ) '); // Performance indexes $pdo->exec('CREATE INDEX idx_workers_status_heartbeat ON workers(status, last_heartbeat)'); $pdo->exec('CREATE INDEX idx_jobs_worker_status ON jobs(worker_id, status)'); $pdo->exec('CREATE INDEX idx_jobs_status_created ON jobs(status, created_at)'); return new DatabaseManager($pdo); } private function cleanupTestData(): void { $pdo = $this->database->getConnection(); $pdo->exec('DELETE FROM workers'); $pdo->exec('DELETE FROM jobs'); } }