Files
michaelschiemer/tests/Framework/Queue/Performance/FailoverPerformanceTest.php
Michael Schiemer 5050c7d73a docs: consolidate documentation into organized structure
- Move 12 markdown files from root to docs/ subdirectories
- Organize documentation by category:
  • docs/troubleshooting/ (1 file)  - Technical troubleshooting guides
  • docs/deployment/      (4 files) - Deployment and security documentation
  • docs/guides/          (3 files) - Feature-specific guides
  • docs/planning/        (4 files) - Planning and improvement proposals

Root directory cleanup:
- Reduced from 16 to 4 markdown files in root
- Only essential project files remain:
  • CLAUDE.md (AI instructions)
  • README.md (Main project readme)
  • CLEANUP_PLAN.md (Current cleanup plan)
  • SRC_STRUCTURE_IMPROVEMENTS.md (Structure improvements)

This improves:
 Documentation discoverability
 Logical organization by purpose
 Clean root directory
 Better maintainability
2025-10-05 11:05:04 +02:00

524 lines
19 KiB
PHP

<?php
declare(strict_types=1);
namespace Tests\Framework\Queue\Performance;
use App\Framework\Database\DatabaseManager;
use App\Framework\Queue\Distribution\JobDistributionService;
use App\Framework\Queue\Failover\FailoverRecoveryService;
use App\Framework\Queue\Health\WorkerHealthCheckService;
use App\Framework\Queue\Jobs\JobStatus;
use App\Framework\Queue\Workers\WorkerRegistry;
use App\Framework\Queue\Workers\WorkerStatus;
use PHPUnit\Framework\TestCase;
final class FailoverPerformanceTest extends TestCase
{
private DatabaseManager $database;
private WorkerRegistry $workerRegistry;
private WorkerHealthCheckService $healthCheckService;
private FailoverRecoveryService $failoverService;
private JobDistributionService $distributionService;
protected function setUp(): void
{
$this->database = $this->createTestDatabase();
$this->workerRegistry = new WorkerRegistry($this->database);
$this->healthCheckService = new WorkerHealthCheckService(
$this->database,
$this->workerRegistry
);
$this->failoverService = new FailoverRecoveryService(
$this->database,
$this->workerRegistry
);
$this->distributionService = new JobDistributionService(
$this->database,
$this->workerRegistry
);
$this->cleanupTestData();
PerformanceTestHelper::warmupDatabase($this->database->getConnection());
}
protected function tearDown(): void
{
$this->cleanupTestData();
}
public function testWorkerFailureDetectionTime(): void
{
// Create workers with recent heartbeats
$workers = $this->createHealthyWorkers(10);
$this->registerWorkers($workers);
// Simulate worker failures by setting old heartbeats
$failedWorkerIds = [];
$currentTime = new \DateTimeImmutable();
// Make 3 workers appear failed (no heartbeat for 2 minutes)
for ($i = 0; $i < 3; $i++) {
$workerId = $workers[$i]->id->toString();
$failedWorkerIds[] = $workerId;
$this->updateWorkerHeartbeat($workerId, $currentTime->modify('-2 minutes'));
}
// Measure failure detection time
$detectionTimes = [];
$iterations = 10;
for ($i = 0; $i < $iterations; $i++) {
$time = PerformanceTestHelper::measureTime(function() {
return $this->healthCheckService->checkAllWorkers();
});
$detectionTimes[] = $time;
// Brief pause between checks
usleep(10000); // 10ms
}
$stats = PerformanceTestHelper::calculateStatistics($detectionTimes);
// Verify failed workers were detected
$failedWorkers = $this->workerRegistry->getWorkersByStatus(WorkerStatus::FAILED);
$detectedFailures = array_map(fn($w) => $w->id->toString(), $failedWorkers);
echo "\nWorker Failure Detection Results:\n";
echo "Workers created: " . count($workers) . "\n";
echo "Workers failed: " . count($failedWorkerIds) . "\n";
echo "Failures detected: " . count($detectedFailures) . "\n";
echo "Detection performance: " . PerformanceTestHelper::formatStatistics($stats) . "\n";
// All failed workers should be detected
foreach ($failedWorkerIds as $failedId) {
$this->assertContains($failedId, $detectedFailures, "Failed worker {$failedId} not detected");
}
// Detection should be fast
$this->assertLessThan(50.0, $stats['avg'], 'Average failure detection time too slow');
$this->assertLessThan(100.0, $stats['p95'], 'P95 failure detection time too slow');
PerformanceTestHelper::assertPerformance(
$detectionTimes,
50.0,
100.0,
'Worker failure detection'
);
}
public function testJobReassignmentSpeed(): void
{
// Create workers and assign jobs
$workers = $this->createHealthyWorkers(5);
$this->registerWorkers($workers);
// Distribute jobs to workers
$jobs = PerformanceTestHelper::createBulkJobs(50);
$assignedJobs = [];
foreach ($jobs as $job) {
$assignedWorker = $this->distributionService->distributeJob($job);
if ($assignedWorker) {
$assignedJobs[] = [
'job' => $job,
'worker_id' => $assignedWorker->id->toString()
];
}
}
// Simulate worker failure
$failedWorkerId = $workers[0]->id->toString();
$this->updateWorkerStatus($failedWorkerId, WorkerStatus::FAILED);
// Find jobs assigned to failed worker
$jobsToReassign = array_filter(
$assignedJobs,
fn($item) => $item['worker_id'] === $failedWorkerId
);
echo "\nJob Reassignment Test:\n";
echo "Total jobs: " . count($assignedJobs) . "\n";
echo "Jobs to reassign: " . count($jobsToReassign) . "\n";
// Measure job reassignment performance
$reassignmentTime = PerformanceTestHelper::measureTime(function() use ($failedWorkerId) {
return $this->failoverService->reassignFailedWorkerJobs($failedWorkerId);
});
echo "Reassignment time: {$reassignmentTime}ms\n";
// Verify jobs were reassigned
$reassignedCount = $this->countJobsReassignedFrom($failedWorkerId);
echo "Jobs successfully reassigned: {$reassignedCount}\n";
// Job reassignment should be fast
$this->assertLessThan(200.0, $reassignmentTime, 'Job reassignment took too long');
// All jobs should be reassigned
$this->assertEquals(
count($jobsToReassign),
$reassignedCount,
'Not all jobs were reassigned'
);
// Performance should scale reasonably with job count
$averageTimePerJob = $reassignmentTime / max(1, count($jobsToReassign));
$this->assertLessThan(5.0, $averageTimePerJob, 'Reassignment time per job too high');
}
public function testSystemRecoveryTime(): void
{
// Create a system with multiple workers and jobs
$workers = $this->createHealthyWorkers(8);
$this->registerWorkers($workers);
// Distribute many jobs
$jobs = PerformanceTestHelper::createBulkJobs(200);
foreach ($jobs as $job) {
$this->distributionService->distributeJob($job);
}
// Simulate multiple worker failures
$failedWorkerIds = [
$workers[0]->id->toString(),
$workers[1]->id->toString(),
$workers[2]->id->toString()
];
foreach ($failedWorkerIds as $workerId) {
$this->updateWorkerStatus($workerId, WorkerStatus::FAILED);
}
echo "\nSystem Recovery Test:\n";
echo "Total workers: " . count($workers) . "\n";
echo "Failed workers: " . count($failedWorkerIds) . "\n";
echo "Total jobs: " . count($jobs) . "\n";
// Measure full system recovery time
$recoveryTime = PerformanceTestHelper::measureTime(function() {
return $this->failoverService->performFullSystemRecovery();
});
echo "Full recovery time: {$recoveryTime}ms\n";
// Verify system state after recovery
$activeWorkers = $this->workerRegistry->getAvailableWorkers();
$pendingJobs = $this->countJobsByStatus(JobStatus::PENDING);
$processingJobs = $this->countJobsByStatus(JobStatus::PROCESSING);
echo "Active workers after recovery: " . count($activeWorkers) . "\n";
echo "Pending jobs: {$pendingJobs}\n";
echo "Processing jobs: {$processingJobs}\n";
// Recovery should complete within reasonable time
$this->assertLessThan(5000.0, $recoveryTime, 'System recovery took too long (>5 seconds)');
// Should have remaining active workers
$this->assertGreaterThan(0, count($activeWorkers), 'No workers available after recovery');
// Jobs should be properly redistributed
$this->assertGreaterThan(0, $pendingJobs + $processingJobs, 'No jobs available after recovery');
}
public function testPartialFailureGracefulDegradation(): void
{
// Create system with mixed capacity workers
$workers = [
PerformanceTestHelper::createTestWorker('high_capacity_1', 50),
PerformanceTestHelper::createTestWorker('high_capacity_2', 50),
PerformanceTestHelper::createTestWorker('medium_capacity_1', 20),
PerformanceTestHelper::createTestWorker('medium_capacity_2', 20),
PerformanceTestHelper::createTestWorker('low_capacity_1', 10),
PerformanceTestHelper::createTestWorker('low_capacity_2', 10)
];
$this->registerWorkers($workers);
// Measure baseline throughput
$baselineThroughput = $this->measureDistributionThroughput(100, 'baseline');
// Fail high capacity workers
$this->updateWorkerStatus('high_capacity_1', WorkerStatus::FAILED);
$this->updateWorkerStatus('high_capacity_2', WorkerStatus::FAILED);
$degradedThroughput = $this->measureDistributionThroughput(100, 'degraded');
// Fail medium capacity workers too
$this->updateWorkerStatus('medium_capacity_1', WorkerStatus::FAILED);
$this->updateWorkerStatus('medium_capacity_2', WorkerStatus::FAILED);
$severeDegradationThroughput = $this->measureDistributionThroughput(100, 'severe');
echo "\nGraceful Degradation Results:\n";
echo "Baseline throughput: {$baselineThroughput} jobs/sec\n";
echo "After high-capacity failure: {$degradedThroughput} jobs/sec\n";
echo "After medium-capacity failure: {$severeDegradationThroughput} jobs/sec\n";
$degradationRatio1 = $degradedThroughput / $baselineThroughput;
$degradationRatio2 = $severeDegradationThroughput / $baselineThroughput;
echo "First degradation ratio: " . round($degradationRatio1 * 100, 1) . "%\n";
echo "Severe degradation ratio: " . round($degradationRatio2 * 100, 1) . "%\n";
// System should degrade gracefully
$this->assertGreaterThan(0.3, $degradationRatio1, 'Degradation too severe after high-capacity failure');
$this->assertGreaterThan(0.1, $degradationRatio2, 'System should still function with low-capacity workers');
// Should maintain some reasonable performance
$this->assertGreaterThan(10, $severeDegradationThroughput, 'Minimum throughput too low');
}
public function testFailoverUnderHighLoad(): void
{
// Create workers under high load
$workers = $this->createHealthyWorkers(6);
$this->registerWorkers($workers);
// Start high load job distribution
$jobsDistributed = 0;
$distributionErrors = 0;
$startTime = microtime(true);
$testDuration = 20; // 20 seconds
$endTime = $startTime + $testDuration;
$distributionTimes = [];
// Simulate ongoing load
while (microtime(true) < $endTime) {
$job = PerformanceTestHelper::createTestJob("load_job_{$jobsDistributed}");
$result = PerformanceTestHelper::measureTimeWithResult(function() use ($job) {
try {
return $this->distributionService->distributeJob($job);
} catch (\Exception $e) {
return null;
}
});
$distributionTimes[] = $result['time_ms'];
if ($result['result'] !== null) {
$jobsDistributed++;
} else {
$distributionErrors++;
}
// Simulate worker failure at 1/3 of test duration
if (microtime(true) > $startTime + ($testDuration / 3) &&
microtime(true) < $startTime + ($testDuration / 3) + 1) {
// Fail 2 workers during high load
$this->updateWorkerStatus($workers[0]->id->toString(), WorkerStatus::FAILED);
$this->updateWorkerStatus($workers[1]->id->toString(), WorkerStatus::FAILED);
// Trigger recovery
$this->failoverService->performFullSystemRecovery();
}
usleep(5000); // 5ms between jobs
}
$actualDuration = microtime(true) - $startTime;
$throughput = $jobsDistributed / $actualDuration;
$errorRate = $distributionErrors / ($jobsDistributed + $distributionErrors) * 100;
$stats = PerformanceTestHelper::calculateStatistics($distributionTimes);
echo "\nFailover Under High Load Results:\n";
echo "Test duration: {$actualDuration} seconds\n";
echo "Jobs distributed: {$jobsDistributed}\n";
echo "Distribution errors: {$distributionErrors}\n";
echo "Throughput: {$throughput} jobs/sec\n";
echo "Error rate: {$errorRate}%\n";
echo "Distribution performance: " . PerformanceTestHelper::formatStatistics($stats) . "\n";
// System should maintain reasonable performance during failover
$this->assertGreaterThan(20, $throughput, 'Throughput too low during failover');
$this->assertLessThan(10.0, $errorRate, 'Error rate too high during failover');
// Distribution times may be higher during failover but should recover
$this->assertLessThan(100.0, $stats['avg'], 'Average distribution time too high during failover');
}
public function testWorkerRecoveryPerformance(): void
{
// Create workers, some initially failed
$workers = $this->createHealthyWorkers(8);
$this->registerWorkers($workers);
// Mark some workers as failed
$failedWorkerIds = [
$workers[0]->id->toString(),
$workers[1]->id->toString(),
$workers[2]->id->toString()
];
foreach ($failedWorkerIds as $workerId) {
$this->updateWorkerStatus($workerId, WorkerStatus::FAILED);
}
echo "\nWorker Recovery Performance Test:\n";
echo "Failed workers: " . count($failedWorkerIds) . "\n";
// Simulate workers coming back online
$recoveryTimes = [];
foreach ($failedWorkerIds as $workerId) {
// Update heartbeat to simulate worker recovery
$this->updateWorkerHeartbeat($workerId, new \DateTimeImmutable());
$recoveryTime = PerformanceTestHelper::measureTime(function() use ($workerId) {
// Simulate health check detecting recovery
$this->healthCheckService->checkWorker($workerId);
// Trigger recovery process
return $this->failoverService->recoverWorker($workerId);
});
$recoveryTimes[] = $recoveryTime;
}
$stats = PerformanceTestHelper::calculateStatistics($recoveryTimes);
echo "Worker recovery performance: " . PerformanceTestHelper::formatStatistics($stats) . "\n";
// Verify workers are back online
$availableWorkers = $this->workerRegistry->getAvailableWorkers();
$availableCount = count($availableWorkers);
echo "Workers available after recovery: {$availableCount}\n";
// Recovery should be fast
$this->assertLessThan(100.0, $stats['avg'], 'Average worker recovery time too slow');
$this->assertLessThan(200.0, $stats['p95'], 'P95 worker recovery time too slow');
// All workers should be recovered
$this->assertGreaterThanOrEqual(
count($workers),
$availableCount,
'Not all workers recovered successfully'
);
PerformanceTestHelper::assertPerformance(
$recoveryTimes,
100.0,
200.0,
'Worker recovery'
);
}
private function measureDistributionThroughput(int $jobCount, string $label): float
{
$jobs = PerformanceTestHelper::createBulkJobs($jobCount);
$startTime = microtime(true);
foreach ($jobs as $job) {
$this->distributionService->distributeJob($job);
}
$endTime = microtime(true);
$duration = $endTime - $startTime;
return round($jobCount / $duration, 1);
}
private function createHealthyWorkers(int $count): array
{
$workers = [];
for ($i = 1; $i <= $count; $i++) {
$workers[] = PerformanceTestHelper::createTestWorker(
"healthy_worker_{$i}",
20,
WorkerStatus::AVAILABLE
);
}
return $workers;
}
private function registerWorkers(array $workers): void
{
foreach ($workers as $worker) {
$this->workerRegistry->registerWorker($worker);
}
}
private function updateWorkerHeartbeat(string $workerId, \DateTimeImmutable $heartbeat): void
{
$pdo = $this->database->getConnection();
$stmt = $pdo->prepare('UPDATE workers SET last_heartbeat = ? WHERE id = ?');
$stmt->execute([$heartbeat->format('Y-m-d H:i:s'), $workerId]);
}
private function updateWorkerStatus(string $workerId, WorkerStatus $status): void
{
$pdo = $this->database->getConnection();
$stmt = $pdo->prepare('UPDATE workers SET status = ? WHERE id = ?');
$stmt->execute([$status->value, $workerId]);
}
private function countJobsReassignedFrom(string $failedWorkerId): int
{
$pdo = $this->database->getConnection();
$stmt = $pdo->prepare('SELECT COUNT(*) FROM jobs WHERE worker_id != ? AND worker_id IS NOT NULL');
$stmt->execute([$failedWorkerId]);
return (int) $stmt->fetchColumn();
}
private function countJobsByStatus(JobStatus $status): int
{
$pdo = $this->database->getConnection();
$stmt = $pdo->prepare('SELECT COUNT(*) FROM jobs WHERE status = ?');
$stmt->execute([$status->value]);
return (int) $stmt->fetchColumn();
}
private function createTestDatabase(): DatabaseManager
{
$pdo = new \PDO('sqlite::memory:');
$pdo->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION);
$pdo->exec('
CREATE TABLE workers (
id TEXT PRIMARY KEY,
queue_names TEXT NOT NULL,
capacity INTEGER NOT NULL,
status TEXT NOT NULL,
last_heartbeat TEXT NOT NULL,
metadata TEXT
)
');
$pdo->exec('
CREATE TABLE jobs (
id TEXT PRIMARY KEY,
type TEXT NOT NULL,
payload TEXT NOT NULL,
queue_name TEXT NOT NULL,
priority INTEGER NOT NULL,
status TEXT NOT NULL,
worker_id TEXT,
created_at TEXT NOT NULL,
started_at TEXT,
completed_at TEXT,
attempts INTEGER DEFAULT 0,
error_message TEXT
)
');
// Performance indexes
$pdo->exec('CREATE INDEX idx_workers_status_heartbeat ON workers(status, last_heartbeat)');
$pdo->exec('CREATE INDEX idx_jobs_worker_status ON jobs(worker_id, status)');
$pdo->exec('CREATE INDEX idx_jobs_status_created ON jobs(status, created_at)');
return new DatabaseManager($pdo);
}
private function cleanupTestData(): void
{
$pdo = $this->database->getConnection();
$pdo->exec('DELETE FROM workers');
$pdo->exec('DELETE FROM jobs');
}
}