- Move 12 markdown files from root to docs/ subdirectories - Organize documentation by category: • docs/troubleshooting/ (1 file) - Technical troubleshooting guides • docs/deployment/ (4 files) - Deployment and security documentation • docs/guides/ (3 files) - Feature-specific guides • docs/planning/ (4 files) - Planning and improvement proposals Root directory cleanup: - Reduced from 16 to 4 markdown files in root - Only essential project files remain: • CLAUDE.md (AI instructions) • README.md (Main project readme) • CLEANUP_PLAN.md (Current cleanup plan) • SRC_STRUCTURE_IMPROVEMENTS.md (Structure improvements) This improves: ✅ Documentation discoverability ✅ Logical organization by purpose ✅ Clean root directory ✅ Better maintainability
524 lines
19 KiB
PHP
524 lines
19 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace Tests\Framework\Queue\Performance;
|
|
|
|
use App\Framework\Database\DatabaseManager;
|
|
use App\Framework\Queue\Distribution\JobDistributionService;
|
|
use App\Framework\Queue\Failover\FailoverRecoveryService;
|
|
use App\Framework\Queue\Health\WorkerHealthCheckService;
|
|
use App\Framework\Queue\Jobs\JobStatus;
|
|
use App\Framework\Queue\Workers\WorkerRegistry;
|
|
use App\Framework\Queue\Workers\WorkerStatus;
|
|
use PHPUnit\Framework\TestCase;
|
|
|
|
final class FailoverPerformanceTest extends TestCase
|
|
{
|
|
private DatabaseManager $database;
|
|
private WorkerRegistry $workerRegistry;
|
|
private WorkerHealthCheckService $healthCheckService;
|
|
private FailoverRecoveryService $failoverService;
|
|
private JobDistributionService $distributionService;
|
|
|
|
protected function setUp(): void
|
|
{
|
|
$this->database = $this->createTestDatabase();
|
|
$this->workerRegistry = new WorkerRegistry($this->database);
|
|
$this->healthCheckService = new WorkerHealthCheckService(
|
|
$this->database,
|
|
$this->workerRegistry
|
|
);
|
|
$this->failoverService = new FailoverRecoveryService(
|
|
$this->database,
|
|
$this->workerRegistry
|
|
);
|
|
$this->distributionService = new JobDistributionService(
|
|
$this->database,
|
|
$this->workerRegistry
|
|
);
|
|
|
|
$this->cleanupTestData();
|
|
PerformanceTestHelper::warmupDatabase($this->database->getConnection());
|
|
}
|
|
|
|
protected function tearDown(): void
|
|
{
|
|
$this->cleanupTestData();
|
|
}
|
|
|
|
public function testWorkerFailureDetectionTime(): void
|
|
{
|
|
// Create workers with recent heartbeats
|
|
$workers = $this->createHealthyWorkers(10);
|
|
$this->registerWorkers($workers);
|
|
|
|
// Simulate worker failures by setting old heartbeats
|
|
$failedWorkerIds = [];
|
|
$currentTime = new \DateTimeImmutable();
|
|
|
|
// Make 3 workers appear failed (no heartbeat for 2 minutes)
|
|
for ($i = 0; $i < 3; $i++) {
|
|
$workerId = $workers[$i]->id->toString();
|
|
$failedWorkerIds[] = $workerId;
|
|
|
|
$this->updateWorkerHeartbeat($workerId, $currentTime->modify('-2 minutes'));
|
|
}
|
|
|
|
// Measure failure detection time
|
|
$detectionTimes = [];
|
|
$iterations = 10;
|
|
|
|
for ($i = 0; $i < $iterations; $i++) {
|
|
$time = PerformanceTestHelper::measureTime(function() {
|
|
return $this->healthCheckService->checkAllWorkers();
|
|
});
|
|
$detectionTimes[] = $time;
|
|
|
|
// Brief pause between checks
|
|
usleep(10000); // 10ms
|
|
}
|
|
|
|
$stats = PerformanceTestHelper::calculateStatistics($detectionTimes);
|
|
|
|
// Verify failed workers were detected
|
|
$failedWorkers = $this->workerRegistry->getWorkersByStatus(WorkerStatus::FAILED);
|
|
$detectedFailures = array_map(fn($w) => $w->id->toString(), $failedWorkers);
|
|
|
|
echo "\nWorker Failure Detection Results:\n";
|
|
echo "Workers created: " . count($workers) . "\n";
|
|
echo "Workers failed: " . count($failedWorkerIds) . "\n";
|
|
echo "Failures detected: " . count($detectedFailures) . "\n";
|
|
echo "Detection performance: " . PerformanceTestHelper::formatStatistics($stats) . "\n";
|
|
|
|
// All failed workers should be detected
|
|
foreach ($failedWorkerIds as $failedId) {
|
|
$this->assertContains($failedId, $detectedFailures, "Failed worker {$failedId} not detected");
|
|
}
|
|
|
|
// Detection should be fast
|
|
$this->assertLessThan(50.0, $stats['avg'], 'Average failure detection time too slow');
|
|
$this->assertLessThan(100.0, $stats['p95'], 'P95 failure detection time too slow');
|
|
|
|
PerformanceTestHelper::assertPerformance(
|
|
$detectionTimes,
|
|
50.0,
|
|
100.0,
|
|
'Worker failure detection'
|
|
);
|
|
}
|
|
|
|
public function testJobReassignmentSpeed(): void
|
|
{
|
|
// Create workers and assign jobs
|
|
$workers = $this->createHealthyWorkers(5);
|
|
$this->registerWorkers($workers);
|
|
|
|
// Distribute jobs to workers
|
|
$jobs = PerformanceTestHelper::createBulkJobs(50);
|
|
$assignedJobs = [];
|
|
|
|
foreach ($jobs as $job) {
|
|
$assignedWorker = $this->distributionService->distributeJob($job);
|
|
if ($assignedWorker) {
|
|
$assignedJobs[] = [
|
|
'job' => $job,
|
|
'worker_id' => $assignedWorker->id->toString()
|
|
];
|
|
}
|
|
}
|
|
|
|
// Simulate worker failure
|
|
$failedWorkerId = $workers[0]->id->toString();
|
|
$this->updateWorkerStatus($failedWorkerId, WorkerStatus::FAILED);
|
|
|
|
// Find jobs assigned to failed worker
|
|
$jobsToReassign = array_filter(
|
|
$assignedJobs,
|
|
fn($item) => $item['worker_id'] === $failedWorkerId
|
|
);
|
|
|
|
echo "\nJob Reassignment Test:\n";
|
|
echo "Total jobs: " . count($assignedJobs) . "\n";
|
|
echo "Jobs to reassign: " . count($jobsToReassign) . "\n";
|
|
|
|
// Measure job reassignment performance
|
|
$reassignmentTime = PerformanceTestHelper::measureTime(function() use ($failedWorkerId) {
|
|
return $this->failoverService->reassignFailedWorkerJobs($failedWorkerId);
|
|
});
|
|
|
|
echo "Reassignment time: {$reassignmentTime}ms\n";
|
|
|
|
// Verify jobs were reassigned
|
|
$reassignedCount = $this->countJobsReassignedFrom($failedWorkerId);
|
|
echo "Jobs successfully reassigned: {$reassignedCount}\n";
|
|
|
|
// Job reassignment should be fast
|
|
$this->assertLessThan(200.0, $reassignmentTime, 'Job reassignment took too long');
|
|
|
|
// All jobs should be reassigned
|
|
$this->assertEquals(
|
|
count($jobsToReassign),
|
|
$reassignedCount,
|
|
'Not all jobs were reassigned'
|
|
);
|
|
|
|
// Performance should scale reasonably with job count
|
|
$averageTimePerJob = $reassignmentTime / max(1, count($jobsToReassign));
|
|
$this->assertLessThan(5.0, $averageTimePerJob, 'Reassignment time per job too high');
|
|
}
|
|
|
|
public function testSystemRecoveryTime(): void
|
|
{
|
|
// Create a system with multiple workers and jobs
|
|
$workers = $this->createHealthyWorkers(8);
|
|
$this->registerWorkers($workers);
|
|
|
|
// Distribute many jobs
|
|
$jobs = PerformanceTestHelper::createBulkJobs(200);
|
|
foreach ($jobs as $job) {
|
|
$this->distributionService->distributeJob($job);
|
|
}
|
|
|
|
// Simulate multiple worker failures
|
|
$failedWorkerIds = [
|
|
$workers[0]->id->toString(),
|
|
$workers[1]->id->toString(),
|
|
$workers[2]->id->toString()
|
|
];
|
|
|
|
foreach ($failedWorkerIds as $workerId) {
|
|
$this->updateWorkerStatus($workerId, WorkerStatus::FAILED);
|
|
}
|
|
|
|
echo "\nSystem Recovery Test:\n";
|
|
echo "Total workers: " . count($workers) . "\n";
|
|
echo "Failed workers: " . count($failedWorkerIds) . "\n";
|
|
echo "Total jobs: " . count($jobs) . "\n";
|
|
|
|
// Measure full system recovery time
|
|
$recoveryTime = PerformanceTestHelper::measureTime(function() {
|
|
return $this->failoverService->performFullSystemRecovery();
|
|
});
|
|
|
|
echo "Full recovery time: {$recoveryTime}ms\n";
|
|
|
|
// Verify system state after recovery
|
|
$activeWorkers = $this->workerRegistry->getAvailableWorkers();
|
|
$pendingJobs = $this->countJobsByStatus(JobStatus::PENDING);
|
|
$processingJobs = $this->countJobsByStatus(JobStatus::PROCESSING);
|
|
|
|
echo "Active workers after recovery: " . count($activeWorkers) . "\n";
|
|
echo "Pending jobs: {$pendingJobs}\n";
|
|
echo "Processing jobs: {$processingJobs}\n";
|
|
|
|
// Recovery should complete within reasonable time
|
|
$this->assertLessThan(5000.0, $recoveryTime, 'System recovery took too long (>5 seconds)');
|
|
|
|
// Should have remaining active workers
|
|
$this->assertGreaterThan(0, count($activeWorkers), 'No workers available after recovery');
|
|
|
|
// Jobs should be properly redistributed
|
|
$this->assertGreaterThan(0, $pendingJobs + $processingJobs, 'No jobs available after recovery');
|
|
}
|
|
|
|
public function testPartialFailureGracefulDegradation(): void
|
|
{
|
|
// Create system with mixed capacity workers
|
|
$workers = [
|
|
PerformanceTestHelper::createTestWorker('high_capacity_1', 50),
|
|
PerformanceTestHelper::createTestWorker('high_capacity_2', 50),
|
|
PerformanceTestHelper::createTestWorker('medium_capacity_1', 20),
|
|
PerformanceTestHelper::createTestWorker('medium_capacity_2', 20),
|
|
PerformanceTestHelper::createTestWorker('low_capacity_1', 10),
|
|
PerformanceTestHelper::createTestWorker('low_capacity_2', 10)
|
|
];
|
|
|
|
$this->registerWorkers($workers);
|
|
|
|
// Measure baseline throughput
|
|
$baselineThroughput = $this->measureDistributionThroughput(100, 'baseline');
|
|
|
|
// Fail high capacity workers
|
|
$this->updateWorkerStatus('high_capacity_1', WorkerStatus::FAILED);
|
|
$this->updateWorkerStatus('high_capacity_2', WorkerStatus::FAILED);
|
|
|
|
$degradedThroughput = $this->measureDistributionThroughput(100, 'degraded');
|
|
|
|
// Fail medium capacity workers too
|
|
$this->updateWorkerStatus('medium_capacity_1', WorkerStatus::FAILED);
|
|
$this->updateWorkerStatus('medium_capacity_2', WorkerStatus::FAILED);
|
|
|
|
$severeDegradationThroughput = $this->measureDistributionThroughput(100, 'severe');
|
|
|
|
echo "\nGraceful Degradation Results:\n";
|
|
echo "Baseline throughput: {$baselineThroughput} jobs/sec\n";
|
|
echo "After high-capacity failure: {$degradedThroughput} jobs/sec\n";
|
|
echo "After medium-capacity failure: {$severeDegradationThroughput} jobs/sec\n";
|
|
|
|
$degradationRatio1 = $degradedThroughput / $baselineThroughput;
|
|
$degradationRatio2 = $severeDegradationThroughput / $baselineThroughput;
|
|
|
|
echo "First degradation ratio: " . round($degradationRatio1 * 100, 1) . "%\n";
|
|
echo "Severe degradation ratio: " . round($degradationRatio2 * 100, 1) . "%\n";
|
|
|
|
// System should degrade gracefully
|
|
$this->assertGreaterThan(0.3, $degradationRatio1, 'Degradation too severe after high-capacity failure');
|
|
$this->assertGreaterThan(0.1, $degradationRatio2, 'System should still function with low-capacity workers');
|
|
|
|
// Should maintain some reasonable performance
|
|
$this->assertGreaterThan(10, $severeDegradationThroughput, 'Minimum throughput too low');
|
|
}
|
|
|
|
public function testFailoverUnderHighLoad(): void
|
|
{
|
|
// Create workers under high load
|
|
$workers = $this->createHealthyWorkers(6);
|
|
$this->registerWorkers($workers);
|
|
|
|
// Start high load job distribution
|
|
$jobsDistributed = 0;
|
|
$distributionErrors = 0;
|
|
$startTime = microtime(true);
|
|
$testDuration = 20; // 20 seconds
|
|
$endTime = $startTime + $testDuration;
|
|
|
|
$distributionTimes = [];
|
|
|
|
// Simulate ongoing load
|
|
while (microtime(true) < $endTime) {
|
|
$job = PerformanceTestHelper::createTestJob("load_job_{$jobsDistributed}");
|
|
|
|
$result = PerformanceTestHelper::measureTimeWithResult(function() use ($job) {
|
|
try {
|
|
return $this->distributionService->distributeJob($job);
|
|
} catch (\Exception $e) {
|
|
return null;
|
|
}
|
|
});
|
|
|
|
$distributionTimes[] = $result['time_ms'];
|
|
|
|
if ($result['result'] !== null) {
|
|
$jobsDistributed++;
|
|
} else {
|
|
$distributionErrors++;
|
|
}
|
|
|
|
// Simulate worker failure at 1/3 of test duration
|
|
if (microtime(true) > $startTime + ($testDuration / 3) &&
|
|
microtime(true) < $startTime + ($testDuration / 3) + 1) {
|
|
|
|
// Fail 2 workers during high load
|
|
$this->updateWorkerStatus($workers[0]->id->toString(), WorkerStatus::FAILED);
|
|
$this->updateWorkerStatus($workers[1]->id->toString(), WorkerStatus::FAILED);
|
|
|
|
// Trigger recovery
|
|
$this->failoverService->performFullSystemRecovery();
|
|
}
|
|
|
|
usleep(5000); // 5ms between jobs
|
|
}
|
|
|
|
$actualDuration = microtime(true) - $startTime;
|
|
$throughput = $jobsDistributed / $actualDuration;
|
|
$errorRate = $distributionErrors / ($jobsDistributed + $distributionErrors) * 100;
|
|
|
|
$stats = PerformanceTestHelper::calculateStatistics($distributionTimes);
|
|
|
|
echo "\nFailover Under High Load Results:\n";
|
|
echo "Test duration: {$actualDuration} seconds\n";
|
|
echo "Jobs distributed: {$jobsDistributed}\n";
|
|
echo "Distribution errors: {$distributionErrors}\n";
|
|
echo "Throughput: {$throughput} jobs/sec\n";
|
|
echo "Error rate: {$errorRate}%\n";
|
|
echo "Distribution performance: " . PerformanceTestHelper::formatStatistics($stats) . "\n";
|
|
|
|
// System should maintain reasonable performance during failover
|
|
$this->assertGreaterThan(20, $throughput, 'Throughput too low during failover');
|
|
$this->assertLessThan(10.0, $errorRate, 'Error rate too high during failover');
|
|
|
|
// Distribution times may be higher during failover but should recover
|
|
$this->assertLessThan(100.0, $stats['avg'], 'Average distribution time too high during failover');
|
|
}
|
|
|
|
public function testWorkerRecoveryPerformance(): void
|
|
{
|
|
// Create workers, some initially failed
|
|
$workers = $this->createHealthyWorkers(8);
|
|
$this->registerWorkers($workers);
|
|
|
|
// Mark some workers as failed
|
|
$failedWorkerIds = [
|
|
$workers[0]->id->toString(),
|
|
$workers[1]->id->toString(),
|
|
$workers[2]->id->toString()
|
|
];
|
|
|
|
foreach ($failedWorkerIds as $workerId) {
|
|
$this->updateWorkerStatus($workerId, WorkerStatus::FAILED);
|
|
}
|
|
|
|
echo "\nWorker Recovery Performance Test:\n";
|
|
echo "Failed workers: " . count($failedWorkerIds) . "\n";
|
|
|
|
// Simulate workers coming back online
|
|
$recoveryTimes = [];
|
|
|
|
foreach ($failedWorkerIds as $workerId) {
|
|
// Update heartbeat to simulate worker recovery
|
|
$this->updateWorkerHeartbeat($workerId, new \DateTimeImmutable());
|
|
|
|
$recoveryTime = PerformanceTestHelper::measureTime(function() use ($workerId) {
|
|
// Simulate health check detecting recovery
|
|
$this->healthCheckService->checkWorker($workerId);
|
|
|
|
// Trigger recovery process
|
|
return $this->failoverService->recoverWorker($workerId);
|
|
});
|
|
|
|
$recoveryTimes[] = $recoveryTime;
|
|
}
|
|
|
|
$stats = PerformanceTestHelper::calculateStatistics($recoveryTimes);
|
|
|
|
echo "Worker recovery performance: " . PerformanceTestHelper::formatStatistics($stats) . "\n";
|
|
|
|
// Verify workers are back online
|
|
$availableWorkers = $this->workerRegistry->getAvailableWorkers();
|
|
$availableCount = count($availableWorkers);
|
|
|
|
echo "Workers available after recovery: {$availableCount}\n";
|
|
|
|
// Recovery should be fast
|
|
$this->assertLessThan(100.0, $stats['avg'], 'Average worker recovery time too slow');
|
|
$this->assertLessThan(200.0, $stats['p95'], 'P95 worker recovery time too slow');
|
|
|
|
// All workers should be recovered
|
|
$this->assertGreaterThanOrEqual(
|
|
count($workers),
|
|
$availableCount,
|
|
'Not all workers recovered successfully'
|
|
);
|
|
|
|
PerformanceTestHelper::assertPerformance(
|
|
$recoveryTimes,
|
|
100.0,
|
|
200.0,
|
|
'Worker recovery'
|
|
);
|
|
}
|
|
|
|
private function measureDistributionThroughput(int $jobCount, string $label): float
|
|
{
|
|
$jobs = PerformanceTestHelper::createBulkJobs($jobCount);
|
|
$startTime = microtime(true);
|
|
|
|
foreach ($jobs as $job) {
|
|
$this->distributionService->distributeJob($job);
|
|
}
|
|
|
|
$endTime = microtime(true);
|
|
$duration = $endTime - $startTime;
|
|
|
|
return round($jobCount / $duration, 1);
|
|
}
|
|
|
|
private function createHealthyWorkers(int $count): array
|
|
{
|
|
$workers = [];
|
|
for ($i = 1; $i <= $count; $i++) {
|
|
$workers[] = PerformanceTestHelper::createTestWorker(
|
|
"healthy_worker_{$i}",
|
|
20,
|
|
WorkerStatus::AVAILABLE
|
|
);
|
|
}
|
|
return $workers;
|
|
}
|
|
|
|
private function registerWorkers(array $workers): void
|
|
{
|
|
foreach ($workers as $worker) {
|
|
$this->workerRegistry->registerWorker($worker);
|
|
}
|
|
}
|
|
|
|
private function updateWorkerHeartbeat(string $workerId, \DateTimeImmutable $heartbeat): void
|
|
{
|
|
$pdo = $this->database->getConnection();
|
|
$stmt = $pdo->prepare('UPDATE workers SET last_heartbeat = ? WHERE id = ?');
|
|
$stmt->execute([$heartbeat->format('Y-m-d H:i:s'), $workerId]);
|
|
}
|
|
|
|
private function updateWorkerStatus(string $workerId, WorkerStatus $status): void
|
|
{
|
|
$pdo = $this->database->getConnection();
|
|
$stmt = $pdo->prepare('UPDATE workers SET status = ? WHERE id = ?');
|
|
$stmt->execute([$status->value, $workerId]);
|
|
}
|
|
|
|
private function countJobsReassignedFrom(string $failedWorkerId): int
|
|
{
|
|
$pdo = $this->database->getConnection();
|
|
$stmt = $pdo->prepare('SELECT COUNT(*) FROM jobs WHERE worker_id != ? AND worker_id IS NOT NULL');
|
|
$stmt->execute([$failedWorkerId]);
|
|
return (int) $stmt->fetchColumn();
|
|
}
|
|
|
|
private function countJobsByStatus(JobStatus $status): int
|
|
{
|
|
$pdo = $this->database->getConnection();
|
|
$stmt = $pdo->prepare('SELECT COUNT(*) FROM jobs WHERE status = ?');
|
|
$stmt->execute([$status->value]);
|
|
return (int) $stmt->fetchColumn();
|
|
}
|
|
|
|
private function createTestDatabase(): DatabaseManager
|
|
{
|
|
$pdo = new \PDO('sqlite::memory:');
|
|
$pdo->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION);
|
|
|
|
$pdo->exec('
|
|
CREATE TABLE workers (
|
|
id TEXT PRIMARY KEY,
|
|
queue_names TEXT NOT NULL,
|
|
capacity INTEGER NOT NULL,
|
|
status TEXT NOT NULL,
|
|
last_heartbeat TEXT NOT NULL,
|
|
metadata TEXT
|
|
)
|
|
');
|
|
|
|
$pdo->exec('
|
|
CREATE TABLE jobs (
|
|
id TEXT PRIMARY KEY,
|
|
type TEXT NOT NULL,
|
|
payload TEXT NOT NULL,
|
|
queue_name TEXT NOT NULL,
|
|
priority INTEGER NOT NULL,
|
|
status TEXT NOT NULL,
|
|
worker_id TEXT,
|
|
created_at TEXT NOT NULL,
|
|
started_at TEXT,
|
|
completed_at TEXT,
|
|
attempts INTEGER DEFAULT 0,
|
|
error_message TEXT
|
|
)
|
|
');
|
|
|
|
// Performance indexes
|
|
$pdo->exec('CREATE INDEX idx_workers_status_heartbeat ON workers(status, last_heartbeat)');
|
|
$pdo->exec('CREATE INDEX idx_jobs_worker_status ON jobs(worker_id, status)');
|
|
$pdo->exec('CREATE INDEX idx_jobs_status_created ON jobs(status, created_at)');
|
|
|
|
return new DatabaseManager($pdo);
|
|
}
|
|
|
|
private function cleanupTestData(): void
|
|
{
|
|
$pdo = $this->database->getConnection();
|
|
$pdo->exec('DELETE FROM workers');
|
|
$pdo->exec('DELETE FROM jobs');
|
|
}
|
|
} |