Files
michaelschiemer/tests/debug/test-ab-testing.php
Michael Schiemer 3b623e7afb feat(Deployment): Integrate Ansible deployment via PHP deployment pipeline
- Create AnsibleDeployStage using framework's Process module for secure command execution
- Integrate AnsibleDeployStage into DeploymentPipelineCommands for production deployments
- Add force_deploy flag support in Ansible playbook to override stale locks
- Use PHP deployment module as orchestrator (php console.php deploy:production)
- Fix ErrorAggregationInitializer to use Environment class instead of $_ENV superglobal

Architecture:
- BuildStage → AnsibleDeployStage → HealthCheckStage for production
- Process module provides timeout, error handling, and output capture
- Ansible playbook supports rollback via rollback-git-based.yml
- Zero-downtime deployments with health checks
2025-10-26 14:08:07 +01:00

323 lines
13 KiB
PHP

<?php
declare(strict_types=1);
/**
* A/B Testing System Workflow Tests
*
* Demonstrates A/B testing capabilities:
* 1. Traffic splitting between model versions
* 2. Model performance comparison
* 3. Statistical significance testing
* 4. Winner determination and recommendations
* 5. Gradual rollout planning
*/
require_once __DIR__ . '/../../vendor/autoload.php';
use App\Framework\MachineLearning\ModelManagement\ABTestingService;
use App\Framework\MachineLearning\ModelManagement\InMemoryModelRegistry;
use App\Framework\MachineLearning\ModelManagement\ModelPerformanceMonitor;
use App\Framework\MachineLearning\ModelManagement\InMemoryPerformanceStorage;
use App\Framework\MachineLearning\ModelManagement\NullAlertingService;
use App\Framework\MachineLearning\ModelManagement\ValueObjects\ABTestConfig;
use App\Framework\MachineLearning\ModelManagement\ValueObjects\ModelMetadata;
use App\Framework\MachineLearning\ModelManagement\ValueObjects\ModelType;
use App\Framework\Core\ValueObjects\Version;
use App\Framework\Core\ValueObjects\Timestamp;
use App\Framework\Core\ValueObjects\Duration;
use App\Framework\Random\SecureRandomGenerator;
echo "=== A/B Testing System Workflow Tests ===\n\n";
try {
// ========================================================================
// Setup: Initialize infrastructure
// ========================================================================
echo "1. Initializing A/B Testing Infrastructure...\n";
$registry = new InMemoryModelRegistry();
$storage = new InMemoryPerformanceStorage();
$alerting = new NullAlertingService();
$performanceMonitor = new ModelPerformanceMonitor($registry, $storage, $alerting);
$random = new SecureRandomGenerator();
$abTesting = new ABTestingService($random, $registry);
echo " ✓ ModelRegistry created\n";
echo " ✓ PerformanceMonitor created\n";
echo " ✓ ABTestingService created\n\n";
// ========================================================================
// Setup: Register two model versions with different performance
// ========================================================================
echo "2. Registering two model versions with different performance...\n";
$modelName = 'fraud-detector';
$versionA = Version::fromString('1.0.0');
$versionB = Version::fromString('2.0.0');
// Version A: Current production model (baseline)
$metadataA = new ModelMetadata(
modelName: $modelName,
modelType: ModelType::SUPERVISED,
version: $versionA,
configuration: [
'threshold' => 0.7,
'algorithm' => 'random_forest',
'features' => 25
],
createdAt: Timestamp::now()
);
$registry->register($metadataA);
// Version B: New candidate model (improved)
$metadataB = new ModelMetadata(
modelName: $modelName,
modelType: ModelType::SUPERVISED,
version: $versionB,
configuration: [
'threshold' => 0.65,
'algorithm' => 'xgboost',
'features' => 30
],
createdAt: Timestamp::now()
);
$registry->register($metadataB);
echo " ✓ Registered version A (1.0.0) - Current production model\n";
echo " ✓ Registered version B (2.0.0) - New candidate model\n\n";
// ========================================================================
// Setup: Simulate performance data for both versions
// ========================================================================
echo "3. Simulating performance data...\n";
$timestamp = Timestamp::now();
// Version A: 85% accuracy (baseline)
$predictionsA = [
// Correct predictions (85%)
...array_fill(0, 85, ['confidence' => 0.8, 'actual' => true, 'prediction' => true]),
// Incorrect predictions (15%)
...array_fill(0, 15, ['confidence' => 0.75, 'actual' => true, 'prediction' => false]),
];
foreach ($predictionsA as $pred) {
$storage->storePrediction([
'model_name' => $modelName,
'version' => $versionA->toString(),
'prediction' => $pred['prediction'],
'actual' => $pred['actual'],
'confidence' => $pred['confidence'],
'features' => [],
'timestamp' => $timestamp->toDateTime(),
'is_correct' => $pred['prediction'] === $pred['actual'],
]);
}
// Version B: 92% accuracy (improved)
$predictionsB = [
// Correct predictions (92%)
...array_fill(0, 92, ['confidence' => 0.85, 'actual' => true, 'prediction' => true]),
// Incorrect predictions (8%)
...array_fill(0, 8, ['confidence' => 0.7, 'actual' => true, 'prediction' => false]),
];
foreach ($predictionsB as $pred) {
$storage->storePrediction([
'model_name' => $modelName,
'version' => $versionB->toString(),
'prediction' => $pred['prediction'],
'actual' => $pred['actual'],
'confidence' => $pred['confidence'],
'features' => [],
'timestamp' => $timestamp->toDateTime(),
'is_correct' => $pred['prediction'] === $pred['actual'],
]);
}
echo " ✓ Version A: 100 predictions, 85% accuracy\n";
echo " ✓ Version B: 100 predictions, 92% accuracy\n\n";
// ========================================================================
// Test 1: Balanced 50/50 A/B Test
// ========================================================================
echo "4. Testing balanced 50/50 traffic split...\n";
$balancedConfig = new ABTestConfig(
modelName: $modelName,
versionA: $versionA,
versionB: $versionB,
trafficSplitA: 0.5,
primaryMetric: 'accuracy'
);
echo " → Configuration:\n";
echo " {$balancedConfig->getDescription()}\n";
// Simulate 1000 routing decisions
$routingResults = ['A' => 0, 'B' => 0];
for ($i = 0; $i < 1000; $i++) {
$selected = $abTesting->selectVersion($balancedConfig);
$routingResults[$selected->equals($versionA) ? 'A' : 'B']++;
}
$percentA = ($routingResults['A'] / 1000) * 100;
$percentB = ($routingResults['B'] / 1000) * 100;
echo " → Traffic Routing (1000 requests):\n";
echo " Version A: {$routingResults['A']} requests (" . sprintf("%.1f%%", $percentA) . ")\n";
echo " Version B: {$routingResults['B']} requests (" . sprintf("%.1f%%", $percentB) . ")\n\n";
// ========================================================================
// Test 2: Model Performance Comparison
// ========================================================================
echo "5. Comparing model performance...\n";
$comparisonResult = $abTesting->runTest($balancedConfig);
echo " → Comparison Results:\n";
echo " Winner: {$comparisonResult->winner}\n";
echo " Statistically Significant: " . ($comparisonResult->isStatisticallySignificant ? 'YES' : 'NO') . "\n";
echo " Primary Metric Improvement: " . sprintf("%+.2f%%", $comparisonResult->getPrimaryMetricImprovementPercent()) . "\n";
echo " → Summary:\n";
echo " {$comparisonResult->getSummary()}\n";
echo " → Recommendation:\n";
echo " {$comparisonResult->recommendation}\n\n";
// ========================================================================
// Test 3: Gradual Rollout Configuration
// ========================================================================
echo "6. Testing gradual rollout configuration...\n";
$gradualConfig = ABTestConfig::forGradualRollout(
modelName: $modelName,
currentVersion: $versionA,
newVersion: $versionB
);
echo " → Configuration:\n";
echo " {$gradualConfig->getDescription()}\n";
// Simulate 1000 routing decisions with gradual rollout
$gradualResults = ['A' => 0, 'B' => 0];
for ($i = 0; $i < 1000; $i++) {
$selected = $abTesting->selectVersion($gradualConfig);
$gradualResults[$selected->equals($versionA) ? 'A' : 'B']++;
}
$percentA = ($gradualResults['A'] / 1000) * 100;
$percentB = ($gradualResults['B'] / 1000) * 100;
echo " → Traffic Routing (1000 requests):\n";
echo " Version A (current): {$gradualResults['A']} requests (" . sprintf("%.1f%%", $percentA) . ")\n";
echo " Version B (new): {$gradualResults['B']} requests (" . sprintf("%.1f%%", $percentB) . ")\n\n";
// ========================================================================
// Test 4: Champion/Challenger Test
// ========================================================================
echo "7. Testing champion/challenger configuration...\n";
$challengerConfig = ABTestConfig::forChallenger(
modelName: $modelName,
champion: $versionA,
challenger: $versionB
);
echo " → Configuration:\n";
echo " {$challengerConfig->getDescription()}\n";
// Simulate 1000 routing decisions with champion/challenger
$challengerResults = ['Champion' => 0, 'Challenger' => 0];
for ($i = 0; $i < 1000; $i++) {
$selected = $abTesting->selectVersion($challengerConfig);
$challengerResults[$selected->equals($versionA) ? 'Champion' : 'Challenger']++;
}
$percentChampion = ($challengerResults['Champion'] / 1000) * 100;
$percentChallenger = ($challengerResults['Challenger'] / 1000) * 100;
echo " → Traffic Routing (1000 requests):\n";
echo " Champion (A): {$challengerResults['Champion']} requests (" . sprintf("%.1f%%", $percentChampion) . ")\n";
echo " Challenger (B): {$challengerResults['Challenger']} requests (" . sprintf("%.1f%%", $percentChallenger) . ")\n\n";
// ========================================================================
// Test 5: Automated Test Execution
// ========================================================================
echo "8. Running automated A/B test...\n";
$autoTestResult = $abTesting->runTest($balancedConfig);
echo " → Automated Test Results:\n";
echo " Winner: {$autoTestResult->winner}\n";
echo " Should Deploy Version B: " . ($autoTestResult->shouldDeployVersionB() ? 'YES' : 'NO') . "\n";
echo " Is Inconclusive: " . ($autoTestResult->isInconclusive() ? 'YES' : 'NO') . "\n";
echo " → Metrics Difference:\n";
foreach ($autoTestResult->metricsDifference as $metric => $diff) {
echo " {$metric}: " . sprintf("%+.4f", $diff) . "\n";
}
echo "\n";
// ========================================================================
// Test 6: Rollout Planning
// ========================================================================
echo "9. Generating rollout plan...\n";
$rolloutPlan = $abTesting->generateRolloutPlan(steps: 5);
echo " → Rollout Plan (5 stages):\n";
foreach ($rolloutPlan as $step => $trafficSplitB) {
$percentB = (int) ($trafficSplitB * 100);
$percentA = 100 - $percentB;
echo " Stage {$step}: Version A {$percentA}%, Version B {$percentB}%\n";
}
echo "\n";
// ========================================================================
// Test 7: Sample Size Calculation
// ========================================================================
echo "10. Calculating required sample size...\n";
$requiredSamples = $abTesting->calculateRequiredSampleSize(
confidenceLevel: 0.95, // 95% confidence
marginOfError: 0.05 // 5% margin of error
);
echo " → Sample Size Requirements:\n";
echo " Confidence Level: 95%\n";
echo " Margin of Error: 5%\n";
echo " Required Samples per Version: {$requiredSamples}\n\n";
// ========================================================================
// Test Summary
// ========================================================================
echo "=== Test Summary ===\n";
echo "✓ Balanced 50/50 A/B Test: Working\n";
echo "✓ Model Performance Comparison: Working\n";
echo "✓ Gradual Rollout Configuration: Working\n";
echo "✓ Champion/Challenger Test: Working\n";
echo "✓ Automated Test Execution: Working\n";
echo "✓ Rollout Planning: Working\n";
echo "✓ Sample Size Calculation: Working\n\n";
echo "Key Findings:\n";
echo " - Version B shows " . sprintf("%.1f%%", $comparisonResult->getPrimaryMetricImprovementPercent()) . " improvement over Version A\n";
echo " - Winner: {$comparisonResult->winner} (statistically significant: " . ($comparisonResult->isStatisticallySignificant ? 'YES' : 'NO') . ")\n";
echo " - Recommendation: {$comparisonResult->recommendation}\n";
echo " - Balanced 50/50 split achieved ~50% traffic to each version\n";
echo " - Gradual rollout achieved ~90/10 split for safe deployment\n";
echo " - Champion/challenger achieved ~80/20 split for validation\n";
echo " - Automated test execution and rollout planning functional\n\n";
echo "=== A/B Testing Workflows PASSED ===\n";
} catch (\Throwable $e) {
echo "\n!!! TEST FAILED !!!\n";
echo "Error: " . $e->getMessage() . "\n";
echo "File: " . $e->getFile() . ":" . $e->getLine() . "\n";
echo "\nStack trace:\n" . $e->getTraceAsString() . "\n";
exit(1);
}