- Create AnsibleDeployStage using framework's Process module for secure command execution - Integrate AnsibleDeployStage into DeploymentPipelineCommands for production deployments - Add force_deploy flag support in Ansible playbook to override stale locks - Use PHP deployment module as orchestrator (php console.php deploy:production) - Fix ErrorAggregationInitializer to use Environment class instead of $_ENV superglobal Architecture: - BuildStage → AnsibleDeployStage → HealthCheckStage for production - Process module provides timeout, error handling, and output capture - Ansible playbook supports rollback via rollback-git-based.yml - Zero-downtime deployments with health checks
323 lines
13 KiB
PHP
323 lines
13 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
/**
|
|
* A/B Testing System Workflow Tests
|
|
*
|
|
* Demonstrates A/B testing capabilities:
|
|
* 1. Traffic splitting between model versions
|
|
* 2. Model performance comparison
|
|
* 3. Statistical significance testing
|
|
* 4. Winner determination and recommendations
|
|
* 5. Gradual rollout planning
|
|
*/
|
|
|
|
require_once __DIR__ . '/../../vendor/autoload.php';
|
|
|
|
use App\Framework\MachineLearning\ModelManagement\ABTestingService;
|
|
use App\Framework\MachineLearning\ModelManagement\InMemoryModelRegistry;
|
|
use App\Framework\MachineLearning\ModelManagement\ModelPerformanceMonitor;
|
|
use App\Framework\MachineLearning\ModelManagement\InMemoryPerformanceStorage;
|
|
use App\Framework\MachineLearning\ModelManagement\NullAlertingService;
|
|
use App\Framework\MachineLearning\ModelManagement\ValueObjects\ABTestConfig;
|
|
use App\Framework\MachineLearning\ModelManagement\ValueObjects\ModelMetadata;
|
|
use App\Framework\MachineLearning\ModelManagement\ValueObjects\ModelType;
|
|
use App\Framework\Core\ValueObjects\Version;
|
|
use App\Framework\Core\ValueObjects\Timestamp;
|
|
use App\Framework\Core\ValueObjects\Duration;
|
|
use App\Framework\Random\SecureRandomGenerator;
|
|
|
|
echo "=== A/B Testing System Workflow Tests ===\n\n";
|
|
|
|
try {
|
|
// ========================================================================
|
|
// Setup: Initialize infrastructure
|
|
// ========================================================================
|
|
echo "1. Initializing A/B Testing Infrastructure...\n";
|
|
|
|
$registry = new InMemoryModelRegistry();
|
|
$storage = new InMemoryPerformanceStorage();
|
|
$alerting = new NullAlertingService();
|
|
$performanceMonitor = new ModelPerformanceMonitor($registry, $storage, $alerting);
|
|
$random = new SecureRandomGenerator();
|
|
$abTesting = new ABTestingService($random, $registry);
|
|
|
|
echo " ✓ ModelRegistry created\n";
|
|
echo " ✓ PerformanceMonitor created\n";
|
|
echo " ✓ ABTestingService created\n\n";
|
|
|
|
// ========================================================================
|
|
// Setup: Register two model versions with different performance
|
|
// ========================================================================
|
|
echo "2. Registering two model versions with different performance...\n";
|
|
|
|
$modelName = 'fraud-detector';
|
|
$versionA = Version::fromString('1.0.0');
|
|
$versionB = Version::fromString('2.0.0');
|
|
|
|
// Version A: Current production model (baseline)
|
|
$metadataA = new ModelMetadata(
|
|
modelName: $modelName,
|
|
modelType: ModelType::SUPERVISED,
|
|
version: $versionA,
|
|
configuration: [
|
|
'threshold' => 0.7,
|
|
'algorithm' => 'random_forest',
|
|
'features' => 25
|
|
],
|
|
createdAt: Timestamp::now()
|
|
);
|
|
|
|
$registry->register($metadataA);
|
|
|
|
// Version B: New candidate model (improved)
|
|
$metadataB = new ModelMetadata(
|
|
modelName: $modelName,
|
|
modelType: ModelType::SUPERVISED,
|
|
version: $versionB,
|
|
configuration: [
|
|
'threshold' => 0.65,
|
|
'algorithm' => 'xgboost',
|
|
'features' => 30
|
|
],
|
|
createdAt: Timestamp::now()
|
|
);
|
|
|
|
$registry->register($metadataB);
|
|
|
|
echo " ✓ Registered version A (1.0.0) - Current production model\n";
|
|
echo " ✓ Registered version B (2.0.0) - New candidate model\n\n";
|
|
|
|
// ========================================================================
|
|
// Setup: Simulate performance data for both versions
|
|
// ========================================================================
|
|
echo "3. Simulating performance data...\n";
|
|
|
|
$timestamp = Timestamp::now();
|
|
|
|
// Version A: 85% accuracy (baseline)
|
|
$predictionsA = [
|
|
// Correct predictions (85%)
|
|
...array_fill(0, 85, ['confidence' => 0.8, 'actual' => true, 'prediction' => true]),
|
|
// Incorrect predictions (15%)
|
|
...array_fill(0, 15, ['confidence' => 0.75, 'actual' => true, 'prediction' => false]),
|
|
];
|
|
|
|
foreach ($predictionsA as $pred) {
|
|
$storage->storePrediction([
|
|
'model_name' => $modelName,
|
|
'version' => $versionA->toString(),
|
|
'prediction' => $pred['prediction'],
|
|
'actual' => $pred['actual'],
|
|
'confidence' => $pred['confidence'],
|
|
'features' => [],
|
|
'timestamp' => $timestamp->toDateTime(),
|
|
'is_correct' => $pred['prediction'] === $pred['actual'],
|
|
]);
|
|
}
|
|
|
|
// Version B: 92% accuracy (improved)
|
|
$predictionsB = [
|
|
// Correct predictions (92%)
|
|
...array_fill(0, 92, ['confidence' => 0.85, 'actual' => true, 'prediction' => true]),
|
|
// Incorrect predictions (8%)
|
|
...array_fill(0, 8, ['confidence' => 0.7, 'actual' => true, 'prediction' => false]),
|
|
];
|
|
|
|
foreach ($predictionsB as $pred) {
|
|
$storage->storePrediction([
|
|
'model_name' => $modelName,
|
|
'version' => $versionB->toString(),
|
|
'prediction' => $pred['prediction'],
|
|
'actual' => $pred['actual'],
|
|
'confidence' => $pred['confidence'],
|
|
'features' => [],
|
|
'timestamp' => $timestamp->toDateTime(),
|
|
'is_correct' => $pred['prediction'] === $pred['actual'],
|
|
]);
|
|
}
|
|
|
|
echo " ✓ Version A: 100 predictions, 85% accuracy\n";
|
|
echo " ✓ Version B: 100 predictions, 92% accuracy\n\n";
|
|
|
|
// ========================================================================
|
|
// Test 1: Balanced 50/50 A/B Test
|
|
// ========================================================================
|
|
echo "4. Testing balanced 50/50 traffic split...\n";
|
|
|
|
$balancedConfig = new ABTestConfig(
|
|
modelName: $modelName,
|
|
versionA: $versionA,
|
|
versionB: $versionB,
|
|
trafficSplitA: 0.5,
|
|
primaryMetric: 'accuracy'
|
|
);
|
|
|
|
echo " → Configuration:\n";
|
|
echo " {$balancedConfig->getDescription()}\n";
|
|
|
|
// Simulate 1000 routing decisions
|
|
$routingResults = ['A' => 0, 'B' => 0];
|
|
for ($i = 0; $i < 1000; $i++) {
|
|
$selected = $abTesting->selectVersion($balancedConfig);
|
|
$routingResults[$selected->equals($versionA) ? 'A' : 'B']++;
|
|
}
|
|
|
|
$percentA = ($routingResults['A'] / 1000) * 100;
|
|
$percentB = ($routingResults['B'] / 1000) * 100;
|
|
|
|
echo " → Traffic Routing (1000 requests):\n";
|
|
echo " Version A: {$routingResults['A']} requests (" . sprintf("%.1f%%", $percentA) . ")\n";
|
|
echo " Version B: {$routingResults['B']} requests (" . sprintf("%.1f%%", $percentB) . ")\n\n";
|
|
|
|
// ========================================================================
|
|
// Test 2: Model Performance Comparison
|
|
// ========================================================================
|
|
echo "5. Comparing model performance...\n";
|
|
|
|
$comparisonResult = $abTesting->runTest($balancedConfig);
|
|
|
|
echo " → Comparison Results:\n";
|
|
echo " Winner: {$comparisonResult->winner}\n";
|
|
echo " Statistically Significant: " . ($comparisonResult->isStatisticallySignificant ? 'YES' : 'NO') . "\n";
|
|
echo " Primary Metric Improvement: " . sprintf("%+.2f%%", $comparisonResult->getPrimaryMetricImprovementPercent()) . "\n";
|
|
echo " → Summary:\n";
|
|
echo " {$comparisonResult->getSummary()}\n";
|
|
echo " → Recommendation:\n";
|
|
echo " {$comparisonResult->recommendation}\n\n";
|
|
|
|
// ========================================================================
|
|
// Test 3: Gradual Rollout Configuration
|
|
// ========================================================================
|
|
echo "6. Testing gradual rollout configuration...\n";
|
|
|
|
$gradualConfig = ABTestConfig::forGradualRollout(
|
|
modelName: $modelName,
|
|
currentVersion: $versionA,
|
|
newVersion: $versionB
|
|
);
|
|
|
|
echo " → Configuration:\n";
|
|
echo " {$gradualConfig->getDescription()}\n";
|
|
|
|
// Simulate 1000 routing decisions with gradual rollout
|
|
$gradualResults = ['A' => 0, 'B' => 0];
|
|
for ($i = 0; $i < 1000; $i++) {
|
|
$selected = $abTesting->selectVersion($gradualConfig);
|
|
$gradualResults[$selected->equals($versionA) ? 'A' : 'B']++;
|
|
}
|
|
|
|
$percentA = ($gradualResults['A'] / 1000) * 100;
|
|
$percentB = ($gradualResults['B'] / 1000) * 100;
|
|
|
|
echo " → Traffic Routing (1000 requests):\n";
|
|
echo " Version A (current): {$gradualResults['A']} requests (" . sprintf("%.1f%%", $percentA) . ")\n";
|
|
echo " Version B (new): {$gradualResults['B']} requests (" . sprintf("%.1f%%", $percentB) . ")\n\n";
|
|
|
|
// ========================================================================
|
|
// Test 4: Champion/Challenger Test
|
|
// ========================================================================
|
|
echo "7. Testing champion/challenger configuration...\n";
|
|
|
|
$challengerConfig = ABTestConfig::forChallenger(
|
|
modelName: $modelName,
|
|
champion: $versionA,
|
|
challenger: $versionB
|
|
);
|
|
|
|
echo " → Configuration:\n";
|
|
echo " {$challengerConfig->getDescription()}\n";
|
|
|
|
// Simulate 1000 routing decisions with champion/challenger
|
|
$challengerResults = ['Champion' => 0, 'Challenger' => 0];
|
|
for ($i = 0; $i < 1000; $i++) {
|
|
$selected = $abTesting->selectVersion($challengerConfig);
|
|
$challengerResults[$selected->equals($versionA) ? 'Champion' : 'Challenger']++;
|
|
}
|
|
|
|
$percentChampion = ($challengerResults['Champion'] / 1000) * 100;
|
|
$percentChallenger = ($challengerResults['Challenger'] / 1000) * 100;
|
|
|
|
echo " → Traffic Routing (1000 requests):\n";
|
|
echo " Champion (A): {$challengerResults['Champion']} requests (" . sprintf("%.1f%%", $percentChampion) . ")\n";
|
|
echo " Challenger (B): {$challengerResults['Challenger']} requests (" . sprintf("%.1f%%", $percentChallenger) . ")\n\n";
|
|
|
|
// ========================================================================
|
|
// Test 5: Automated Test Execution
|
|
// ========================================================================
|
|
echo "8. Running automated A/B test...\n";
|
|
|
|
$autoTestResult = $abTesting->runTest($balancedConfig);
|
|
|
|
echo " → Automated Test Results:\n";
|
|
echo " Winner: {$autoTestResult->winner}\n";
|
|
echo " Should Deploy Version B: " . ($autoTestResult->shouldDeployVersionB() ? 'YES' : 'NO') . "\n";
|
|
echo " Is Inconclusive: " . ($autoTestResult->isInconclusive() ? 'YES' : 'NO') . "\n";
|
|
echo " → Metrics Difference:\n";
|
|
foreach ($autoTestResult->metricsDifference as $metric => $diff) {
|
|
echo " {$metric}: " . sprintf("%+.4f", $diff) . "\n";
|
|
}
|
|
echo "\n";
|
|
|
|
// ========================================================================
|
|
// Test 6: Rollout Planning
|
|
// ========================================================================
|
|
echo "9. Generating rollout plan...\n";
|
|
|
|
$rolloutPlan = $abTesting->generateRolloutPlan(steps: 5);
|
|
|
|
echo " → Rollout Plan (5 stages):\n";
|
|
foreach ($rolloutPlan as $step => $trafficSplitB) {
|
|
$percentB = (int) ($trafficSplitB * 100);
|
|
$percentA = 100 - $percentB;
|
|
echo " Stage {$step}: Version A {$percentA}%, Version B {$percentB}%\n";
|
|
}
|
|
echo "\n";
|
|
|
|
// ========================================================================
|
|
// Test 7: Sample Size Calculation
|
|
// ========================================================================
|
|
echo "10. Calculating required sample size...\n";
|
|
|
|
$requiredSamples = $abTesting->calculateRequiredSampleSize(
|
|
confidenceLevel: 0.95, // 95% confidence
|
|
marginOfError: 0.05 // 5% margin of error
|
|
);
|
|
|
|
echo " → Sample Size Requirements:\n";
|
|
echo " Confidence Level: 95%\n";
|
|
echo " Margin of Error: 5%\n";
|
|
echo " Required Samples per Version: {$requiredSamples}\n\n";
|
|
|
|
// ========================================================================
|
|
// Test Summary
|
|
// ========================================================================
|
|
echo "=== Test Summary ===\n";
|
|
echo "✓ Balanced 50/50 A/B Test: Working\n";
|
|
echo "✓ Model Performance Comparison: Working\n";
|
|
echo "✓ Gradual Rollout Configuration: Working\n";
|
|
echo "✓ Champion/Challenger Test: Working\n";
|
|
echo "✓ Automated Test Execution: Working\n";
|
|
echo "✓ Rollout Planning: Working\n";
|
|
echo "✓ Sample Size Calculation: Working\n\n";
|
|
|
|
echo "Key Findings:\n";
|
|
echo " - Version B shows " . sprintf("%.1f%%", $comparisonResult->getPrimaryMetricImprovementPercent()) . " improvement over Version A\n";
|
|
echo " - Winner: {$comparisonResult->winner} (statistically significant: " . ($comparisonResult->isStatisticallySignificant ? 'YES' : 'NO') . ")\n";
|
|
echo " - Recommendation: {$comparisonResult->recommendation}\n";
|
|
echo " - Balanced 50/50 split achieved ~50% traffic to each version\n";
|
|
echo " - Gradual rollout achieved ~90/10 split for safe deployment\n";
|
|
echo " - Champion/challenger achieved ~80/20 split for validation\n";
|
|
echo " - Automated test execution and rollout planning functional\n\n";
|
|
|
|
echo "=== A/B Testing Workflows PASSED ===\n";
|
|
|
|
} catch (\Throwable $e) {
|
|
echo "\n!!! TEST FAILED !!!\n";
|
|
echo "Error: " . $e->getMessage() . "\n";
|
|
echo "File: " . $e->getFile() . ":" . $e->getLine() . "\n";
|
|
echo "\nStack trace:\n" . $e->getTraceAsString() . "\n";
|
|
exit(1);
|
|
}
|