- Move 12 markdown files from root to docs/ subdirectories - Organize documentation by category: • docs/troubleshooting/ (1 file) - Technical troubleshooting guides • docs/deployment/ (4 files) - Deployment and security documentation • docs/guides/ (3 files) - Feature-specific guides • docs/planning/ (4 files) - Planning and improvement proposals Root directory cleanup: - Reduced from 16 to 4 markdown files in root - Only essential project files remain: • CLAUDE.md (AI instructions) • README.md (Main project readme) • CLEANUP_PLAN.md (Current cleanup plan) • SRC_STRUCTURE_IMPROVEMENTS.md (Structure improvements) This improves: ✅ Documentation discoverability ✅ Logical organization by purpose ✅ Clean root directory ✅ Better maintainability
386 lines
13 KiB
PHP
386 lines
13 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Framework\Database\Profiling;
|
|
|
|
use App\Framework\Database\ConnectionInterface;
|
|
|
|
/**
|
|
* Advanced query analysis and optimization suggestions
|
|
*/
|
|
final class QueryAnalyzer
|
|
{
|
|
public function __construct(
|
|
private readonly ConnectionInterface $connection
|
|
) {
|
|
}
|
|
|
|
/**
|
|
* Analyze query and provide optimization suggestions
|
|
*/
|
|
public function analyzeQuery(QueryProfile $profile): QueryAnalysis
|
|
{
|
|
$sql = $profile->query->sql;
|
|
$suggestions = [];
|
|
$issues = [];
|
|
$indexRecommendations = [];
|
|
|
|
// Basic SQL analysis
|
|
$this->analyzeSelectStatements($sql, $suggestions, $issues);
|
|
$this->analyzeWhereClause($sql, $suggestions, $indexRecommendations);
|
|
$this->analyzeJoins($sql, $suggestions, $issues);
|
|
$this->analyzeAggregations($sql, $suggestions);
|
|
$this->analyzeSubqueries($sql, $suggestions, $issues);
|
|
|
|
// Performance analysis based on execution metrics
|
|
$this->analyzePerformanceMetrics($profile, $suggestions, $issues);
|
|
|
|
// Try to get execution plan if possible
|
|
$executionPlan = $this->getExecutionPlan($sql);
|
|
|
|
return new QueryAnalysis(
|
|
profile: $profile,
|
|
suggestions: $suggestions,
|
|
issues: $issues,
|
|
indexRecommendations: $indexRecommendations,
|
|
executionPlan: $executionPlan,
|
|
optimizationScore: $this->calculateOptimizationScore($profile, $issues, $suggestions)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Analyze SELECT statements
|
|
*/
|
|
private function analyzeSelectStatements(string $sql, array &$suggestions, array &$issues): void
|
|
{
|
|
$upperSql = strtoupper($sql);
|
|
|
|
// Check for SELECT *
|
|
if (str_contains($upperSql, 'SELECT *')) {
|
|
$issues[] = 'Using SELECT * can fetch unnecessary columns and hurt performance';
|
|
$suggestions[] = 'Specify only the columns you need instead of using SELECT *';
|
|
}
|
|
|
|
// Check for DISTINCT usage
|
|
if (str_contains($upperSql, 'DISTINCT') && str_contains($upperSql, 'ORDER BY')) {
|
|
$suggestions[] = 'DISTINCT with ORDER BY can be expensive - consider if both are necessary';
|
|
}
|
|
|
|
// Check for functions in SELECT
|
|
$functions = ['COUNT', 'SUM', 'AVG', 'MAX', 'MIN'];
|
|
$functionCount = 0;
|
|
foreach ($functions as $function) {
|
|
$functionCount += substr_count($upperSql, $function);
|
|
}
|
|
|
|
if ($functionCount > 5) {
|
|
$suggestions[] = 'Multiple aggregate functions detected - consider if they can be optimized or cached';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Analyze WHERE clauses
|
|
*/
|
|
private function analyzeWhereClause(string $sql, array &$suggestions, array &$indexRecommendations): void
|
|
{
|
|
$upperSql = strtoupper($sql);
|
|
|
|
if (! str_contains($upperSql, 'WHERE')) {
|
|
if (str_starts_with($upperSql, 'SELECT')) {
|
|
$issues[] = 'SELECT without WHERE clause may result in full table scan';
|
|
$suggestions[] = 'Add WHERE clause to limit result set';
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
// Check for functions in WHERE clause
|
|
if (preg_match('/WHERE\s+\w+\([^)]+\)\s*(=|<|>|<=|>=)/', $upperSql)) {
|
|
$issues[] = 'Functions in WHERE clause prevent index usage';
|
|
$suggestions[] = 'Avoid functions on columns in WHERE clause to enable index usage';
|
|
}
|
|
|
|
// Check for LIKE with leading wildcard
|
|
if (preg_match('/LIKE\s+\'%[^%]*\'/', $upperSql)) {
|
|
$issues[] = 'LIKE with leading wildcard prevents index usage';
|
|
$suggestions[] = 'Avoid leading wildcards in LIKE patterns for better performance';
|
|
}
|
|
|
|
// Extract potential index candidates from WHERE clause
|
|
if (preg_match_all('/WHERE\s+(\w+)\s*(=|<|>|<=|>=|IN)/', $upperSql, $matches)) {
|
|
foreach ($matches[1] as $column) {
|
|
$indexRecommendations[] = "Consider index on column: {$column}";
|
|
}
|
|
}
|
|
|
|
// Check for OR conditions
|
|
if (str_contains($upperSql, ' OR ')) {
|
|
$suggestions[] = 'OR conditions can be slower than UNION - consider rewriting if appropriate';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Analyze JOIN clauses
|
|
*/
|
|
private function analyzeJoins(string $sql, array &$suggestions, array &$issues): void
|
|
{
|
|
$upperSql = strtoupper($sql);
|
|
$joinCount = substr_count($upperSql, 'JOIN');
|
|
|
|
if ($joinCount === 0) {
|
|
return;
|
|
}
|
|
|
|
if ($joinCount > 5) {
|
|
$issues[] = "High number of JOINs ({$joinCount}) may impact performance";
|
|
$suggestions[] = 'Consider denormalization or caching for queries with many JOINs';
|
|
}
|
|
|
|
// Check for Cartesian products (JOIN without ON)
|
|
$onCount = substr_count($upperSql, ' ON ');
|
|
if ($joinCount > $onCount) {
|
|
$issues[] = 'Potential Cartesian product detected - missing JOIN conditions';
|
|
$suggestions[] = 'Ensure all JOINs have proper ON conditions';
|
|
}
|
|
|
|
// Check for table order in JOINs
|
|
if (str_contains($upperSql, 'LEFT JOIN') || str_contains($upperSql, 'RIGHT JOIN')) {
|
|
$suggestions[] = 'Consider JOIN order - start with the most selective table';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Analyze aggregation functions
|
|
*/
|
|
private function analyzeAggregations(string $sql, array &$suggestions): void
|
|
{
|
|
$upperSql = strtoupper($sql);
|
|
|
|
if (str_contains($upperSql, 'GROUP BY')) {
|
|
if (! str_contains($upperSql, 'ORDER BY')) {
|
|
$suggestions[] = 'GROUP BY without ORDER BY - consider if ordering is needed';
|
|
}
|
|
|
|
// Check for GROUP BY with many columns
|
|
$groupByMatches = [];
|
|
if (preg_match('/GROUP BY\s+(.+?)(?:\s+ORDER|\s+HAVING|\s*$)/i', $sql, $groupByMatches)) {
|
|
$columns = explode(',', $groupByMatches[1]);
|
|
if (count($columns) > 3) {
|
|
$suggestions[] = 'GROUP BY with many columns can be expensive - verify all are necessary';
|
|
}
|
|
}
|
|
}
|
|
|
|
if (str_contains($upperSql, 'HAVING')) {
|
|
if (str_contains($upperSql, 'WHERE')) {
|
|
$suggestions[] = 'Move non-aggregate conditions from HAVING to WHERE for better performance';
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Analyze subqueries
|
|
*/
|
|
private function analyzeSubqueries(string $sql, array &$suggestions, array &$issues): void
|
|
{
|
|
$selectCount = substr_count(strtoupper($sql), 'SELECT');
|
|
if ($selectCount <= 1) {
|
|
return;
|
|
}
|
|
|
|
$subqueryCount = $selectCount - 1;
|
|
|
|
if ($subqueryCount > 2) {
|
|
$issues[] = "Multiple subqueries ({$subqueryCount}) detected";
|
|
$suggestions[] = 'Consider rewriting subqueries as JOINs for better performance';
|
|
}
|
|
|
|
// Check for correlated subqueries (simplified detection)
|
|
if (str_contains(strtoupper($sql), 'WHERE EXISTS') ||
|
|
str_contains(strtoupper($sql), 'WHERE NOT EXISTS')) {
|
|
$suggestions[] = 'Correlated subqueries can be expensive - consider JOIN alternatives';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Analyze performance metrics
|
|
*/
|
|
private function analyzePerformanceMetrics(QueryProfile $profile, array &$suggestions, array &$issues): void
|
|
{
|
|
$executionTimeMs = $profile->executionTime->toMilliseconds();
|
|
$memoryUsageMB = $profile->memoryUsage / (1024 * 1024);
|
|
|
|
if ($executionTimeMs > 5000) {
|
|
$issues[] = 'Very slow execution time (>5 seconds)';
|
|
$suggestions[] = 'Consider breaking this query into smaller parts or adding appropriate indexes';
|
|
} elseif ($executionTimeMs > 1000) {
|
|
$issues[] = 'Slow execution time (>1 second)';
|
|
$suggestions[] = 'Review query optimization opportunities';
|
|
}
|
|
|
|
if ($memoryUsageMB > 50) {
|
|
$issues[] = 'High memory usage (>50MB)';
|
|
$suggestions[] = 'Consider limiting result set size or using pagination';
|
|
} elseif ($memoryUsageMB > 10) {
|
|
$suggestions[] = 'Moderate memory usage detected - monitor if consistent';
|
|
}
|
|
|
|
if ($profile->getComplexityScore() > 15) {
|
|
$issues[] = 'High query complexity score';
|
|
$suggestions[] = 'Consider simplifying the query or breaking it into multiple queries';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get execution plan (simplified - database specific)
|
|
*/
|
|
private function getExecutionPlan(string $sql): ?string
|
|
{
|
|
try {
|
|
// Try MySQL EXPLAIN
|
|
$explainSql = "EXPLAIN " . $sql;
|
|
$result = $this->connection->query($explainSql);
|
|
|
|
$plan = '';
|
|
while ($row = $result->fetch()) {
|
|
$plan .= print_r($row, true) . "\n";
|
|
}
|
|
|
|
return $plan ?: null;
|
|
|
|
} catch (\Throwable) {
|
|
// If EXPLAIN fails, try other database-specific approaches
|
|
try {
|
|
// Try PostgreSQL EXPLAIN
|
|
$explainSql = "EXPLAIN (FORMAT JSON) " . $sql;
|
|
$result = $this->connection->queryScalar($explainSql);
|
|
|
|
return is_string($result) ? $result : null;
|
|
|
|
} catch (\Throwable) {
|
|
// Return null if we can't get execution plan
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Calculate optimization score (0-100, higher is better)
|
|
*/
|
|
private function calculateOptimizationScore(QueryProfile $profile, array $issues, array $suggestions): int
|
|
{
|
|
$baseScore = 100;
|
|
|
|
// Penalize execution time
|
|
$executionTimeMs = $profile->executionTime->toMilliseconds();
|
|
if ($executionTimeMs > 5000) {
|
|
$baseScore -= 40;
|
|
} elseif ($executionTimeMs > 1000) {
|
|
$baseScore -= 25;
|
|
} elseif ($executionTimeMs > 500) {
|
|
$baseScore -= 15;
|
|
} elseif ($executionTimeMs > 100) {
|
|
$baseScore -= 10;
|
|
}
|
|
|
|
// Penalize memory usage
|
|
$memoryUsageMB = $profile->memoryUsage / (1024 * 1024);
|
|
if ($memoryUsageMB > 50) {
|
|
$baseScore -= 20;
|
|
} elseif ($memoryUsageMB > 10) {
|
|
$baseScore -= 10;
|
|
} elseif ($memoryUsageMB > 5) {
|
|
$baseScore -= 5;
|
|
}
|
|
|
|
// Penalize complexity
|
|
$complexityScore = $profile->getComplexityScore();
|
|
if ($complexityScore > 15) {
|
|
$baseScore -= 15;
|
|
} elseif ($complexityScore > 10) {
|
|
$baseScore -= 10;
|
|
} elseif ($complexityScore > 5) {
|
|
$baseScore -= 5;
|
|
}
|
|
|
|
// Penalize issues and suggestions
|
|
$baseScore -= count($issues) * 5;
|
|
$baseScore -= count($suggestions) * 2;
|
|
|
|
return max(0, min(100, $baseScore));
|
|
}
|
|
|
|
/**
|
|
* Batch analyze multiple profiles
|
|
*/
|
|
public function batchAnalyze(array $profiles): array
|
|
{
|
|
$analyses = [];
|
|
|
|
foreach ($profiles as $profile) {
|
|
$analyses[] = $this->analyzeQuery($profile);
|
|
}
|
|
|
|
return $analyses;
|
|
}
|
|
|
|
/**
|
|
* Get optimization summary for multiple analyses
|
|
*/
|
|
public function getOptimizationSummary(array $analyses): array
|
|
{
|
|
if (empty($analyses)) {
|
|
return [];
|
|
}
|
|
|
|
$totalScore = 0;
|
|
$totalIssues = 0;
|
|
$totalSuggestions = 0;
|
|
$commonIssues = [];
|
|
$commonSuggestions = [];
|
|
|
|
foreach ($analyses as $analysis) {
|
|
$totalScore += $analysis->optimizationScore;
|
|
$totalIssues += count($analysis->issues);
|
|
$totalSuggestions += count($analysis->suggestions);
|
|
|
|
foreach ($analysis->issues as $issue) {
|
|
$commonIssues[$issue] = ($commonIssues[$issue] ?? 0) + 1;
|
|
}
|
|
|
|
foreach ($analysis->suggestions as $suggestion) {
|
|
$commonSuggestions[$suggestion] = ($commonSuggestions[$suggestion] ?? 0) + 1;
|
|
}
|
|
}
|
|
|
|
// Sort by frequency
|
|
arsort($commonIssues);
|
|
arsort($commonSuggestions);
|
|
|
|
return [
|
|
'total_queries_analyzed' => count($analyses),
|
|
'average_optimization_score' => round($totalScore / count($analyses)),
|
|
'total_issues' => $totalIssues,
|
|
'total_suggestions' => $totalSuggestions,
|
|
'most_common_issues' => array_slice($commonIssues, 0, 5, true),
|
|
'most_common_suggestions' => array_slice($commonSuggestions, 0, 5, true),
|
|
'overall_assessment' => $this->getOverallAssessment($totalScore / count($analyses)),
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Get overall assessment based on average score
|
|
*/
|
|
private function getOverallAssessment(float $averageScore): string
|
|
{
|
|
return match (true) {
|
|
$averageScore >= 90 => 'excellent',
|
|
$averageScore >= 75 => 'good',
|
|
$averageScore >= 60 => 'fair',
|
|
$averageScore >= 40 => 'poor',
|
|
default => 'critical'
|
|
};
|
|
}
|
|
}
|