Files
michaelschiemer/src/Framework/Database/Profiling/QueryAnalyzer.php
Michael Schiemer 5050c7d73a docs: consolidate documentation into organized structure
- Move 12 markdown files from root to docs/ subdirectories
- Organize documentation by category:
  • docs/troubleshooting/ (1 file)  - Technical troubleshooting guides
  • docs/deployment/      (4 files) - Deployment and security documentation
  • docs/guides/          (3 files) - Feature-specific guides
  • docs/planning/        (4 files) - Planning and improvement proposals

Root directory cleanup:
- Reduced from 16 to 4 markdown files in root
- Only essential project files remain:
  • CLAUDE.md (AI instructions)
  • README.md (Main project readme)
  • CLEANUP_PLAN.md (Current cleanup plan)
  • SRC_STRUCTURE_IMPROVEMENTS.md (Structure improvements)

This improves:
 Documentation discoverability
 Logical organization by purpose
 Clean root directory
 Better maintainability
2025-10-05 11:05:04 +02:00

386 lines
13 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Framework\Database\Profiling;
use App\Framework\Database\ConnectionInterface;
/**
* Advanced query analysis and optimization suggestions
*/
final class QueryAnalyzer
{
public function __construct(
private readonly ConnectionInterface $connection
) {
}
/**
* Analyze query and provide optimization suggestions
*/
public function analyzeQuery(QueryProfile $profile): QueryAnalysis
{
$sql = $profile->query->sql;
$suggestions = [];
$issues = [];
$indexRecommendations = [];
// Basic SQL analysis
$this->analyzeSelectStatements($sql, $suggestions, $issues);
$this->analyzeWhereClause($sql, $suggestions, $indexRecommendations);
$this->analyzeJoins($sql, $suggestions, $issues);
$this->analyzeAggregations($sql, $suggestions);
$this->analyzeSubqueries($sql, $suggestions, $issues);
// Performance analysis based on execution metrics
$this->analyzePerformanceMetrics($profile, $suggestions, $issues);
// Try to get execution plan if possible
$executionPlan = $this->getExecutionPlan($sql);
return new QueryAnalysis(
profile: $profile,
suggestions: $suggestions,
issues: $issues,
indexRecommendations: $indexRecommendations,
executionPlan: $executionPlan,
optimizationScore: $this->calculateOptimizationScore($profile, $issues, $suggestions)
);
}
/**
* Analyze SELECT statements
*/
private function analyzeSelectStatements(string $sql, array &$suggestions, array &$issues): void
{
$upperSql = strtoupper($sql);
// Check for SELECT *
if (str_contains($upperSql, 'SELECT *')) {
$issues[] = 'Using SELECT * can fetch unnecessary columns and hurt performance';
$suggestions[] = 'Specify only the columns you need instead of using SELECT *';
}
// Check for DISTINCT usage
if (str_contains($upperSql, 'DISTINCT') && str_contains($upperSql, 'ORDER BY')) {
$suggestions[] = 'DISTINCT with ORDER BY can be expensive - consider if both are necessary';
}
// Check for functions in SELECT
$functions = ['COUNT', 'SUM', 'AVG', 'MAX', 'MIN'];
$functionCount = 0;
foreach ($functions as $function) {
$functionCount += substr_count($upperSql, $function);
}
if ($functionCount > 5) {
$suggestions[] = 'Multiple aggregate functions detected - consider if they can be optimized or cached';
}
}
/**
* Analyze WHERE clauses
*/
private function analyzeWhereClause(string $sql, array &$suggestions, array &$indexRecommendations): void
{
$upperSql = strtoupper($sql);
if (! str_contains($upperSql, 'WHERE')) {
if (str_starts_with($upperSql, 'SELECT')) {
$issues[] = 'SELECT without WHERE clause may result in full table scan';
$suggestions[] = 'Add WHERE clause to limit result set';
}
return;
}
// Check for functions in WHERE clause
if (preg_match('/WHERE\s+\w+\([^)]+\)\s*(=|<|>|<=|>=)/', $upperSql)) {
$issues[] = 'Functions in WHERE clause prevent index usage';
$suggestions[] = 'Avoid functions on columns in WHERE clause to enable index usage';
}
// Check for LIKE with leading wildcard
if (preg_match('/LIKE\s+\'%[^%]*\'/', $upperSql)) {
$issues[] = 'LIKE with leading wildcard prevents index usage';
$suggestions[] = 'Avoid leading wildcards in LIKE patterns for better performance';
}
// Extract potential index candidates from WHERE clause
if (preg_match_all('/WHERE\s+(\w+)\s*(=|<|>|<=|>=|IN)/', $upperSql, $matches)) {
foreach ($matches[1] as $column) {
$indexRecommendations[] = "Consider index on column: {$column}";
}
}
// Check for OR conditions
if (str_contains($upperSql, ' OR ')) {
$suggestions[] = 'OR conditions can be slower than UNION - consider rewriting if appropriate';
}
}
/**
* Analyze JOIN clauses
*/
private function analyzeJoins(string $sql, array &$suggestions, array &$issues): void
{
$upperSql = strtoupper($sql);
$joinCount = substr_count($upperSql, 'JOIN');
if ($joinCount === 0) {
return;
}
if ($joinCount > 5) {
$issues[] = "High number of JOINs ({$joinCount}) may impact performance";
$suggestions[] = 'Consider denormalization or caching for queries with many JOINs';
}
// Check for Cartesian products (JOIN without ON)
$onCount = substr_count($upperSql, ' ON ');
if ($joinCount > $onCount) {
$issues[] = 'Potential Cartesian product detected - missing JOIN conditions';
$suggestions[] = 'Ensure all JOINs have proper ON conditions';
}
// Check for table order in JOINs
if (str_contains($upperSql, 'LEFT JOIN') || str_contains($upperSql, 'RIGHT JOIN')) {
$suggestions[] = 'Consider JOIN order - start with the most selective table';
}
}
/**
* Analyze aggregation functions
*/
private function analyzeAggregations(string $sql, array &$suggestions): void
{
$upperSql = strtoupper($sql);
if (str_contains($upperSql, 'GROUP BY')) {
if (! str_contains($upperSql, 'ORDER BY')) {
$suggestions[] = 'GROUP BY without ORDER BY - consider if ordering is needed';
}
// Check for GROUP BY with many columns
$groupByMatches = [];
if (preg_match('/GROUP BY\s+(.+?)(?:\s+ORDER|\s+HAVING|\s*$)/i', $sql, $groupByMatches)) {
$columns = explode(',', $groupByMatches[1]);
if (count($columns) > 3) {
$suggestions[] = 'GROUP BY with many columns can be expensive - verify all are necessary';
}
}
}
if (str_contains($upperSql, 'HAVING')) {
if (str_contains($upperSql, 'WHERE')) {
$suggestions[] = 'Move non-aggregate conditions from HAVING to WHERE for better performance';
}
}
}
/**
* Analyze subqueries
*/
private function analyzeSubqueries(string $sql, array &$suggestions, array &$issues): void
{
$selectCount = substr_count(strtoupper($sql), 'SELECT');
if ($selectCount <= 1) {
return;
}
$subqueryCount = $selectCount - 1;
if ($subqueryCount > 2) {
$issues[] = "Multiple subqueries ({$subqueryCount}) detected";
$suggestions[] = 'Consider rewriting subqueries as JOINs for better performance';
}
// Check for correlated subqueries (simplified detection)
if (str_contains(strtoupper($sql), 'WHERE EXISTS') ||
str_contains(strtoupper($sql), 'WHERE NOT EXISTS')) {
$suggestions[] = 'Correlated subqueries can be expensive - consider JOIN alternatives';
}
}
/**
* Analyze performance metrics
*/
private function analyzePerformanceMetrics(QueryProfile $profile, array &$suggestions, array &$issues): void
{
$executionTimeMs = $profile->executionTime->toMilliseconds();
$memoryUsageMB = $profile->memoryUsage / (1024 * 1024);
if ($executionTimeMs > 5000) {
$issues[] = 'Very slow execution time (>5 seconds)';
$suggestions[] = 'Consider breaking this query into smaller parts or adding appropriate indexes';
} elseif ($executionTimeMs > 1000) {
$issues[] = 'Slow execution time (>1 second)';
$suggestions[] = 'Review query optimization opportunities';
}
if ($memoryUsageMB > 50) {
$issues[] = 'High memory usage (>50MB)';
$suggestions[] = 'Consider limiting result set size or using pagination';
} elseif ($memoryUsageMB > 10) {
$suggestions[] = 'Moderate memory usage detected - monitor if consistent';
}
if ($profile->getComplexityScore() > 15) {
$issues[] = 'High query complexity score';
$suggestions[] = 'Consider simplifying the query or breaking it into multiple queries';
}
}
/**
* Get execution plan (simplified - database specific)
*/
private function getExecutionPlan(string $sql): ?string
{
try {
// Try MySQL EXPLAIN
$explainSql = "EXPLAIN " . $sql;
$result = $this->connection->query($explainSql);
$plan = '';
while ($row = $result->fetch()) {
$plan .= print_r($row, true) . "\n";
}
return $plan ?: null;
} catch (\Throwable) {
// If EXPLAIN fails, try other database-specific approaches
try {
// Try PostgreSQL EXPLAIN
$explainSql = "EXPLAIN (FORMAT JSON) " . $sql;
$result = $this->connection->queryScalar($explainSql);
return is_string($result) ? $result : null;
} catch (\Throwable) {
// Return null if we can't get execution plan
return null;
}
}
}
/**
* Calculate optimization score (0-100, higher is better)
*/
private function calculateOptimizationScore(QueryProfile $profile, array $issues, array $suggestions): int
{
$baseScore = 100;
// Penalize execution time
$executionTimeMs = $profile->executionTime->toMilliseconds();
if ($executionTimeMs > 5000) {
$baseScore -= 40;
} elseif ($executionTimeMs > 1000) {
$baseScore -= 25;
} elseif ($executionTimeMs > 500) {
$baseScore -= 15;
} elseif ($executionTimeMs > 100) {
$baseScore -= 10;
}
// Penalize memory usage
$memoryUsageMB = $profile->memoryUsage / (1024 * 1024);
if ($memoryUsageMB > 50) {
$baseScore -= 20;
} elseif ($memoryUsageMB > 10) {
$baseScore -= 10;
} elseif ($memoryUsageMB > 5) {
$baseScore -= 5;
}
// Penalize complexity
$complexityScore = $profile->getComplexityScore();
if ($complexityScore > 15) {
$baseScore -= 15;
} elseif ($complexityScore > 10) {
$baseScore -= 10;
} elseif ($complexityScore > 5) {
$baseScore -= 5;
}
// Penalize issues and suggestions
$baseScore -= count($issues) * 5;
$baseScore -= count($suggestions) * 2;
return max(0, min(100, $baseScore));
}
/**
* Batch analyze multiple profiles
*/
public function batchAnalyze(array $profiles): array
{
$analyses = [];
foreach ($profiles as $profile) {
$analyses[] = $this->analyzeQuery($profile);
}
return $analyses;
}
/**
* Get optimization summary for multiple analyses
*/
public function getOptimizationSummary(array $analyses): array
{
if (empty($analyses)) {
return [];
}
$totalScore = 0;
$totalIssues = 0;
$totalSuggestions = 0;
$commonIssues = [];
$commonSuggestions = [];
foreach ($analyses as $analysis) {
$totalScore += $analysis->optimizationScore;
$totalIssues += count($analysis->issues);
$totalSuggestions += count($analysis->suggestions);
foreach ($analysis->issues as $issue) {
$commonIssues[$issue] = ($commonIssues[$issue] ?? 0) + 1;
}
foreach ($analysis->suggestions as $suggestion) {
$commonSuggestions[$suggestion] = ($commonSuggestions[$suggestion] ?? 0) + 1;
}
}
// Sort by frequency
arsort($commonIssues);
arsort($commonSuggestions);
return [
'total_queries_analyzed' => count($analyses),
'average_optimization_score' => round($totalScore / count($analyses)),
'total_issues' => $totalIssues,
'total_suggestions' => $totalSuggestions,
'most_common_issues' => array_slice($commonIssues, 0, 5, true),
'most_common_suggestions' => array_slice($commonSuggestions, 0, 5, true),
'overall_assessment' => $this->getOverallAssessment($totalScore / count($analyses)),
];
}
/**
* Get overall assessment based on average score
*/
private function getOverallAssessment(float $averageScore): string
{
return match (true) {
$averageScore >= 90 => 'excellent',
$averageScore >= 75 => 'good',
$averageScore >= 60 => 'fair',
$averageScore >= 40 => 'poor',
default => 'critical'
};
}
}