- Add comprehensive health check system with multiple endpoints - Add Prometheus metrics endpoint - Add production logging configurations (5 strategies) - Add complete deployment documentation suite: * QUICKSTART.md - 30-minute deployment guide * DEPLOYMENT_CHECKLIST.md - Printable verification checklist * DEPLOYMENT_WORKFLOW.md - Complete deployment lifecycle * PRODUCTION_DEPLOYMENT.md - Comprehensive technical reference * production-logging.md - Logging configuration guide * ANSIBLE_DEPLOYMENT.md - Infrastructure as Code automation * README.md - Navigation hub * DEPLOYMENT_SUMMARY.md - Executive summary - Add deployment scripts and automation - Add DEPLOYMENT_PLAN.md - Concrete plan for immediate deployment - Update README with production-ready features All production infrastructure is now complete and ready for deployment.
317 lines
9.4 KiB
PHP
317 lines
9.4 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Framework\View\Lexer;
|
|
|
|
final class HtmlLexer
|
|
{
|
|
private const COMMENT_START = '!--';
|
|
private const COMMENT_END = '-->';
|
|
private const DOCTYPE_UPPER = '!DOCTYPE';
|
|
private const DOCTYPE_LOWER = '!doctype';
|
|
private const CDATA_START = '![CDATA[';
|
|
private const CDATA_END = ']]>';
|
|
private const TAG_NAME_PATTERN = '/<([a-z][a-z0-9-]*)/i';
|
|
|
|
/** @var array<string> Raw content tags (script, style, etc.) */
|
|
private const RAW_TEXT_TAGS = ['script', 'style'];
|
|
|
|
private string $html;
|
|
private int $position;
|
|
private int $length;
|
|
private ?string $currentTagName = null;
|
|
|
|
public function __construct(string $html)
|
|
{
|
|
$this->html = $html;
|
|
$this->position = 0;
|
|
$this->length = mb_strlen($html, '8bit');
|
|
}
|
|
|
|
/**
|
|
* Tokenize HTML and return array of tokens
|
|
*
|
|
* @return array<Token>
|
|
*/
|
|
public function tokenize(): array
|
|
{
|
|
$tokens = [];
|
|
$this->position = 0;
|
|
|
|
while ($this->position < $this->length) {
|
|
// Check for tag start
|
|
if ($this->current() === '<') {
|
|
$token = $this->consumeTag();
|
|
$tokens[] = $token;
|
|
} else {
|
|
// Consume content until next tag
|
|
$content = $this->consumeUntil('<');
|
|
if ($content !== '') {
|
|
$tokens[] = new Token($content, TokenType::CONTENT);
|
|
}
|
|
}
|
|
}
|
|
|
|
return $tokens;
|
|
}
|
|
|
|
/**
|
|
* Normalize self-closing x-components to explicit closing tags
|
|
*/
|
|
public function normalizeXComponents(): string
|
|
{
|
|
$tokens = $this->tokenize();
|
|
$parts = [];
|
|
|
|
foreach ($tokens as $token) {
|
|
if ($token->type === TokenType::SELF_CLOSING_TAG && $this->isXComponent($token->content)) {
|
|
// Convert <x-foo /> to <x-foo></x-foo>
|
|
$tagContent = trim($token->content);
|
|
$tagContent = rtrim($tagContent, '/>');
|
|
$tagContent = trim($tagContent);
|
|
|
|
// Extract tag name (e.g., "x-counter" from "<x-counter" or "<x-counter attr='val'")
|
|
preg_match(self::TAG_NAME_PATTERN, $tagContent, $matches);
|
|
$tagName = $matches[1] ?? '';
|
|
|
|
if ($tagName !== '') {
|
|
$parts[] = $tagContent . '></' . $tagName . '>';
|
|
} else {
|
|
// Fallback: keep original if we can't parse
|
|
$parts[] = $token->content;
|
|
}
|
|
} else {
|
|
$parts[] = $token->content;
|
|
}
|
|
}
|
|
|
|
return implode('', $parts);
|
|
}
|
|
|
|
private function consumeTag(): Token
|
|
{
|
|
$start = $this->position;
|
|
|
|
// Consume '<'
|
|
$this->advance();
|
|
|
|
// Check for special cases
|
|
if ($this->peek(mb_strlen(self::COMMENT_START, '8bit')) === self::COMMENT_START) {
|
|
return $this->consumeComment($start);
|
|
}
|
|
|
|
if ($this->peek(mb_strlen(self::CDATA_START, '8bit')) === self::CDATA_START) {
|
|
return $this->consumeCData($start);
|
|
}
|
|
|
|
if ($this->peek(mb_strlen(self::DOCTYPE_UPPER, '8bit')) === self::DOCTYPE_UPPER
|
|
|| $this->peek(mb_strlen(self::DOCTYPE_LOWER, '8bit')) === self::DOCTYPE_LOWER) {
|
|
return $this->consumeDoctype($start);
|
|
}
|
|
|
|
// Check for closing tag
|
|
if ($this->current() === '/') {
|
|
return $this->consumeClosingTag($start);
|
|
}
|
|
|
|
// Consume opening tag
|
|
return $this->consumeOpeningTag($start);
|
|
}
|
|
|
|
private function consumeCData(int $start): Token
|
|
{
|
|
// Consume until ']]>'
|
|
$cdataEndLength = mb_strlen(self::CDATA_END, '8bit');
|
|
|
|
while ($this->position < $this->length) {
|
|
if ($this->peek($cdataEndLength) === self::CDATA_END) {
|
|
for ($i = 0; $i < $cdataEndLength; $i++) {
|
|
$this->advance();
|
|
}
|
|
break;
|
|
}
|
|
$this->advance();
|
|
}
|
|
|
|
return new Token(substr($this->html, $start, $this->position - $start), TokenType::CDATA);
|
|
}
|
|
|
|
private function consumeOpeningTag(int $start): Token
|
|
{
|
|
$tagNameStart = $this->position;
|
|
|
|
// Consume tag name
|
|
while ($this->position < $this->length
|
|
&& !ctype_space($this->current())
|
|
&& $this->current() !== '>'
|
|
&& $this->current() !== '/') {
|
|
$this->advance();
|
|
}
|
|
|
|
// Extract tag name for raw text handling
|
|
$tagName = strtolower(substr($this->html, $tagNameStart, $this->position - $tagNameStart));
|
|
|
|
// Consume attributes with proper quote handling
|
|
$inQuote = false;
|
|
$quoteChar = '';
|
|
|
|
while ($this->position < $this->length && $this->current() !== '>') {
|
|
$char = $this->current();
|
|
|
|
// Handle quotes in attributes
|
|
if (($char === '"' || $char === "'") && !$inQuote) {
|
|
$inQuote = true;
|
|
$quoteChar = $char;
|
|
} elseif ($inQuote && $char === $quoteChar) {
|
|
$inQuote = false;
|
|
$quoteChar = '';
|
|
}
|
|
|
|
$this->advance();
|
|
|
|
// Don't break on '>' inside quotes
|
|
if ($this->current() === '>' && $inQuote) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Check if this is a self-closing tag
|
|
if ($this->position > 0 && $this->html[$this->position - 1] === '/') {
|
|
// Self-closing tag detected: <tag />
|
|
$this->advance(); // consume '>'
|
|
return new Token(substr($this->html, $start, $this->position - $start), TokenType::SELF_CLOSING_TAG);
|
|
}
|
|
|
|
// Regular opening tag
|
|
if ($this->current() === '>') {
|
|
$this->advance(); // consume '>'
|
|
}
|
|
|
|
// Track if we entered a raw text tag
|
|
if (in_array($tagName, self::RAW_TEXT_TAGS, true)) {
|
|
$this->currentTagName = $tagName;
|
|
}
|
|
|
|
return new Token(substr($this->html, $start, $this->position - $start), TokenType::OPEN_TAG_START);
|
|
}
|
|
|
|
private function consumeClosingTag(int $start): Token
|
|
{
|
|
// Consume '/'
|
|
$this->advance();
|
|
|
|
$tagNameStart = $this->position;
|
|
|
|
// Consume tag name
|
|
while ($this->position < $this->length
|
|
&& !ctype_space($this->current())
|
|
&& $this->current() !== '>') {
|
|
$this->advance();
|
|
}
|
|
|
|
$tagName = strtolower(substr($this->html, $tagNameStart, $this->position - $tagNameStart));
|
|
|
|
// Consume until '>'
|
|
while ($this->position < $this->length && $this->current() !== '>') {
|
|
$this->advance();
|
|
}
|
|
|
|
if ($this->current() === '>') {
|
|
$this->advance();
|
|
}
|
|
|
|
// Reset current tag if we're closing a raw text tag
|
|
if ($this->currentTagName === $tagName) {
|
|
$this->currentTagName = null;
|
|
}
|
|
|
|
return new Token(substr($this->html, $start, $this->position - $start), TokenType::CLOSING_TAG);
|
|
}
|
|
|
|
private function consumeComment(int $start): Token
|
|
{
|
|
// Consume until '-->'
|
|
$commentEndLength = mb_strlen(self::COMMENT_END, '8bit');
|
|
|
|
while ($this->position < $this->length) {
|
|
if ($this->peek($commentEndLength) === self::COMMENT_END) {
|
|
// Consume all three characters: -->
|
|
for ($i = 0; $i < $commentEndLength; $i++) {
|
|
$this->advance();
|
|
}
|
|
break;
|
|
}
|
|
$this->advance();
|
|
}
|
|
|
|
return new Token(substr($this->html, $start, $this->position - $start), TokenType::COMMENT);
|
|
}
|
|
|
|
private function consumeDoctype(int $start): Token
|
|
{
|
|
// Consume until '>'
|
|
while ($this->position < $this->length && $this->current() !== '>') {
|
|
$this->advance();
|
|
}
|
|
|
|
if ($this->current() === '>') {
|
|
$this->advance();
|
|
}
|
|
|
|
return new Token(substr($this->html, $start, $this->position - $start), TokenType::DOCTYPE);
|
|
}
|
|
|
|
private function consumeUntil(string $char): string
|
|
{
|
|
$start = $this->position;
|
|
|
|
// If we're inside a raw text tag (script/style), consume until closing tag
|
|
if ($this->currentTagName !== null) {
|
|
$closingTag = '</' . $this->currentTagName;
|
|
|
|
while ($this->position < $this->length) {
|
|
if ($this->peek(mb_strlen($closingTag, '8bit')) === $closingTag) {
|
|
break;
|
|
}
|
|
$this->advance();
|
|
}
|
|
} else {
|
|
while ($this->position < $this->length && $this->current() !== $char) {
|
|
$this->advance();
|
|
}
|
|
}
|
|
|
|
return substr($this->html, $start, $this->position - $start);
|
|
}
|
|
|
|
private function isXComponent(string $tagContent): bool
|
|
{
|
|
// Check if tag starts with <x- (case-insensitive)
|
|
return preg_match('/^<x-[a-z0-9][a-z0-9-]*/i', $tagContent) === 1;
|
|
}
|
|
|
|
private function current(): string
|
|
{
|
|
if ($this->position >= $this->length) {
|
|
return '';
|
|
}
|
|
|
|
return $this->html[$this->position];
|
|
}
|
|
|
|
private function peek(int $length): string
|
|
{
|
|
if ($this->position + $length > $this->length) {
|
|
return '';
|
|
}
|
|
|
|
return substr($this->html, $this->position, $length);
|
|
}
|
|
|
|
private function advance(): void
|
|
{
|
|
$this->position++;
|
|
}
|
|
}
|