Files
michaelschiemer/src/Framework/View/Lexer/HtmlLexer.php
Michael Schiemer fc3d7e6357 feat(Production): Complete production deployment infrastructure
- Add comprehensive health check system with multiple endpoints
- Add Prometheus metrics endpoint
- Add production logging configurations (5 strategies)
- Add complete deployment documentation suite:
  * QUICKSTART.md - 30-minute deployment guide
  * DEPLOYMENT_CHECKLIST.md - Printable verification checklist
  * DEPLOYMENT_WORKFLOW.md - Complete deployment lifecycle
  * PRODUCTION_DEPLOYMENT.md - Comprehensive technical reference
  * production-logging.md - Logging configuration guide
  * ANSIBLE_DEPLOYMENT.md - Infrastructure as Code automation
  * README.md - Navigation hub
  * DEPLOYMENT_SUMMARY.md - Executive summary
- Add deployment scripts and automation
- Add DEPLOYMENT_PLAN.md - Concrete plan for immediate deployment
- Update README with production-ready features

All production infrastructure is now complete and ready for deployment.
2025-10-25 19:18:37 +02:00

317 lines
9.4 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Framework\View\Lexer;
final class HtmlLexer
{
private const COMMENT_START = '!--';
private const COMMENT_END = '-->';
private const DOCTYPE_UPPER = '!DOCTYPE';
private const DOCTYPE_LOWER = '!doctype';
private const CDATA_START = '![CDATA[';
private const CDATA_END = ']]>';
private const TAG_NAME_PATTERN = '/<([a-z][a-z0-9-]*)/i';
/** @var array<string> Raw content tags (script, style, etc.) */
private const RAW_TEXT_TAGS = ['script', 'style'];
private string $html;
private int $position;
private int $length;
private ?string $currentTagName = null;
public function __construct(string $html)
{
$this->html = $html;
$this->position = 0;
$this->length = mb_strlen($html, '8bit');
}
/**
* Tokenize HTML and return array of tokens
*
* @return array<Token>
*/
public function tokenize(): array
{
$tokens = [];
$this->position = 0;
while ($this->position < $this->length) {
// Check for tag start
if ($this->current() === '<') {
$token = $this->consumeTag();
$tokens[] = $token;
} else {
// Consume content until next tag
$content = $this->consumeUntil('<');
if ($content !== '') {
$tokens[] = new Token($content, TokenType::CONTENT);
}
}
}
return $tokens;
}
/**
* Normalize self-closing x-components to explicit closing tags
*/
public function normalizeXComponents(): string
{
$tokens = $this->tokenize();
$parts = [];
foreach ($tokens as $token) {
if ($token->type === TokenType::SELF_CLOSING_TAG && $this->isXComponent($token->content)) {
// Convert <x-foo /> to <x-foo></x-foo>
$tagContent = trim($token->content);
$tagContent = rtrim($tagContent, '/>');
$tagContent = trim($tagContent);
// Extract tag name (e.g., "x-counter" from "<x-counter" or "<x-counter attr='val'")
preg_match(self::TAG_NAME_PATTERN, $tagContent, $matches);
$tagName = $matches[1] ?? '';
if ($tagName !== '') {
$parts[] = $tagContent . '></' . $tagName . '>';
} else {
// Fallback: keep original if we can't parse
$parts[] = $token->content;
}
} else {
$parts[] = $token->content;
}
}
return implode('', $parts);
}
private function consumeTag(): Token
{
$start = $this->position;
// Consume '<'
$this->advance();
// Check for special cases
if ($this->peek(mb_strlen(self::COMMENT_START, '8bit')) === self::COMMENT_START) {
return $this->consumeComment($start);
}
if ($this->peek(mb_strlen(self::CDATA_START, '8bit')) === self::CDATA_START) {
return $this->consumeCData($start);
}
if ($this->peek(mb_strlen(self::DOCTYPE_UPPER, '8bit')) === self::DOCTYPE_UPPER
|| $this->peek(mb_strlen(self::DOCTYPE_LOWER, '8bit')) === self::DOCTYPE_LOWER) {
return $this->consumeDoctype($start);
}
// Check for closing tag
if ($this->current() === '/') {
return $this->consumeClosingTag($start);
}
// Consume opening tag
return $this->consumeOpeningTag($start);
}
private function consumeCData(int $start): Token
{
// Consume until ']]>'
$cdataEndLength = mb_strlen(self::CDATA_END, '8bit');
while ($this->position < $this->length) {
if ($this->peek($cdataEndLength) === self::CDATA_END) {
for ($i = 0; $i < $cdataEndLength; $i++) {
$this->advance();
}
break;
}
$this->advance();
}
return new Token(substr($this->html, $start, $this->position - $start), TokenType::CDATA);
}
private function consumeOpeningTag(int $start): Token
{
$tagNameStart = $this->position;
// Consume tag name
while ($this->position < $this->length
&& !ctype_space($this->current())
&& $this->current() !== '>'
&& $this->current() !== '/') {
$this->advance();
}
// Extract tag name for raw text handling
$tagName = strtolower(substr($this->html, $tagNameStart, $this->position - $tagNameStart));
// Consume attributes with proper quote handling
$inQuote = false;
$quoteChar = '';
while ($this->position < $this->length && $this->current() !== '>') {
$char = $this->current();
// Handle quotes in attributes
if (($char === '"' || $char === "'") && !$inQuote) {
$inQuote = true;
$quoteChar = $char;
} elseif ($inQuote && $char === $quoteChar) {
$inQuote = false;
$quoteChar = '';
}
$this->advance();
// Don't break on '>' inside quotes
if ($this->current() === '>' && $inQuote) {
continue;
}
}
// Check if this is a self-closing tag
if ($this->position > 0 && $this->html[$this->position - 1] === '/') {
// Self-closing tag detected: <tag />
$this->advance(); // consume '>'
return new Token(substr($this->html, $start, $this->position - $start), TokenType::SELF_CLOSING_TAG);
}
// Regular opening tag
if ($this->current() === '>') {
$this->advance(); // consume '>'
}
// Track if we entered a raw text tag
if (in_array($tagName, self::RAW_TEXT_TAGS, true)) {
$this->currentTagName = $tagName;
}
return new Token(substr($this->html, $start, $this->position - $start), TokenType::OPEN_TAG_START);
}
private function consumeClosingTag(int $start): Token
{
// Consume '/'
$this->advance();
$tagNameStart = $this->position;
// Consume tag name
while ($this->position < $this->length
&& !ctype_space($this->current())
&& $this->current() !== '>') {
$this->advance();
}
$tagName = strtolower(substr($this->html, $tagNameStart, $this->position - $tagNameStart));
// Consume until '>'
while ($this->position < $this->length && $this->current() !== '>') {
$this->advance();
}
if ($this->current() === '>') {
$this->advance();
}
// Reset current tag if we're closing a raw text tag
if ($this->currentTagName === $tagName) {
$this->currentTagName = null;
}
return new Token(substr($this->html, $start, $this->position - $start), TokenType::CLOSING_TAG);
}
private function consumeComment(int $start): Token
{
// Consume until '-->'
$commentEndLength = mb_strlen(self::COMMENT_END, '8bit');
while ($this->position < $this->length) {
if ($this->peek($commentEndLength) === self::COMMENT_END) {
// Consume all three characters: -->
for ($i = 0; $i < $commentEndLength; $i++) {
$this->advance();
}
break;
}
$this->advance();
}
return new Token(substr($this->html, $start, $this->position - $start), TokenType::COMMENT);
}
private function consumeDoctype(int $start): Token
{
// Consume until '>'
while ($this->position < $this->length && $this->current() !== '>') {
$this->advance();
}
if ($this->current() === '>') {
$this->advance();
}
return new Token(substr($this->html, $start, $this->position - $start), TokenType::DOCTYPE);
}
private function consumeUntil(string $char): string
{
$start = $this->position;
// If we're inside a raw text tag (script/style), consume until closing tag
if ($this->currentTagName !== null) {
$closingTag = '</' . $this->currentTagName;
while ($this->position < $this->length) {
if ($this->peek(mb_strlen($closingTag, '8bit')) === $closingTag) {
break;
}
$this->advance();
}
} else {
while ($this->position < $this->length && $this->current() !== $char) {
$this->advance();
}
}
return substr($this->html, $start, $this->position - $start);
}
private function isXComponent(string $tagContent): bool
{
// Check if tag starts with <x- (case-insensitive)
return preg_match('/^<x-[a-z0-9][a-z0-9-]*/i', $tagContent) === 1;
}
private function current(): string
{
if ($this->position >= $this->length) {
return '';
}
return $this->html[$this->position];
}
private function peek(int $length): string
{
if ($this->position + $length > $this->length) {
return '';
}
return substr($this->html, $this->position, $length);
}
private function advance(): void
{
$this->position++;
}
}