├── LICENSE ├── README.mdown ├── composer.json └── src └── zz └── Html ├── HTMLMinify.php ├── HTMLNames.php ├── HTMLToken.php ├── HTMLTokenizer.php └── SegmentedString.php /LICENSE: -------------------------------------------------------------------------------- 1 | All codes without notes are distributed under MIT License. 2 | 3 | Copyright (c) zaininnari 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is furnished 10 | to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | 24 | 25 | Specific license. 26 | 27 | The BSD 3-Clause License 28 | - HTMLNames.php 29 | - HTMLToken.php 30 | - HTMLTokenizer.php 31 | 32 | GNU General Public License 33 | - SegmentedString.php 34 | -------------------------------------------------------------------------------- /README.mdown: -------------------------------------------------------------------------------- 1 | html-minifier 2 | ============= 3 | The Blink HTMLTokenizer ported to PHP and minify HTML. 4 | 5 | [![Build Status](https://travis-ci.org/zaininnari/html-minifier.png?branch=master)](https://travis-ci.org/zaininnari/html-minifier) 6 | [![Coverage Status](https://coveralls.io/repos/zaininnari/html-minifier/badge.png?branch=master)](https://coveralls.io/r/zaininnari/html-minifier?branch=master) 7 | 8 | Requirements 9 | ------------ 10 | 11 | - Any flavor of PHP 5.3 should do 12 | - [optional] PHPUnit 3.5+ to execute the test suite (phpunit --version) 13 | 14 | Use 15 | --- 16 | Create composer.json. 17 | [composer.json] 18 | ```sh 19 | { 20 | "require": { 21 | "zaininnari/html-minifier": "*" 22 | } 23 | } 24 | ``` 25 | 26 | Download composer.phar and install. 27 | ```sh 28 | curl -sS https://getcomposer.org/installer | php 29 | php composer.phar install 30 | ``` 31 | 32 | ```php 33 | 39 |

40 | text 41 |

42 | '; 43 | 44 | // shortcut. retrun minify html 45 | $minify = HTMLMinify::minify($html); 46 | 47 | // detail 48 | $HTMLMinify = new HTMLMinify($html); 49 | $minify = $HTMLMinify->process(); 50 | ``` 51 | 52 | output html 53 | ```HTML 54 |
55 |

56 | text 57 |

58 |
59 | ``` 60 | 61 | Option 62 | ------ 63 | 64 | ### optimizationLevel 65 | #### OPTIMIZATION_SIMPLE(default) 66 | 67 | Replace many whitespace to a single whitespace. 68 | This option leave a new line of one. 69 | 70 | [input] 71 | ```HTML 72 |
73 |

Example of paragraphs

74 | This is the first paragraph in this example. 75 |

This is the second.

76 | 77 | 78 | HTML 79 |
80 | ``` 81 | 82 | [output] 83 | ```HTML 84 |
85 |

Example of paragraphs

86 | This is the first paragraph in this example. 87 |

This is the second.

88 | 89 | HTML 90 |
91 | ``` 92 | 93 | #### OPTIMIZATION_ADVANCED 94 | 95 | Remove the whitespace of all as much as possible. 96 | 97 | - Remove whitespace 98 | - between block element and block element 99 | - between block element and inline element 100 | - run trim in style, script and downlevel-revealed conditional comment 101 | - **[future]** consider comments 102 | - Preserve whitespace 103 | - between inline element and inline element 104 | 105 | [input] 106 | ```HTML 107 |
108 |

Example of paragraphs

109 | This is the first paragraph in this example. 110 |

This is the second.

111 | 112 | 113 | HTML 114 |
115 | ``` 116 | 117 | [output] 118 | ```HTML 119 |

Example of paragraphs

This is thefirstparagraph in this example.

This is the second.

HTML
120 | ``` 121 | 122 | Author 123 | ------ 124 | 125 | zaininnari 126 | http://www.zay.jp/ 127 | 128 | Original source 129 | 130 | http://www.chromium.org/blink 131 | 132 | License 133 | ------- 134 | 135 | Licensed under the MIT License and other License - see the `LICENSE` file for details 136 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "zaininnari/html-minifier", 3 | "description": "The Blink HTMLTokenizer ported to PHP.", 4 | "type": "library", 5 | "keywords": ["PHP", "Blink", "HTML minify"], 6 | "homepage": "https://github.com/zaininnari/html-minifier", 7 | "license": "The BSD 3-Clause License", 8 | "authors": [ 9 | { 10 | "name": "Google inc", 11 | "homepage": "http://www.chromium.org/blink", 12 | "role": "Author" 13 | }, 14 | { 15 | "name": "zaininnari", 16 | "homepage": "https://github.com/zaininnari/", 17 | "role": "Developer" 18 | } 19 | ], 20 | "require": { 21 | "php": ">=5.3.0" 22 | }, 23 | "require-dev": { 24 | "satooshi/php-coveralls": "dev-master" 25 | }, 26 | "minimum-stability": "dev", 27 | "autoload": { 28 | "psr-0": { 29 | "zz": "src/" 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/zz/Html/HTMLMinify.php: -------------------------------------------------------------------------------- 1 | 'inline', 33 | 'abbr' => 'inline', 34 | 'acronym' => 'inline', 35 | 'address' => 'block', 36 | 'applet' => 'inline', 37 | 'area' => 'none', 38 | 'article' => 'block', 39 | 'aside' => 'block', 40 | 'audio' => 'inline', 41 | 'b' => 'inline', 42 | 'base' => 'inline', 43 | 'basefont' => 'inline', 44 | 'bdo' => 'inline', 45 | 'bgsound' => 'inline', 46 | 'big' => 'inline', 47 | 'blockquote' => 'block', 48 | 'body' => 'block', 49 | 'br' => 'inline', 50 | 'button' => 'inline-block', 51 | 'canvas' => 'inline', 52 | 'caption' => 'table-caption', 53 | 'center' => 'block', 54 | 'cite' => 'inline', 55 | 'code' => 'inline', 56 | 'col' => 'table-column', 57 | 'colgroup' => 'table-column-group', 58 | 'command' => 'inline', 59 | 'datalist' => 'none', 60 | 'dd' => 'block', 61 | 'del' => 'inline', 62 | 'details' => 'block', 63 | 'dfn' => 'inline', 64 | 'dir' => 'block', 65 | 'div' => 'block', 66 | 'dl' => 'block', 67 | 'dt' => 'block', 68 | 'em' => 'inline', 69 | 'embed' => 'inline', 70 | 'fieldset' => 'block', 71 | 'figcaption' => 'block', 72 | 'figure' => 'block', 73 | 'font' => 'inline', 74 | 'footer' => 'block', 75 | 'form' => 'block', 76 | 'frame' => 'block', 77 | 'frameset' => 'block', 78 | 'h1' => 'block', 79 | 'h2' => 'block', 80 | 'h3' => 'block', 81 | 'h4' => 'block', 82 | 'h5' => 'block', 83 | 'h6' => 'block', 84 | 'head' => 'none', 85 | 'header' => 'block', 86 | 'hgroup' => 'block', 87 | 'hr' => 'block', 88 | 'html' => 'block', 89 | 'i' => 'inline', 90 | 'iframe' => 'inline', 91 | 'image' => 'inline', 92 | 'img' => 'inline', 93 | 'input' => 'inline-block', 94 | 'ins' => 'inline', 95 | 'isindex' => 'inline-block', 96 | 'kbd' => 'inline', 97 | 'keygen' => 'inline-block', 98 | 'label' => 'inline', 99 | 'layer' => 'block', 100 | 'legend' => 'block', 101 | 'li' => 'list-item', 102 | 'link' => 'none', 103 | 'listing' => 'block', 104 | 'map' => 'inline', 105 | 'mark' => 'inline', 106 | 'marquee' => 'inline-block', 107 | 'menu' => 'block', 108 | 'meta' => 'none', 109 | 'meter' => 'inline-block', 110 | 'nav' => 'block', 111 | 'nobr' => 'inline', 112 | 'noembed' => 'inline', 113 | 'noframes' => 'none', 114 | 'nolayer' => 'inline', 115 | 'noscript' => 'inline', 116 | 'object' => 'inline', 117 | 'ol' => 'block', 118 | 'optgroup' => 'inline', 119 | 'option' => 'inline', 120 | 'output' => 'inline', 121 | 'p' => 'block', 122 | 'param' => 'none', 123 | 'plaintext' => 'block', 124 | 'pre' => 'block', 125 | 'progress' => 'inline-block', 126 | 'q' => 'inline', 127 | 'rp' => 'inline', 128 | 'rt' => 'inline', 129 | 'ruby' => 'inline', 130 | 's' => 'inline', 131 | 'samp' => 'inline', 132 | 'script' => 'none', 133 | 'section' => 'block', 134 | 'select' => 'inline-block', 135 | 'small' => 'inline', 136 | 'source' => 'inline', 137 | 'span' => 'inline', 138 | 'strike' => 'inline', 139 | 'strong' => 'inline', 140 | 'style' => 'none', 141 | 'sub' => 'inline', 142 | 'summary' => 'block', 143 | 'sup' => 'inline', 144 | 'table' => 'table', 145 | 'tbody' => 'table-row-group', 146 | 'td' => 'table-cell', 147 | 'textarea' => 'inline-block', 148 | 'tfoot' => 'table-footer-group', 149 | 'th' => 'table-cell', 150 | 'thead' => 'table-header-group', 151 | 'title' => 'none', 152 | 'tr' => 'table-row', 153 | 'track' => 'inline', 154 | 'tt' => 'inline', 155 | 'u' => 'inline', 156 | 'ul' => 'inline-block', 157 | 'var' => 'inline', 158 | 'video' => 'inline', 159 | 'wbr' => 'inline', 160 | 'xmp' => 'block', 161 | ); 162 | 163 | protected $emptyTag = array( 164 | 'area' => 'area', 165 | 'base' => 'base', 166 | 'basefont' => 'basefont', 167 | 'br' => 'br', 168 | 'col' => 'col', 169 | 'embed' => 'embed', 170 | 'frame' => 'frame', 171 | 'hr' => 'hr', 172 | 'img' => 'img', 173 | 'input' => 'input', 174 | 'isindex' => 'isindex', 175 | 'link' => 'link', 176 | 'meta' => 'meta', 177 | 'param' => 'param', 178 | ); 179 | 180 | /** 181 | * @param string $html 182 | * @param array $options 183 | */ 184 | public function __construct($html, $options = array()) { 185 | $html = ltrim($html); 186 | $this->html = $html; 187 | $this->options = $this->options($options); 188 | 189 | $SegmentedString = new SegmentedString($html); 190 | $HTMLTokenizer = new HTMLTokenizer($SegmentedString, $options); 191 | $this->tokens = $HTMLTokenizer->tokenizer(); 192 | } 193 | 194 | /** 195 | * 'optimizationLevel' 196 | * OPTIMIZATION_SIMPLE(default) 197 | * : replace many whitespace to a single whitespace 198 | * this option leave a new line of one 199 | * OPTIMIZATION_ADVANCED 200 | * : remove the white space of all as much as possible 201 | * 202 | * 'emptyElementAddSlash' 203 | * HTML4.01 no slash :
204 | * XHTML1.0 add slash :
205 | * HTML5 mixed OK :
206 | * 207 | * example : 208 | * true(default) : 209 | * false : 210 | * 211 | * 'emptyElementAddWhitespaceBeforeSlash' 212 | * HTML4.01 no slash :
213 | * XHTML1.0 add slash :
214 | * HTML5 mixed OK :
215 | * 216 | * example : 217 | * true(default) : 218 | * false : 219 | * 220 | * 'removeComment' 221 | * example : HTML 222 | * true(default) => HTML 223 | * false => do nothing 224 | * 225 | * 'excludeComment' 226 | * example : content 227 | * array('//')(default) => content 228 | * array('//') => content 229 | * 230 | * 'removeDuplicateAttribute' 231 | * example : 232 | * true(default) => 233 | * false => do nothing 234 | * 235 | * @param array $options 236 | * @return array 237 | */ 238 | protected function options(Array $options) { 239 | $_options = array( 240 | 'doctype' => static::DOCTYPE_XHTML1, 241 | 'optimizationLevel' => static::OPTIMIZATION_SIMPLE, 242 | 'emptyElementAddSlash' => false, 243 | 'emptyElementAddWhitespaceBeforeSlash' => false, 244 | 'removeComment' => true, 245 | 'excludeComment' => array(), 246 | 'removeDuplicateAttribute' => true, 247 | ); 248 | $documentTypeOptions = array( 249 | static::DOCTYPE_HTML4 => array( 250 | 'doctype' => static::DOCTYPE_HTML4, 251 | 'emptyElementAddSlash' => false, 252 | 'emptyElementAddWhitespaceBeforeSlash' => false, 253 | ), 254 | static::DOCTYPE_XHTML1 => array( 255 | 'doctype' => static::DOCTYPE_XHTML1, 256 | 'emptyElementAddSlash' => true, 257 | 'emptyElementAddWhitespaceBeforeSlash' => true, 258 | ), 259 | static::DOCTYPE_HTML5 => array( 260 | 'doctype' => static::DOCTYPE_HTML5, 261 | 'emptyElementAddSlash' => false, 262 | 'emptyElementAddWhitespaceBeforeSlash' => false, 263 | ), 264 | ); 265 | 266 | $documentTypeOption = $documentTypeOptions[static::DOCTYPE_XHTML1]; 267 | if (isset($options['doctype'])) { 268 | $doctype = $options['doctype']; 269 | if (isset($documentTypeOptions[$doctype])) { 270 | $documentTypeOption = $documentTypeOptions[$doctype]; 271 | } 272 | } 273 | 274 | return $options + $documentTypeOption + $_options; 275 | } 276 | 277 | /** 278 | * @param $html 279 | * @param array $options 280 | * @return string 281 | */ 282 | public static function minify($html, $options = array()) { 283 | $instance = new self($html, $options); 284 | return $instance->process(); 285 | } 286 | 287 | /** 288 | * @return HtmlToken[] 289 | */ 290 | public function getTokens() { 291 | return $this->tokens; 292 | } 293 | 294 | /** 295 | * @return string 296 | */ 297 | public function process() { 298 | $this->beforeFilter(); 299 | $html = $this->_buildHtml($this->tokens); 300 | return $html; 301 | } 302 | 303 | /** 304 | * @param array $tokens 305 | * @return string 306 | */ 307 | protected function _buildHtml(Array $tokens) { 308 | $html = ''; 309 | foreach ($tokens as $token) { 310 | $html .= $this->_buildElement($token); 311 | } 312 | return $html; 313 | } 314 | 315 | protected function _buildElement(HTMLToken $token) { 316 | switch ($token->getType()) { 317 | case HTMLToken::DOCTYPE: 318 | $html = $token->getHtmlOrigin(); 319 | break; 320 | case HTMLToken::StartTag: 321 | $tagName = $token->getTagName(); 322 | $selfClosing = ''; 323 | if (isset($this->emptyTag[$tagName]) && $this->options['emptyElementAddSlash']) { 324 | $selfClosing = '/'; 325 | $selfClosing = ($this->options['emptyElementAddWhitespaceBeforeSlash'] ? ' ' : '') . $selfClosing; 326 | } 327 | 328 | $attributes = $this->_buildAttributes($token); 329 | $beforeAttributeSpace = ''; 330 | if ($attributes) { 331 | $beforeAttributeSpace = ' '; 332 | } 333 | $html = sprintf('<%s%s%s%s>', $token->getTagName(), $beforeAttributeSpace, $attributes, $selfClosing); 334 | break; 335 | case HTMLToken::EndTag: 336 | $html = sprintf('', $token->getTagName()); 337 | break; 338 | default : 339 | $html = $token->getData(); 340 | break; 341 | } 342 | return $html; 343 | } 344 | 345 | /** 346 | * @param HTMLToken $token 347 | * @return string 348 | */ 349 | protected function _buildAttributes(HTMLToken $token) { 350 | $attr = array(); 351 | $format = '%s=%s%s%s'; 352 | foreach ($token->getAttributes() as $attribute) { 353 | $name = $attribute['name']; 354 | $value = $attribute['value']; 355 | switch ($attribute['quoted']) { 356 | case HTMLToken::DoubleQuoted: 357 | $quoted = '"'; 358 | break; 359 | case HTMLToken::SingleQuoted: 360 | $quoted = '\''; 361 | break; 362 | default: 363 | $quoted = ''; 364 | break; 365 | } 366 | if ($quoted === '' && $value === '') { 367 | $attr[] = $name; 368 | } else { 369 | $attr[] = sprintf($format, $name, $quoted, $value, $quoted); 370 | } 371 | } 372 | return join(' ', $attr); 373 | } 374 | 375 | protected function beforeFilter() { 376 | if ($this->options['removeComment']) { 377 | $this->removeWhitespaceFromComment(); 378 | } 379 | 380 | $this->removeWhitespaceFromCharacter(); 381 | 382 | if ($this->options['removeDuplicateAttribute']) { 383 | $this->optimizeStartTagAttributes(); 384 | } 385 | } 386 | 387 | protected function removeWhitespaceFromComment() { 388 | $tokens = $this->tokens; 389 | $regexps = $this->options['excludeComment']; 390 | $HTMLTokenStartTag = HTMLToken::StartTag; 391 | $HTMLTokenComment = HTMLToken::Comment; 392 | $HTMLTokenCharacter = HTMLToken::Character; 393 | $HTMLNamesScriptTag = HTMLNames::scriptTag; 394 | $HTMLNamesStyleTag = HTMLNames::styleTag; 395 | $removes = array(); 396 | $combineIndex = null; 397 | 398 | $len = count($tokens); 399 | for ($i = 0; $i < $len; $i++) { 400 | $token = $tokens[$i]; 401 | $type = $token->getType(); 402 | if ($type === $HTMLTokenStartTag) { 403 | $combineIndex = null; 404 | $tagName = $token->getTagName(); 405 | if ($tagName === $HTMLNamesScriptTag || $tagName === $HTMLNamesStyleTag) { 406 | $i++; 407 | } 408 | continue; 409 | } else if ($type === $HTMLTokenCharacter) { 410 | if ($combineIndex > 0) { 411 | $tokens[$combineIndex]->setData($tokens[$combineIndex] . $token); 412 | $removes[] = $i; 413 | } 414 | continue; 415 | } else if ($type !== $HTMLTokenComment) { 416 | $combineIndex = null; 417 | continue; 418 | } 419 | 420 | $comment = $token->getData(); 421 | if ($this->_isConditionalComment($comment)) { 422 | $combineIndex = null; 423 | continue; 424 | } 425 | if ($regexps) { 426 | foreach ($regexps as $regexp) { 427 | if (preg_match($regexp, $comment)) { 428 | $combineIndex = null; 429 | continue 2; 430 | } 431 | } 432 | } 433 | $combineIndex = $i - 1; 434 | $removes[] = $i; 435 | } 436 | 437 | foreach ($removes as $remove) { 438 | unset($tokens[$remove]); 439 | } 440 | 441 | if ($len !== count($tokens)) { 442 | $tokens = array_merge($tokens,array()); 443 | } 444 | $this->tokens = $tokens; 445 | return true; 446 | } 447 | 448 | protected function isInlineTag($tag) { 449 | $tags = $this->tagDisplay; 450 | if (!isset($tags[$tag])) { 451 | return true; 452 | } 453 | return $tags[$tag] === 'inline'; 454 | } 455 | 456 | protected function removeWhitespaceFromCharacter() { 457 | $tokens = $this->tokens; 458 | $isEditable = true; 459 | $isBeforeInline = false; 460 | $uneditableTag = null; 461 | $type = null; 462 | $token = null; 463 | $isOptimize = $this->options['optimizationLevel'] === static::OPTIMIZATION_ADVANCED; 464 | 465 | for ($i = 0, $len = count($tokens); $i < $len; $i++) { 466 | /** 467 | * @var HTMLToken $tokenBefore 468 | */ 469 | $tokenBefore = $token; 470 | $token = $tokens[$i]; 471 | $type = $token->getType(); 472 | if ($type === HTMLToken::StartTag) { 473 | $tagName = $token->getName(); 474 | $isBeforeInline = $this->isInlineTag($tagName); 475 | switch ($tagName) { 476 | case HTMLNames::scriptTag: 477 | case HTMLNames::styleTag: 478 | case HTMLNames::textareaTag: 479 | case HTMLNames::preTag: 480 | $isEditable = false; 481 | $uneditableTag = $tagName; 482 | continue 2; 483 | break; 484 | default: 485 | break; 486 | } 487 | } else if ($type === HTMLToken::EndTag) { 488 | $tagName = $token->getName(); 489 | $isBeforeInline = $this->isInlineTag($tagName); 490 | if (!$isEditable && $tagName === $uneditableTag) { 491 | $uneditableTag = null; 492 | $isEditable = true; 493 | continue; 494 | } 495 | } 496 | if ($type !== HTMLToken::Character) { 497 | continue; 498 | } 499 | 500 | $characters = $token->getData(); 501 | 502 | if ($isEditable) { 503 | if ($isOptimize && $i < ($len - 1)) { 504 | $afterToken = $tokens[$i + 1]; 505 | $afterType = $afterToken->getType(); 506 | if (!$tokenBefore) { 507 | $tokenBefore = new HTMLToken(); 508 | } 509 | $typeBefore = $tokenBefore->getType(); 510 | $isTagBefore = $typeBefore === HTMLToken::StartTag || $typeBefore === HTMLToken::EndTag; 511 | $isAfterTag = $afterType === HTMLToken::StartTag || $afterType === HTMLToken::EndTag; 512 | $isAfterInline = $isAfterTag ? $this->isInlineTag($afterToken->getTagName()) : false; 513 | 514 | if (($i === 0 || $isTagBefore) && $isAfterTag && (!$isBeforeInline || !$isAfterInline)) { 515 | $characters = trim($characters); 516 | } else if (($i === 0 || !$isBeforeInline) && !$isAfterInline) { 517 | $characters = trim($characters); 518 | } 519 | } 520 | $characters = $this->_removeWhitespaceFromCharacter($characters); 521 | if ($i === ($len - 1)) { 522 | $characters = rtrim($characters); 523 | } 524 | } else if ($isOptimize && ($uneditableTag === HTMLNames::scriptTag || $uneditableTag === HTMLNames::styleTag)) { 525 | $characters = trim($characters); 526 | } 527 | $tokens[$i]->setData($characters); 528 | } 529 | $this->tokens = $tokens; 530 | } 531 | 532 | /** 533 | * @param string $characters 534 | * @return string 535 | */ 536 | protected function _removeWhitespaceFromCharacter($characters) { 537 | $compactCharacters = ''; 538 | $hasWhiteSpace = false; 539 | 540 | for ($i = 0, $len = strlen($characters); $i < $len; $i++) { 541 | $char = $characters[$i]; 542 | if ($char === "\x0A") { 543 | // remove before whitespace char 544 | if ($hasWhiteSpace) { 545 | $compactCharacters = substr($compactCharacters, 0, -1); 546 | } 547 | $compactCharacters .= $char; 548 | $hasWhiteSpace = true; 549 | } else if ($char === ' ' || $char === "\x09" || $char === "\x0C") { 550 | if (!$hasWhiteSpace) { 551 | $compactCharacters .= ' '; 552 | $hasWhiteSpace = true; 553 | } 554 | } else { 555 | $hasWhiteSpace = false; 556 | $compactCharacters .= $char; 557 | } 558 | } 559 | 560 | return $compactCharacters; 561 | } 562 | 563 | protected function optimizeStartTagAttributes() { 564 | $tokens = $this->tokens; 565 | for ($i = 0, $len = count($tokens); $i < $len; $i++) { 566 | $token = $tokens[$i]; 567 | if ($token->getType() !== HTMLToken::StartTag) { 568 | continue; 569 | } 570 | 571 | $attributes_old = $token->getAttributes(); 572 | $attributes_new =array(); 573 | $attributes_name = array(); 574 | 575 | foreach ($attributes_old as $attribute) { 576 | if (!isset($attributes_name[$attribute['name']])) { 577 | $attributes_name[$attribute['name']] = true; 578 | $attributes_new[] = $attribute; 579 | } 580 | } 581 | if ($attributes_old !== $attributes_new) { 582 | $token->setAttributes($attributes_new); 583 | } 584 | } 585 | $this->tokens = $tokens; 586 | } 587 | 588 | /** 589 | * downlevel-hidden : 590 | * downlevel-revealed : HTML 591 | * @param string $comment 592 | * @return bool 593 | */ 594 | protected function _isConditionalComment($comment) { 595 | $pattern = '/\A/s'; 596 | if (preg_match($pattern, $comment)) { 597 | return true; 598 | } 599 | $pattern = '/\Z/s'; 600 | if (preg_match($pattern, $comment)) { 601 | return true; 602 | } 603 | return false; 604 | } 605 | 606 | } -------------------------------------------------------------------------------- /src/zz/Html/HTMLNames.php: -------------------------------------------------------------------------------- 1 | static::aTag, 469 | static::abbrTag => static::abbrTag, 470 | static::acronymTag => static::acronymTag, 471 | static::addressTag => static::addressTag, 472 | static::appletTag => static::appletTag, 473 | static::areaTag => static::areaTag, 474 | static::articleTag => static::articleTag, 475 | static::asideTag => static::asideTag, 476 | static::audioTag => static::audioTag, 477 | static::bTag => static::bTag, 478 | static::baseTag => static::baseTag, 479 | static::basefontTag => static::basefontTag, 480 | static::bdoTag => static::bdoTag, 481 | static::bgsoundTag => static::bgsoundTag, 482 | static::bigTag => static::bigTag, 483 | static::blockquoteTag => static::blockquoteTag, 484 | static::bodyTag => static::bodyTag, 485 | static::brTag => static::brTag, 486 | static::buttonTag => static::buttonTag, 487 | static::canvasTag => static::canvasTag, 488 | static::captionTag => static::captionTag, 489 | static::centerTag => static::centerTag, 490 | static::citeTag => static::citeTag, 491 | static::codeTag => static::codeTag, 492 | static::colTag => static::colTag, 493 | static::colgroupTag => static::colgroupTag, 494 | static::commandTag => static::commandTag, 495 | static::datalistTag => static::datalistTag, 496 | static::ddTag => static::ddTag, 497 | static::delTag => static::delTag, 498 | static::detailsTag => static::detailsTag, 499 | static::dfnTag => static::dfnTag, 500 | static::dirTag => static::dirTag, 501 | static::divTag => static::divTag, 502 | static::dlTag => static::dlTag, 503 | static::dtTag => static::dtTag, 504 | static::emTag => static::emTag, 505 | static::embedTag => static::embedTag, 506 | static::fieldsetTag => static::fieldsetTag, 507 | static::figcaptionTag => static::figcaptionTag, 508 | static::figureTag => static::figureTag, 509 | static::fontTag => static::fontTag, 510 | static::footerTag => static::footerTag, 511 | static::formTag => static::formTag, 512 | static::frameTag => static::frameTag, 513 | static::framesetTag => static::framesetTag, 514 | static::h1Tag => static::h1Tag, 515 | static::h2Tag => static::h2Tag, 516 | static::h3Tag => static::h3Tag, 517 | static::h4Tag => static::h4Tag, 518 | static::h5Tag => static::h5Tag, 519 | static::h6Tag => static::h6Tag, 520 | static::headTag => static::headTag, 521 | static::headerTag => static::headerTag, 522 | static::hgroupTag => static::hgroupTag, 523 | static::hrTag => static::hrTag, 524 | static::htmlTag => static::htmlTag, 525 | static::iTag => static::iTag, 526 | static::iframeTag => static::iframeTag, 527 | static::imageTag => static::imageTag, 528 | static::imgTag => static::imgTag, 529 | static::inputTag => static::inputTag, 530 | static::insTag => static::insTag, 531 | static::isindexTag => static::isindexTag, 532 | static::kbdTag => static::kbdTag, 533 | static::keygenTag => static::keygenTag, 534 | static::labelTag => static::labelTag, 535 | static::layerTag => static::layerTag, 536 | static::legendTag => static::legendTag, 537 | static::liTag => static::liTag, 538 | static::linkTag => static::linkTag, 539 | static::listingTag => static::listingTag, 540 | static::mapTag => static::mapTag, 541 | static::markTag => static::markTag, 542 | static::marqueeTag => static::marqueeTag, 543 | static::menuTag => static::menuTag, 544 | static::metaTag => static::metaTag, 545 | static::meterTag => static::meterTag, 546 | static::navTag => static::navTag, 547 | static::nobrTag => static::nobrTag, 548 | static::noembedTag => static::noembedTag, 549 | static::noframesTag => static::noframesTag, 550 | static::nolayerTag => static::nolayerTag, 551 | static::noscriptTag => static::noscriptTag, 552 | static::objectTag => static::objectTag, 553 | static::olTag => static::olTag, 554 | static::optgroupTag => static::optgroupTag, 555 | static::optionTag => static::optionTag, 556 | static::outputTag => static::outputTag, 557 | static::pTag => static::pTag, 558 | static::paramTag => static::paramTag, 559 | static::plaintextTag => static::plaintextTag, 560 | static::preTag => static::preTag, 561 | static::progressTag => static::progressTag, 562 | static::qTag => static::qTag, 563 | static::rpTag => static::rpTag, 564 | static::rtTag => static::rtTag, 565 | static::rubyTag => static::rubyTag, 566 | static::sTag => static::sTag, 567 | static::sampTag => static::sampTag, 568 | static::scriptTag => static::scriptTag, 569 | static::sectionTag => static::sectionTag, 570 | static::selectTag => static::selectTag, 571 | static::smallTag => static::smallTag, 572 | static::sourceTag => static::sourceTag, 573 | static::spanTag => static::spanTag, 574 | static::strikeTag => static::strikeTag, 575 | static::strongTag => static::strongTag, 576 | static::styleTag => static::styleTag, 577 | static::subTag => static::subTag, 578 | static::summaryTag => static::summaryTag, 579 | static::supTag => static::supTag, 580 | static::tableTag => static::tableTag, 581 | static::tbodyTag => static::tbodyTag, 582 | static::tdTag => static::tdTag, 583 | static::textareaTag => static::textareaTag, 584 | static::tfootTag => static::tfootTag, 585 | static::thTag => static::thTag, 586 | static::theadTag => static::theadTag, 587 | static::titleTag => static::titleTag, 588 | static::trTag => static::trTag, 589 | static::trackTag => static::trackTag, 590 | static::ttTag => static::ttTag, 591 | static::uTag => static::uTag, 592 | static::ulTag => static::ulTag, 593 | static::varTag => static::varTag, 594 | static::videoTag => static::videoTag, 595 | static::wbrTag => static::wbrTag, 596 | static::xmpTag => static::xmpTag, 597 | ); 598 | } 599 | 600 | public static function getHTMLAttrs() { 601 | return array( 602 | static::abbrAttr => static::abbrAttr, 603 | static::acceptAttr => static::acceptAttr, 604 | static::accept_charsetAttr => static::accept_charsetAttr, 605 | static::accesskeyAttr => static::accesskeyAttr, 606 | static::actionAttr => static::actionAttr, 607 | static::alignAttr => static::alignAttr, 608 | static::alinkAttr => static::alinkAttr, 609 | static::altAttr => static::altAttr, 610 | static::archiveAttr => static::archiveAttr, 611 | static::aria_activedescendantAttr => static::aria_activedescendantAttr, 612 | static::aria_atomicAttr => static::aria_atomicAttr, 613 | static::aria_busyAttr => static::aria_busyAttr, 614 | static::aria_checkedAttr => static::aria_checkedAttr, 615 | static::aria_controlsAttr => static::aria_controlsAttr, 616 | static::aria_describedbyAttr => static::aria_describedbyAttr, 617 | static::aria_disabledAttr => static::aria_disabledAttr, 618 | static::aria_dropeffectAttr => static::aria_dropeffectAttr, 619 | static::aria_expandedAttr => static::aria_expandedAttr, 620 | static::aria_flowtoAttr => static::aria_flowtoAttr, 621 | static::aria_grabbedAttr => static::aria_grabbedAttr, 622 | static::aria_haspopupAttr => static::aria_haspopupAttr, 623 | static::aria_helpAttr => static::aria_helpAttr, 624 | static::aria_hiddenAttr => static::aria_hiddenAttr, 625 | static::aria_invalidAttr => static::aria_invalidAttr, 626 | static::aria_labelAttr => static::aria_labelAttr, 627 | static::aria_labeledbyAttr => static::aria_labeledbyAttr, 628 | static::aria_labelledbyAttr => static::aria_labelledbyAttr, 629 | static::aria_levelAttr => static::aria_levelAttr, 630 | static::aria_liveAttr => static::aria_liveAttr, 631 | static::aria_multilineAttr => static::aria_multilineAttr, 632 | static::aria_multiselectableAttr => static::aria_multiselectableAttr, 633 | static::aria_orientationAttr => static::aria_orientationAttr, 634 | static::aria_ownsAttr => static::aria_ownsAttr, 635 | static::aria_pressedAttr => static::aria_pressedAttr, 636 | static::aria_readonlyAttr => static::aria_readonlyAttr, 637 | static::aria_relevantAttr => static::aria_relevantAttr, 638 | static::aria_requiredAttr => static::aria_requiredAttr, 639 | static::aria_selectedAttr => static::aria_selectedAttr, 640 | static::aria_sortAttr => static::aria_sortAttr, 641 | static::aria_valuemaxAttr => static::aria_valuemaxAttr, 642 | static::aria_valueminAttr => static::aria_valueminAttr, 643 | static::aria_valuenowAttr => static::aria_valuenowAttr, 644 | static::aria_valuetextAttr => static::aria_valuetextAttr, 645 | static::asyncAttr => static::asyncAttr, 646 | static::autocompleteAttr => static::autocompleteAttr, 647 | static::autofocusAttr => static::autofocusAttr, 648 | static::autoplayAttr => static::autoplayAttr, 649 | static::autosaveAttr => static::autosaveAttr, 650 | static::axisAttr => static::axisAttr, 651 | static::backgroundAttr => static::backgroundAttr, 652 | static::behaviorAttr => static::behaviorAttr, 653 | static::bgcolorAttr => static::bgcolorAttr, 654 | static::bgpropertiesAttr => static::bgpropertiesAttr, 655 | static::borderAttr => static::borderAttr, 656 | static::bordercolorAttr => static::bordercolorAttr, 657 | static::cellborderAttr => static::cellborderAttr, 658 | static::cellpaddingAttr => static::cellpaddingAttr, 659 | static::cellspacingAttr => static::cellspacingAttr, 660 | static::challengeAttr => static::challengeAttr, 661 | static::charAttr => static::charAttr, 662 | static::charoffAttr => static::charoffAttr, 663 | static::charsetAttr => static::charsetAttr, 664 | static::checkedAttr => static::checkedAttr, 665 | static::citeAttr => static::citeAttr, 666 | static::classAttr => static::classAttr, 667 | static::classidAttr => static::classidAttr, 668 | static::clearAttr => static::clearAttr, 669 | static::codeAttr => static::codeAttr, 670 | static::codebaseAttr => static::codebaseAttr, 671 | static::codetypeAttr => static::codetypeAttr, 672 | static::colorAttr => static::colorAttr, 673 | static::colsAttr => static::colsAttr, 674 | static::colspanAttr => static::colspanAttr, 675 | static::compactAttr => static::compactAttr, 676 | static::compositeAttr => static::compositeAttr, 677 | static::contentAttr => static::contentAttr, 678 | static::contenteditableAttr => static::contenteditableAttr, 679 | static::controlsAttr => static::controlsAttr, 680 | static::coordsAttr => static::coordsAttr, 681 | static::dataAttr => static::dataAttr, 682 | static::datetimeAttr => static::datetimeAttr, 683 | static::declareAttr => static::declareAttr, 684 | static::defaultAttr => static::defaultAttr, 685 | static::deferAttr => static::deferAttr, 686 | static::dirAttr => static::dirAttr, 687 | static::directionAttr => static::directionAttr, 688 | static::disabledAttr => static::disabledAttr, 689 | static::draggableAttr => static::draggableAttr, 690 | static::enctypeAttr => static::enctypeAttr, 691 | static::endAttr => static::endAttr, 692 | static::eventAttr => static::eventAttr, 693 | static::expandedAttr => static::expandedAttr, 694 | static::faceAttr => static::faceAttr, 695 | static::focusedAttr => static::focusedAttr, 696 | static::forAttr => static::forAttr, 697 | static::formAttr => static::formAttr, 698 | static::formactionAttr => static::formactionAttr, 699 | static::formenctypeAttr => static::formenctypeAttr, 700 | static::formmethodAttr => static::formmethodAttr, 701 | static::formnovalidateAttr => static::formnovalidateAttr, 702 | static::formtargetAttr => static::formtargetAttr, 703 | static::frameAttr => static::frameAttr, 704 | static::frameborderAttr => static::frameborderAttr, 705 | static::headersAttr => static::headersAttr, 706 | static::heightAttr => static::heightAttr, 707 | static::hiddenAttr => static::hiddenAttr, 708 | static::highAttr => static::highAttr, 709 | static::hrefAttr => static::hrefAttr, 710 | static::hreflangAttr => static::hreflangAttr, 711 | static::hspaceAttr => static::hspaceAttr, 712 | static::http_equivAttr => static::http_equivAttr, 713 | static::idAttr => static::idAttr, 714 | static::incrementalAttr => static::incrementalAttr, 715 | static::indeterminateAttr => static::indeterminateAttr, 716 | static::ismapAttr => static::ismapAttr, 717 | static::keytypeAttr => static::keytypeAttr, 718 | static::kindAttr => static::kindAttr, 719 | static::labelAttr => static::labelAttr, 720 | static::langAttr => static::langAttr, 721 | static::languageAttr => static::languageAttr, 722 | static::leftmarginAttr => static::leftmarginAttr, 723 | static::linkAttr => static::linkAttr, 724 | static::listAttr => static::listAttr, 725 | static::longdescAttr => static::longdescAttr, 726 | static::loopAttr => static::loopAttr, 727 | static::loopendAttr => static::loopendAttr, 728 | static::loopstartAttr => static::loopstartAttr, 729 | static::lowAttr => static::lowAttr, 730 | static::lowsrcAttr => static::lowsrcAttr, 731 | static::manifestAttr => static::manifestAttr, 732 | static::marginheightAttr => static::marginheightAttr, 733 | static::marginwidthAttr => static::marginwidthAttr, 734 | static::maxAttr => static::maxAttr, 735 | static::maxlengthAttr => static::maxlengthAttr, 736 | static::mayscriptAttr => static::mayscriptAttr, 737 | static::mediaAttr => static::mediaAttr, 738 | static::methodAttr => static::methodAttr, 739 | static::minAttr => static::minAttr, 740 | static::multipleAttr => static::multipleAttr, 741 | static::nameAttr => static::nameAttr, 742 | static::nohrefAttr => static::nohrefAttr, 743 | static::noresizeAttr => static::noresizeAttr, 744 | static::noshadeAttr => static::noshadeAttr, 745 | static::novalidateAttr => static::novalidateAttr, 746 | static::nowrapAttr => static::nowrapAttr, 747 | static::objectAttr => static::objectAttr, 748 | static::onabortAttr => static::onabortAttr, 749 | static::onbeforecopyAttr => static::onbeforecopyAttr, 750 | static::onbeforecutAttr => static::onbeforecutAttr, 751 | static::onbeforeloadAttr => static::onbeforeloadAttr, 752 | static::onbeforepasteAttr => static::onbeforepasteAttr, 753 | static::onbeforeprocessAttr => static::onbeforeprocessAttr, 754 | static::onbeforeunloadAttr => static::onbeforeunloadAttr, 755 | static::onblurAttr => static::onblurAttr, 756 | static::oncanplayAttr => static::oncanplayAttr, 757 | static::oncanplaythroughAttr => static::oncanplaythroughAttr, 758 | static::onchangeAttr => static::onchangeAttr, 759 | static::onclickAttr => static::onclickAttr, 760 | static::oncontextmenuAttr => static::oncontextmenuAttr, 761 | static::oncopyAttr => static::oncopyAttr, 762 | static::oncutAttr => static::oncutAttr, 763 | static::ondblclickAttr => static::ondblclickAttr, 764 | static::ondragAttr => static::ondragAttr, 765 | static::ondragendAttr => static::ondragendAttr, 766 | static::ondragenterAttr => static::ondragenterAttr, 767 | static::ondragleaveAttr => static::ondragleaveAttr, 768 | static::ondragoverAttr => static::ondragoverAttr, 769 | static::ondragstartAttr => static::ondragstartAttr, 770 | static::ondropAttr => static::ondropAttr, 771 | static::ondurationchangeAttr => static::ondurationchangeAttr, 772 | static::onemptiedAttr => static::onemptiedAttr, 773 | static::onendedAttr => static::onendedAttr, 774 | static::onerrorAttr => static::onerrorAttr, 775 | static::onfocusAttr => static::onfocusAttr, 776 | static::onfocusinAttr => static::onfocusinAttr, 777 | static::onfocusoutAttr => static::onfocusoutAttr, 778 | static::onhashchangeAttr => static::onhashchangeAttr, 779 | static::oninputAttr => static::oninputAttr, 780 | static::oninvalidAttr => static::oninvalidAttr, 781 | static::onkeydownAttr => static::onkeydownAttr, 782 | static::onkeypressAttr => static::onkeypressAttr, 783 | static::onkeyupAttr => static::onkeyupAttr, 784 | static::onloadAttr => static::onloadAttr, 785 | static::onloadeddataAttr => static::onloadeddataAttr, 786 | static::onloadedmetadataAttr => static::onloadedmetadataAttr, 787 | static::onloadstartAttr => static::onloadstartAttr, 788 | static::onmousedownAttr => static::onmousedownAttr, 789 | static::onmousemoveAttr => static::onmousemoveAttr, 790 | static::onmouseoutAttr => static::onmouseoutAttr, 791 | static::onmouseoverAttr => static::onmouseoverAttr, 792 | static::onmouseupAttr => static::onmouseupAttr, 793 | static::onmousewheelAttr => static::onmousewheelAttr, 794 | static::onofflineAttr => static::onofflineAttr, 795 | static::ononlineAttr => static::ononlineAttr, 796 | static::onorientationchangeAttr => static::onorientationchangeAttr, 797 | static::onpagehideAttr => static::onpagehideAttr, 798 | static::onpageshowAttr => static::onpageshowAttr, 799 | static::onpasteAttr => static::onpasteAttr, 800 | static::onpauseAttr => static::onpauseAttr, 801 | static::onplayAttr => static::onplayAttr, 802 | static::onplayingAttr => static::onplayingAttr, 803 | static::onpopstateAttr => static::onpopstateAttr, 804 | static::onprogressAttr => static::onprogressAttr, 805 | static::onratechangeAttr => static::onratechangeAttr, 806 | static::onresetAttr => static::onresetAttr, 807 | static::onresizeAttr => static::onresizeAttr, 808 | static::onscrollAttr => static::onscrollAttr, 809 | static::onsearchAttr => static::onsearchAttr, 810 | static::onseekedAttr => static::onseekedAttr, 811 | static::onseekingAttr => static::onseekingAttr, 812 | static::onselectAttr => static::onselectAttr, 813 | static::onselectionchangeAttr => static::onselectionchangeAttr, 814 | static::onselectstartAttr => static::onselectstartAttr, 815 | static::onstalledAttr => static::onstalledAttr, 816 | static::onstorageAttr => static::onstorageAttr, 817 | static::onsubmitAttr => static::onsubmitAttr, 818 | static::onsuspendAttr => static::onsuspendAttr, 819 | static::ontimeupdateAttr => static::ontimeupdateAttr, 820 | static::ontouchcancelAttr => static::ontouchcancelAttr, 821 | static::ontouchendAttr => static::ontouchendAttr, 822 | static::ontouchmoveAttr => static::ontouchmoveAttr, 823 | static::ontouchstartAttr => static::ontouchstartAttr, 824 | static::onunloadAttr => static::onunloadAttr, 825 | static::onvolumechangeAttr => static::onvolumechangeAttr, 826 | static::onwaitingAttr => static::onwaitingAttr, 827 | static::onwebkitanimationendAttr => static::onwebkitanimationendAttr, 828 | static::onwebkitanimationiterationAttr => static::onwebkitanimationiterationAttr, 829 | static::onwebkitanimationstartAttr => static::onwebkitanimationstartAttr, 830 | static::onwebkitbeginfullscreenAttr => static::onwebkitbeginfullscreenAttr, 831 | static::onwebkitendfullscreenAttr => static::onwebkitendfullscreenAttr, 832 | static::onwebkitfullscreenchangeAttr => static::onwebkitfullscreenchangeAttr, 833 | static::onwebkitspeechchangeAttr => static::onwebkitspeechchangeAttr, 834 | static::onwebkittransitionendAttr => static::onwebkittransitionendAttr, 835 | static::openAttr => static::openAttr, 836 | static::optimumAttr => static::optimumAttr, 837 | static::patternAttr => static::patternAttr, 838 | static::pingAttr => static::pingAttr, 839 | static::placeholderAttr => static::placeholderAttr, 840 | static::playcountAttr => static::playcountAttr, 841 | static::pluginspageAttr => static::pluginspageAttr, 842 | static::pluginurlAttr => static::pluginurlAttr, 843 | static::posterAttr => static::posterAttr, 844 | static::precisionAttr => static::precisionAttr, 845 | static::preloadAttr => static::preloadAttr, 846 | static::primaryAttr => static::primaryAttr, 847 | static::profileAttr => static::profileAttr, 848 | static::progressAttr => static::progressAttr, 849 | static::promptAttr => static::promptAttr, 850 | static::readonlyAttr => static::readonlyAttr, 851 | static::relAttr => static::relAttr, 852 | static::requiredAttr => static::requiredAttr, 853 | static::resultsAttr => static::resultsAttr, 854 | static::revAttr => static::revAttr, 855 | static::roleAttr => static::roleAttr, 856 | static::rowsAttr => static::rowsAttr, 857 | static::rowspanAttr => static::rowspanAttr, 858 | static::rulesAttr => static::rulesAttr, 859 | static::sandboxAttr => static::sandboxAttr, 860 | static::schemeAttr => static::schemeAttr, 861 | static::scopeAttr => static::scopeAttr, 862 | static::scrollamountAttr => static::scrollamountAttr, 863 | static::scrolldelayAttr => static::scrolldelayAttr, 864 | static::scrollingAttr => static::scrollingAttr, 865 | static::selectedAttr => static::selectedAttr, 866 | static::shapeAttr => static::shapeAttr, 867 | static::sizeAttr => static::sizeAttr, 868 | static::sortableAttr => static::sortableAttr, 869 | static::sortdirectionAttr => static::sortdirectionAttr, 870 | static::spanAttr => static::spanAttr, 871 | static::spellcheckAttr => static::spellcheckAttr, 872 | static::srcAttr => static::srcAttr, 873 | static::srclangAttr => static::srclangAttr, 874 | static::standbyAttr => static::standbyAttr, 875 | static::startAttr => static::startAttr, 876 | static::stepAttr => static::stepAttr, 877 | static::styleAttr => static::styleAttr, 878 | static::summaryAttr => static::summaryAttr, 879 | static::tabindexAttr => static::tabindexAttr, 880 | static::tableborderAttr => static::tableborderAttr, 881 | static::targetAttr => static::targetAttr, 882 | static::textAttr => static::textAttr, 883 | static::titleAttr => static::titleAttr, 884 | static::topAttr => static::topAttr, 885 | static::topmarginAttr => static::topmarginAttr, 886 | static::truespeedAttr => static::truespeedAttr, 887 | static::typeAttr => static::typeAttr, 888 | static::usemapAttr => static::usemapAttr, 889 | static::valignAttr => static::valignAttr, 890 | static::valueAttr => static::valueAttr, 891 | static::valuetypeAttr => static::valuetypeAttr, 892 | static::versionAttr => static::versionAttr, 893 | static::viewsourceAttr => static::viewsourceAttr, 894 | static::vlinkAttr => static::vlinkAttr, 895 | static::vspaceAttr => static::vspaceAttr, 896 | static::webkitallowfullscreenAttr => static::webkitallowfullscreenAttr, 897 | static::webkitdirectoryAttr => static::webkitdirectoryAttr, 898 | static::webkitgrammarAttr => static::webkitgrammarAttr, 899 | static::webkitspeechAttr => static::webkitspeechAttr, 900 | static::widthAttr => static::widthAttr, 901 | static::wrapAttr => static::wrapAttr, 902 | ); 903 | } 904 | 905 | } 906 | -------------------------------------------------------------------------------- /src/zz/Html/HTMLToken.php: -------------------------------------------------------------------------------- 1 | false, 55 | 'hasSystemIdentifier' => false, 56 | 'publicIdentifier' => '', 57 | 'systemIdentifier' => '', 58 | 'forceQuirks' => false, 59 | ); 60 | 61 | protected $_html = ''; 62 | protected $_state = array(); 63 | 64 | public function __construct() { 65 | $this->_type = static::Uninitialized; 66 | } 67 | 68 | public function __toString() { 69 | return $this->_data; 70 | } 71 | 72 | public function toArray() { 73 | $data = array( 74 | 'type' => $this->_type, 75 | 'data' => $this->_data, 76 | 'selfClosing' => $this->_selfClosing, 77 | 'attributes' => $this->_attributes, 78 | 'parseError' => $this->_parseError, 79 | 'html' => $this->_html, 80 | 'state' => $this->_state, 81 | ); 82 | if ($this->getType() === static::DOCTYPE) { 83 | $doctypeData = $this->_doctypeData; 84 | if ($doctypeData['forceQuirks']) { 85 | $mode = static::QuirksMode; 86 | } else { 87 | $mode = $this->setCompatibilityModeFromDoctype($this->_data, $doctypeData['publicIdentifier'], $doctypeData['systemIdentifier']); 88 | } 89 | $doctypeData['mode'] = $mode; 90 | $data['doctypeData'] = $doctypeData; 91 | } 92 | return $data; 93 | } 94 | 95 | /** 96 | * Source/core/html/parser/HTMLConstructionSite.cpp 97 | * HTMLConstructionSite::setCompatibilityModeFromDoctype 98 | * 99 | * [QuirksMode] 100 | * startsWith publicId 101 | * `+//Silmaril//dtd html Pro v0r11 19970101//` 102 | * `-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//` 103 | * `-//AS//DTD HTML 3.0 asWedit + extensions//` 104 | * `-//IETF//DTD HTML 2.0 Level 1//` 105 | * `-//IETF//DTD HTML 2.0 Level 2//` 106 | * `-//IETF//DTD HTML 2.0 Strict Level 1//` 107 | * `-//IETF//DTD HTML 2.0 Strict Level 2//` 108 | * `-//IETF//DTD HTML 2.0 Strict//` 109 | * `-//IETF//DTD HTML 2.0//` 110 | * `-//IETF//DTD HTML 2.1E//` 111 | * `-//IETF//DTD HTML 3.0//` 112 | * `-//IETF//DTD HTML 3.2 Final//` 113 | * `-//IETF//DTD HTML 3.2//` 114 | * `-//IETF//DTD HTML 3//` 115 | * `-//IETF//DTD HTML Level 0//` 116 | * `-//IETF//DTD HTML Level 1//` 117 | * `-//IETF//DTD HTML Level 2//` 118 | * `-//IETF//DTD HTML Level 3//` 119 | * `-//IETF//DTD HTML Strict Level 0//` 120 | * `-//IETF//DTD HTML Strict Level 1//` 121 | * `-//IETF//DTD HTML Strict Level 2//` 122 | * `-//IETF//DTD HTML Strict Level 3//` 123 | * `-//IETF//DTD HTML Strict//` 124 | * `-//IETF//DTD HTML//` 125 | * `-//Metrius//DTD Metrius Presentational//` 126 | * `-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//` 127 | * `-//Microsoft//DTD Internet Explorer 2.0 HTML//` 128 | * `-//Microsoft//DTD Internet Explorer 2.0 Tables//` 129 | * `-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//` 130 | * `-//Microsoft//DTD Internet Explorer 3.0 HTML//` 131 | * `-//Microsoft//DTD Internet Explorer 3.0 Tables//` 132 | * `-//Netscape Comm. Corp.//DTD HTML//` 133 | * `-//Netscape Comm. Corp.//DTD Strict HTML//` 134 | * `-//O'Reilly and Associates//DTD HTML 2.0//` 135 | * `-//O'Reilly and Associates//DTD HTML Extended 1.0//` 136 | * `-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//` 137 | * `-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//` 138 | * `-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//` 139 | * `-//Spyglass//DTD HTML 2.0 Extended//` 140 | * `-//SQ//DTD HTML 2.0 HoTMetaL + extensions//` 141 | * `-//Sun Microsystems Corp.//DTD HotJava HTML//` 142 | * `-//Sun Microsystems Corp.//DTD HotJava Strict HTML//` 143 | * `-//W3C//DTD HTML 3 1995-03-24//` 144 | * `-//W3C//DTD HTML 3.2 Draft//` 145 | * `-//W3C//DTD HTML 3.2 Final//` 146 | * `-//W3C//DTD HTML 3.2//` 147 | * `-//W3C//DTD HTML 3.2S Draft//` 148 | * `-//W3C//DTD HTML 4.0 Frameset//` 149 | * `-//W3C//DTD HTML 4.0 Transitional//` 150 | * `-//W3C//DTD HTML Experimental 19960712//` 151 | * `-//W3C//DTD HTML Experimental 970421//` 152 | * `-//W3C//DTD W3 HTML//` 153 | * `-//W3O//DTD W3 HTML 3.0//` 154 | * `-//WebTechs//DTD Mozilla HTML 2.0//` 155 | * `-//WebTechs//DTD Mozilla HTML//` 156 | * 157 | * IgnoringCase publicId 158 | * `-//W3O//DTD W3 HTML Strict 3.0//EN//` 159 | * `-/W3C/DTD HTML 4.0 Transitional/EN` 160 | * `HTML` 161 | * 162 | * IgnoringCase systemId 163 | * `http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd` 164 | * 165 | * systemId.isEmpty() && publicId.startsWith 166 | * `-//W3C//DTD HTML 4.01 Frameset//` 167 | * `-//W3C//DTD HTML 4.01 Transitional//` 168 | * 169 | * [LimitedQuirksMode] 170 | * startsWith publicId 171 | * `-//W3C//DTD XHTML 1.0 Frameset//` 172 | * `-//W3C//DTD XHTML 1.0 Transitional//` 173 | * 174 | * !systemId.isEmpty() && publicId.startsWith 175 | * `-//W3C//DTD HTML 4.01 Frameset//` 176 | * `-//W3C//DTD HTML 4.01 Transitional//` 177 | */ 178 | protected function setCompatibilityModeFromDoctype($name, $publicId, $systemId) { 179 | 180 | if ($name !== 'html') { 181 | return static::QuirksMode; 182 | } 183 | $startsWithPublicId = "/^(?:-\/\/(?:S(?:oftQuad(?: Software\/\/DTD HoTMetaL PRO 6\.0::19990601|\/\/DTD HoTMetaL PRO 4\.0::19971010)::extensions to HTML 4\.0|un Microsystems Corp\.\/\/DTD HotJava(?: Strict)? HTML|Q\/\/DTD HTML 2\.0 HoTMetaL \+ extensions|pyglass\/\/DTD HTML 2\.0 Extended)|W(?:3(?:C\/\/DTD (?:HTML (?:3(?:\.2(?: (?:Draft|Final)|S Draft)?| 1995-03-24)|Experimental (?:19960712|970421)|4\.0 (?:Transitional|Frameset))|W3 HTML)|O\/\/DTD W3 HTML 3\.0)|ebTechs\/\/DTD Mozilla HTML(?: 2\.0)?)|IETF\/\/DTD HTML(?: (?:2\.(?:0(?: (?:Strict(?: Level [12])?|Level [12]))?|1E)|3(?:\.(?:2(?: Final)?|0))?|Strict(?: Level [0123])?|Level [0123]))?|M(?:icrosoft\/\/DTD Internet Explorer [23]\.0 (?:HTML(?: Strict)?|Tables)|etrius\/\/DTD Metrius Presentational)|O'Reilly and Associates\/\/DTD HTML (?:Extend(?:ed Relax)?ed 1|2)\.0|A(?:dvaSoft Ltd|S)\/\/DTD HTML 3\.0 asWedit \+ extensions|Netscape Comm\. Corp\.\/\/DTD(?: Strict)? HTML)|\+\/\/Silmaril\/\/dtd html Pro v0r11 19970101)\/\//"; 184 | $ignoringCasePublicId = '/^(?:-\/(?:\/W3O\/\/DTD W3 HTML Strict 3\.0\/\/EN\/\/|W3C\/DTD HTML 4\.0 Transitional\/EN)|HTML)$/i'; 185 | $ignoringCaseSystemId = '/^http:\/\/www\.ibm\.com\/data\/dtd\/v11\/ibmxhtml1-transitional\.dtd$/i'; 186 | $startsWithPublicId2 = '/^-\/\/W3C\/\/DTD HTML 4\.01 (?:Transitional|Frameset)\/\//'; 187 | 188 | if (preg_match($startsWithPublicId, $publicId) || preg_match($ignoringCasePublicId, $publicId) || preg_match($ignoringCaseSystemId, $systemId)) { 189 | return static::QuirksMode; 190 | } 191 | 192 | if ($systemId === '' && preg_match($startsWithPublicId2, $publicId)) { 193 | return static::QuirksMode; 194 | } 195 | 196 | $pattern1 = '/^-\/\/W3C\/\/DTD XHTML 1\.0 (?:Transitional|Frameset)\/\//'; 197 | $pattern2 = ' /^-\/\/W3C\/\/DTD HTML 4\.01 (?:Transitional|Frameset)\/\//'; 198 | if (preg_match($pattern1, $publicId) || ($systemId !== '' && preg_match($pattern2, $publicId))) { 199 | return static::LimitedQuirksMode; 200 | } 201 | 202 | return static::NoQuirksMode; 203 | } 204 | 205 | public function clean() { 206 | unset($this->_currentAttribute); 207 | } 208 | 209 | public function getType() { 210 | return $this->_type; 211 | } 212 | 213 | public function getName() { 214 | return $this->_data; 215 | } 216 | 217 | public function setType($type) { 218 | $this->_type = $type; 219 | } 220 | 221 | public function getHtmlOrigin() { 222 | return $this->_html; 223 | } 224 | 225 | public function setHtmlOrigin($html) { 226 | $this->_html = $html; 227 | } 228 | 229 | public function getState() { 230 | return $this->_state; 231 | } 232 | 233 | public function setState($states) { 234 | $this->_state = $states; 235 | } 236 | 237 | public function getTagName() { 238 | $type = $this->getType(); 239 | if ($type !== static::StartTag && $type !== static::EndTag) { 240 | return false; 241 | } 242 | return $this->getName(); 243 | } 244 | 245 | public function setData($data) { 246 | $this->_data = $data; 247 | } 248 | 249 | public function getData() { 250 | return $this->_data; 251 | } 252 | 253 | public function getAttributes() { 254 | return $this->_attributes; 255 | } 256 | 257 | public function setAttributes($attributes) { 258 | $this->_attributes = $attributes; 259 | } 260 | 261 | public function getDoctypeData() { 262 | return $this->_doctypeData; 263 | } 264 | 265 | public function hasSelfClosing() { 266 | return $this->_selfClosing; 267 | } 268 | 269 | public function hasParseError() { 270 | return $this->_parseError; 271 | } 272 | 273 | public function parseError() { 274 | $this->_parseError = true; 275 | } 276 | 277 | public function clear() { 278 | $this->_type = static::Uninitialized; 279 | $this->_data = ''; 280 | } 281 | 282 | public function ensureIsCharacterToken() { 283 | $this->_type = static::Character; 284 | } 285 | 286 | public function makeEndOfFile() { 287 | $this->_type = static::EndOfFile; 288 | } 289 | 290 | public function appendToCharacter($character) { 291 | $this->_data .= $character; 292 | } 293 | 294 | public function beginComment() { 295 | $this->_type = static::Comment; 296 | } 297 | 298 | public function appendToComment($character) { 299 | $this->_data .= $character; 300 | } 301 | 302 | public function appendToName($character) { 303 | $this->_data .= $character; 304 | } 305 | 306 | public function setDoubleQuoted() { 307 | $this->_currentAttribute['quoted'] = static::DoubleQuoted; 308 | } 309 | 310 | public function setSingleQuoted() { 311 | $this->_currentAttribute['quoted'] = static::SingleQuoted; 312 | } 313 | 314 | /* Start/End Tag Tokens */ 315 | 316 | public function selfClosing() { 317 | return $this->_selfClosing; 318 | } 319 | 320 | public function setSelfClosing() { 321 | $this->_selfClosing = true; 322 | } 323 | 324 | public function beginStartTag($character) { 325 | $this->setType(static::StartTag); 326 | $this->_selfClosing = false; 327 | $this->_currentAttribute = 0; 328 | $this->_attributes = array(); 329 | $this->_data .= $character; 330 | } 331 | 332 | public function beginEndTag($character) { 333 | $this->setType(static::EndTag); 334 | $this->_selfClosing = false; 335 | $this->_currentAttribute = 0; 336 | $this->_attributes = array(); 337 | $this->_data .= $character; 338 | } 339 | 340 | public function addNewAttribute() { 341 | // m_attributes.grow(m_attributes.size() + 1); 342 | // m_currentAttribute = &m_attributes.last(); 343 | $_default = array( 344 | 'name' => '', 345 | 'value' => '', 346 | 'quoted' => false, 347 | ); 348 | unset($this->_currentAttribute); 349 | $this->_currentAttribute = $_default; 350 | $this->_attributes[] = & $this->_currentAttribute; 351 | } 352 | 353 | public function beginAttributeName($offset) { 354 | // m_currentAttribute->nameRange.start = offset - m_baseOffset; 355 | // $this->_currentAttribute['nameRange']['start'] = $offset; 356 | } 357 | 358 | public function endAttributeName($offset) { 359 | // int index = offset - m_baseOffset; 360 | // m_currentAttribute->nameRange.end = index; 361 | // m_currentAttribute->valueRange.start = index; 362 | // m_currentAttribute->valueRange.end = index; 363 | // $this->_currentAttribute['nameRange']['end'] = $offset; 364 | // $this->_currentAttribute['valueRange']['start'] = $offset; 365 | // $this->_currentAttribute['valueRange']['end'] = $offset; 366 | } 367 | 368 | public function beginAttributeValue($offset) { 369 | // m_currentAttribute->valueRange.start = offset - m_baseOffset; 370 | // #ifndef NDEBUG 371 | // m_currentAttribute->valueRange.end = 0; 372 | // #endif 373 | // $this->_currentAttribute['valueRange']['start'] = $offset; 374 | } 375 | 376 | public function endAttributeValue($offset) { 377 | // m_currentAttribute->valueRange.end = offset - m_baseOffset; 378 | // $this->_currentAttribute['valueRange']['end'] = $offset; 379 | } 380 | 381 | public function appendToAttributeName($character) { 382 | // FIXME: We should be able to add the following ASSERT once we fix 383 | // https://bugs.webkit.org/show_bug.cgi?id=62971 384 | // ASSERT(m_currentAttribute->nameRange.start); 385 | // m_currentAttribute->name.append(character); 386 | $this->_currentAttribute['name'] .= $character; 387 | } 388 | 389 | public function appendToAttributeValue($character) { 390 | // FIXME: We should be able to add the following ASSERT once we fix 391 | // m_currentAttribute->value.append(character); 392 | $this->_currentAttribute['value'] .= $character; 393 | } 394 | 395 | /* DOCTYPE Tokens */ 396 | 397 | public function forceQuirks() { 398 | // return m_doctypeData->m_forceQuirks; 399 | return $this->_doctypeData['forceQuirks']; 400 | } 401 | 402 | public function setForceQuirks() { 403 | // m_doctypeData->m_forceQuirks = true; 404 | $this->_doctypeData['forceQuirks'] = true; 405 | } 406 | 407 | protected function _beginDOCTYPE() { 408 | $this->_type = static::DOCTYPE; 409 | // m_doctypeData = adoptPtr(new DoctypeData); 410 | } 411 | 412 | public function beginDOCTYPE($character = null) { 413 | $this->_beginDOCTYPE(); 414 | if ($character) { 415 | $this->_data .= $character; 416 | } 417 | } 418 | 419 | public function setPublicIdentifierToEmptyString() { 420 | // m_doctypeData->m_hasPublicIdentifier = true; 421 | // m_doctypeData->m_publicIdentifier.clear(); 422 | $this->_doctypeData['hasPublicIdentifier'] = true; 423 | $this->_doctypeData['publicIdentifier'] = ''; 424 | } 425 | 426 | public function setSystemIdentifierToEmptyString() { 427 | // m_doctypeData->m_hasSystemIdentifier = true; 428 | // m_doctypeData->m_systemIdentifier.clear(); 429 | $this->_doctypeData['hasSystemIdentifier'] = true; 430 | $this->_doctypeData['systemIdentifier'] = ''; 431 | } 432 | 433 | 434 | public function appendToPublicIdentifier($character) { 435 | // m_doctypeData->m_publicIdentifier.append(character); 436 | $this->_doctypeData['publicIdentifier'] .= $character; 437 | } 438 | 439 | public function appendToSystemIdentifier($character) { 440 | // m_doctypeData->m_systemIdentifier.append(character); 441 | $this->_doctypeData['systemIdentifier'] .= $character; 442 | } 443 | 444 | } -------------------------------------------------------------------------------- /src/zz/Html/HTMLTokenizer.php: -------------------------------------------------------------------------------- 1 | _SegmentedString = $SegmentedString; 156 | $this->_Token = new HTMLToken(); 157 | $this->_state = static::DataState; 158 | $this->_startState = static::DataState; 159 | $this->_option = $option + array('debug' => false); 160 | $this->_debug = !!$this->_option['debug']; 161 | } 162 | 163 | /** 164 | * @param string $state 165 | */ 166 | public function setState($state) { 167 | $this->_state = $state; 168 | } 169 | 170 | /** 171 | * @return string 172 | */ 173 | public function getState() { 174 | return $this->_state; 175 | } 176 | 177 | /** 178 | * @throws \InvalidArgumentException 179 | * @return HtmlToken[] 180 | */ 181 | public function tokenizer() { 182 | if ($this->_SegmentedString->eos()) { 183 | return array(); 184 | } 185 | 186 | while (true) { 187 | $this->_startPos = $startPos = $this->_SegmentedString->tell(); 188 | $result = $this->nextToken($this->_SegmentedString); 189 | $this->_state = static::DataState; 190 | $endPos = $this->_SegmentedString->tell(); 191 | 192 | if ($result === false && (($endPos - $startPos) === 0)) { 193 | throw new \InvalidArgumentException('Given invalid string or invalid statement.'); 194 | } 195 | 196 | $startState = $this->_startState; 197 | // In other than `DataState`, `nextToken` return the type of Character, it contains the type of EndTag. 198 | // SegmentedString go back to the end of the type of Character position. 199 | $type = $this->_Token->getType(); 200 | if ($type === HTMLToken::Character && $this->_bufferedEndTagName !== '' && ($startState === static::RAWTEXTState || $startState === static::RCDATAState || $startState === static::ScriptDataState)) { 201 | $length = strlen($this->_Token->getData()); 202 | 203 | // HTMLToken::Character 204 | $this->_buffer = array_slice($this->_buffer, 0, $length); 205 | $this->_compactBuffer($startPos, $startPos + $length, $type); 206 | $token = $this->_Token; 207 | $this->_tokens[] = $token; 208 | 209 | // process again for type of EndTag 210 | $this->_SegmentedString->seek($startPos + $length); 211 | $this->_state = $startState; 212 | } else { 213 | $this->_compactBuffer($startPos, $endPos, $type); 214 | $token = $this->_Token; 215 | $this->_tokens[] = $token; 216 | // FIXME: The tokenizer should do this work for us. 217 | if ($type === HTMLToken::StartTag) { 218 | $this->_updateStateFor($token->getTagName()); 219 | } else { 220 | $this->_state = static::DataState; 221 | } 222 | } 223 | $this->_startState = $this->_state; 224 | 225 | $this->_buffer = array(); 226 | $this->_bufferedEndTagName = ''; 227 | $this->_temporaryBuffer = ''; 228 | $this->_Token = new HTMLToken(); 229 | if ($this->_SegmentedString->eos()) { 230 | break; 231 | } 232 | } 233 | return $this->_tokens; 234 | } 235 | 236 | public function getTokensAsArray() { 237 | $result = array(); 238 | foreach ($this->_tokens as $token) { 239 | $result[] = $token->toArray(); 240 | } 241 | return $result; 242 | } 243 | 244 | protected function _compactBuffer($startPos, $endPos, $type) { 245 | $compactBuffer = array(); 246 | $before = static::kEndOfFileMarker; 247 | $html = $this->_SegmentedString->substr($startPos, $endPos - $startPos); 248 | foreach ($this->_buffer as $i => $state) { 249 | if ($before !== $state) { 250 | $before = $compactBuffer[$i] = $state; 251 | } 252 | } 253 | switch ($type) { 254 | case HTMLToken::Uninitialized: 255 | case HTMLToken::EndOfFile: 256 | case HTMLToken::Character: 257 | case HTMLToken::Comment: 258 | $this->_Token->setData($html); 259 | break; 260 | } 261 | 262 | if ($this->_debug) { 263 | $this->_Token->setHtmlOrigin($html); 264 | $this->_Token->setState($compactBuffer); 265 | } else if ($type === HTMLToken::DOCTYPE) { 266 | $this->_Token->setHtmlOrigin($html); 267 | } 268 | $this->_Token->clean(); 269 | } 270 | 271 | protected function _updateStateFor($tagName) { 272 | if ($tagName === HTMLNames::textareaTag || $tagName === HTMLNames::titleTag) { 273 | $this->_state = static::RCDATAState; 274 | } else if ($tagName === HTMLNames::plaintextTag) { 275 | $this->_state = static::PLAINTEXTState; 276 | } else if ($tagName === HTMLNames::scriptTag) { 277 | $this->_state = static::ScriptDataState; 278 | } else if ($tagName === HTMLNames::styleTag || $tagName === HTMLNames::iframeTag || $tagName === HTMLNames::xmpTag || ($tagName === HTMLNames::noembedTag && $this->_pluginsEnabled) || $tagName === HTMLNames::noframesTag || ($tagName === HTMLNames::noscriptTag && $this->_scriptEnabled)) { 279 | $this->_state = static::RAWTEXTState; 280 | } 281 | } 282 | 283 | // http://www.whatwg.org/specs/web-apps/current-work/#tokenization 284 | protected function nextToken(SegmentedString $source) { 285 | while (true) { 286 | $char = $this->_SegmentedString->getCurrentChar(); 287 | switch ($this->_state) { 288 | case static::DataState: 289 | if ($char === '&') { 290 | $this->_HTML_ADVANCE_TO(static::CharacterReferenceInDataState); 291 | } else if ($char === '<') { 292 | if ($this->_Token->getType() === HTMLToken::Character) { 293 | // We have a bunch of character tokens queued up that we 294 | // are emitting lazily here. 295 | return true; 296 | } 297 | $this->_HTML_ADVANCE_TO(static::TagOpenState); 298 | } else if ($char === static::kEndOfFileMarker) { 299 | return $this->_emitEndOfFile(); 300 | } else { 301 | $this->_bufferCharacter($char); 302 | $this->_HTML_ADVANCE_TO(static::DataState); 303 | } 304 | break; 305 | 306 | case static::CharacterReferenceInDataState: 307 | // TODO Do not expand the reference, so skip parse Character references. 308 | $this->_HTML_SWITCH_TO(static::DataState); 309 | break; 310 | 311 | case static::RCDATAState: 312 | if ($char === '&') { 313 | $this->_HTML_ADVANCE_TO(static::CharacterReferenceInRCDATAState); 314 | } else if ($char === '<') { 315 | $this->_HTML_ADVANCE_TO(static::RCDATALessThanSignState); 316 | } else if ($char === static::kEndOfFileMarker) { 317 | return $this->_emitEndOfFile(); 318 | } else { 319 | $this->_bufferCharacter($char); 320 | $this->_HTML_ADVANCE_TO(static::RCDATAState); 321 | } 322 | break; 323 | 324 | case static::CharacterReferenceInRCDATAState: 325 | // TODO Do not expand the reference, so skip parse Character references. 326 | $this->_HTML_SWITCH_TO(static::RCDATAState); 327 | break; 328 | 329 | case static::RAWTEXTState: 330 | if ($char === '<') { 331 | $this->_HTML_ADVANCE_TO(static::RAWTEXTLessThanSignState); 332 | } else if ($char === static::kEndOfFileMarker) { 333 | return $this->_emitEndOfFile(); 334 | } else { 335 | $this->_bufferCharacter($char); 336 | $this->_HTML_ADVANCE_TO(static::RAWTEXTState); 337 | } 338 | break; 339 | 340 | case static::ScriptDataState: 341 | if ($char === '<') { 342 | $this->_HTML_ADVANCE_TO(static::ScriptDataLessThanSignState); 343 | } else if ($char === static::kEndOfFileMarker) { 344 | return $this->_emitEndOfFile(); 345 | } else { 346 | $this->_bufferCharacter($char); 347 | $this->_HTML_ADVANCE_TO(static::ScriptDataState); 348 | } 349 | break; 350 | 351 | case static::PLAINTEXTState: 352 | if ($char === static::kEndOfFileMarker) { 353 | return $this->_emitEndOfFile(); 354 | } else { 355 | $this->_bufferCharacter($char); 356 | $this->_HTML_ADVANCE_TO(static::PLAINTEXTState); 357 | } 358 | break; 359 | 360 | case static::TagOpenState: 361 | if ($char === '!') { 362 | $this->_HTML_ADVANCE_TO(static::MarkupDeclarationOpenState); 363 | } else if ($char === '/') { 364 | $this->_HTML_ADVANCE_TO(static::EndTagOpenState); 365 | } else if (ctype_upper($char)) { 366 | $this->_Token->beginStartTag(strtolower($char)); 367 | $this->_HTML_ADVANCE_TO(static::TagNameState); 368 | } else if (ctype_lower($char)) { 369 | $this->_Token->beginStartTag(strtolower($char)); 370 | $this->_HTML_ADVANCE_TO(static::TagNameState); 371 | } else if ($char === '?') { 372 | $this->_parseError(); 373 | // The spec consumes the current character before switching 374 | // to the bogus comment state, but it's easier to implement 375 | // if we reconsume the current character. 376 | $this->_HTML_RECONSUME_IN(static::BogusCommentState); 377 | } else { 378 | $this->_parseError(); 379 | $this->_bufferCharacter('<'); 380 | $this->_HTML_RECONSUME_IN(static::DataState); 381 | } 382 | break; 383 | 384 | case static::EndTagOpenState: 385 | if (ctype_upper($char)) { 386 | $this->_Token->beginEndTag(strtolower($char)); 387 | $this->_HTML_ADVANCE_TO(static::TagNameState); 388 | } else if (ctype_lower($char)) { 389 | $this->_Token->beginEndTag(strtolower($char)); 390 | $this->_HTML_ADVANCE_TO(static::TagNameState); 391 | } else if ($char === '>') { 392 | $this->_parseError(); 393 | $this->_HTML_ADVANCE_TO(static::DataState); 394 | } else if ($char === static::kEndOfFileMarker) { 395 | $this->_parseError(); 396 | $this->_bufferCharacter('<'); 397 | $this->_bufferCharacter('/'); 398 | $this->_HTML_RECONSUME_IN(static::DataState); 399 | } else { 400 | $this->_parseError(); 401 | $this->_HTML_RECONSUME_IN(static::BogusCommentState); 402 | } 403 | break; 404 | 405 | case static::TagNameState: 406 | if ($this->_isTokenizerWhitespace($char)) { 407 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeNameState); 408 | } else if ($char === '/') { 409 | $this->_HTML_ADVANCE_TO(static::SelfClosingStartTagState); 410 | } else if ($char === '>') { 411 | return $this->_emitAndResumeIn(); 412 | } else if (ctype_upper($char)) { 413 | $this->_Token->appendToName(strtolower($char)); 414 | $this->_HTML_ADVANCE_TO(static::TagNameState); 415 | } else if ($char === static::kEndOfFileMarker) { 416 | $this->_parseError(); 417 | $this->_HTML_RECONSUME_IN(static::DataState); 418 | } else { 419 | $this->_Token->appendToName($char); 420 | $this->_HTML_ADVANCE_TO(static::TagNameState); 421 | } 422 | break; 423 | 424 | case static::RCDATALessThanSignState: 425 | if ($char === '/') { 426 | $this->_temporaryBuffer = ''; 427 | $this->_HTML_ADVANCE_TO(static::RCDATAEndTagOpenState); 428 | } else { 429 | $this->_bufferCharacter('<'); 430 | $this->_HTML_RECONSUME_IN(static::RCDATAState); 431 | } 432 | break; 433 | 434 | case static::RCDATAEndTagOpenState: 435 | if (ctype_upper($char)) { 436 | $this->_temporaryBuffer .= $char; 437 | $this->_bufferedEndTagName .= strtolower($char); 438 | $this->_HTML_ADVANCE_TO(static::RCDATAEndTagNameState); 439 | } else if (ctype_lower($char)) { 440 | $this->_temporaryBuffer .= $char; 441 | $this->_bufferedEndTagName .= $char; 442 | $this->_HTML_ADVANCE_TO(static::RCDATAEndTagNameState); 443 | } else { 444 | $this->_bufferCharacter('<'); 445 | $this->_bufferCharacter('/'); 446 | $this->_HTML_RECONSUME_IN(static::RCDATAState); 447 | } 448 | break; 449 | 450 | case static::RCDATAEndTagNameState: 451 | if (ctype_upper($char)) { 452 | $this->_temporaryBuffer .= $char; 453 | $this->_bufferedEndTagName .= strtolower($char); 454 | $this->_HTML_ADVANCE_TO(static::RCDATAEndTagNameState); 455 | } else if (ctype_lower($char)) { 456 | $this->_temporaryBuffer .= $char; 457 | $this->_bufferedEndTagName .= $char; 458 | $this->_HTML_ADVANCE_TO(static::RCDATAEndTagNameState); 459 | } else { 460 | if ($this->_isTokenizerWhitespace($char)) { 461 | if ($this->_isAppropriateEndTag()) { 462 | $this->_temporaryBuffer .= $char; 463 | $result = $this->_FLUSH_AND_ADVANCE_TO(static::BeforeAttributeNameState); 464 | if ($result !== null) { 465 | return $result; 466 | } 467 | break; 468 | } 469 | } else if ($char === '/') { 470 | if ($this->_isAppropriateEndTag()) { 471 | $this->_temporaryBuffer .= $char; 472 | $result = $this->_FLUSH_AND_ADVANCE_TO(static::SelfClosingStartTagState); 473 | if ($result !== null) { 474 | return $result; 475 | } 476 | break; 477 | } 478 | } else if ($char === '>') { 479 | if ($this->_isAppropriateEndTag()) { 480 | $this->_temporaryBuffer .= $char; 481 | return $this->_flushEmitAndResumeIn($source, HTMLTokenizer::DataState); 482 | } 483 | } 484 | $this->_bufferCharacter('<'); 485 | $this->_bufferCharacter('/'); 486 | $this->_Token->appendToCharacter($this->_temporaryBuffer); 487 | $this->_bufferedEndTagName = ''; 488 | $this->_temporaryBuffer = ''; 489 | $this->_HTML_RECONSUME_IN(static::RCDATAState); 490 | } 491 | break; 492 | 493 | case static::RAWTEXTLessThanSignState: 494 | if ($char === '/') { 495 | $this->_temporaryBuffer = ''; 496 | $this->_HTML_ADVANCE_TO(static::RAWTEXTEndTagOpenState); 497 | } else { 498 | $this->_bufferCharacter('<'); 499 | $this->_HTML_RECONSUME_IN(static::RAWTEXTState); 500 | } 501 | break; 502 | 503 | case static::RAWTEXTEndTagOpenState: 504 | if (ctype_upper($char)) { 505 | $this->_temporaryBuffer .= $char; 506 | $this->_bufferedEndTagName .= strtolower($char); 507 | $this->_HTML_ADVANCE_TO(static::RAWTEXTEndTagNameState); 508 | } else if (ctype_lower($char)) { 509 | $this->_temporaryBuffer .= $char; 510 | $this->_bufferedEndTagName .= $char; 511 | $this->_HTML_ADVANCE_TO(static::RAWTEXTEndTagNameState); 512 | } else { 513 | $this->_bufferCharacter('<'); 514 | $this->_bufferCharacter('/'); 515 | $this->_HTML_RECONSUME_IN(static::RAWTEXTState); 516 | } 517 | break; 518 | 519 | case static::RAWTEXTEndTagNameState: 520 | if (ctype_upper($char)) { 521 | $this->_temporaryBuffer .= $char; 522 | $this->_bufferedEndTagName .= strtolower($char); 523 | $this->_HTML_ADVANCE_TO(static::RAWTEXTEndTagNameState); 524 | } else if (ctype_lower($char)) { 525 | $this->_temporaryBuffer .= $char; 526 | $this->_bufferedEndTagName .= $char; 527 | $this->_HTML_ADVANCE_TO(static::RAWTEXTEndTagNameState); 528 | } else { 529 | if ($this->_isTokenizerWhitespace($char)) { 530 | if ($this->_isAppropriateEndTag()) { 531 | $this->_temporaryBuffer .= $char; 532 | $result = $this->_FLUSH_AND_ADVANCE_TO(static::BeforeAttributeNameState); 533 | if ($result !== null) { 534 | return $result; 535 | } 536 | break; 537 | } 538 | } else if ($char === '/') { 539 | if ($this->_isAppropriateEndTag()) { 540 | $this->_temporaryBuffer .= $char; 541 | $result = $this->_FLUSH_AND_ADVANCE_TO(static::SelfClosingStartTagState); 542 | if ($result !== null) { 543 | return $result; 544 | } 545 | break; 546 | } 547 | } else if ($char === '>') { 548 | if ($this->_isAppropriateEndTag()) { 549 | $this->_temporaryBuffer .= $char; 550 | return $this->_flushEmitAndResumeIn($source, HTMLTokenizer::DataState); 551 | } 552 | } 553 | $this->_bufferCharacter('<'); 554 | $this->_bufferCharacter('/'); 555 | $this->_Token->appendToCharacter($this->_temporaryBuffer); 556 | $this->_bufferedEndTagName = ''; 557 | $this->_temporaryBuffer = ''; 558 | $this->_HTML_RECONSUME_IN(static::RAWTEXTState); 559 | } 560 | break; 561 | 562 | case static::ScriptDataLessThanSignState: 563 | if ($char === '/') { 564 | $this->_temporaryBuffer = ''; 565 | $this->_HTML_ADVANCE_TO(static::ScriptDataEndTagOpenState); 566 | } else if ($char === '!') { 567 | $this->_bufferCharacter('<'); 568 | $this->_bufferCharacter('!'); 569 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapeStartState); 570 | } else { 571 | $this->_bufferCharacter('<'); 572 | $this->_HTML_RECONSUME_IN(static::ScriptDataState); 573 | } 574 | break; 575 | 576 | case static::ScriptDataEndTagOpenState: 577 | if (ctype_upper($char)) { 578 | $this->_temporaryBuffer .= $char; 579 | $this->_bufferedEndTagName .= strtolower($char); 580 | $this->_HTML_ADVANCE_TO(static::ScriptDataEndTagNameState); 581 | } else if (ctype_lower($char)) { 582 | $this->_temporaryBuffer .= $char; 583 | $this->_bufferedEndTagName .= $char; 584 | $this->_HTML_ADVANCE_TO(static::ScriptDataEndTagNameState); 585 | } else { 586 | $this->_bufferCharacter('<'); 587 | $this->_bufferCharacter('/'); 588 | $this->_HTML_RECONSUME_IN(static::ScriptDataState); 589 | } 590 | break; 591 | 592 | case static::ScriptDataEndTagNameState: 593 | if (ctype_upper($char)) { 594 | $this->_temporaryBuffer .= $char; 595 | $this->_bufferedEndTagName .= strtolower($char); 596 | $this->_HTML_ADVANCE_TO(static::ScriptDataEndTagNameState); 597 | } else if (ctype_lower($char)) { 598 | $this->_temporaryBuffer .= $char; 599 | $this->_bufferedEndTagName .= $char; 600 | $this->_HTML_ADVANCE_TO(static::ScriptDataEndTagNameState); 601 | } else { 602 | if ($this->_isTokenizerWhitespace($char)) { 603 | if ($this->_isAppropriateEndTag()) { 604 | $this->_temporaryBuffer .= $char; 605 | $result = $this->_FLUSH_AND_ADVANCE_TO(static::BeforeAttributeNameState); 606 | if ($result !== null) { 607 | return $result; 608 | } 609 | break; 610 | } 611 | } else if ($char === '/') { 612 | if ($this->_isAppropriateEndTag()) { 613 | $this->_temporaryBuffer .= $char; 614 | $result = $this->_FLUSH_AND_ADVANCE_TO(static::SelfClosingStartTagState); 615 | if ($result !== null) { 616 | return $result; 617 | } 618 | break; 619 | } 620 | } else if ($char === '>') { 621 | if ($this->_isAppropriateEndTag()) { 622 | $this->_temporaryBuffer .= $char; 623 | return $this->_flushEmitAndResumeIn($source, HTMLTokenizer::DataState); 624 | } 625 | } 626 | $this->_bufferCharacter('<'); 627 | $this->_bufferCharacter('/'); 628 | $this->_Token->appendToCharacter($this->_temporaryBuffer); 629 | $this->_bufferedEndTagName = ''; 630 | $this->_temporaryBuffer = ''; 631 | $this->_HTML_RECONSUME_IN(static::ScriptDataState); 632 | } 633 | break; 634 | 635 | case static::ScriptDataEscapeStartState: 636 | if ($char === '-') { 637 | $this->_bufferCharacter($char); 638 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapeStartDashState); 639 | } else { 640 | $this->_HTML_RECONSUME_IN(static::ScriptDataState); 641 | } 642 | break; 643 | 644 | case static::ScriptDataEscapeStartDashState: 645 | if ($char === '-') { 646 | $this->_bufferCharacter($char); 647 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedDashDashState); 648 | } else { 649 | $this->_HTML_RECONSUME_IN(static::ScriptDataState); 650 | } 651 | break; 652 | 653 | case static::ScriptDataEscapedState: 654 | if ($char === '-') { 655 | $this->_bufferCharacter($char); 656 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedDashState); 657 | } else if ($char === '<') { 658 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedLessThanSignState); 659 | } else if ($char === static::kEndOfFileMarker) { 660 | $this->_parseError(); 661 | $this->_HTML_RECONSUME_IN(static::DataState); 662 | } else { 663 | $this->_bufferCharacter($char); 664 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedState); 665 | } 666 | break; 667 | 668 | case static::ScriptDataEscapedDashState: 669 | if ($char === '-') { 670 | $this->_bufferCharacter($char); 671 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedDashDashState); 672 | } else if ($char === '<') { 673 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedLessThanSignState); 674 | } else if ($char === static::kEndOfFileMarker) { 675 | $this->_parseError(); 676 | $this->_HTML_RECONSUME_IN(static::DataState); 677 | } else { 678 | $this->_bufferCharacter($char); 679 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedState); 680 | } 681 | break; 682 | 683 | case static::ScriptDataEscapedDashDashState: 684 | if ($char === '-') { 685 | $this->_bufferCharacter($char); 686 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedDashDashState); 687 | } else if ($char === '<') { 688 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedLessThanSignState); 689 | } else if ($char === '>') { 690 | $this->_bufferCharacter($char); 691 | $this->_HTML_ADVANCE_TO(static::ScriptDataState); 692 | } else if ($char === static::kEndOfFileMarker) { 693 | $this->_parseError(); 694 | $this->_HTML_RECONSUME_IN(static::DataState); 695 | } else { 696 | $this->_bufferCharacter($char); 697 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedState); 698 | } 699 | break; 700 | 701 | case static::ScriptDataEscapedLessThanSignState: 702 | if ($char === '/') { 703 | $this->_temporaryBuffer = ''; 704 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedEndTagOpenState); 705 | } else if (ctype_upper($char)) { 706 | $this->_bufferCharacter('<'); 707 | $this->_bufferCharacter($char); 708 | $this->_temporaryBuffer = ''; 709 | $this->_temporaryBuffer = strtolower($char); 710 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeStartState); 711 | } else if (ctype_lower($char)) { 712 | $this->_bufferCharacter('<'); 713 | $this->_bufferCharacter($char); 714 | $this->_temporaryBuffer = ''; 715 | $this->_temporaryBuffer .= $char; 716 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeStartState); 717 | } else { 718 | $this->_bufferCharacter('<'); 719 | $this->_HTML_RECONSUME_IN(static::ScriptDataEscapedState); 720 | } 721 | break; 722 | 723 | case static::ScriptDataEscapedEndTagOpenState: 724 | if (ctype_upper($char)) { 725 | $this->_temporaryBuffer .= $char; 726 | $this->_bufferedEndTagName .= strtolower($char); 727 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedEndTagNameState); 728 | } else if (ctype_lower($char)) { 729 | $this->_temporaryBuffer .= $char; 730 | $this->_bufferedEndTagName .= $char; 731 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedEndTagNameState); 732 | } else { 733 | $this->_bufferCharacter('<'); 734 | $this->_bufferCharacter('/'); 735 | $this->_HTML_RECONSUME_IN(static::ScriptDataEscapedState); 736 | } 737 | break; 738 | 739 | case static::ScriptDataEscapedEndTagNameState: 740 | if (ctype_upper($char)) { 741 | $this->_temporaryBuffer .= $char; 742 | $this->_bufferedEndTagName .= strtolower($char); 743 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedEndTagNameState); 744 | } else if (ctype_lower($char)) { 745 | $this->_temporaryBuffer .= $char; 746 | $this->_bufferedEndTagName .= $char; 747 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedEndTagNameState); 748 | } else { 749 | if ($this->_isTokenizerWhitespace($char)) { 750 | if ($this->_isAppropriateEndTag()) { 751 | $this->_temporaryBuffer .= $char; 752 | // ScriptDataEscapeStartState called bufferCharacter, so `_FLUSH_AND_ADVANCE_TO` always returns true. 753 | return $this->_FLUSH_AND_ADVANCE_TO(static::BeforeAttributeNameState); 754 | } 755 | } else if ($char === '/') { 756 | if ($this->_isAppropriateEndTag()) { 757 | $this->_temporaryBuffer .= $char; 758 | // ScriptDataEscapeStartState called bufferCharacter, so `_FLUSH_AND_ADVANCE_TO` always returns true. 759 | return $this->_FLUSH_AND_ADVANCE_TO(static::SelfClosingStartTagState); 760 | } 761 | } else if ($char === '>') { 762 | if ($this->_isAppropriateEndTag()) { 763 | $this->_temporaryBuffer .= $char; 764 | $this->_temporaryBuffer .= $char; 765 | return $this->_flushEmitAndResumeIn($source, HTMLTokenizer::DataState); 766 | } 767 | } 768 | $this->_bufferCharacter('<'); 769 | $this->_bufferCharacter('/'); 770 | $this->_Token->appendToCharacter($this->_temporaryBuffer); 771 | $this->_bufferedEndTagName = ''; 772 | $this->_temporaryBuffer = ''; 773 | $this->_HTML_RECONSUME_IN(static::ScriptDataEscapedState); 774 | } 775 | break; 776 | 777 | case static::ScriptDataDoubleEscapeStartState: 778 | if ($this->_isTokenizerWhitespace($char) || $char === '/' || $char === '>') { 779 | $this->_bufferCharacter($char); 780 | if ($this->_temporaryBufferIs(HTMLNames::scriptTag)) { 781 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedState); 782 | } else { 783 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedState); 784 | } 785 | } else if (ctype_upper($char)) { 786 | $this->_bufferCharacter($char); 787 | $this->_temporaryBuffer .= strtolower($char); 788 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeStartState); 789 | } else if (ctype_lower($char)) { 790 | $this->_bufferCharacter($char); 791 | $this->_temporaryBuffer .= $char; 792 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeStartState); 793 | } else { 794 | $this->_HTML_RECONSUME_IN(static::ScriptDataEscapedState); 795 | } 796 | break; 797 | 798 | case static::ScriptDataDoubleEscapedState: 799 | if ($char === '-') { 800 | $this->_bufferCharacter($char); 801 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedDashState); 802 | } else if ($char === '<') { 803 | $this->_bufferCharacter($char); 804 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedLessThanSignState); 805 | } else if ($char === static::kEndOfFileMarker) { 806 | $this->_parseError(); 807 | $this->_HTML_RECONSUME_IN(static::DataState); 808 | } else { 809 | $this->_bufferCharacter($char); 810 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedState); 811 | } 812 | break; 813 | 814 | case static::ScriptDataDoubleEscapedDashState: 815 | if ($char === '-') { 816 | $this->_bufferCharacter($char); 817 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedDashDashState); 818 | } else if ($char === '<') { 819 | $this->_bufferCharacter($char); 820 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedLessThanSignState); 821 | } else if ($char === static::kEndOfFileMarker) { 822 | $this->_parseError(); 823 | $this->_HTML_RECONSUME_IN(static::DataState); 824 | } else { 825 | $this->_bufferCharacter($char); 826 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedState); 827 | } 828 | break; 829 | 830 | case static::ScriptDataDoubleEscapedDashDashState: 831 | if ($char === '-') { 832 | $this->_bufferCharacter($char); 833 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedDashDashState); 834 | } else if ($char === '<') { 835 | $this->_bufferCharacter($char); 836 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedLessThanSignState); 837 | } else if ($char === '>') { 838 | $this->_bufferCharacter($char); 839 | $this->_HTML_ADVANCE_TO(static::ScriptDataState); 840 | } else if ($char === static::kEndOfFileMarker) { 841 | $this->_parseError(); 842 | $this->_HTML_RECONSUME_IN(static::DataState); 843 | } else { 844 | $this->_bufferCharacter($char); 845 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedState); 846 | } 847 | break; 848 | 849 | case static::ScriptDataDoubleEscapedLessThanSignState: 850 | if ($char === '/') { 851 | $this->_bufferCharacter($char); 852 | $this->_temporaryBuffer = ''; 853 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeEndState); 854 | } else 855 | $this->_HTML_RECONSUME_IN(static::ScriptDataDoubleEscapedState); 856 | break; 857 | 858 | case static::ScriptDataDoubleEscapeEndState: 859 | if ($this->_isTokenizerWhitespace($char) || $char === '/' || $char === '>') { 860 | $this->_bufferCharacter($char); 861 | if ($this->_temporaryBufferIs(HTMLNames::scriptTag)) { 862 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedState); 863 | } else { 864 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedState); 865 | } 866 | } else if (ctype_upper($char)) { 867 | $this->_bufferCharacter($char); 868 | $this->_temporaryBuffer .= strtolower($char); 869 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeEndState); 870 | } else if (ctype_lower($char)) { 871 | $this->_bufferCharacter($char); 872 | $this->_temporaryBuffer .= $char; 873 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeEndState); 874 | } else { 875 | $this->_HTML_RECONSUME_IN(static::ScriptDataDoubleEscapedState); 876 | } 877 | break; 878 | 879 | case static::BeforeAttributeNameState: 880 | if ($this->_isTokenizerWhitespace($char)) { 881 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeNameState); 882 | } else if ($char === '/') { 883 | $this->_HTML_ADVANCE_TO(static::SelfClosingStartTagState); 884 | } else if ($char === '>') { 885 | return $this->_emitAndResumeIn(); 886 | } else if (ctype_upper($char)) { 887 | $this->_Token->addNewAttribute(); 888 | $this->_Token->beginAttributeName($source->numberOfCharactersConsumed()); 889 | $this->_Token->appendToAttributeName(strtolower($char)); 890 | $this->_HTML_ADVANCE_TO(static::AttributeNameState); 891 | } else if ($char === static::kEndOfFileMarker) { 892 | $this->_parseError(); 893 | $this->_HTML_RECONSUME_IN(static::DataState); 894 | } else { 895 | if ($char === '"' || $char === '\'' || $char === '<' || $char === '=') { 896 | $this->_parseError(); 897 | } 898 | $this->_Token->addNewAttribute(); 899 | $this->_Token->beginAttributeName($source->numberOfCharactersConsumed()); 900 | $this->_Token->appendToAttributeName($char); 901 | $this->_HTML_ADVANCE_TO(static::AttributeNameState); 902 | } 903 | break; 904 | 905 | case static::AttributeNameState: 906 | if ($this->_isTokenizerWhitespace($char)) { 907 | $this->_Token->endAttributeName($source->numberOfCharactersConsumed()); 908 | $this->_HTML_ADVANCE_TO(static::AfterAttributeNameState); 909 | } else if ($char === '/') { 910 | $this->_Token->endAttributeName($source->numberOfCharactersConsumed()); 911 | $this->_HTML_ADVANCE_TO(static::SelfClosingStartTagState); 912 | } else if ($char === '=') { 913 | $this->_Token->endAttributeName($source->numberOfCharactersConsumed()); 914 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeValueState); 915 | } else if ($char === '>') { 916 | $this->_Token->endAttributeName($source->numberOfCharactersConsumed()); 917 | return $this->_emitAndResumeIn(); 918 | } else if (ctype_upper($char)) { 919 | $this->_Token->appendToAttributeName(strtolower($char)); 920 | $this->_HTML_ADVANCE_TO(static::AttributeNameState); 921 | } else if ($char === static::kEndOfFileMarker) { 922 | $this->_parseError(); 923 | $this->_Token->endAttributeName($source->numberOfCharactersConsumed()); 924 | $this->_HTML_RECONSUME_IN(static::DataState); 925 | } else { 926 | if ($char === '"' || $char === '\'' || $char === '<' || $char === '=') { 927 | $this->_parseError(); 928 | } 929 | $this->_Token->appendToAttributeName($char); 930 | $this->_HTML_ADVANCE_TO(static::AttributeNameState); 931 | } 932 | break; 933 | 934 | case static::AfterAttributeNameState: 935 | if ($this->_isTokenizerWhitespace($char)) { 936 | $this->_HTML_ADVANCE_TO(static::AfterAttributeNameState); 937 | } else if ($char === '/') { 938 | $this->_HTML_ADVANCE_TO(static::SelfClosingStartTagState); 939 | } else if ($char === '=') { 940 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeValueState); 941 | } else if ($char === '>') { 942 | return $this->_emitAndResumeIn(); 943 | } else if (ctype_upper($char)) { 944 | $this->_Token->addNewAttribute(); 945 | $this->_Token->beginAttributeName($source->numberOfCharactersConsumed()); 946 | $this->_Token->appendToAttributeName(strtolower($char)); 947 | $this->_HTML_ADVANCE_TO(static::AttributeNameState); 948 | } else if ($char === static::kEndOfFileMarker) { 949 | $this->_parseError(); 950 | $this->_HTML_RECONSUME_IN(static::DataState); 951 | } else { 952 | if ($char === '"' || $char === '\'' || $char === '<') { 953 | $this->_parseError(); 954 | } 955 | $this->_Token->addNewAttribute(); 956 | $this->_Token->beginAttributeName($source->numberOfCharactersConsumed()); 957 | $this->_Token->appendToAttributeName($char); 958 | $this->_HTML_ADVANCE_TO(static::AttributeNameState); 959 | } 960 | break; 961 | 962 | case static::BeforeAttributeValueState: 963 | if ($this->_isTokenizerWhitespace($char)) { 964 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeValueState); 965 | } else if ($char === '"') { 966 | $this->_Token->beginAttributeValue($source->numberOfCharactersConsumed() + 1); 967 | $this->_HTML_ADVANCE_TO(static::AttributeValueDoubleQuotedState); 968 | } else if ($char === '&') { 969 | $this->_Token->beginAttributeValue($source->numberOfCharactersConsumed()); 970 | $this->_HTML_RECONSUME_IN(static::AttributeValueUnquotedState); 971 | } else if ($char === '\'') { 972 | $this->_Token->beginAttributeValue($source->numberOfCharactersConsumed() + 1); 973 | $this->_HTML_ADVANCE_TO(static::AttributeValueSingleQuotedState); 974 | } else if ($char === '>') { 975 | $this->_parseError(); 976 | return $this->_emitAndResumeIn(); 977 | } else if ($char === static::kEndOfFileMarker) { 978 | $this->_parseError(); 979 | $this->_HTML_RECONSUME_IN(static::DataState); 980 | } else { 981 | if ($char === '<' || $char === '=' || $char === '`') { 982 | $this->_parseError(); 983 | } 984 | $this->_Token->beginAttributeValue($source->numberOfCharactersConsumed()); 985 | $this->_Token->appendToAttributeValue($char); 986 | $this->_HTML_ADVANCE_TO(static::AttributeValueUnquotedState); 987 | } 988 | break; 989 | 990 | case static::AttributeValueDoubleQuotedState: 991 | if ($char === '"') { 992 | $this->_Token->setDoubleQuoted(); 993 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed()); 994 | $this->_HTML_ADVANCE_TO(static::AfterAttributeValueQuotedState); 995 | } else if ($char === '&') { 996 | $this->_additionalAllowedCharacter = '"'; 997 | $this->_HTML_ADVANCE_TO(static::CharacterReferenceInAttributeValueState); 998 | } else if ($char === static::kEndOfFileMarker) { 999 | $this->_parseError(); 1000 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed()); 1001 | $this->_HTML_RECONSUME_IN(static::DataState); 1002 | } else { 1003 | $this->_Token->appendToAttributeValue($char); 1004 | $this->_HTML_ADVANCE_TO(static::AttributeValueDoubleQuotedState); 1005 | } 1006 | break; 1007 | 1008 | case static::AttributeValueSingleQuotedState: 1009 | if ($char === '\'') { 1010 | $this->_Token->setSingleQuoted(); 1011 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed()); 1012 | $this->_HTML_ADVANCE_TO(static::AfterAttributeValueQuotedState); 1013 | } else if ($char === '&') { 1014 | $this->_additionalAllowedCharacter = '\''; 1015 | $this->_HTML_ADVANCE_TO(static::CharacterReferenceInAttributeValueState); 1016 | } else if ($char === static::kEndOfFileMarker) { 1017 | $this->_parseError(); 1018 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed()); 1019 | $this->_HTML_RECONSUME_IN(static::DataState); 1020 | } else { 1021 | $this->_Token->appendToAttributeValue($char); 1022 | $this->_HTML_ADVANCE_TO(static::AttributeValueSingleQuotedState); 1023 | } 1024 | break; 1025 | 1026 | case static::AttributeValueUnquotedState: 1027 | if ($this->_isTokenizerWhitespace($char)) { 1028 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed()); 1029 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeNameState); 1030 | } else if ($char === '&') { 1031 | $this->_additionalAllowedCharacter = '>'; 1032 | $this->_HTML_ADVANCE_TO(static::CharacterReferenceInAttributeValueState); 1033 | } else if ($char === '>') { 1034 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed()); 1035 | return $this->_emitAndResumeIn(); 1036 | } else if ($char === static::kEndOfFileMarker) { 1037 | $this->_parseError(); 1038 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed()); 1039 | $this->_HTML_RECONSUME_IN(static::DataState); 1040 | } else { 1041 | if ($char === '"' || $char === '\'' || $char === '<' || $char === '=' || $char === '`') { 1042 | $this->_parseError(); 1043 | } 1044 | $this->_Token->appendToAttributeValue($char); 1045 | $this->_HTML_ADVANCE_TO(static::AttributeValueUnquotedState); 1046 | } 1047 | break; 1048 | 1049 | case static::CharacterReferenceInAttributeValueState: 1050 | // TODO Do not expand the reference, so skip parse Character references. 1051 | $this->_Token->appendToAttributeValue('&'); 1052 | // We're supposed to switch back to the attribute value state that 1053 | // we were in when we were switched into this state. Rather than 1054 | // keeping track of this explictly, we observe that the previous 1055 | // state can be determined by $this->_additionalAllowedCharacter. 1056 | if ($this->_additionalAllowedCharacter === '"') { 1057 | $this->_HTML_SWITCH_TO(static::AttributeValueDoubleQuotedState); 1058 | } else if ($this->_additionalAllowedCharacter === '\'') { 1059 | $this->_HTML_SWITCH_TO(static::AttributeValueSingleQuotedState); 1060 | } else if ($this->_additionalAllowedCharacter === '>') { 1061 | $this->_HTML_SWITCH_TO(static::AttributeValueUnquotedState); 1062 | } else { 1063 | // ASSERT_NOT_REACHED(); 1064 | } 1065 | break; 1066 | 1067 | case static::AfterAttributeValueQuotedState: 1068 | if ($this->_isTokenizerWhitespace($char)) { 1069 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeNameState); 1070 | } else if ($char === '/') { 1071 | $this->_HTML_ADVANCE_TO(static::SelfClosingStartTagState); 1072 | } else if ($char === '>') { 1073 | return $this->_emitAndResumeIn(); 1074 | } else if ($char === static::kEndOfFileMarker) { 1075 | $this->_parseError(); 1076 | $this->_HTML_RECONSUME_IN(static::DataState); 1077 | } else { 1078 | $this->_parseError(); 1079 | $this->_HTML_RECONSUME_IN(static::BeforeAttributeNameState); 1080 | } 1081 | break; 1082 | 1083 | case static::SelfClosingStartTagState: 1084 | if ($char === '>') { 1085 | $this->_Token->setSelfClosing(); 1086 | return $this->_emitAndResumeIn(); 1087 | } else if ($char === static::kEndOfFileMarker) { 1088 | $this->_parseError(); 1089 | $this->_HTML_RECONSUME_IN(static::DataState); 1090 | } else { 1091 | $this->_parseError(); 1092 | $this->_HTML_RECONSUME_IN(static::BeforeAttributeNameState); 1093 | } 1094 | break; 1095 | 1096 | case static::BogusCommentState: 1097 | $this->_Token->beginComment(); 1098 | $this->_HTML_RECONSUME_IN(static::ContinueBogusCommentState); 1099 | break; 1100 | 1101 | case static::ContinueBogusCommentState: 1102 | if ($char === '>') { 1103 | return $this->_emitAndResumeIn(); 1104 | } else if ($char === static::kEndOfFileMarker) { 1105 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1106 | } else { 1107 | $this->_Token->appendToComment($char); 1108 | $this->_HTML_ADVANCE_TO(static::ContinueBogusCommentState); 1109 | } 1110 | break; 1111 | 1112 | case static::MarkupDeclarationOpenState: 1113 | $dashDashString = '--'; 1114 | $doctypeString = 'doctype'; 1115 | $cdataString = '[CDATA['; 1116 | if ($char === '-') { 1117 | $result = $source->lookAhead($dashDashString); 1118 | if ($result === SegmentedString::DidMatch) { 1119 | $this->addState(); 1120 | $this->_SegmentedString->read(strlen('--')); 1121 | $this->_Token->beginComment(); 1122 | $this->_HTML_SWITCH_TO(static::CommentStartState); 1123 | continue; 1124 | } else if ($result === SegmentedString::NotEnoughCharacters) { 1125 | $this->addState(); 1126 | return $this->_haveBufferedCharacterToken(); 1127 | } 1128 | } else if ($char === 'D' || $char === 'd') { 1129 | $result = $this->_SegmentedString->lookAheadIgnoringCase($doctypeString); 1130 | if ($result === SegmentedString::DidMatch) { 1131 | $this->addState(); 1132 | $this->_SegmentedString->read(strlen($doctypeString)); 1133 | $this->_HTML_SWITCH_TO(static::DOCTYPEState); 1134 | continue; 1135 | } else if ($result === SegmentedString::NotEnoughCharacters) { 1136 | $this->addState(); 1137 | return $this->_haveBufferedCharacterToken(); 1138 | } 1139 | } else if ($char === '[' && $this->_shouldAllowCDATA()) { 1140 | $result = $source->lookAhead($cdataString); 1141 | if ($result === SegmentedString::DidMatch) { 1142 | $this->addState(); 1143 | $this->_SegmentedString->read(strlen($cdataString)); 1144 | $this->_HTML_SWITCH_TO(static::CDATASectionState); 1145 | continue; 1146 | } else if ($result === SegmentedString::NotEnoughCharacters) { 1147 | $this->addState(); 1148 | return $this->_haveBufferedCharacterToken(); 1149 | } 1150 | } 1151 | $this->_parseError(); 1152 | $this->_HTML_RECONSUME_IN(static::BogusCommentState); 1153 | break; 1154 | 1155 | case static::CommentStartState: 1156 | if ($char === '-') { 1157 | $this->_HTML_ADVANCE_TO(static::CommentStartDashState); 1158 | } else if ($char === '>') { 1159 | $this->_parseError(); 1160 | return $this->_emitAndResumeIn(); 1161 | } else if ($char === static::kEndOfFileMarker) { 1162 | $this->_parseError(); 1163 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1164 | } else { 1165 | $this->_Token->appendToComment($char); 1166 | $this->_HTML_ADVANCE_TO(static::CommentState); 1167 | } 1168 | break; 1169 | 1170 | case static::CommentStartDashState: 1171 | if ($char === '-') { 1172 | $this->_HTML_ADVANCE_TO(static::CommentEndState); 1173 | } else if ($char === '>') { 1174 | $this->_parseError(); 1175 | return $this->_emitAndResumeIn(); 1176 | } else if ($char === static::kEndOfFileMarker) { 1177 | $this->_parseError(); 1178 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1179 | } else { 1180 | $this->_Token->appendToComment('-'); 1181 | $this->_Token->appendToComment($char); 1182 | $this->_HTML_ADVANCE_TO(static::CommentState); 1183 | } 1184 | break; 1185 | 1186 | case static::CommentState: 1187 | if ($char === '-') { 1188 | $this->_HTML_ADVANCE_TO(static::CommentEndDashState); 1189 | } else if ($char === static::kEndOfFileMarker) { 1190 | $this->_parseError(); 1191 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1192 | } else { 1193 | $this->_Token->appendToComment($char); 1194 | $this->_HTML_ADVANCE_TO(static::CommentState); 1195 | } 1196 | break; 1197 | 1198 | case static::CommentEndDashState: 1199 | if ($char === '-') { 1200 | $this->_HTML_ADVANCE_TO(static::CommentEndState); 1201 | } else if ($char === static::kEndOfFileMarker) { 1202 | $this->_parseError(); 1203 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1204 | } else { 1205 | $this->_Token->appendToComment('-'); 1206 | $this->_Token->appendToComment($char); 1207 | $this->_HTML_ADVANCE_TO(static::CommentState); 1208 | } 1209 | break; 1210 | 1211 | case static::CommentEndState: 1212 | if ($char === '>') { 1213 | return $this->_emitAndResumeIn(); 1214 | } else if ($char === '!') { 1215 | $this->_parseError(); 1216 | $this->_HTML_ADVANCE_TO(static::CommentEndBangState); 1217 | } else if ($char === '-') { 1218 | $this->_parseError(); 1219 | $this->_Token->appendToComment('-'); 1220 | $this->_HTML_ADVANCE_TO(static::CommentEndState); 1221 | } else if ($char === static::kEndOfFileMarker) { 1222 | $this->_parseError(true); 1223 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1224 | } else { 1225 | $this->_parseError(); 1226 | $this->_Token->appendToComment('-'); 1227 | $this->_Token->appendToComment('-'); 1228 | $this->_Token->appendToComment($char); 1229 | $this->_HTML_ADVANCE_TO(static::CommentState); 1230 | } 1231 | break; 1232 | 1233 | case static::CommentEndBangState: 1234 | if ($char === '-') { 1235 | $this->_Token->appendToComment('-'); 1236 | $this->_Token->appendToComment('-'); 1237 | $this->_Token->appendToComment('!'); 1238 | $this->_HTML_ADVANCE_TO(static::CommentEndDashState); 1239 | } else if ($char === '>') { 1240 | return $this->_emitAndResumeIn(); 1241 | } else if ($char === static::kEndOfFileMarker) { 1242 | $this->_parseError(true); 1243 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1244 | } else { 1245 | $this->_Token->appendToComment('-'); 1246 | $this->_Token->appendToComment('-'); 1247 | $this->_Token->appendToComment('!'); 1248 | $this->_Token->appendToComment($char); 1249 | $this->_HTML_ADVANCE_TO(static::CommentState); 1250 | } 1251 | break; 1252 | 1253 | case static::DOCTYPEState: 1254 | if ($this->_isTokenizerWhitespace($char)) { 1255 | $this->_HTML_ADVANCE_TO(static::BeforeDOCTYPENameState); 1256 | } else if ($char === static::kEndOfFileMarker) { 1257 | $this->_parseError(); 1258 | $this->_Token->beginDOCTYPE(); 1259 | $this->_Token->setForceQuirks(); 1260 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1261 | } else { 1262 | $this->_parseError(); 1263 | $this->_HTML_RECONSUME_IN(static::BeforeDOCTYPENameState); 1264 | } 1265 | break; 1266 | 1267 | case static::BeforeDOCTYPENameState: 1268 | if ($this->_isTokenizerWhitespace($char)) { 1269 | $this->_HTML_ADVANCE_TO(static::BeforeDOCTYPENameState); 1270 | } else if (ctype_upper($char)) { 1271 | $this->_Token->beginDOCTYPE(strtolower($char)); 1272 | $this->_HTML_ADVANCE_TO(static::DOCTYPENameState); 1273 | } else if ($char === '>') { 1274 | $this->_parseError(); 1275 | $this->_Token->beginDOCTYPE(); 1276 | $this->_Token->setForceQuirks(); 1277 | return $this->_emitAndResumeIn(); 1278 | } else if ($char === static::kEndOfFileMarker) { 1279 | $this->_parseError(true); 1280 | $this->_Token->beginDOCTYPE(); 1281 | $this->_Token->setForceQuirks(); 1282 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1283 | } else { 1284 | $this->_Token->beginDOCTYPE($char); 1285 | $this->_HTML_ADVANCE_TO(static::DOCTYPENameState); 1286 | } 1287 | break; 1288 | 1289 | case static::DOCTYPENameState: 1290 | if ($this->_isTokenizerWhitespace($char)) { 1291 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPENameState); 1292 | } else if ($char === '>') { 1293 | return $this->_emitAndResumeIn(); 1294 | } else if (ctype_upper($char)) { 1295 | $this->_Token->appendToName(strtolower($char)); 1296 | $this->_HTML_ADVANCE_TO(static::DOCTYPENameState); 1297 | } else if ($char === static::kEndOfFileMarker) { 1298 | $this->_parseError(true); 1299 | $this->_Token->setForceQuirks(); 1300 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1301 | } else { 1302 | $this->_Token->appendToName($char); 1303 | $this->_HTML_ADVANCE_TO(static::DOCTYPENameState); 1304 | } 1305 | break; 1306 | 1307 | case static::AfterDOCTYPENameState: 1308 | if ($this->_isTokenizerWhitespace($char)) { 1309 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPENameState); 1310 | } else if ($char === '>') { 1311 | return $this->_emitAndResumeIn(); 1312 | } else if ($char === static::kEndOfFileMarker) { 1313 | $this->_parseError(true); 1314 | $this->_Token->setForceQuirks(); 1315 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1316 | } else { 1317 | // DEFINE_STATIC_LOCAL(String, publicString, (ASCIILiteral("public"))); 1318 | $publicString = 'public'; 1319 | // DEFINE_STATIC_LOCAL(String, systemString, (ASCIILiteral("system"))); 1320 | $systemString = 'system'; 1321 | if ($char === 'P' || $char === 'p') { 1322 | $result = $source->lookAheadIgnoringCase($publicString); 1323 | if ($result === SegmentedString::DidMatch) { 1324 | $this->addState(); 1325 | $this->_HTML_SWITCH_TO(static::AfterDOCTYPEPublicKeywordState); 1326 | $this->_SegmentedString->read(strlen($publicString)); 1327 | continue; 1328 | } 1329 | // @todo 1330 | // else if ($result === SegmentedString::NotEnoughCharacters) { 1331 | // $this->addState(); 1332 | // return $this->_haveBufferedCharacterToken(); 1333 | // } 1334 | } else if ($char === 'S' || $char === 's') { 1335 | $result = $source->lookAheadIgnoringCase($systemString); 1336 | if ($result === SegmentedString::DidMatch) { 1337 | $this->addState(); 1338 | $this->_HTML_SWITCH_TO(static::AfterDOCTYPESystemKeywordState); 1339 | $this->_SegmentedString->read(strlen($systemString)); 1340 | continue; 1341 | } 1342 | // @todo 1343 | // else if ($result === SegmentedString::NotEnoughCharacters) { 1344 | // $this->addState(); 1345 | // return $this->_haveBufferedCharacterToken(); 1346 | // } 1347 | } 1348 | $this->_parseError(); 1349 | $this->_Token->setForceQuirks(); 1350 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState); 1351 | } 1352 | break; 1353 | 1354 | case static::AfterDOCTYPEPublicKeywordState: 1355 | if ($this->_isTokenizerWhitespace($char)) { 1356 | $this->_HTML_ADVANCE_TO(static::BeforeDOCTYPEPublicIdentifierState); 1357 | } else if ($char === '"') { 1358 | $this->_parseError(); 1359 | $this->_Token->setPublicIdentifierToEmptyString(); 1360 | $this->_HTML_ADVANCE_TO(static::DOCTYPEPublicIdentifierDoubleQuotedState); 1361 | } else if ($char === '\'') { 1362 | $this->_parseError(); 1363 | $this->_Token->setPublicIdentifierToEmptyString(); 1364 | $this->_HTML_ADVANCE_TO(static::DOCTYPEPublicIdentifierSingleQuotedState); 1365 | } else if ($char === '>') { 1366 | $this->_parseError(); 1367 | $this->_Token->setForceQuirks(); 1368 | return $this->_emitAndResumeIn(); 1369 | } else if ($char === static::kEndOfFileMarker) { 1370 | $this->_parseError(true); 1371 | $this->_Token->setForceQuirks(); 1372 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1373 | } else { 1374 | $this->_parseError(); 1375 | $this->_Token->setForceQuirks(); 1376 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState); 1377 | } 1378 | break; 1379 | 1380 | case static::BeforeDOCTYPEPublicIdentifierState: 1381 | if ($this->_isTokenizerWhitespace($char)) { 1382 | $this->_HTML_ADVANCE_TO(static::BeforeDOCTYPEPublicIdentifierState); 1383 | } else if ($char === '"') { 1384 | $this->_Token->setPublicIdentifierToEmptyString(); 1385 | $this->_HTML_ADVANCE_TO(static::DOCTYPEPublicIdentifierDoubleQuotedState); 1386 | } else if ($char === '\'') { 1387 | $this->_Token->setPublicIdentifierToEmptyString(); 1388 | $this->_HTML_ADVANCE_TO(static::DOCTYPEPublicIdentifierSingleQuotedState); 1389 | } else if ($char === '>') { 1390 | $this->_parseError(); 1391 | $this->_Token->setForceQuirks(); 1392 | return $this->_emitAndResumeIn(); 1393 | } else if ($char === static::kEndOfFileMarker) { 1394 | $this->_parseError(true); 1395 | $this->_Token->setForceQuirks(); 1396 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1397 | } else { 1398 | $this->_parseError(); 1399 | $this->_Token->setForceQuirks(); 1400 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState); 1401 | } 1402 | break; 1403 | 1404 | case static::DOCTYPEPublicIdentifierDoubleQuotedState: 1405 | if ($char === '"') { 1406 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPEPublicIdentifierState); 1407 | } else if ($char === '>') { 1408 | $this->_parseError(); 1409 | $this->_Token->setForceQuirks(); 1410 | return $this->_emitAndResumeIn(); 1411 | } else if ($char === static::kEndOfFileMarker) { 1412 | $this->_parseError(); 1413 | $this->_Token->setForceQuirks(); 1414 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1415 | } else { 1416 | $this->_Token->appendToPublicIdentifier($char); 1417 | $this->_HTML_ADVANCE_TO(static::DOCTYPEPublicIdentifierDoubleQuotedState); 1418 | } 1419 | break; 1420 | 1421 | case static::DOCTYPEPublicIdentifierSingleQuotedState: 1422 | if ($char === '\'') { 1423 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPEPublicIdentifierState); 1424 | } else if ($char === '>') { 1425 | $this->_parseError(); 1426 | $this->_Token->setForceQuirks(); 1427 | return $this->_emitAndResumeIn(); 1428 | } else if ($char === static::kEndOfFileMarker) { 1429 | $this->_parseError(); 1430 | $this->_Token->setForceQuirks(); 1431 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1432 | } else { 1433 | $this->_Token->appendToPublicIdentifier($char); 1434 | $this->_HTML_ADVANCE_TO(static::DOCTYPEPublicIdentifierSingleQuotedState); 1435 | } 1436 | break; 1437 | 1438 | case static::AfterDOCTYPEPublicIdentifierState: 1439 | if ($this->_isTokenizerWhitespace($char)) { 1440 | $this->_HTML_ADVANCE_TO(static::BetweenDOCTYPEPublicAndSystemIdentifiersState); 1441 | } else if ($char === '>') { 1442 | return $this->_emitAndResumeIn(); 1443 | } else if ($char === '"') { 1444 | $this->_parseError(); 1445 | $this->_Token->setSystemIdentifierToEmptyString(); 1446 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierDoubleQuotedState); 1447 | } else if ($char === '\'') { 1448 | $this->_parseError(); 1449 | $this->_Token->setSystemIdentifierToEmptyString(); 1450 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierSingleQuotedState); 1451 | } else if ($char === static::kEndOfFileMarker) { 1452 | $this->_parseError(); 1453 | $this->_Token->setForceQuirks(); 1454 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1455 | } else { 1456 | $this->_parseError(); 1457 | $this->_Token->setForceQuirks(); 1458 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState); 1459 | } 1460 | break; 1461 | 1462 | case static::BetweenDOCTYPEPublicAndSystemIdentifiersState: 1463 | if ($this->_isTokenizerWhitespace($char)) { 1464 | $this->_HTML_ADVANCE_TO(static::BetweenDOCTYPEPublicAndSystemIdentifiersState); 1465 | } else if ($char === '>') { 1466 | return $this->_emitAndResumeIn(); 1467 | } else if ($char === '"') { 1468 | $this->_Token->setSystemIdentifierToEmptyString(); 1469 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierDoubleQuotedState); 1470 | } else if ($char === '\'') { 1471 | $this->_Token->setSystemIdentifierToEmptyString(); 1472 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierSingleQuotedState); 1473 | } else if ($char === static::kEndOfFileMarker) { 1474 | $this->_parseError(); 1475 | $this->_Token->setForceQuirks(); 1476 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1477 | } else { 1478 | $this->_parseError(); 1479 | $this->_Token->setForceQuirks(); 1480 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState); 1481 | } 1482 | break; 1483 | 1484 | case static::AfterDOCTYPESystemKeywordState: 1485 | if ($this->_isTokenizerWhitespace($char)) { 1486 | $this->_HTML_ADVANCE_TO(static::BeforeDOCTYPESystemIdentifierState); 1487 | } else if ($char === '"') { 1488 | $this->_parseError(); 1489 | $this->_Token->setSystemIdentifierToEmptyString(); 1490 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierDoubleQuotedState); 1491 | } else if ($char === '\'') { 1492 | $this->_parseError(); 1493 | $this->_Token->setSystemIdentifierToEmptyString(); 1494 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierSingleQuotedState); 1495 | } else if ($char === '>') { 1496 | $this->_parseError(); 1497 | $this->_Token->setForceQuirks(); 1498 | return $this->_emitAndResumeIn(); 1499 | } else if ($char === static::kEndOfFileMarker) { 1500 | $this->_parseError(); 1501 | $this->_Token->setForceQuirks(); 1502 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1503 | } else { 1504 | $this->_parseError(); 1505 | $this->_Token->setForceQuirks(); 1506 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState); 1507 | } 1508 | break; 1509 | 1510 | case static::BeforeDOCTYPESystemIdentifierState: 1511 | if ($this->_isTokenizerWhitespace($char)) { 1512 | $this->_HTML_ADVANCE_TO(static::BeforeDOCTYPESystemIdentifierState); 1513 | continue; 1514 | } 1515 | if ($char === '"') { 1516 | $this->_Token->setSystemIdentifierToEmptyString(); 1517 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierDoubleQuotedState); 1518 | } else if ($char === '\'') { 1519 | $this->_Token->setSystemIdentifierToEmptyString(); 1520 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierSingleQuotedState); 1521 | } else if ($char === '>') { 1522 | $this->_parseError(); 1523 | $this->_Token->setForceQuirks(); 1524 | return $this->_emitAndResumeIn(); 1525 | } else if ($char === static::kEndOfFileMarker) { 1526 | $this->_parseError(); 1527 | $this->_Token->setForceQuirks(); 1528 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1529 | } else { 1530 | $this->_parseError(); 1531 | $this->_Token->setForceQuirks(); 1532 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState); 1533 | } 1534 | break; 1535 | 1536 | case static::DOCTYPESystemIdentifierDoubleQuotedState: 1537 | if ($char === '"') { 1538 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPESystemIdentifierState); 1539 | } else if ($char === '>') { 1540 | $this->_parseError(); 1541 | $this->_Token->setForceQuirks(); 1542 | return $this->_emitAndResumeIn(); 1543 | } else if ($char === static::kEndOfFileMarker) { 1544 | $this->_parseError(); 1545 | $this->_Token->setForceQuirks(); 1546 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1547 | } else { 1548 | $this->_Token->appendToSystemIdentifier($char); 1549 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierDoubleQuotedState); 1550 | } 1551 | break; 1552 | 1553 | case static::DOCTYPESystemIdentifierSingleQuotedState: 1554 | if ($char === '\'') { 1555 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPESystemIdentifierState); 1556 | } else if ($char === '>') { 1557 | $this->_parseError(); 1558 | $this->_Token->setForceQuirks(); 1559 | return $this->_emitAndResumeIn(); 1560 | } else if ($char === static::kEndOfFileMarker) { 1561 | $this->_parseError(); 1562 | $this->_Token->setForceQuirks(); 1563 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1564 | } else { 1565 | $this->_Token->appendToSystemIdentifier($char); 1566 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierSingleQuotedState); 1567 | } 1568 | break; 1569 | 1570 | case static::AfterDOCTYPESystemIdentifierState: 1571 | if ($this->_isTokenizerWhitespace($char)) { 1572 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPESystemIdentifierState); 1573 | } else if ($char === '>') { 1574 | return $this->_emitAndResumeIn(); 1575 | } else if ($char === static::kEndOfFileMarker) { 1576 | $this->_parseError(); 1577 | $this->_Token->setForceQuirks(); 1578 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1579 | } else { 1580 | $this->_parseError(); 1581 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState); 1582 | } 1583 | break; 1584 | 1585 | case static::BogusDOCTYPEState: 1586 | if ($char === '>') { 1587 | return $this->_emitAndResumeIn(); 1588 | } else if ($char === static::kEndOfFileMarker) { 1589 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState); 1590 | } 1591 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState); 1592 | break; 1593 | 1594 | case static::CDATASectionState: 1595 | if ($char === ']') { 1596 | $this->_HTML_ADVANCE_TO(static::CDATASectionRightSquareBracketState); 1597 | } else if ($char === static::kEndOfFileMarker) { 1598 | $this->_HTML_RECONSUME_IN(static::DataState); 1599 | } else { 1600 | $this->_bufferCharacter($char); 1601 | $this->_HTML_ADVANCE_TO(static::CDATASectionState); 1602 | } 1603 | break; 1604 | 1605 | case static::CDATASectionRightSquareBracketState: 1606 | if ($char === ']') { 1607 | $this->_HTML_ADVANCE_TO(static::CDATASectionDoubleRightSquareBracketState); 1608 | } else { 1609 | $this->_bufferCharacter(']'); 1610 | $this->_HTML_RECONSUME_IN(static::CDATASectionState); 1611 | } 1612 | break; 1613 | 1614 | case static::CDATASectionDoubleRightSquareBracketState: 1615 | if ($char === '>') { 1616 | $this->_HTML_ADVANCE_TO(static::DataState); 1617 | } else { 1618 | $this->_bufferCharacter(']'); 1619 | $this->_bufferCharacter(']'); 1620 | $this->_HTML_RECONSUME_IN(static::CDATASectionState); 1621 | } 1622 | break; 1623 | default: 1624 | break 2; 1625 | } 1626 | } 1627 | // ASSERT_NOT_REACHED 1628 | return false; 1629 | } 1630 | 1631 | protected function _parseError() { 1632 | $this->_Token->parseError(); 1633 | $this->_notImplemented(); 1634 | } 1635 | 1636 | protected function _notImplemented() { 1637 | // Source/core/platform/NotImplemented.h 1638 | // logger 1639 | } 1640 | 1641 | protected function _temporaryBufferIs($expectedString) { 1642 | return $this->_vectorEqualsString($this->_temporaryBuffer, $expectedString); 1643 | } 1644 | 1645 | protected function _vectorEqualsString($vector, $string) { 1646 | return $vector === $string; 1647 | } 1648 | 1649 | protected function _isAppropriateEndTag() { 1650 | return $this->_bufferedEndTagName === $this->_appropriateEndTagName; 1651 | } 1652 | 1653 | protected function _emitAndReconsumeIn(SegmentedString $source, $state) { 1654 | $this->_saveEndTagNameIfNeeded(); 1655 | $this->_state = $state; 1656 | return true; 1657 | } 1658 | 1659 | protected function _saveEndTagNameIfNeeded() { 1660 | if ($this->_Token->getType() === HTMLToken::StartTag) { 1661 | $this->_appropriateEndTagName = $this->_Token->getName(); 1662 | } 1663 | } 1664 | 1665 | protected function _emitEndOfFile() { 1666 | if ($this->_haveBufferedCharacterToken()) { 1667 | return true; 1668 | } 1669 | 1670 | $this->_state = HTMLTokenizer::DataState; 1671 | //source.advanceAndUpdateLineNumber(); 1672 | //$this->_Token->clear(); 1673 | $this->_Token->makeEndOfFile(); 1674 | return true; 1675 | } 1676 | 1677 | protected function _emitAndResumeIn() { 1678 | $this->addState(); 1679 | $this->_saveEndTagNameIfNeeded(); 1680 | //m_state = state; 1681 | $this->_state = static::DataState; 1682 | //source.advanceAndUpdateLineNumber(); 1683 | $this->_SegmentedString->advance(); 1684 | return true; 1685 | } 1686 | 1687 | protected function _flushEmitAndResumeIn(SegmentedString $source, $state) { 1688 | // m_state = state; 1689 | $this->_state = $state; 1690 | $this->_flushBufferedEndTag($source); 1691 | return true; 1692 | } 1693 | 1694 | protected function _flushBufferedEndTag(SegmentedString $source) { 1695 | $source->advance(); 1696 | if ($this->_Token->getType() === HTMLToken::Character) { 1697 | return true; 1698 | } 1699 | $this->_Token->beginEndTag($this->_bufferedEndTagName); 1700 | $this->_bufferedEndTagName = ''; 1701 | $this->_appropriateEndTagName = ''; 1702 | $this->_temporaryBuffer = ''; 1703 | return false; 1704 | } 1705 | 1706 | protected function _haveBufferedCharacterToken() { 1707 | return $this->_Token->getType() === HTMLToken::Character; 1708 | } 1709 | 1710 | protected function _bufferCharacter($char) { 1711 | $this->_Token->ensureIsCharacterToken(); 1712 | $this->_Token->appendToCharacter($char); 1713 | } 1714 | 1715 | // todo 1716 | protected function _shouldAllowCDATA() { 1717 | return true; 1718 | } 1719 | 1720 | protected function _isTokenizerWhitespace($char) { 1721 | return $char === ' ' || $char === "\x0A" || $char === "\x09" || $char === "\x0C"; 1722 | } 1723 | 1724 | protected function _FLUSH_AND_ADVANCE_TO($state) { 1725 | $this->addState(); 1726 | $this->_state = $state; 1727 | if ($this->_flushBufferedEndTag($this->_SegmentedString)) { 1728 | return true; 1729 | } 1730 | // if ( !m_inputStreamPreprocessor.peek(source)) return haveBufferedCharacterToken(); 1731 | return null; 1732 | } 1733 | 1734 | protected function _HTML_RECONSUME_IN($state) { 1735 | $this->_state = $state; 1736 | } 1737 | 1738 | protected function _HTML_SWITCH_TO($state) { 1739 | $this->_state = $state; 1740 | } 1741 | 1742 | protected function _HTML_ADVANCE_TO($state) { 1743 | $this->addState(); 1744 | $this->_state = $state; 1745 | $this->_SegmentedString->advance(); 1746 | } 1747 | 1748 | protected function addState() { 1749 | if (!$this->_debug) { 1750 | return; 1751 | } 1752 | $this->_buffer[$this->_SegmentedString->tell() - $this->_startPos] = $this->_state; 1753 | } 1754 | 1755 | } -------------------------------------------------------------------------------- /src/zz/Html/SegmentedString.php: -------------------------------------------------------------------------------- 1 | str = $str; 44 | $this->len = strlen($str); 45 | } 46 | 47 | /** 48 | * @return bool|string 49 | */ 50 | public function getCurrentChar() { 51 | $i = $this->i; 52 | if ($this->len <= $i) { 53 | return false; 54 | } 55 | return $this->str[$i]; 56 | } 57 | 58 | public function advance() { 59 | $this->i += 1; 60 | } 61 | 62 | /** 63 | * @param int $i 64 | * @return string 65 | */ 66 | public function read($i) { 67 | if ($this->eos() && $i > 0) { 68 | return false; 69 | } 70 | $this->i += $i; 71 | return substr($this->str, $this->i - $i, $i); 72 | } 73 | 74 | /** 75 | * @param int $startPos 76 | * @param int $length 77 | * @return string 78 | */ 79 | public function substr($startPos, $length) { 80 | return substr($this->str, $startPos, $length); 81 | } 82 | 83 | /** 84 | * @param int $offset 85 | * @param int $whence 86 | * @throws \InvalidArgumentException 87 | * @return bool 88 | */ 89 | public function seek($offset, $whence = self::begin) { 90 | switch ($whence) { 91 | case static::begin: 92 | if ($this->len < $offset) { 93 | return false; 94 | } 95 | $this->i = $offset; 96 | return true; 97 | break; 98 | case static::current: 99 | $lookAhead = $this->i + $offset; 100 | if ($lookAhead < 0 || $lookAhead > $this->len) { 101 | return false; 102 | } 103 | $this->i = $lookAhead; 104 | return true; 105 | break; 106 | } 107 | 108 | throw new \InvalidArgumentException; 109 | } 110 | 111 | /** 112 | * @return int 113 | */ 114 | public function tell() { 115 | return $this->i; 116 | } 117 | 118 | /** 119 | * @return bool 120 | */ 121 | public function eos() { 122 | return $this->len <= $this->i; 123 | } 124 | 125 | public function get() { 126 | return $this->str; 127 | } 128 | 129 | public function len() { 130 | return $this->len; 131 | } 132 | 133 | public function token($str, $caseSensitive = true) { 134 | $matched = $this->read(strlen($str)); 135 | if ($caseSensitive) { 136 | return $str === $matched ? $str : false; 137 | } else { 138 | return strtolower($str) === strtolower($matched) ? $matched : false; 139 | } 140 | } 141 | 142 | public function lookAheadIgnoringCase($str) { 143 | return $this->_lookAhead($str, false); 144 | } 145 | 146 | public function lookAhead($str) { 147 | return $this->_lookAhead($str, true); 148 | } 149 | 150 | protected function _lookAhead($str, $caseSensitive = true) { 151 | $i = $this->i; 152 | $result = $this->token($str, $caseSensitive) !== false; 153 | $this->seek($i); 154 | if (strlen($str) + $i <= $this->len) { 155 | if ($result) { 156 | return static::DidMatch; 157 | } 158 | return static::DidNotMatch; 159 | } 160 | return static::NotEnoughCharacters; 161 | } 162 | 163 | // int numberOfCharactersConsumed() const { return m_string.length() - m_length; } 164 | public function numberOfCharactersConsumed() { 165 | // int numberOfPushedCharacters = 0; 166 | // if (m_pushedChar1) { 167 | // ++numberOfPushedCharacters; 168 | // if (m_pushedChar2) 169 | // ++numberOfPushedCharacters; 170 | // } 171 | // return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters; 172 | return $this->i; 173 | } 174 | 175 | } 176 | --------------------------------------------------------------------------------