├── LICENSE
├── README.mdown
├── composer.json
└── src
└── zz
└── Html
├── HTMLMinify.php
├── HTMLNames.php
├── HTMLToken.php
├── HTMLTokenizer.php
└── SegmentedString.php
/LICENSE:
--------------------------------------------------------------------------------
1 | All codes without notes are distributed under MIT License.
2 |
3 | Copyright (c) zaininnari
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is furnished
10 | to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
23 |
24 |
25 | Specific license.
26 |
27 | The BSD 3-Clause License
28 | - HTMLNames.php
29 | - HTMLToken.php
30 | - HTMLTokenizer.php
31 |
32 | GNU General Public License
33 | - SegmentedString.php
34 |
--------------------------------------------------------------------------------
/README.mdown:
--------------------------------------------------------------------------------
1 | html-minifier
2 | =============
3 | The Blink HTMLTokenizer ported to PHP and minify HTML.
4 |
5 | [](https://travis-ci.org/zaininnari/html-minifier)
6 | [](https://coveralls.io/r/zaininnari/html-minifier?branch=master)
7 |
8 | Requirements
9 | ------------
10 |
11 | - Any flavor of PHP 5.3 should do
12 | - [optional] PHPUnit 3.5+ to execute the test suite (phpunit --version)
13 |
14 | Use
15 | ---
16 | Create composer.json.
17 | [composer.json]
18 | ```sh
19 | {
20 | "require": {
21 | "zaininnari/html-minifier": "*"
22 | }
23 | }
24 | ```
25 |
26 | Download composer.phar and install.
27 | ```sh
28 | curl -sS https://getcomposer.org/installer | php
29 | php composer.phar install
30 | ```
31 |
32 | ```php
33 |
39 |
40 | text
41 |
42 | ';
43 |
44 | // shortcut. retrun minify html
45 | $minify = HTMLMinify::minify($html);
46 |
47 | // detail
48 | $HTMLMinify = new HTMLMinify($html);
49 | $minify = $HTMLMinify->process();
50 | ```
51 |
52 | output html
53 | ```HTML
54 |
59 | ```
60 |
61 | Option
62 | ------
63 |
64 | ### optimizationLevel
65 | #### OPTIMIZATION_SIMPLE(default)
66 |
67 | Replace many whitespace to a single whitespace.
68 | This option leave a new line of one.
69 |
70 | [input]
71 | ```HTML
72 |
73 | Example of paragraphs
74 | This is the first paragraph in this example.
75 | This is the second.
76 |
77 |
78 | HTML
79 |
80 | ```
81 |
82 | [output]
83 | ```HTML
84 |
85 | Example of paragraphs
86 | This is the first paragraph in this example.
87 | This is the second.
88 |
89 | HTML
90 |
91 | ```
92 |
93 | #### OPTIMIZATION_ADVANCED
94 |
95 | Remove the whitespace of all as much as possible.
96 |
97 | - Remove whitespace
98 | - between block element and block element
99 | - between block element and inline element
100 | - run trim in style, script and downlevel-revealed conditional comment
101 | - **[future]** consider comments
102 | - Preserve whitespace
103 | - between inline element and inline element
104 |
105 | [input]
106 | ```HTML
107 |
108 | Example of paragraphs
109 | This is the first paragraph in this example.
110 | This is the second.
111 |
112 |
113 | HTML
114 |
115 | ```
116 |
117 | [output]
118 | ```HTML
119 | Example of paragraphs
This is thefirstparagraph in this example.This is the second.
HTML
120 | ```
121 |
122 | Author
123 | ------
124 |
125 | zaininnari
126 | http://www.zay.jp/
127 |
128 | Original source
129 |
130 | http://www.chromium.org/blink
131 |
132 | License
133 | -------
134 |
135 | Licensed under the MIT License and other License - see the `LICENSE` file for details
136 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "zaininnari/html-minifier",
3 | "description": "The Blink HTMLTokenizer ported to PHP.",
4 | "type": "library",
5 | "keywords": ["PHP", "Blink", "HTML minify"],
6 | "homepage": "https://github.com/zaininnari/html-minifier",
7 | "license": "The BSD 3-Clause License",
8 | "authors": [
9 | {
10 | "name": "Google inc",
11 | "homepage": "http://www.chromium.org/blink",
12 | "role": "Author"
13 | },
14 | {
15 | "name": "zaininnari",
16 | "homepage": "https://github.com/zaininnari/",
17 | "role": "Developer"
18 | }
19 | ],
20 | "require": {
21 | "php": ">=5.3.0"
22 | },
23 | "require-dev": {
24 | "satooshi/php-coveralls": "dev-master"
25 | },
26 | "minimum-stability": "dev",
27 | "autoload": {
28 | "psr-0": {
29 | "zz": "src/"
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/zz/Html/HTMLMinify.php:
--------------------------------------------------------------------------------
1 | 'inline',
33 | 'abbr' => 'inline',
34 | 'acronym' => 'inline',
35 | 'address' => 'block',
36 | 'applet' => 'inline',
37 | 'area' => 'none',
38 | 'article' => 'block',
39 | 'aside' => 'block',
40 | 'audio' => 'inline',
41 | 'b' => 'inline',
42 | 'base' => 'inline',
43 | 'basefont' => 'inline',
44 | 'bdo' => 'inline',
45 | 'bgsound' => 'inline',
46 | 'big' => 'inline',
47 | 'blockquote' => 'block',
48 | 'body' => 'block',
49 | 'br' => 'inline',
50 | 'button' => 'inline-block',
51 | 'canvas' => 'inline',
52 | 'caption' => 'table-caption',
53 | 'center' => 'block',
54 | 'cite' => 'inline',
55 | 'code' => 'inline',
56 | 'col' => 'table-column',
57 | 'colgroup' => 'table-column-group',
58 | 'command' => 'inline',
59 | 'datalist' => 'none',
60 | 'dd' => 'block',
61 | 'del' => 'inline',
62 | 'details' => 'block',
63 | 'dfn' => 'inline',
64 | 'dir' => 'block',
65 | 'div' => 'block',
66 | 'dl' => 'block',
67 | 'dt' => 'block',
68 | 'em' => 'inline',
69 | 'embed' => 'inline',
70 | 'fieldset' => 'block',
71 | 'figcaption' => 'block',
72 | 'figure' => 'block',
73 | 'font' => 'inline',
74 | 'footer' => 'block',
75 | 'form' => 'block',
76 | 'frame' => 'block',
77 | 'frameset' => 'block',
78 | 'h1' => 'block',
79 | 'h2' => 'block',
80 | 'h3' => 'block',
81 | 'h4' => 'block',
82 | 'h5' => 'block',
83 | 'h6' => 'block',
84 | 'head' => 'none',
85 | 'header' => 'block',
86 | 'hgroup' => 'block',
87 | 'hr' => 'block',
88 | 'html' => 'block',
89 | 'i' => 'inline',
90 | 'iframe' => 'inline',
91 | 'image' => 'inline',
92 | 'img' => 'inline',
93 | 'input' => 'inline-block',
94 | 'ins' => 'inline',
95 | 'isindex' => 'inline-block',
96 | 'kbd' => 'inline',
97 | 'keygen' => 'inline-block',
98 | 'label' => 'inline',
99 | 'layer' => 'block',
100 | 'legend' => 'block',
101 | 'li' => 'list-item',
102 | 'link' => 'none',
103 | 'listing' => 'block',
104 | 'map' => 'inline',
105 | 'mark' => 'inline',
106 | 'marquee' => 'inline-block',
107 | 'menu' => 'block',
108 | 'meta' => 'none',
109 | 'meter' => 'inline-block',
110 | 'nav' => 'block',
111 | 'nobr' => 'inline',
112 | 'noembed' => 'inline',
113 | 'noframes' => 'none',
114 | 'nolayer' => 'inline',
115 | 'noscript' => 'inline',
116 | 'object' => 'inline',
117 | 'ol' => 'block',
118 | 'optgroup' => 'inline',
119 | 'option' => 'inline',
120 | 'output' => 'inline',
121 | 'p' => 'block',
122 | 'param' => 'none',
123 | 'plaintext' => 'block',
124 | 'pre' => 'block',
125 | 'progress' => 'inline-block',
126 | 'q' => 'inline',
127 | 'rp' => 'inline',
128 | 'rt' => 'inline',
129 | 'ruby' => 'inline',
130 | 's' => 'inline',
131 | 'samp' => 'inline',
132 | 'script' => 'none',
133 | 'section' => 'block',
134 | 'select' => 'inline-block',
135 | 'small' => 'inline',
136 | 'source' => 'inline',
137 | 'span' => 'inline',
138 | 'strike' => 'inline',
139 | 'strong' => 'inline',
140 | 'style' => 'none',
141 | 'sub' => 'inline',
142 | 'summary' => 'block',
143 | 'sup' => 'inline',
144 | 'table' => 'table',
145 | 'tbody' => 'table-row-group',
146 | 'td' => 'table-cell',
147 | 'textarea' => 'inline-block',
148 | 'tfoot' => 'table-footer-group',
149 | 'th' => 'table-cell',
150 | 'thead' => 'table-header-group',
151 | 'title' => 'none',
152 | 'tr' => 'table-row',
153 | 'track' => 'inline',
154 | 'tt' => 'inline',
155 | 'u' => 'inline',
156 | 'ul' => 'inline-block',
157 | 'var' => 'inline',
158 | 'video' => 'inline',
159 | 'wbr' => 'inline',
160 | 'xmp' => 'block',
161 | );
162 |
163 | protected $emptyTag = array(
164 | 'area' => 'area',
165 | 'base' => 'base',
166 | 'basefont' => 'basefont',
167 | 'br' => 'br',
168 | 'col' => 'col',
169 | 'embed' => 'embed',
170 | 'frame' => 'frame',
171 | 'hr' => 'hr',
172 | 'img' => 'img',
173 | 'input' => 'input',
174 | 'isindex' => 'isindex',
175 | 'link' => 'link',
176 | 'meta' => 'meta',
177 | 'param' => 'param',
178 | );
179 |
180 | /**
181 | * @param string $html
182 | * @param array $options
183 | */
184 | public function __construct($html, $options = array()) {
185 | $html = ltrim($html);
186 | $this->html = $html;
187 | $this->options = $this->options($options);
188 |
189 | $SegmentedString = new SegmentedString($html);
190 | $HTMLTokenizer = new HTMLTokenizer($SegmentedString, $options);
191 | $this->tokens = $HTMLTokenizer->tokenizer();
192 | }
193 |
194 | /**
195 | * 'optimizationLevel'
196 | * OPTIMIZATION_SIMPLE(default)
197 | * : replace many whitespace to a single whitespace
198 | * this option leave a new line of one
199 | * OPTIMIZATION_ADVANCED
200 | * : remove the white space of all as much as possible
201 | *
202 | * 'emptyElementAddSlash'
203 | * HTML4.01 no slash : ![]()
204 | * XHTML1.0 add slash : ![]()
205 | * HTML5 mixed OK : ![]()
206 | *
207 | * example :
208 | * true(default) :
209 | * false :
210 | *
211 | * 'emptyElementAddWhitespaceBeforeSlash'
212 | * HTML4.01 no slash : ![]()
213 | * XHTML1.0 add slash : ![]()
214 | * HTML5 mixed OK : ![]()
215 | *
216 | * example :
217 | * true(default) :
218 | * false :
219 | *
220 | * 'removeComment'
221 | * example : HTML
222 | * true(default) => HTML
223 | * false => do nothing
224 | *
225 | * 'excludeComment'
226 | * example : content--nocache-->
227 | * array('//')(default) => content
228 | * array('//') => content--nocache-->
229 | *
230 | * 'removeDuplicateAttribute'
231 | * example :
232 | * true(default) =>
233 | * false => do nothing
234 | *
235 | * @param array $options
236 | * @return array
237 | */
238 | protected function options(Array $options) {
239 | $_options = array(
240 | 'doctype' => static::DOCTYPE_XHTML1,
241 | 'optimizationLevel' => static::OPTIMIZATION_SIMPLE,
242 | 'emptyElementAddSlash' => false,
243 | 'emptyElementAddWhitespaceBeforeSlash' => false,
244 | 'removeComment' => true,
245 | 'excludeComment' => array(),
246 | 'removeDuplicateAttribute' => true,
247 | );
248 | $documentTypeOptions = array(
249 | static::DOCTYPE_HTML4 => array(
250 | 'doctype' => static::DOCTYPE_HTML4,
251 | 'emptyElementAddSlash' => false,
252 | 'emptyElementAddWhitespaceBeforeSlash' => false,
253 | ),
254 | static::DOCTYPE_XHTML1 => array(
255 | 'doctype' => static::DOCTYPE_XHTML1,
256 | 'emptyElementAddSlash' => true,
257 | 'emptyElementAddWhitespaceBeforeSlash' => true,
258 | ),
259 | static::DOCTYPE_HTML5 => array(
260 | 'doctype' => static::DOCTYPE_HTML5,
261 | 'emptyElementAddSlash' => false,
262 | 'emptyElementAddWhitespaceBeforeSlash' => false,
263 | ),
264 | );
265 |
266 | $documentTypeOption = $documentTypeOptions[static::DOCTYPE_XHTML1];
267 | if (isset($options['doctype'])) {
268 | $doctype = $options['doctype'];
269 | if (isset($documentTypeOptions[$doctype])) {
270 | $documentTypeOption = $documentTypeOptions[$doctype];
271 | }
272 | }
273 |
274 | return $options + $documentTypeOption + $_options;
275 | }
276 |
277 | /**
278 | * @param $html
279 | * @param array $options
280 | * @return string
281 | */
282 | public static function minify($html, $options = array()) {
283 | $instance = new self($html, $options);
284 | return $instance->process();
285 | }
286 |
287 | /**
288 | * @return HtmlToken[]
289 | */
290 | public function getTokens() {
291 | return $this->tokens;
292 | }
293 |
294 | /**
295 | * @return string
296 | */
297 | public function process() {
298 | $this->beforeFilter();
299 | $html = $this->_buildHtml($this->tokens);
300 | return $html;
301 | }
302 |
303 | /**
304 | * @param array $tokens
305 | * @return string
306 | */
307 | protected function _buildHtml(Array $tokens) {
308 | $html = '';
309 | foreach ($tokens as $token) {
310 | $html .= $this->_buildElement($token);
311 | }
312 | return $html;
313 | }
314 |
315 | protected function _buildElement(HTMLToken $token) {
316 | switch ($token->getType()) {
317 | case HTMLToken::DOCTYPE:
318 | $html = $token->getHtmlOrigin();
319 | break;
320 | case HTMLToken::StartTag:
321 | $tagName = $token->getTagName();
322 | $selfClosing = '';
323 | if (isset($this->emptyTag[$tagName]) && $this->options['emptyElementAddSlash']) {
324 | $selfClosing = '/';
325 | $selfClosing = ($this->options['emptyElementAddWhitespaceBeforeSlash'] ? ' ' : '') . $selfClosing;
326 | }
327 |
328 | $attributes = $this->_buildAttributes($token);
329 | $beforeAttributeSpace = '';
330 | if ($attributes) {
331 | $beforeAttributeSpace = ' ';
332 | }
333 | $html = sprintf('<%s%s%s%s>', $token->getTagName(), $beforeAttributeSpace, $attributes, $selfClosing);
334 | break;
335 | case HTMLToken::EndTag:
336 | $html = sprintf('%s>', $token->getTagName());
337 | break;
338 | default :
339 | $html = $token->getData();
340 | break;
341 | }
342 | return $html;
343 | }
344 |
345 | /**
346 | * @param HTMLToken $token
347 | * @return string
348 | */
349 | protected function _buildAttributes(HTMLToken $token) {
350 | $attr = array();
351 | $format = '%s=%s%s%s';
352 | foreach ($token->getAttributes() as $attribute) {
353 | $name = $attribute['name'];
354 | $value = $attribute['value'];
355 | switch ($attribute['quoted']) {
356 | case HTMLToken::DoubleQuoted:
357 | $quoted = '"';
358 | break;
359 | case HTMLToken::SingleQuoted:
360 | $quoted = '\'';
361 | break;
362 | default:
363 | $quoted = '';
364 | break;
365 | }
366 | if ($quoted === '' && $value === '') {
367 | $attr[] = $name;
368 | } else {
369 | $attr[] = sprintf($format, $name, $quoted, $value, $quoted);
370 | }
371 | }
372 | return join(' ', $attr);
373 | }
374 |
375 | protected function beforeFilter() {
376 | if ($this->options['removeComment']) {
377 | $this->removeWhitespaceFromComment();
378 | }
379 |
380 | $this->removeWhitespaceFromCharacter();
381 |
382 | if ($this->options['removeDuplicateAttribute']) {
383 | $this->optimizeStartTagAttributes();
384 | }
385 | }
386 |
387 | protected function removeWhitespaceFromComment() {
388 | $tokens = $this->tokens;
389 | $regexps = $this->options['excludeComment'];
390 | $HTMLTokenStartTag = HTMLToken::StartTag;
391 | $HTMLTokenComment = HTMLToken::Comment;
392 | $HTMLTokenCharacter = HTMLToken::Character;
393 | $HTMLNamesScriptTag = HTMLNames::scriptTag;
394 | $HTMLNamesStyleTag = HTMLNames::styleTag;
395 | $removes = array();
396 | $combineIndex = null;
397 |
398 | $len = count($tokens);
399 | for ($i = 0; $i < $len; $i++) {
400 | $token = $tokens[$i];
401 | $type = $token->getType();
402 | if ($type === $HTMLTokenStartTag) {
403 | $combineIndex = null;
404 | $tagName = $token->getTagName();
405 | if ($tagName === $HTMLNamesScriptTag || $tagName === $HTMLNamesStyleTag) {
406 | $i++;
407 | }
408 | continue;
409 | } else if ($type === $HTMLTokenCharacter) {
410 | if ($combineIndex > 0) {
411 | $tokens[$combineIndex]->setData($tokens[$combineIndex] . $token);
412 | $removes[] = $i;
413 | }
414 | continue;
415 | } else if ($type !== $HTMLTokenComment) {
416 | $combineIndex = null;
417 | continue;
418 | }
419 |
420 | $comment = $token->getData();
421 | if ($this->_isConditionalComment($comment)) {
422 | $combineIndex = null;
423 | continue;
424 | }
425 | if ($regexps) {
426 | foreach ($regexps as $regexp) {
427 | if (preg_match($regexp, $comment)) {
428 | $combineIndex = null;
429 | continue 2;
430 | }
431 | }
432 | }
433 | $combineIndex = $i - 1;
434 | $removes[] = $i;
435 | }
436 |
437 | foreach ($removes as $remove) {
438 | unset($tokens[$remove]);
439 | }
440 |
441 | if ($len !== count($tokens)) {
442 | $tokens = array_merge($tokens,array());
443 | }
444 | $this->tokens = $tokens;
445 | return true;
446 | }
447 |
448 | protected function isInlineTag($tag) {
449 | $tags = $this->tagDisplay;
450 | if (!isset($tags[$tag])) {
451 | return true;
452 | }
453 | return $tags[$tag] === 'inline';
454 | }
455 |
456 | protected function removeWhitespaceFromCharacter() {
457 | $tokens = $this->tokens;
458 | $isEditable = true;
459 | $isBeforeInline = false;
460 | $uneditableTag = null;
461 | $type = null;
462 | $token = null;
463 | $isOptimize = $this->options['optimizationLevel'] === static::OPTIMIZATION_ADVANCED;
464 |
465 | for ($i = 0, $len = count($tokens); $i < $len; $i++) {
466 | /**
467 | * @var HTMLToken $tokenBefore
468 | */
469 | $tokenBefore = $token;
470 | $token = $tokens[$i];
471 | $type = $token->getType();
472 | if ($type === HTMLToken::StartTag) {
473 | $tagName = $token->getName();
474 | $isBeforeInline = $this->isInlineTag($tagName);
475 | switch ($tagName) {
476 | case HTMLNames::scriptTag:
477 | case HTMLNames::styleTag:
478 | case HTMLNames::textareaTag:
479 | case HTMLNames::preTag:
480 | $isEditable = false;
481 | $uneditableTag = $tagName;
482 | continue 2;
483 | break;
484 | default:
485 | break;
486 | }
487 | } else if ($type === HTMLToken::EndTag) {
488 | $tagName = $token->getName();
489 | $isBeforeInline = $this->isInlineTag($tagName);
490 | if (!$isEditable && $tagName === $uneditableTag) {
491 | $uneditableTag = null;
492 | $isEditable = true;
493 | continue;
494 | }
495 | }
496 | if ($type !== HTMLToken::Character) {
497 | continue;
498 | }
499 |
500 | $characters = $token->getData();
501 |
502 | if ($isEditable) {
503 | if ($isOptimize && $i < ($len - 1)) {
504 | $afterToken = $tokens[$i + 1];
505 | $afterType = $afterToken->getType();
506 | if (!$tokenBefore) {
507 | $tokenBefore = new HTMLToken();
508 | }
509 | $typeBefore = $tokenBefore->getType();
510 | $isTagBefore = $typeBefore === HTMLToken::StartTag || $typeBefore === HTMLToken::EndTag;
511 | $isAfterTag = $afterType === HTMLToken::StartTag || $afterType === HTMLToken::EndTag;
512 | $isAfterInline = $isAfterTag ? $this->isInlineTag($afterToken->getTagName()) : false;
513 |
514 | if (($i === 0 || $isTagBefore) && $isAfterTag && (!$isBeforeInline || !$isAfterInline)) {
515 | $characters = trim($characters);
516 | } else if (($i === 0 || !$isBeforeInline) && !$isAfterInline) {
517 | $characters = trim($characters);
518 | }
519 | }
520 | $characters = $this->_removeWhitespaceFromCharacter($characters);
521 | if ($i === ($len - 1)) {
522 | $characters = rtrim($characters);
523 | }
524 | } else if ($isOptimize && ($uneditableTag === HTMLNames::scriptTag || $uneditableTag === HTMLNames::styleTag)) {
525 | $characters = trim($characters);
526 | }
527 | $tokens[$i]->setData($characters);
528 | }
529 | $this->tokens = $tokens;
530 | }
531 |
532 | /**
533 | * @param string $characters
534 | * @return string
535 | */
536 | protected function _removeWhitespaceFromCharacter($characters) {
537 | $compactCharacters = '';
538 | $hasWhiteSpace = false;
539 |
540 | for ($i = 0, $len = strlen($characters); $i < $len; $i++) {
541 | $char = $characters[$i];
542 | if ($char === "\x0A") {
543 | // remove before whitespace char
544 | if ($hasWhiteSpace) {
545 | $compactCharacters = substr($compactCharacters, 0, -1);
546 | }
547 | $compactCharacters .= $char;
548 | $hasWhiteSpace = true;
549 | } else if ($char === ' ' || $char === "\x09" || $char === "\x0C") {
550 | if (!$hasWhiteSpace) {
551 | $compactCharacters .= ' ';
552 | $hasWhiteSpace = true;
553 | }
554 | } else {
555 | $hasWhiteSpace = false;
556 | $compactCharacters .= $char;
557 | }
558 | }
559 |
560 | return $compactCharacters;
561 | }
562 |
563 | protected function optimizeStartTagAttributes() {
564 | $tokens = $this->tokens;
565 | for ($i = 0, $len = count($tokens); $i < $len; $i++) {
566 | $token = $tokens[$i];
567 | if ($token->getType() !== HTMLToken::StartTag) {
568 | continue;
569 | }
570 |
571 | $attributes_old = $token->getAttributes();
572 | $attributes_new =array();
573 | $attributes_name = array();
574 |
575 | foreach ($attributes_old as $attribute) {
576 | if (!isset($attributes_name[$attribute['name']])) {
577 | $attributes_name[$attribute['name']] = true;
578 | $attributes_new[] = $attribute;
579 | }
580 | }
581 | if ($attributes_old !== $attributes_new) {
582 | $token->setAttributes($attributes_new);
583 | }
584 | }
585 | $this->tokens = $tokens;
586 | }
587 |
588 | /**
589 | * downlevel-hidden :
590 | * downlevel-revealed : HTML
591 | * @param string $comment
592 | * @return bool
593 | */
594 | protected function _isConditionalComment($comment) {
595 | $pattern = '/\A/s';
596 | if (preg_match($pattern, $comment)) {
597 | return true;
598 | }
599 | $pattern = '/\Z/s';
600 | if (preg_match($pattern, $comment)) {
601 | return true;
602 | }
603 | return false;
604 | }
605 |
606 | }
--------------------------------------------------------------------------------
/src/zz/Html/HTMLNames.php:
--------------------------------------------------------------------------------
1 | static::aTag,
469 | static::abbrTag => static::abbrTag,
470 | static::acronymTag => static::acronymTag,
471 | static::addressTag => static::addressTag,
472 | static::appletTag => static::appletTag,
473 | static::areaTag => static::areaTag,
474 | static::articleTag => static::articleTag,
475 | static::asideTag => static::asideTag,
476 | static::audioTag => static::audioTag,
477 | static::bTag => static::bTag,
478 | static::baseTag => static::baseTag,
479 | static::basefontTag => static::basefontTag,
480 | static::bdoTag => static::bdoTag,
481 | static::bgsoundTag => static::bgsoundTag,
482 | static::bigTag => static::bigTag,
483 | static::blockquoteTag => static::blockquoteTag,
484 | static::bodyTag => static::bodyTag,
485 | static::brTag => static::brTag,
486 | static::buttonTag => static::buttonTag,
487 | static::canvasTag => static::canvasTag,
488 | static::captionTag => static::captionTag,
489 | static::centerTag => static::centerTag,
490 | static::citeTag => static::citeTag,
491 | static::codeTag => static::codeTag,
492 | static::colTag => static::colTag,
493 | static::colgroupTag => static::colgroupTag,
494 | static::commandTag => static::commandTag,
495 | static::datalistTag => static::datalistTag,
496 | static::ddTag => static::ddTag,
497 | static::delTag => static::delTag,
498 | static::detailsTag => static::detailsTag,
499 | static::dfnTag => static::dfnTag,
500 | static::dirTag => static::dirTag,
501 | static::divTag => static::divTag,
502 | static::dlTag => static::dlTag,
503 | static::dtTag => static::dtTag,
504 | static::emTag => static::emTag,
505 | static::embedTag => static::embedTag,
506 | static::fieldsetTag => static::fieldsetTag,
507 | static::figcaptionTag => static::figcaptionTag,
508 | static::figureTag => static::figureTag,
509 | static::fontTag => static::fontTag,
510 | static::footerTag => static::footerTag,
511 | static::formTag => static::formTag,
512 | static::frameTag => static::frameTag,
513 | static::framesetTag => static::framesetTag,
514 | static::h1Tag => static::h1Tag,
515 | static::h2Tag => static::h2Tag,
516 | static::h3Tag => static::h3Tag,
517 | static::h4Tag => static::h4Tag,
518 | static::h5Tag => static::h5Tag,
519 | static::h6Tag => static::h6Tag,
520 | static::headTag => static::headTag,
521 | static::headerTag => static::headerTag,
522 | static::hgroupTag => static::hgroupTag,
523 | static::hrTag => static::hrTag,
524 | static::htmlTag => static::htmlTag,
525 | static::iTag => static::iTag,
526 | static::iframeTag => static::iframeTag,
527 | static::imageTag => static::imageTag,
528 | static::imgTag => static::imgTag,
529 | static::inputTag => static::inputTag,
530 | static::insTag => static::insTag,
531 | static::isindexTag => static::isindexTag,
532 | static::kbdTag => static::kbdTag,
533 | static::keygenTag => static::keygenTag,
534 | static::labelTag => static::labelTag,
535 | static::layerTag => static::layerTag,
536 | static::legendTag => static::legendTag,
537 | static::liTag => static::liTag,
538 | static::linkTag => static::linkTag,
539 | static::listingTag => static::listingTag,
540 | static::mapTag => static::mapTag,
541 | static::markTag => static::markTag,
542 | static::marqueeTag => static::marqueeTag,
543 | static::menuTag => static::menuTag,
544 | static::metaTag => static::metaTag,
545 | static::meterTag => static::meterTag,
546 | static::navTag => static::navTag,
547 | static::nobrTag => static::nobrTag,
548 | static::noembedTag => static::noembedTag,
549 | static::noframesTag => static::noframesTag,
550 | static::nolayerTag => static::nolayerTag,
551 | static::noscriptTag => static::noscriptTag,
552 | static::objectTag => static::objectTag,
553 | static::olTag => static::olTag,
554 | static::optgroupTag => static::optgroupTag,
555 | static::optionTag => static::optionTag,
556 | static::outputTag => static::outputTag,
557 | static::pTag => static::pTag,
558 | static::paramTag => static::paramTag,
559 | static::plaintextTag => static::plaintextTag,
560 | static::preTag => static::preTag,
561 | static::progressTag => static::progressTag,
562 | static::qTag => static::qTag,
563 | static::rpTag => static::rpTag,
564 | static::rtTag => static::rtTag,
565 | static::rubyTag => static::rubyTag,
566 | static::sTag => static::sTag,
567 | static::sampTag => static::sampTag,
568 | static::scriptTag => static::scriptTag,
569 | static::sectionTag => static::sectionTag,
570 | static::selectTag => static::selectTag,
571 | static::smallTag => static::smallTag,
572 | static::sourceTag => static::sourceTag,
573 | static::spanTag => static::spanTag,
574 | static::strikeTag => static::strikeTag,
575 | static::strongTag => static::strongTag,
576 | static::styleTag => static::styleTag,
577 | static::subTag => static::subTag,
578 | static::summaryTag => static::summaryTag,
579 | static::supTag => static::supTag,
580 | static::tableTag => static::tableTag,
581 | static::tbodyTag => static::tbodyTag,
582 | static::tdTag => static::tdTag,
583 | static::textareaTag => static::textareaTag,
584 | static::tfootTag => static::tfootTag,
585 | static::thTag => static::thTag,
586 | static::theadTag => static::theadTag,
587 | static::titleTag => static::titleTag,
588 | static::trTag => static::trTag,
589 | static::trackTag => static::trackTag,
590 | static::ttTag => static::ttTag,
591 | static::uTag => static::uTag,
592 | static::ulTag => static::ulTag,
593 | static::varTag => static::varTag,
594 | static::videoTag => static::videoTag,
595 | static::wbrTag => static::wbrTag,
596 | static::xmpTag => static::xmpTag,
597 | );
598 | }
599 |
600 | public static function getHTMLAttrs() {
601 | return array(
602 | static::abbrAttr => static::abbrAttr,
603 | static::acceptAttr => static::acceptAttr,
604 | static::accept_charsetAttr => static::accept_charsetAttr,
605 | static::accesskeyAttr => static::accesskeyAttr,
606 | static::actionAttr => static::actionAttr,
607 | static::alignAttr => static::alignAttr,
608 | static::alinkAttr => static::alinkAttr,
609 | static::altAttr => static::altAttr,
610 | static::archiveAttr => static::archiveAttr,
611 | static::aria_activedescendantAttr => static::aria_activedescendantAttr,
612 | static::aria_atomicAttr => static::aria_atomicAttr,
613 | static::aria_busyAttr => static::aria_busyAttr,
614 | static::aria_checkedAttr => static::aria_checkedAttr,
615 | static::aria_controlsAttr => static::aria_controlsAttr,
616 | static::aria_describedbyAttr => static::aria_describedbyAttr,
617 | static::aria_disabledAttr => static::aria_disabledAttr,
618 | static::aria_dropeffectAttr => static::aria_dropeffectAttr,
619 | static::aria_expandedAttr => static::aria_expandedAttr,
620 | static::aria_flowtoAttr => static::aria_flowtoAttr,
621 | static::aria_grabbedAttr => static::aria_grabbedAttr,
622 | static::aria_haspopupAttr => static::aria_haspopupAttr,
623 | static::aria_helpAttr => static::aria_helpAttr,
624 | static::aria_hiddenAttr => static::aria_hiddenAttr,
625 | static::aria_invalidAttr => static::aria_invalidAttr,
626 | static::aria_labelAttr => static::aria_labelAttr,
627 | static::aria_labeledbyAttr => static::aria_labeledbyAttr,
628 | static::aria_labelledbyAttr => static::aria_labelledbyAttr,
629 | static::aria_levelAttr => static::aria_levelAttr,
630 | static::aria_liveAttr => static::aria_liveAttr,
631 | static::aria_multilineAttr => static::aria_multilineAttr,
632 | static::aria_multiselectableAttr => static::aria_multiselectableAttr,
633 | static::aria_orientationAttr => static::aria_orientationAttr,
634 | static::aria_ownsAttr => static::aria_ownsAttr,
635 | static::aria_pressedAttr => static::aria_pressedAttr,
636 | static::aria_readonlyAttr => static::aria_readonlyAttr,
637 | static::aria_relevantAttr => static::aria_relevantAttr,
638 | static::aria_requiredAttr => static::aria_requiredAttr,
639 | static::aria_selectedAttr => static::aria_selectedAttr,
640 | static::aria_sortAttr => static::aria_sortAttr,
641 | static::aria_valuemaxAttr => static::aria_valuemaxAttr,
642 | static::aria_valueminAttr => static::aria_valueminAttr,
643 | static::aria_valuenowAttr => static::aria_valuenowAttr,
644 | static::aria_valuetextAttr => static::aria_valuetextAttr,
645 | static::asyncAttr => static::asyncAttr,
646 | static::autocompleteAttr => static::autocompleteAttr,
647 | static::autofocusAttr => static::autofocusAttr,
648 | static::autoplayAttr => static::autoplayAttr,
649 | static::autosaveAttr => static::autosaveAttr,
650 | static::axisAttr => static::axisAttr,
651 | static::backgroundAttr => static::backgroundAttr,
652 | static::behaviorAttr => static::behaviorAttr,
653 | static::bgcolorAttr => static::bgcolorAttr,
654 | static::bgpropertiesAttr => static::bgpropertiesAttr,
655 | static::borderAttr => static::borderAttr,
656 | static::bordercolorAttr => static::bordercolorAttr,
657 | static::cellborderAttr => static::cellborderAttr,
658 | static::cellpaddingAttr => static::cellpaddingAttr,
659 | static::cellspacingAttr => static::cellspacingAttr,
660 | static::challengeAttr => static::challengeAttr,
661 | static::charAttr => static::charAttr,
662 | static::charoffAttr => static::charoffAttr,
663 | static::charsetAttr => static::charsetAttr,
664 | static::checkedAttr => static::checkedAttr,
665 | static::citeAttr => static::citeAttr,
666 | static::classAttr => static::classAttr,
667 | static::classidAttr => static::classidAttr,
668 | static::clearAttr => static::clearAttr,
669 | static::codeAttr => static::codeAttr,
670 | static::codebaseAttr => static::codebaseAttr,
671 | static::codetypeAttr => static::codetypeAttr,
672 | static::colorAttr => static::colorAttr,
673 | static::colsAttr => static::colsAttr,
674 | static::colspanAttr => static::colspanAttr,
675 | static::compactAttr => static::compactAttr,
676 | static::compositeAttr => static::compositeAttr,
677 | static::contentAttr => static::contentAttr,
678 | static::contenteditableAttr => static::contenteditableAttr,
679 | static::controlsAttr => static::controlsAttr,
680 | static::coordsAttr => static::coordsAttr,
681 | static::dataAttr => static::dataAttr,
682 | static::datetimeAttr => static::datetimeAttr,
683 | static::declareAttr => static::declareAttr,
684 | static::defaultAttr => static::defaultAttr,
685 | static::deferAttr => static::deferAttr,
686 | static::dirAttr => static::dirAttr,
687 | static::directionAttr => static::directionAttr,
688 | static::disabledAttr => static::disabledAttr,
689 | static::draggableAttr => static::draggableAttr,
690 | static::enctypeAttr => static::enctypeAttr,
691 | static::endAttr => static::endAttr,
692 | static::eventAttr => static::eventAttr,
693 | static::expandedAttr => static::expandedAttr,
694 | static::faceAttr => static::faceAttr,
695 | static::focusedAttr => static::focusedAttr,
696 | static::forAttr => static::forAttr,
697 | static::formAttr => static::formAttr,
698 | static::formactionAttr => static::formactionAttr,
699 | static::formenctypeAttr => static::formenctypeAttr,
700 | static::formmethodAttr => static::formmethodAttr,
701 | static::formnovalidateAttr => static::formnovalidateAttr,
702 | static::formtargetAttr => static::formtargetAttr,
703 | static::frameAttr => static::frameAttr,
704 | static::frameborderAttr => static::frameborderAttr,
705 | static::headersAttr => static::headersAttr,
706 | static::heightAttr => static::heightAttr,
707 | static::hiddenAttr => static::hiddenAttr,
708 | static::highAttr => static::highAttr,
709 | static::hrefAttr => static::hrefAttr,
710 | static::hreflangAttr => static::hreflangAttr,
711 | static::hspaceAttr => static::hspaceAttr,
712 | static::http_equivAttr => static::http_equivAttr,
713 | static::idAttr => static::idAttr,
714 | static::incrementalAttr => static::incrementalAttr,
715 | static::indeterminateAttr => static::indeterminateAttr,
716 | static::ismapAttr => static::ismapAttr,
717 | static::keytypeAttr => static::keytypeAttr,
718 | static::kindAttr => static::kindAttr,
719 | static::labelAttr => static::labelAttr,
720 | static::langAttr => static::langAttr,
721 | static::languageAttr => static::languageAttr,
722 | static::leftmarginAttr => static::leftmarginAttr,
723 | static::linkAttr => static::linkAttr,
724 | static::listAttr => static::listAttr,
725 | static::longdescAttr => static::longdescAttr,
726 | static::loopAttr => static::loopAttr,
727 | static::loopendAttr => static::loopendAttr,
728 | static::loopstartAttr => static::loopstartAttr,
729 | static::lowAttr => static::lowAttr,
730 | static::lowsrcAttr => static::lowsrcAttr,
731 | static::manifestAttr => static::manifestAttr,
732 | static::marginheightAttr => static::marginheightAttr,
733 | static::marginwidthAttr => static::marginwidthAttr,
734 | static::maxAttr => static::maxAttr,
735 | static::maxlengthAttr => static::maxlengthAttr,
736 | static::mayscriptAttr => static::mayscriptAttr,
737 | static::mediaAttr => static::mediaAttr,
738 | static::methodAttr => static::methodAttr,
739 | static::minAttr => static::minAttr,
740 | static::multipleAttr => static::multipleAttr,
741 | static::nameAttr => static::nameAttr,
742 | static::nohrefAttr => static::nohrefAttr,
743 | static::noresizeAttr => static::noresizeAttr,
744 | static::noshadeAttr => static::noshadeAttr,
745 | static::novalidateAttr => static::novalidateAttr,
746 | static::nowrapAttr => static::nowrapAttr,
747 | static::objectAttr => static::objectAttr,
748 | static::onabortAttr => static::onabortAttr,
749 | static::onbeforecopyAttr => static::onbeforecopyAttr,
750 | static::onbeforecutAttr => static::onbeforecutAttr,
751 | static::onbeforeloadAttr => static::onbeforeloadAttr,
752 | static::onbeforepasteAttr => static::onbeforepasteAttr,
753 | static::onbeforeprocessAttr => static::onbeforeprocessAttr,
754 | static::onbeforeunloadAttr => static::onbeforeunloadAttr,
755 | static::onblurAttr => static::onblurAttr,
756 | static::oncanplayAttr => static::oncanplayAttr,
757 | static::oncanplaythroughAttr => static::oncanplaythroughAttr,
758 | static::onchangeAttr => static::onchangeAttr,
759 | static::onclickAttr => static::onclickAttr,
760 | static::oncontextmenuAttr => static::oncontextmenuAttr,
761 | static::oncopyAttr => static::oncopyAttr,
762 | static::oncutAttr => static::oncutAttr,
763 | static::ondblclickAttr => static::ondblclickAttr,
764 | static::ondragAttr => static::ondragAttr,
765 | static::ondragendAttr => static::ondragendAttr,
766 | static::ondragenterAttr => static::ondragenterAttr,
767 | static::ondragleaveAttr => static::ondragleaveAttr,
768 | static::ondragoverAttr => static::ondragoverAttr,
769 | static::ondragstartAttr => static::ondragstartAttr,
770 | static::ondropAttr => static::ondropAttr,
771 | static::ondurationchangeAttr => static::ondurationchangeAttr,
772 | static::onemptiedAttr => static::onemptiedAttr,
773 | static::onendedAttr => static::onendedAttr,
774 | static::onerrorAttr => static::onerrorAttr,
775 | static::onfocusAttr => static::onfocusAttr,
776 | static::onfocusinAttr => static::onfocusinAttr,
777 | static::onfocusoutAttr => static::onfocusoutAttr,
778 | static::onhashchangeAttr => static::onhashchangeAttr,
779 | static::oninputAttr => static::oninputAttr,
780 | static::oninvalidAttr => static::oninvalidAttr,
781 | static::onkeydownAttr => static::onkeydownAttr,
782 | static::onkeypressAttr => static::onkeypressAttr,
783 | static::onkeyupAttr => static::onkeyupAttr,
784 | static::onloadAttr => static::onloadAttr,
785 | static::onloadeddataAttr => static::onloadeddataAttr,
786 | static::onloadedmetadataAttr => static::onloadedmetadataAttr,
787 | static::onloadstartAttr => static::onloadstartAttr,
788 | static::onmousedownAttr => static::onmousedownAttr,
789 | static::onmousemoveAttr => static::onmousemoveAttr,
790 | static::onmouseoutAttr => static::onmouseoutAttr,
791 | static::onmouseoverAttr => static::onmouseoverAttr,
792 | static::onmouseupAttr => static::onmouseupAttr,
793 | static::onmousewheelAttr => static::onmousewheelAttr,
794 | static::onofflineAttr => static::onofflineAttr,
795 | static::ononlineAttr => static::ononlineAttr,
796 | static::onorientationchangeAttr => static::onorientationchangeAttr,
797 | static::onpagehideAttr => static::onpagehideAttr,
798 | static::onpageshowAttr => static::onpageshowAttr,
799 | static::onpasteAttr => static::onpasteAttr,
800 | static::onpauseAttr => static::onpauseAttr,
801 | static::onplayAttr => static::onplayAttr,
802 | static::onplayingAttr => static::onplayingAttr,
803 | static::onpopstateAttr => static::onpopstateAttr,
804 | static::onprogressAttr => static::onprogressAttr,
805 | static::onratechangeAttr => static::onratechangeAttr,
806 | static::onresetAttr => static::onresetAttr,
807 | static::onresizeAttr => static::onresizeAttr,
808 | static::onscrollAttr => static::onscrollAttr,
809 | static::onsearchAttr => static::onsearchAttr,
810 | static::onseekedAttr => static::onseekedAttr,
811 | static::onseekingAttr => static::onseekingAttr,
812 | static::onselectAttr => static::onselectAttr,
813 | static::onselectionchangeAttr => static::onselectionchangeAttr,
814 | static::onselectstartAttr => static::onselectstartAttr,
815 | static::onstalledAttr => static::onstalledAttr,
816 | static::onstorageAttr => static::onstorageAttr,
817 | static::onsubmitAttr => static::onsubmitAttr,
818 | static::onsuspendAttr => static::onsuspendAttr,
819 | static::ontimeupdateAttr => static::ontimeupdateAttr,
820 | static::ontouchcancelAttr => static::ontouchcancelAttr,
821 | static::ontouchendAttr => static::ontouchendAttr,
822 | static::ontouchmoveAttr => static::ontouchmoveAttr,
823 | static::ontouchstartAttr => static::ontouchstartAttr,
824 | static::onunloadAttr => static::onunloadAttr,
825 | static::onvolumechangeAttr => static::onvolumechangeAttr,
826 | static::onwaitingAttr => static::onwaitingAttr,
827 | static::onwebkitanimationendAttr => static::onwebkitanimationendAttr,
828 | static::onwebkitanimationiterationAttr => static::onwebkitanimationiterationAttr,
829 | static::onwebkitanimationstartAttr => static::onwebkitanimationstartAttr,
830 | static::onwebkitbeginfullscreenAttr => static::onwebkitbeginfullscreenAttr,
831 | static::onwebkitendfullscreenAttr => static::onwebkitendfullscreenAttr,
832 | static::onwebkitfullscreenchangeAttr => static::onwebkitfullscreenchangeAttr,
833 | static::onwebkitspeechchangeAttr => static::onwebkitspeechchangeAttr,
834 | static::onwebkittransitionendAttr => static::onwebkittransitionendAttr,
835 | static::openAttr => static::openAttr,
836 | static::optimumAttr => static::optimumAttr,
837 | static::patternAttr => static::patternAttr,
838 | static::pingAttr => static::pingAttr,
839 | static::placeholderAttr => static::placeholderAttr,
840 | static::playcountAttr => static::playcountAttr,
841 | static::pluginspageAttr => static::pluginspageAttr,
842 | static::pluginurlAttr => static::pluginurlAttr,
843 | static::posterAttr => static::posterAttr,
844 | static::precisionAttr => static::precisionAttr,
845 | static::preloadAttr => static::preloadAttr,
846 | static::primaryAttr => static::primaryAttr,
847 | static::profileAttr => static::profileAttr,
848 | static::progressAttr => static::progressAttr,
849 | static::promptAttr => static::promptAttr,
850 | static::readonlyAttr => static::readonlyAttr,
851 | static::relAttr => static::relAttr,
852 | static::requiredAttr => static::requiredAttr,
853 | static::resultsAttr => static::resultsAttr,
854 | static::revAttr => static::revAttr,
855 | static::roleAttr => static::roleAttr,
856 | static::rowsAttr => static::rowsAttr,
857 | static::rowspanAttr => static::rowspanAttr,
858 | static::rulesAttr => static::rulesAttr,
859 | static::sandboxAttr => static::sandboxAttr,
860 | static::schemeAttr => static::schemeAttr,
861 | static::scopeAttr => static::scopeAttr,
862 | static::scrollamountAttr => static::scrollamountAttr,
863 | static::scrolldelayAttr => static::scrolldelayAttr,
864 | static::scrollingAttr => static::scrollingAttr,
865 | static::selectedAttr => static::selectedAttr,
866 | static::shapeAttr => static::shapeAttr,
867 | static::sizeAttr => static::sizeAttr,
868 | static::sortableAttr => static::sortableAttr,
869 | static::sortdirectionAttr => static::sortdirectionAttr,
870 | static::spanAttr => static::spanAttr,
871 | static::spellcheckAttr => static::spellcheckAttr,
872 | static::srcAttr => static::srcAttr,
873 | static::srclangAttr => static::srclangAttr,
874 | static::standbyAttr => static::standbyAttr,
875 | static::startAttr => static::startAttr,
876 | static::stepAttr => static::stepAttr,
877 | static::styleAttr => static::styleAttr,
878 | static::summaryAttr => static::summaryAttr,
879 | static::tabindexAttr => static::tabindexAttr,
880 | static::tableborderAttr => static::tableborderAttr,
881 | static::targetAttr => static::targetAttr,
882 | static::textAttr => static::textAttr,
883 | static::titleAttr => static::titleAttr,
884 | static::topAttr => static::topAttr,
885 | static::topmarginAttr => static::topmarginAttr,
886 | static::truespeedAttr => static::truespeedAttr,
887 | static::typeAttr => static::typeAttr,
888 | static::usemapAttr => static::usemapAttr,
889 | static::valignAttr => static::valignAttr,
890 | static::valueAttr => static::valueAttr,
891 | static::valuetypeAttr => static::valuetypeAttr,
892 | static::versionAttr => static::versionAttr,
893 | static::viewsourceAttr => static::viewsourceAttr,
894 | static::vlinkAttr => static::vlinkAttr,
895 | static::vspaceAttr => static::vspaceAttr,
896 | static::webkitallowfullscreenAttr => static::webkitallowfullscreenAttr,
897 | static::webkitdirectoryAttr => static::webkitdirectoryAttr,
898 | static::webkitgrammarAttr => static::webkitgrammarAttr,
899 | static::webkitspeechAttr => static::webkitspeechAttr,
900 | static::widthAttr => static::widthAttr,
901 | static::wrapAttr => static::wrapAttr,
902 | );
903 | }
904 |
905 | }
906 |
--------------------------------------------------------------------------------
/src/zz/Html/HTMLToken.php:
--------------------------------------------------------------------------------
1 | false,
55 | 'hasSystemIdentifier' => false,
56 | 'publicIdentifier' => '',
57 | 'systemIdentifier' => '',
58 | 'forceQuirks' => false,
59 | );
60 |
61 | protected $_html = '';
62 | protected $_state = array();
63 |
64 | public function __construct() {
65 | $this->_type = static::Uninitialized;
66 | }
67 |
68 | public function __toString() {
69 | return $this->_data;
70 | }
71 |
72 | public function toArray() {
73 | $data = array(
74 | 'type' => $this->_type,
75 | 'data' => $this->_data,
76 | 'selfClosing' => $this->_selfClosing,
77 | 'attributes' => $this->_attributes,
78 | 'parseError' => $this->_parseError,
79 | 'html' => $this->_html,
80 | 'state' => $this->_state,
81 | );
82 | if ($this->getType() === static::DOCTYPE) {
83 | $doctypeData = $this->_doctypeData;
84 | if ($doctypeData['forceQuirks']) {
85 | $mode = static::QuirksMode;
86 | } else {
87 | $mode = $this->setCompatibilityModeFromDoctype($this->_data, $doctypeData['publicIdentifier'], $doctypeData['systemIdentifier']);
88 | }
89 | $doctypeData['mode'] = $mode;
90 | $data['doctypeData'] = $doctypeData;
91 | }
92 | return $data;
93 | }
94 |
95 | /**
96 | * Source/core/html/parser/HTMLConstructionSite.cpp
97 | * HTMLConstructionSite::setCompatibilityModeFromDoctype
98 | *
99 | * [QuirksMode]
100 | * startsWith publicId
101 | * `+//Silmaril//dtd html Pro v0r11 19970101//`
102 | * `-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//`
103 | * `-//AS//DTD HTML 3.0 asWedit + extensions//`
104 | * `-//IETF//DTD HTML 2.0 Level 1//`
105 | * `-//IETF//DTD HTML 2.0 Level 2//`
106 | * `-//IETF//DTD HTML 2.0 Strict Level 1//`
107 | * `-//IETF//DTD HTML 2.0 Strict Level 2//`
108 | * `-//IETF//DTD HTML 2.0 Strict//`
109 | * `-//IETF//DTD HTML 2.0//`
110 | * `-//IETF//DTD HTML 2.1E//`
111 | * `-//IETF//DTD HTML 3.0//`
112 | * `-//IETF//DTD HTML 3.2 Final//`
113 | * `-//IETF//DTD HTML 3.2//`
114 | * `-//IETF//DTD HTML 3//`
115 | * `-//IETF//DTD HTML Level 0//`
116 | * `-//IETF//DTD HTML Level 1//`
117 | * `-//IETF//DTD HTML Level 2//`
118 | * `-//IETF//DTD HTML Level 3//`
119 | * `-//IETF//DTD HTML Strict Level 0//`
120 | * `-//IETF//DTD HTML Strict Level 1//`
121 | * `-//IETF//DTD HTML Strict Level 2//`
122 | * `-//IETF//DTD HTML Strict Level 3//`
123 | * `-//IETF//DTD HTML Strict//`
124 | * `-//IETF//DTD HTML//`
125 | * `-//Metrius//DTD Metrius Presentational//`
126 | * `-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//`
127 | * `-//Microsoft//DTD Internet Explorer 2.0 HTML//`
128 | * `-//Microsoft//DTD Internet Explorer 2.0 Tables//`
129 | * `-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//`
130 | * `-//Microsoft//DTD Internet Explorer 3.0 HTML//`
131 | * `-//Microsoft//DTD Internet Explorer 3.0 Tables//`
132 | * `-//Netscape Comm. Corp.//DTD HTML//`
133 | * `-//Netscape Comm. Corp.//DTD Strict HTML//`
134 | * `-//O'Reilly and Associates//DTD HTML 2.0//`
135 | * `-//O'Reilly and Associates//DTD HTML Extended 1.0//`
136 | * `-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//`
137 | * `-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//`
138 | * `-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//`
139 | * `-//Spyglass//DTD HTML 2.0 Extended//`
140 | * `-//SQ//DTD HTML 2.0 HoTMetaL + extensions//`
141 | * `-//Sun Microsystems Corp.//DTD HotJava HTML//`
142 | * `-//Sun Microsystems Corp.//DTD HotJava Strict HTML//`
143 | * `-//W3C//DTD HTML 3 1995-03-24//`
144 | * `-//W3C//DTD HTML 3.2 Draft//`
145 | * `-//W3C//DTD HTML 3.2 Final//`
146 | * `-//W3C//DTD HTML 3.2//`
147 | * `-//W3C//DTD HTML 3.2S Draft//`
148 | * `-//W3C//DTD HTML 4.0 Frameset//`
149 | * `-//W3C//DTD HTML 4.0 Transitional//`
150 | * `-//W3C//DTD HTML Experimental 19960712//`
151 | * `-//W3C//DTD HTML Experimental 970421//`
152 | * `-//W3C//DTD W3 HTML//`
153 | * `-//W3O//DTD W3 HTML 3.0//`
154 | * `-//WebTechs//DTD Mozilla HTML 2.0//`
155 | * `-//WebTechs//DTD Mozilla HTML//`
156 | *
157 | * IgnoringCase publicId
158 | * `-//W3O//DTD W3 HTML Strict 3.0//EN//`
159 | * `-/W3C/DTD HTML 4.0 Transitional/EN`
160 | * `HTML`
161 | *
162 | * IgnoringCase systemId
163 | * `http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd`
164 | *
165 | * systemId.isEmpty() && publicId.startsWith
166 | * `-//W3C//DTD HTML 4.01 Frameset//`
167 | * `-//W3C//DTD HTML 4.01 Transitional//`
168 | *
169 | * [LimitedQuirksMode]
170 | * startsWith publicId
171 | * `-//W3C//DTD XHTML 1.0 Frameset//`
172 | * `-//W3C//DTD XHTML 1.0 Transitional//`
173 | *
174 | * !systemId.isEmpty() && publicId.startsWith
175 | * `-//W3C//DTD HTML 4.01 Frameset//`
176 | * `-//W3C//DTD HTML 4.01 Transitional//`
177 | */
178 | protected function setCompatibilityModeFromDoctype($name, $publicId, $systemId) {
179 |
180 | if ($name !== 'html') {
181 | return static::QuirksMode;
182 | }
183 | $startsWithPublicId = "/^(?:-\/\/(?:S(?:oftQuad(?: Software\/\/DTD HoTMetaL PRO 6\.0::19990601|\/\/DTD HoTMetaL PRO 4\.0::19971010)::extensions to HTML 4\.0|un Microsystems Corp\.\/\/DTD HotJava(?: Strict)? HTML|Q\/\/DTD HTML 2\.0 HoTMetaL \+ extensions|pyglass\/\/DTD HTML 2\.0 Extended)|W(?:3(?:C\/\/DTD (?:HTML (?:3(?:\.2(?: (?:Draft|Final)|S Draft)?| 1995-03-24)|Experimental (?:19960712|970421)|4\.0 (?:Transitional|Frameset))|W3 HTML)|O\/\/DTD W3 HTML 3\.0)|ebTechs\/\/DTD Mozilla HTML(?: 2\.0)?)|IETF\/\/DTD HTML(?: (?:2\.(?:0(?: (?:Strict(?: Level [12])?|Level [12]))?|1E)|3(?:\.(?:2(?: Final)?|0))?|Strict(?: Level [0123])?|Level [0123]))?|M(?:icrosoft\/\/DTD Internet Explorer [23]\.0 (?:HTML(?: Strict)?|Tables)|etrius\/\/DTD Metrius Presentational)|O'Reilly and Associates\/\/DTD HTML (?:Extend(?:ed Relax)?ed 1|2)\.0|A(?:dvaSoft Ltd|S)\/\/DTD HTML 3\.0 asWedit \+ extensions|Netscape Comm\. Corp\.\/\/DTD(?: Strict)? HTML)|\+\/\/Silmaril\/\/dtd html Pro v0r11 19970101)\/\//";
184 | $ignoringCasePublicId = '/^(?:-\/(?:\/W3O\/\/DTD W3 HTML Strict 3\.0\/\/EN\/\/|W3C\/DTD HTML 4\.0 Transitional\/EN)|HTML)$/i';
185 | $ignoringCaseSystemId = '/^http:\/\/www\.ibm\.com\/data\/dtd\/v11\/ibmxhtml1-transitional\.dtd$/i';
186 | $startsWithPublicId2 = '/^-\/\/W3C\/\/DTD HTML 4\.01 (?:Transitional|Frameset)\/\//';
187 |
188 | if (preg_match($startsWithPublicId, $publicId) || preg_match($ignoringCasePublicId, $publicId) || preg_match($ignoringCaseSystemId, $systemId)) {
189 | return static::QuirksMode;
190 | }
191 |
192 | if ($systemId === '' && preg_match($startsWithPublicId2, $publicId)) {
193 | return static::QuirksMode;
194 | }
195 |
196 | $pattern1 = '/^-\/\/W3C\/\/DTD XHTML 1\.0 (?:Transitional|Frameset)\/\//';
197 | $pattern2 = ' /^-\/\/W3C\/\/DTD HTML 4\.01 (?:Transitional|Frameset)\/\//';
198 | if (preg_match($pattern1, $publicId) || ($systemId !== '' && preg_match($pattern2, $publicId))) {
199 | return static::LimitedQuirksMode;
200 | }
201 |
202 | return static::NoQuirksMode;
203 | }
204 |
205 | public function clean() {
206 | unset($this->_currentAttribute);
207 | }
208 |
209 | public function getType() {
210 | return $this->_type;
211 | }
212 |
213 | public function getName() {
214 | return $this->_data;
215 | }
216 |
217 | public function setType($type) {
218 | $this->_type = $type;
219 | }
220 |
221 | public function getHtmlOrigin() {
222 | return $this->_html;
223 | }
224 |
225 | public function setHtmlOrigin($html) {
226 | $this->_html = $html;
227 | }
228 |
229 | public function getState() {
230 | return $this->_state;
231 | }
232 |
233 | public function setState($states) {
234 | $this->_state = $states;
235 | }
236 |
237 | public function getTagName() {
238 | $type = $this->getType();
239 | if ($type !== static::StartTag && $type !== static::EndTag) {
240 | return false;
241 | }
242 | return $this->getName();
243 | }
244 |
245 | public function setData($data) {
246 | $this->_data = $data;
247 | }
248 |
249 | public function getData() {
250 | return $this->_data;
251 | }
252 |
253 | public function getAttributes() {
254 | return $this->_attributes;
255 | }
256 |
257 | public function setAttributes($attributes) {
258 | $this->_attributes = $attributes;
259 | }
260 |
261 | public function getDoctypeData() {
262 | return $this->_doctypeData;
263 | }
264 |
265 | public function hasSelfClosing() {
266 | return $this->_selfClosing;
267 | }
268 |
269 | public function hasParseError() {
270 | return $this->_parseError;
271 | }
272 |
273 | public function parseError() {
274 | $this->_parseError = true;
275 | }
276 |
277 | public function clear() {
278 | $this->_type = static::Uninitialized;
279 | $this->_data = '';
280 | }
281 |
282 | public function ensureIsCharacterToken() {
283 | $this->_type = static::Character;
284 | }
285 |
286 | public function makeEndOfFile() {
287 | $this->_type = static::EndOfFile;
288 | }
289 |
290 | public function appendToCharacter($character) {
291 | $this->_data .= $character;
292 | }
293 |
294 | public function beginComment() {
295 | $this->_type = static::Comment;
296 | }
297 |
298 | public function appendToComment($character) {
299 | $this->_data .= $character;
300 | }
301 |
302 | public function appendToName($character) {
303 | $this->_data .= $character;
304 | }
305 |
306 | public function setDoubleQuoted() {
307 | $this->_currentAttribute['quoted'] = static::DoubleQuoted;
308 | }
309 |
310 | public function setSingleQuoted() {
311 | $this->_currentAttribute['quoted'] = static::SingleQuoted;
312 | }
313 |
314 | /* Start/End Tag Tokens */
315 |
316 | public function selfClosing() {
317 | return $this->_selfClosing;
318 | }
319 |
320 | public function setSelfClosing() {
321 | $this->_selfClosing = true;
322 | }
323 |
324 | public function beginStartTag($character) {
325 | $this->setType(static::StartTag);
326 | $this->_selfClosing = false;
327 | $this->_currentAttribute = 0;
328 | $this->_attributes = array();
329 | $this->_data .= $character;
330 | }
331 |
332 | public function beginEndTag($character) {
333 | $this->setType(static::EndTag);
334 | $this->_selfClosing = false;
335 | $this->_currentAttribute = 0;
336 | $this->_attributes = array();
337 | $this->_data .= $character;
338 | }
339 |
340 | public function addNewAttribute() {
341 | // m_attributes.grow(m_attributes.size() + 1);
342 | // m_currentAttribute = &m_attributes.last();
343 | $_default = array(
344 | 'name' => '',
345 | 'value' => '',
346 | 'quoted' => false,
347 | );
348 | unset($this->_currentAttribute);
349 | $this->_currentAttribute = $_default;
350 | $this->_attributes[] = & $this->_currentAttribute;
351 | }
352 |
353 | public function beginAttributeName($offset) {
354 | // m_currentAttribute->nameRange.start = offset - m_baseOffset;
355 | // $this->_currentAttribute['nameRange']['start'] = $offset;
356 | }
357 |
358 | public function endAttributeName($offset) {
359 | // int index = offset - m_baseOffset;
360 | // m_currentAttribute->nameRange.end = index;
361 | // m_currentAttribute->valueRange.start = index;
362 | // m_currentAttribute->valueRange.end = index;
363 | // $this->_currentAttribute['nameRange']['end'] = $offset;
364 | // $this->_currentAttribute['valueRange']['start'] = $offset;
365 | // $this->_currentAttribute['valueRange']['end'] = $offset;
366 | }
367 |
368 | public function beginAttributeValue($offset) {
369 | // m_currentAttribute->valueRange.start = offset - m_baseOffset;
370 | // #ifndef NDEBUG
371 | // m_currentAttribute->valueRange.end = 0;
372 | // #endif
373 | // $this->_currentAttribute['valueRange']['start'] = $offset;
374 | }
375 |
376 | public function endAttributeValue($offset) {
377 | // m_currentAttribute->valueRange.end = offset - m_baseOffset;
378 | // $this->_currentAttribute['valueRange']['end'] = $offset;
379 | }
380 |
381 | public function appendToAttributeName($character) {
382 | // FIXME: We should be able to add the following ASSERT once we fix
383 | // https://bugs.webkit.org/show_bug.cgi?id=62971
384 | // ASSERT(m_currentAttribute->nameRange.start);
385 | // m_currentAttribute->name.append(character);
386 | $this->_currentAttribute['name'] .= $character;
387 | }
388 |
389 | public function appendToAttributeValue($character) {
390 | // FIXME: We should be able to add the following ASSERT once we fix
391 | // m_currentAttribute->value.append(character);
392 | $this->_currentAttribute['value'] .= $character;
393 | }
394 |
395 | /* DOCTYPE Tokens */
396 |
397 | public function forceQuirks() {
398 | // return m_doctypeData->m_forceQuirks;
399 | return $this->_doctypeData['forceQuirks'];
400 | }
401 |
402 | public function setForceQuirks() {
403 | // m_doctypeData->m_forceQuirks = true;
404 | $this->_doctypeData['forceQuirks'] = true;
405 | }
406 |
407 | protected function _beginDOCTYPE() {
408 | $this->_type = static::DOCTYPE;
409 | // m_doctypeData = adoptPtr(new DoctypeData);
410 | }
411 |
412 | public function beginDOCTYPE($character = null) {
413 | $this->_beginDOCTYPE();
414 | if ($character) {
415 | $this->_data .= $character;
416 | }
417 | }
418 |
419 | public function setPublicIdentifierToEmptyString() {
420 | // m_doctypeData->m_hasPublicIdentifier = true;
421 | // m_doctypeData->m_publicIdentifier.clear();
422 | $this->_doctypeData['hasPublicIdentifier'] = true;
423 | $this->_doctypeData['publicIdentifier'] = '';
424 | }
425 |
426 | public function setSystemIdentifierToEmptyString() {
427 | // m_doctypeData->m_hasSystemIdentifier = true;
428 | // m_doctypeData->m_systemIdentifier.clear();
429 | $this->_doctypeData['hasSystemIdentifier'] = true;
430 | $this->_doctypeData['systemIdentifier'] = '';
431 | }
432 |
433 |
434 | public function appendToPublicIdentifier($character) {
435 | // m_doctypeData->m_publicIdentifier.append(character);
436 | $this->_doctypeData['publicIdentifier'] .= $character;
437 | }
438 |
439 | public function appendToSystemIdentifier($character) {
440 | // m_doctypeData->m_systemIdentifier.append(character);
441 | $this->_doctypeData['systemIdentifier'] .= $character;
442 | }
443 |
444 | }
--------------------------------------------------------------------------------
/src/zz/Html/HTMLTokenizer.php:
--------------------------------------------------------------------------------
1 | _SegmentedString = $SegmentedString;
156 | $this->_Token = new HTMLToken();
157 | $this->_state = static::DataState;
158 | $this->_startState = static::DataState;
159 | $this->_option = $option + array('debug' => false);
160 | $this->_debug = !!$this->_option['debug'];
161 | }
162 |
163 | /**
164 | * @param string $state
165 | */
166 | public function setState($state) {
167 | $this->_state = $state;
168 | }
169 |
170 | /**
171 | * @return string
172 | */
173 | public function getState() {
174 | return $this->_state;
175 | }
176 |
177 | /**
178 | * @throws \InvalidArgumentException
179 | * @return HtmlToken[]
180 | */
181 | public function tokenizer() {
182 | if ($this->_SegmentedString->eos()) {
183 | return array();
184 | }
185 |
186 | while (true) {
187 | $this->_startPos = $startPos = $this->_SegmentedString->tell();
188 | $result = $this->nextToken($this->_SegmentedString);
189 | $this->_state = static::DataState;
190 | $endPos = $this->_SegmentedString->tell();
191 |
192 | if ($result === false && (($endPos - $startPos) === 0)) {
193 | throw new \InvalidArgumentException('Given invalid string or invalid statement.');
194 | }
195 |
196 | $startState = $this->_startState;
197 | // In other than `DataState`, `nextToken` return the type of Character, it contains the type of EndTag.
198 | // SegmentedString go back to the end of the type of Character position.
199 | $type = $this->_Token->getType();
200 | if ($type === HTMLToken::Character && $this->_bufferedEndTagName !== '' && ($startState === static::RAWTEXTState || $startState === static::RCDATAState || $startState === static::ScriptDataState)) {
201 | $length = strlen($this->_Token->getData());
202 |
203 | // HTMLToken::Character
204 | $this->_buffer = array_slice($this->_buffer, 0, $length);
205 | $this->_compactBuffer($startPos, $startPos + $length, $type);
206 | $token = $this->_Token;
207 | $this->_tokens[] = $token;
208 |
209 | // process again for type of EndTag
210 | $this->_SegmentedString->seek($startPos + $length);
211 | $this->_state = $startState;
212 | } else {
213 | $this->_compactBuffer($startPos, $endPos, $type);
214 | $token = $this->_Token;
215 | $this->_tokens[] = $token;
216 | // FIXME: The tokenizer should do this work for us.
217 | if ($type === HTMLToken::StartTag) {
218 | $this->_updateStateFor($token->getTagName());
219 | } else {
220 | $this->_state = static::DataState;
221 | }
222 | }
223 | $this->_startState = $this->_state;
224 |
225 | $this->_buffer = array();
226 | $this->_bufferedEndTagName = '';
227 | $this->_temporaryBuffer = '';
228 | $this->_Token = new HTMLToken();
229 | if ($this->_SegmentedString->eos()) {
230 | break;
231 | }
232 | }
233 | return $this->_tokens;
234 | }
235 |
236 | public function getTokensAsArray() {
237 | $result = array();
238 | foreach ($this->_tokens as $token) {
239 | $result[] = $token->toArray();
240 | }
241 | return $result;
242 | }
243 |
244 | protected function _compactBuffer($startPos, $endPos, $type) {
245 | $compactBuffer = array();
246 | $before = static::kEndOfFileMarker;
247 | $html = $this->_SegmentedString->substr($startPos, $endPos - $startPos);
248 | foreach ($this->_buffer as $i => $state) {
249 | if ($before !== $state) {
250 | $before = $compactBuffer[$i] = $state;
251 | }
252 | }
253 | switch ($type) {
254 | case HTMLToken::Uninitialized:
255 | case HTMLToken::EndOfFile:
256 | case HTMLToken::Character:
257 | case HTMLToken::Comment:
258 | $this->_Token->setData($html);
259 | break;
260 | }
261 |
262 | if ($this->_debug) {
263 | $this->_Token->setHtmlOrigin($html);
264 | $this->_Token->setState($compactBuffer);
265 | } else if ($type === HTMLToken::DOCTYPE) {
266 | $this->_Token->setHtmlOrigin($html);
267 | }
268 | $this->_Token->clean();
269 | }
270 |
271 | protected function _updateStateFor($tagName) {
272 | if ($tagName === HTMLNames::textareaTag || $tagName === HTMLNames::titleTag) {
273 | $this->_state = static::RCDATAState;
274 | } else if ($tagName === HTMLNames::plaintextTag) {
275 | $this->_state = static::PLAINTEXTState;
276 | } else if ($tagName === HTMLNames::scriptTag) {
277 | $this->_state = static::ScriptDataState;
278 | } else if ($tagName === HTMLNames::styleTag || $tagName === HTMLNames::iframeTag || $tagName === HTMLNames::xmpTag || ($tagName === HTMLNames::noembedTag && $this->_pluginsEnabled) || $tagName === HTMLNames::noframesTag || ($tagName === HTMLNames::noscriptTag && $this->_scriptEnabled)) {
279 | $this->_state = static::RAWTEXTState;
280 | }
281 | }
282 |
283 | // http://www.whatwg.org/specs/web-apps/current-work/#tokenization
284 | protected function nextToken(SegmentedString $source) {
285 | while (true) {
286 | $char = $this->_SegmentedString->getCurrentChar();
287 | switch ($this->_state) {
288 | case static::DataState:
289 | if ($char === '&') {
290 | $this->_HTML_ADVANCE_TO(static::CharacterReferenceInDataState);
291 | } else if ($char === '<') {
292 | if ($this->_Token->getType() === HTMLToken::Character) {
293 | // We have a bunch of character tokens queued up that we
294 | // are emitting lazily here.
295 | return true;
296 | }
297 | $this->_HTML_ADVANCE_TO(static::TagOpenState);
298 | } else if ($char === static::kEndOfFileMarker) {
299 | return $this->_emitEndOfFile();
300 | } else {
301 | $this->_bufferCharacter($char);
302 | $this->_HTML_ADVANCE_TO(static::DataState);
303 | }
304 | break;
305 |
306 | case static::CharacterReferenceInDataState:
307 | // TODO Do not expand the reference, so skip parse Character references.
308 | $this->_HTML_SWITCH_TO(static::DataState);
309 | break;
310 |
311 | case static::RCDATAState:
312 | if ($char === '&') {
313 | $this->_HTML_ADVANCE_TO(static::CharacterReferenceInRCDATAState);
314 | } else if ($char === '<') {
315 | $this->_HTML_ADVANCE_TO(static::RCDATALessThanSignState);
316 | } else if ($char === static::kEndOfFileMarker) {
317 | return $this->_emitEndOfFile();
318 | } else {
319 | $this->_bufferCharacter($char);
320 | $this->_HTML_ADVANCE_TO(static::RCDATAState);
321 | }
322 | break;
323 |
324 | case static::CharacterReferenceInRCDATAState:
325 | // TODO Do not expand the reference, so skip parse Character references.
326 | $this->_HTML_SWITCH_TO(static::RCDATAState);
327 | break;
328 |
329 | case static::RAWTEXTState:
330 | if ($char === '<') {
331 | $this->_HTML_ADVANCE_TO(static::RAWTEXTLessThanSignState);
332 | } else if ($char === static::kEndOfFileMarker) {
333 | return $this->_emitEndOfFile();
334 | } else {
335 | $this->_bufferCharacter($char);
336 | $this->_HTML_ADVANCE_TO(static::RAWTEXTState);
337 | }
338 | break;
339 |
340 | case static::ScriptDataState:
341 | if ($char === '<') {
342 | $this->_HTML_ADVANCE_TO(static::ScriptDataLessThanSignState);
343 | } else if ($char === static::kEndOfFileMarker) {
344 | return $this->_emitEndOfFile();
345 | } else {
346 | $this->_bufferCharacter($char);
347 | $this->_HTML_ADVANCE_TO(static::ScriptDataState);
348 | }
349 | break;
350 |
351 | case static::PLAINTEXTState:
352 | if ($char === static::kEndOfFileMarker) {
353 | return $this->_emitEndOfFile();
354 | } else {
355 | $this->_bufferCharacter($char);
356 | $this->_HTML_ADVANCE_TO(static::PLAINTEXTState);
357 | }
358 | break;
359 |
360 | case static::TagOpenState:
361 | if ($char === '!') {
362 | $this->_HTML_ADVANCE_TO(static::MarkupDeclarationOpenState);
363 | } else if ($char === '/') {
364 | $this->_HTML_ADVANCE_TO(static::EndTagOpenState);
365 | } else if (ctype_upper($char)) {
366 | $this->_Token->beginStartTag(strtolower($char));
367 | $this->_HTML_ADVANCE_TO(static::TagNameState);
368 | } else if (ctype_lower($char)) {
369 | $this->_Token->beginStartTag(strtolower($char));
370 | $this->_HTML_ADVANCE_TO(static::TagNameState);
371 | } else if ($char === '?') {
372 | $this->_parseError();
373 | // The spec consumes the current character before switching
374 | // to the bogus comment state, but it's easier to implement
375 | // if we reconsume the current character.
376 | $this->_HTML_RECONSUME_IN(static::BogusCommentState);
377 | } else {
378 | $this->_parseError();
379 | $this->_bufferCharacter('<');
380 | $this->_HTML_RECONSUME_IN(static::DataState);
381 | }
382 | break;
383 |
384 | case static::EndTagOpenState:
385 | if (ctype_upper($char)) {
386 | $this->_Token->beginEndTag(strtolower($char));
387 | $this->_HTML_ADVANCE_TO(static::TagNameState);
388 | } else if (ctype_lower($char)) {
389 | $this->_Token->beginEndTag(strtolower($char));
390 | $this->_HTML_ADVANCE_TO(static::TagNameState);
391 | } else if ($char === '>') {
392 | $this->_parseError();
393 | $this->_HTML_ADVANCE_TO(static::DataState);
394 | } else if ($char === static::kEndOfFileMarker) {
395 | $this->_parseError();
396 | $this->_bufferCharacter('<');
397 | $this->_bufferCharacter('/');
398 | $this->_HTML_RECONSUME_IN(static::DataState);
399 | } else {
400 | $this->_parseError();
401 | $this->_HTML_RECONSUME_IN(static::BogusCommentState);
402 | }
403 | break;
404 |
405 | case static::TagNameState:
406 | if ($this->_isTokenizerWhitespace($char)) {
407 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeNameState);
408 | } else if ($char === '/') {
409 | $this->_HTML_ADVANCE_TO(static::SelfClosingStartTagState);
410 | } else if ($char === '>') {
411 | return $this->_emitAndResumeIn();
412 | } else if (ctype_upper($char)) {
413 | $this->_Token->appendToName(strtolower($char));
414 | $this->_HTML_ADVANCE_TO(static::TagNameState);
415 | } else if ($char === static::kEndOfFileMarker) {
416 | $this->_parseError();
417 | $this->_HTML_RECONSUME_IN(static::DataState);
418 | } else {
419 | $this->_Token->appendToName($char);
420 | $this->_HTML_ADVANCE_TO(static::TagNameState);
421 | }
422 | break;
423 |
424 | case static::RCDATALessThanSignState:
425 | if ($char === '/') {
426 | $this->_temporaryBuffer = '';
427 | $this->_HTML_ADVANCE_TO(static::RCDATAEndTagOpenState);
428 | } else {
429 | $this->_bufferCharacter('<');
430 | $this->_HTML_RECONSUME_IN(static::RCDATAState);
431 | }
432 | break;
433 |
434 | case static::RCDATAEndTagOpenState:
435 | if (ctype_upper($char)) {
436 | $this->_temporaryBuffer .= $char;
437 | $this->_bufferedEndTagName .= strtolower($char);
438 | $this->_HTML_ADVANCE_TO(static::RCDATAEndTagNameState);
439 | } else if (ctype_lower($char)) {
440 | $this->_temporaryBuffer .= $char;
441 | $this->_bufferedEndTagName .= $char;
442 | $this->_HTML_ADVANCE_TO(static::RCDATAEndTagNameState);
443 | } else {
444 | $this->_bufferCharacter('<');
445 | $this->_bufferCharacter('/');
446 | $this->_HTML_RECONSUME_IN(static::RCDATAState);
447 | }
448 | break;
449 |
450 | case static::RCDATAEndTagNameState:
451 | if (ctype_upper($char)) {
452 | $this->_temporaryBuffer .= $char;
453 | $this->_bufferedEndTagName .= strtolower($char);
454 | $this->_HTML_ADVANCE_TO(static::RCDATAEndTagNameState);
455 | } else if (ctype_lower($char)) {
456 | $this->_temporaryBuffer .= $char;
457 | $this->_bufferedEndTagName .= $char;
458 | $this->_HTML_ADVANCE_TO(static::RCDATAEndTagNameState);
459 | } else {
460 | if ($this->_isTokenizerWhitespace($char)) {
461 | if ($this->_isAppropriateEndTag()) {
462 | $this->_temporaryBuffer .= $char;
463 | $result = $this->_FLUSH_AND_ADVANCE_TO(static::BeforeAttributeNameState);
464 | if ($result !== null) {
465 | return $result;
466 | }
467 | break;
468 | }
469 | } else if ($char === '/') {
470 | if ($this->_isAppropriateEndTag()) {
471 | $this->_temporaryBuffer .= $char;
472 | $result = $this->_FLUSH_AND_ADVANCE_TO(static::SelfClosingStartTagState);
473 | if ($result !== null) {
474 | return $result;
475 | }
476 | break;
477 | }
478 | } else if ($char === '>') {
479 | if ($this->_isAppropriateEndTag()) {
480 | $this->_temporaryBuffer .= $char;
481 | return $this->_flushEmitAndResumeIn($source, HTMLTokenizer::DataState);
482 | }
483 | }
484 | $this->_bufferCharacter('<');
485 | $this->_bufferCharacter('/');
486 | $this->_Token->appendToCharacter($this->_temporaryBuffer);
487 | $this->_bufferedEndTagName = '';
488 | $this->_temporaryBuffer = '';
489 | $this->_HTML_RECONSUME_IN(static::RCDATAState);
490 | }
491 | break;
492 |
493 | case static::RAWTEXTLessThanSignState:
494 | if ($char === '/') {
495 | $this->_temporaryBuffer = '';
496 | $this->_HTML_ADVANCE_TO(static::RAWTEXTEndTagOpenState);
497 | } else {
498 | $this->_bufferCharacter('<');
499 | $this->_HTML_RECONSUME_IN(static::RAWTEXTState);
500 | }
501 | break;
502 |
503 | case static::RAWTEXTEndTagOpenState:
504 | if (ctype_upper($char)) {
505 | $this->_temporaryBuffer .= $char;
506 | $this->_bufferedEndTagName .= strtolower($char);
507 | $this->_HTML_ADVANCE_TO(static::RAWTEXTEndTagNameState);
508 | } else if (ctype_lower($char)) {
509 | $this->_temporaryBuffer .= $char;
510 | $this->_bufferedEndTagName .= $char;
511 | $this->_HTML_ADVANCE_TO(static::RAWTEXTEndTagNameState);
512 | } else {
513 | $this->_bufferCharacter('<');
514 | $this->_bufferCharacter('/');
515 | $this->_HTML_RECONSUME_IN(static::RAWTEXTState);
516 | }
517 | break;
518 |
519 | case static::RAWTEXTEndTagNameState:
520 | if (ctype_upper($char)) {
521 | $this->_temporaryBuffer .= $char;
522 | $this->_bufferedEndTagName .= strtolower($char);
523 | $this->_HTML_ADVANCE_TO(static::RAWTEXTEndTagNameState);
524 | } else if (ctype_lower($char)) {
525 | $this->_temporaryBuffer .= $char;
526 | $this->_bufferedEndTagName .= $char;
527 | $this->_HTML_ADVANCE_TO(static::RAWTEXTEndTagNameState);
528 | } else {
529 | if ($this->_isTokenizerWhitespace($char)) {
530 | if ($this->_isAppropriateEndTag()) {
531 | $this->_temporaryBuffer .= $char;
532 | $result = $this->_FLUSH_AND_ADVANCE_TO(static::BeforeAttributeNameState);
533 | if ($result !== null) {
534 | return $result;
535 | }
536 | break;
537 | }
538 | } else if ($char === '/') {
539 | if ($this->_isAppropriateEndTag()) {
540 | $this->_temporaryBuffer .= $char;
541 | $result = $this->_FLUSH_AND_ADVANCE_TO(static::SelfClosingStartTagState);
542 | if ($result !== null) {
543 | return $result;
544 | }
545 | break;
546 | }
547 | } else if ($char === '>') {
548 | if ($this->_isAppropriateEndTag()) {
549 | $this->_temporaryBuffer .= $char;
550 | return $this->_flushEmitAndResumeIn($source, HTMLTokenizer::DataState);
551 | }
552 | }
553 | $this->_bufferCharacter('<');
554 | $this->_bufferCharacter('/');
555 | $this->_Token->appendToCharacter($this->_temporaryBuffer);
556 | $this->_bufferedEndTagName = '';
557 | $this->_temporaryBuffer = '';
558 | $this->_HTML_RECONSUME_IN(static::RAWTEXTState);
559 | }
560 | break;
561 |
562 | case static::ScriptDataLessThanSignState:
563 | if ($char === '/') {
564 | $this->_temporaryBuffer = '';
565 | $this->_HTML_ADVANCE_TO(static::ScriptDataEndTagOpenState);
566 | } else if ($char === '!') {
567 | $this->_bufferCharacter('<');
568 | $this->_bufferCharacter('!');
569 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapeStartState);
570 | } else {
571 | $this->_bufferCharacter('<');
572 | $this->_HTML_RECONSUME_IN(static::ScriptDataState);
573 | }
574 | break;
575 |
576 | case static::ScriptDataEndTagOpenState:
577 | if (ctype_upper($char)) {
578 | $this->_temporaryBuffer .= $char;
579 | $this->_bufferedEndTagName .= strtolower($char);
580 | $this->_HTML_ADVANCE_TO(static::ScriptDataEndTagNameState);
581 | } else if (ctype_lower($char)) {
582 | $this->_temporaryBuffer .= $char;
583 | $this->_bufferedEndTagName .= $char;
584 | $this->_HTML_ADVANCE_TO(static::ScriptDataEndTagNameState);
585 | } else {
586 | $this->_bufferCharacter('<');
587 | $this->_bufferCharacter('/');
588 | $this->_HTML_RECONSUME_IN(static::ScriptDataState);
589 | }
590 | break;
591 |
592 | case static::ScriptDataEndTagNameState:
593 | if (ctype_upper($char)) {
594 | $this->_temporaryBuffer .= $char;
595 | $this->_bufferedEndTagName .= strtolower($char);
596 | $this->_HTML_ADVANCE_TO(static::ScriptDataEndTagNameState);
597 | } else if (ctype_lower($char)) {
598 | $this->_temporaryBuffer .= $char;
599 | $this->_bufferedEndTagName .= $char;
600 | $this->_HTML_ADVANCE_TO(static::ScriptDataEndTagNameState);
601 | } else {
602 | if ($this->_isTokenizerWhitespace($char)) {
603 | if ($this->_isAppropriateEndTag()) {
604 | $this->_temporaryBuffer .= $char;
605 | $result = $this->_FLUSH_AND_ADVANCE_TO(static::BeforeAttributeNameState);
606 | if ($result !== null) {
607 | return $result;
608 | }
609 | break;
610 | }
611 | } else if ($char === '/') {
612 | if ($this->_isAppropriateEndTag()) {
613 | $this->_temporaryBuffer .= $char;
614 | $result = $this->_FLUSH_AND_ADVANCE_TO(static::SelfClosingStartTagState);
615 | if ($result !== null) {
616 | return $result;
617 | }
618 | break;
619 | }
620 | } else if ($char === '>') {
621 | if ($this->_isAppropriateEndTag()) {
622 | $this->_temporaryBuffer .= $char;
623 | return $this->_flushEmitAndResumeIn($source, HTMLTokenizer::DataState);
624 | }
625 | }
626 | $this->_bufferCharacter('<');
627 | $this->_bufferCharacter('/');
628 | $this->_Token->appendToCharacter($this->_temporaryBuffer);
629 | $this->_bufferedEndTagName = '';
630 | $this->_temporaryBuffer = '';
631 | $this->_HTML_RECONSUME_IN(static::ScriptDataState);
632 | }
633 | break;
634 |
635 | case static::ScriptDataEscapeStartState:
636 | if ($char === '-') {
637 | $this->_bufferCharacter($char);
638 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapeStartDashState);
639 | } else {
640 | $this->_HTML_RECONSUME_IN(static::ScriptDataState);
641 | }
642 | break;
643 |
644 | case static::ScriptDataEscapeStartDashState:
645 | if ($char === '-') {
646 | $this->_bufferCharacter($char);
647 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedDashDashState);
648 | } else {
649 | $this->_HTML_RECONSUME_IN(static::ScriptDataState);
650 | }
651 | break;
652 |
653 | case static::ScriptDataEscapedState:
654 | if ($char === '-') {
655 | $this->_bufferCharacter($char);
656 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedDashState);
657 | } else if ($char === '<') {
658 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedLessThanSignState);
659 | } else if ($char === static::kEndOfFileMarker) {
660 | $this->_parseError();
661 | $this->_HTML_RECONSUME_IN(static::DataState);
662 | } else {
663 | $this->_bufferCharacter($char);
664 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedState);
665 | }
666 | break;
667 |
668 | case static::ScriptDataEscapedDashState:
669 | if ($char === '-') {
670 | $this->_bufferCharacter($char);
671 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedDashDashState);
672 | } else if ($char === '<') {
673 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedLessThanSignState);
674 | } else if ($char === static::kEndOfFileMarker) {
675 | $this->_parseError();
676 | $this->_HTML_RECONSUME_IN(static::DataState);
677 | } else {
678 | $this->_bufferCharacter($char);
679 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedState);
680 | }
681 | break;
682 |
683 | case static::ScriptDataEscapedDashDashState:
684 | if ($char === '-') {
685 | $this->_bufferCharacter($char);
686 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedDashDashState);
687 | } else if ($char === '<') {
688 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedLessThanSignState);
689 | } else if ($char === '>') {
690 | $this->_bufferCharacter($char);
691 | $this->_HTML_ADVANCE_TO(static::ScriptDataState);
692 | } else if ($char === static::kEndOfFileMarker) {
693 | $this->_parseError();
694 | $this->_HTML_RECONSUME_IN(static::DataState);
695 | } else {
696 | $this->_bufferCharacter($char);
697 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedState);
698 | }
699 | break;
700 |
701 | case static::ScriptDataEscapedLessThanSignState:
702 | if ($char === '/') {
703 | $this->_temporaryBuffer = '';
704 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedEndTagOpenState);
705 | } else if (ctype_upper($char)) {
706 | $this->_bufferCharacter('<');
707 | $this->_bufferCharacter($char);
708 | $this->_temporaryBuffer = '';
709 | $this->_temporaryBuffer = strtolower($char);
710 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeStartState);
711 | } else if (ctype_lower($char)) {
712 | $this->_bufferCharacter('<');
713 | $this->_bufferCharacter($char);
714 | $this->_temporaryBuffer = '';
715 | $this->_temporaryBuffer .= $char;
716 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeStartState);
717 | } else {
718 | $this->_bufferCharacter('<');
719 | $this->_HTML_RECONSUME_IN(static::ScriptDataEscapedState);
720 | }
721 | break;
722 |
723 | case static::ScriptDataEscapedEndTagOpenState:
724 | if (ctype_upper($char)) {
725 | $this->_temporaryBuffer .= $char;
726 | $this->_bufferedEndTagName .= strtolower($char);
727 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedEndTagNameState);
728 | } else if (ctype_lower($char)) {
729 | $this->_temporaryBuffer .= $char;
730 | $this->_bufferedEndTagName .= $char;
731 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedEndTagNameState);
732 | } else {
733 | $this->_bufferCharacter('<');
734 | $this->_bufferCharacter('/');
735 | $this->_HTML_RECONSUME_IN(static::ScriptDataEscapedState);
736 | }
737 | break;
738 |
739 | case static::ScriptDataEscapedEndTagNameState:
740 | if (ctype_upper($char)) {
741 | $this->_temporaryBuffer .= $char;
742 | $this->_bufferedEndTagName .= strtolower($char);
743 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedEndTagNameState);
744 | } else if (ctype_lower($char)) {
745 | $this->_temporaryBuffer .= $char;
746 | $this->_bufferedEndTagName .= $char;
747 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedEndTagNameState);
748 | } else {
749 | if ($this->_isTokenizerWhitespace($char)) {
750 | if ($this->_isAppropriateEndTag()) {
751 | $this->_temporaryBuffer .= $char;
752 | // ScriptDataEscapeStartState called bufferCharacter, so `_FLUSH_AND_ADVANCE_TO` always returns true.
753 | return $this->_FLUSH_AND_ADVANCE_TO(static::BeforeAttributeNameState);
754 | }
755 | } else if ($char === '/') {
756 | if ($this->_isAppropriateEndTag()) {
757 | $this->_temporaryBuffer .= $char;
758 | // ScriptDataEscapeStartState called bufferCharacter, so `_FLUSH_AND_ADVANCE_TO` always returns true.
759 | return $this->_FLUSH_AND_ADVANCE_TO(static::SelfClosingStartTagState);
760 | }
761 | } else if ($char === '>') {
762 | if ($this->_isAppropriateEndTag()) {
763 | $this->_temporaryBuffer .= $char;
764 | $this->_temporaryBuffer .= $char;
765 | return $this->_flushEmitAndResumeIn($source, HTMLTokenizer::DataState);
766 | }
767 | }
768 | $this->_bufferCharacter('<');
769 | $this->_bufferCharacter('/');
770 | $this->_Token->appendToCharacter($this->_temporaryBuffer);
771 | $this->_bufferedEndTagName = '';
772 | $this->_temporaryBuffer = '';
773 | $this->_HTML_RECONSUME_IN(static::ScriptDataEscapedState);
774 | }
775 | break;
776 |
777 | case static::ScriptDataDoubleEscapeStartState:
778 | if ($this->_isTokenizerWhitespace($char) || $char === '/' || $char === '>') {
779 | $this->_bufferCharacter($char);
780 | if ($this->_temporaryBufferIs(HTMLNames::scriptTag)) {
781 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedState);
782 | } else {
783 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedState);
784 | }
785 | } else if (ctype_upper($char)) {
786 | $this->_bufferCharacter($char);
787 | $this->_temporaryBuffer .= strtolower($char);
788 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeStartState);
789 | } else if (ctype_lower($char)) {
790 | $this->_bufferCharacter($char);
791 | $this->_temporaryBuffer .= $char;
792 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeStartState);
793 | } else {
794 | $this->_HTML_RECONSUME_IN(static::ScriptDataEscapedState);
795 | }
796 | break;
797 |
798 | case static::ScriptDataDoubleEscapedState:
799 | if ($char === '-') {
800 | $this->_bufferCharacter($char);
801 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedDashState);
802 | } else if ($char === '<') {
803 | $this->_bufferCharacter($char);
804 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedLessThanSignState);
805 | } else if ($char === static::kEndOfFileMarker) {
806 | $this->_parseError();
807 | $this->_HTML_RECONSUME_IN(static::DataState);
808 | } else {
809 | $this->_bufferCharacter($char);
810 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedState);
811 | }
812 | break;
813 |
814 | case static::ScriptDataDoubleEscapedDashState:
815 | if ($char === '-') {
816 | $this->_bufferCharacter($char);
817 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedDashDashState);
818 | } else if ($char === '<') {
819 | $this->_bufferCharacter($char);
820 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedLessThanSignState);
821 | } else if ($char === static::kEndOfFileMarker) {
822 | $this->_parseError();
823 | $this->_HTML_RECONSUME_IN(static::DataState);
824 | } else {
825 | $this->_bufferCharacter($char);
826 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedState);
827 | }
828 | break;
829 |
830 | case static::ScriptDataDoubleEscapedDashDashState:
831 | if ($char === '-') {
832 | $this->_bufferCharacter($char);
833 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedDashDashState);
834 | } else if ($char === '<') {
835 | $this->_bufferCharacter($char);
836 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedLessThanSignState);
837 | } else if ($char === '>') {
838 | $this->_bufferCharacter($char);
839 | $this->_HTML_ADVANCE_TO(static::ScriptDataState);
840 | } else if ($char === static::kEndOfFileMarker) {
841 | $this->_parseError();
842 | $this->_HTML_RECONSUME_IN(static::DataState);
843 | } else {
844 | $this->_bufferCharacter($char);
845 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedState);
846 | }
847 | break;
848 |
849 | case static::ScriptDataDoubleEscapedLessThanSignState:
850 | if ($char === '/') {
851 | $this->_bufferCharacter($char);
852 | $this->_temporaryBuffer = '';
853 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeEndState);
854 | } else
855 | $this->_HTML_RECONSUME_IN(static::ScriptDataDoubleEscapedState);
856 | break;
857 |
858 | case static::ScriptDataDoubleEscapeEndState:
859 | if ($this->_isTokenizerWhitespace($char) || $char === '/' || $char === '>') {
860 | $this->_bufferCharacter($char);
861 | if ($this->_temporaryBufferIs(HTMLNames::scriptTag)) {
862 | $this->_HTML_ADVANCE_TO(static::ScriptDataEscapedState);
863 | } else {
864 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapedState);
865 | }
866 | } else if (ctype_upper($char)) {
867 | $this->_bufferCharacter($char);
868 | $this->_temporaryBuffer .= strtolower($char);
869 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeEndState);
870 | } else if (ctype_lower($char)) {
871 | $this->_bufferCharacter($char);
872 | $this->_temporaryBuffer .= $char;
873 | $this->_HTML_ADVANCE_TO(static::ScriptDataDoubleEscapeEndState);
874 | } else {
875 | $this->_HTML_RECONSUME_IN(static::ScriptDataDoubleEscapedState);
876 | }
877 | break;
878 |
879 | case static::BeforeAttributeNameState:
880 | if ($this->_isTokenizerWhitespace($char)) {
881 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeNameState);
882 | } else if ($char === '/') {
883 | $this->_HTML_ADVANCE_TO(static::SelfClosingStartTagState);
884 | } else if ($char === '>') {
885 | return $this->_emitAndResumeIn();
886 | } else if (ctype_upper($char)) {
887 | $this->_Token->addNewAttribute();
888 | $this->_Token->beginAttributeName($source->numberOfCharactersConsumed());
889 | $this->_Token->appendToAttributeName(strtolower($char));
890 | $this->_HTML_ADVANCE_TO(static::AttributeNameState);
891 | } else if ($char === static::kEndOfFileMarker) {
892 | $this->_parseError();
893 | $this->_HTML_RECONSUME_IN(static::DataState);
894 | } else {
895 | if ($char === '"' || $char === '\'' || $char === '<' || $char === '=') {
896 | $this->_parseError();
897 | }
898 | $this->_Token->addNewAttribute();
899 | $this->_Token->beginAttributeName($source->numberOfCharactersConsumed());
900 | $this->_Token->appendToAttributeName($char);
901 | $this->_HTML_ADVANCE_TO(static::AttributeNameState);
902 | }
903 | break;
904 |
905 | case static::AttributeNameState:
906 | if ($this->_isTokenizerWhitespace($char)) {
907 | $this->_Token->endAttributeName($source->numberOfCharactersConsumed());
908 | $this->_HTML_ADVANCE_TO(static::AfterAttributeNameState);
909 | } else if ($char === '/') {
910 | $this->_Token->endAttributeName($source->numberOfCharactersConsumed());
911 | $this->_HTML_ADVANCE_TO(static::SelfClosingStartTagState);
912 | } else if ($char === '=') {
913 | $this->_Token->endAttributeName($source->numberOfCharactersConsumed());
914 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeValueState);
915 | } else if ($char === '>') {
916 | $this->_Token->endAttributeName($source->numberOfCharactersConsumed());
917 | return $this->_emitAndResumeIn();
918 | } else if (ctype_upper($char)) {
919 | $this->_Token->appendToAttributeName(strtolower($char));
920 | $this->_HTML_ADVANCE_TO(static::AttributeNameState);
921 | } else if ($char === static::kEndOfFileMarker) {
922 | $this->_parseError();
923 | $this->_Token->endAttributeName($source->numberOfCharactersConsumed());
924 | $this->_HTML_RECONSUME_IN(static::DataState);
925 | } else {
926 | if ($char === '"' || $char === '\'' || $char === '<' || $char === '=') {
927 | $this->_parseError();
928 | }
929 | $this->_Token->appendToAttributeName($char);
930 | $this->_HTML_ADVANCE_TO(static::AttributeNameState);
931 | }
932 | break;
933 |
934 | case static::AfterAttributeNameState:
935 | if ($this->_isTokenizerWhitespace($char)) {
936 | $this->_HTML_ADVANCE_TO(static::AfterAttributeNameState);
937 | } else if ($char === '/') {
938 | $this->_HTML_ADVANCE_TO(static::SelfClosingStartTagState);
939 | } else if ($char === '=') {
940 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeValueState);
941 | } else if ($char === '>') {
942 | return $this->_emitAndResumeIn();
943 | } else if (ctype_upper($char)) {
944 | $this->_Token->addNewAttribute();
945 | $this->_Token->beginAttributeName($source->numberOfCharactersConsumed());
946 | $this->_Token->appendToAttributeName(strtolower($char));
947 | $this->_HTML_ADVANCE_TO(static::AttributeNameState);
948 | } else if ($char === static::kEndOfFileMarker) {
949 | $this->_parseError();
950 | $this->_HTML_RECONSUME_IN(static::DataState);
951 | } else {
952 | if ($char === '"' || $char === '\'' || $char === '<') {
953 | $this->_parseError();
954 | }
955 | $this->_Token->addNewAttribute();
956 | $this->_Token->beginAttributeName($source->numberOfCharactersConsumed());
957 | $this->_Token->appendToAttributeName($char);
958 | $this->_HTML_ADVANCE_TO(static::AttributeNameState);
959 | }
960 | break;
961 |
962 | case static::BeforeAttributeValueState:
963 | if ($this->_isTokenizerWhitespace($char)) {
964 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeValueState);
965 | } else if ($char === '"') {
966 | $this->_Token->beginAttributeValue($source->numberOfCharactersConsumed() + 1);
967 | $this->_HTML_ADVANCE_TO(static::AttributeValueDoubleQuotedState);
968 | } else if ($char === '&') {
969 | $this->_Token->beginAttributeValue($source->numberOfCharactersConsumed());
970 | $this->_HTML_RECONSUME_IN(static::AttributeValueUnquotedState);
971 | } else if ($char === '\'') {
972 | $this->_Token->beginAttributeValue($source->numberOfCharactersConsumed() + 1);
973 | $this->_HTML_ADVANCE_TO(static::AttributeValueSingleQuotedState);
974 | } else if ($char === '>') {
975 | $this->_parseError();
976 | return $this->_emitAndResumeIn();
977 | } else if ($char === static::kEndOfFileMarker) {
978 | $this->_parseError();
979 | $this->_HTML_RECONSUME_IN(static::DataState);
980 | } else {
981 | if ($char === '<' || $char === '=' || $char === '`') {
982 | $this->_parseError();
983 | }
984 | $this->_Token->beginAttributeValue($source->numberOfCharactersConsumed());
985 | $this->_Token->appendToAttributeValue($char);
986 | $this->_HTML_ADVANCE_TO(static::AttributeValueUnquotedState);
987 | }
988 | break;
989 |
990 | case static::AttributeValueDoubleQuotedState:
991 | if ($char === '"') {
992 | $this->_Token->setDoubleQuoted();
993 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed());
994 | $this->_HTML_ADVANCE_TO(static::AfterAttributeValueQuotedState);
995 | } else if ($char === '&') {
996 | $this->_additionalAllowedCharacter = '"';
997 | $this->_HTML_ADVANCE_TO(static::CharacterReferenceInAttributeValueState);
998 | } else if ($char === static::kEndOfFileMarker) {
999 | $this->_parseError();
1000 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed());
1001 | $this->_HTML_RECONSUME_IN(static::DataState);
1002 | } else {
1003 | $this->_Token->appendToAttributeValue($char);
1004 | $this->_HTML_ADVANCE_TO(static::AttributeValueDoubleQuotedState);
1005 | }
1006 | break;
1007 |
1008 | case static::AttributeValueSingleQuotedState:
1009 | if ($char === '\'') {
1010 | $this->_Token->setSingleQuoted();
1011 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed());
1012 | $this->_HTML_ADVANCE_TO(static::AfterAttributeValueQuotedState);
1013 | } else if ($char === '&') {
1014 | $this->_additionalAllowedCharacter = '\'';
1015 | $this->_HTML_ADVANCE_TO(static::CharacterReferenceInAttributeValueState);
1016 | } else if ($char === static::kEndOfFileMarker) {
1017 | $this->_parseError();
1018 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed());
1019 | $this->_HTML_RECONSUME_IN(static::DataState);
1020 | } else {
1021 | $this->_Token->appendToAttributeValue($char);
1022 | $this->_HTML_ADVANCE_TO(static::AttributeValueSingleQuotedState);
1023 | }
1024 | break;
1025 |
1026 | case static::AttributeValueUnquotedState:
1027 | if ($this->_isTokenizerWhitespace($char)) {
1028 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed());
1029 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeNameState);
1030 | } else if ($char === '&') {
1031 | $this->_additionalAllowedCharacter = '>';
1032 | $this->_HTML_ADVANCE_TO(static::CharacterReferenceInAttributeValueState);
1033 | } else if ($char === '>') {
1034 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed());
1035 | return $this->_emitAndResumeIn();
1036 | } else if ($char === static::kEndOfFileMarker) {
1037 | $this->_parseError();
1038 | $this->_Token->endAttributeValue($source->numberOfCharactersConsumed());
1039 | $this->_HTML_RECONSUME_IN(static::DataState);
1040 | } else {
1041 | if ($char === '"' || $char === '\'' || $char === '<' || $char === '=' || $char === '`') {
1042 | $this->_parseError();
1043 | }
1044 | $this->_Token->appendToAttributeValue($char);
1045 | $this->_HTML_ADVANCE_TO(static::AttributeValueUnquotedState);
1046 | }
1047 | break;
1048 |
1049 | case static::CharacterReferenceInAttributeValueState:
1050 | // TODO Do not expand the reference, so skip parse Character references.
1051 | $this->_Token->appendToAttributeValue('&');
1052 | // We're supposed to switch back to the attribute value state that
1053 | // we were in when we were switched into this state. Rather than
1054 | // keeping track of this explictly, we observe that the previous
1055 | // state can be determined by $this->_additionalAllowedCharacter.
1056 | if ($this->_additionalAllowedCharacter === '"') {
1057 | $this->_HTML_SWITCH_TO(static::AttributeValueDoubleQuotedState);
1058 | } else if ($this->_additionalAllowedCharacter === '\'') {
1059 | $this->_HTML_SWITCH_TO(static::AttributeValueSingleQuotedState);
1060 | } else if ($this->_additionalAllowedCharacter === '>') {
1061 | $this->_HTML_SWITCH_TO(static::AttributeValueUnquotedState);
1062 | } else {
1063 | // ASSERT_NOT_REACHED();
1064 | }
1065 | break;
1066 |
1067 | case static::AfterAttributeValueQuotedState:
1068 | if ($this->_isTokenizerWhitespace($char)) {
1069 | $this->_HTML_ADVANCE_TO(static::BeforeAttributeNameState);
1070 | } else if ($char === '/') {
1071 | $this->_HTML_ADVANCE_TO(static::SelfClosingStartTagState);
1072 | } else if ($char === '>') {
1073 | return $this->_emitAndResumeIn();
1074 | } else if ($char === static::kEndOfFileMarker) {
1075 | $this->_parseError();
1076 | $this->_HTML_RECONSUME_IN(static::DataState);
1077 | } else {
1078 | $this->_parseError();
1079 | $this->_HTML_RECONSUME_IN(static::BeforeAttributeNameState);
1080 | }
1081 | break;
1082 |
1083 | case static::SelfClosingStartTagState:
1084 | if ($char === '>') {
1085 | $this->_Token->setSelfClosing();
1086 | return $this->_emitAndResumeIn();
1087 | } else if ($char === static::kEndOfFileMarker) {
1088 | $this->_parseError();
1089 | $this->_HTML_RECONSUME_IN(static::DataState);
1090 | } else {
1091 | $this->_parseError();
1092 | $this->_HTML_RECONSUME_IN(static::BeforeAttributeNameState);
1093 | }
1094 | break;
1095 |
1096 | case static::BogusCommentState:
1097 | $this->_Token->beginComment();
1098 | $this->_HTML_RECONSUME_IN(static::ContinueBogusCommentState);
1099 | break;
1100 |
1101 | case static::ContinueBogusCommentState:
1102 | if ($char === '>') {
1103 | return $this->_emitAndResumeIn();
1104 | } else if ($char === static::kEndOfFileMarker) {
1105 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1106 | } else {
1107 | $this->_Token->appendToComment($char);
1108 | $this->_HTML_ADVANCE_TO(static::ContinueBogusCommentState);
1109 | }
1110 | break;
1111 |
1112 | case static::MarkupDeclarationOpenState:
1113 | $dashDashString = '--';
1114 | $doctypeString = 'doctype';
1115 | $cdataString = '[CDATA[';
1116 | if ($char === '-') {
1117 | $result = $source->lookAhead($dashDashString);
1118 | if ($result === SegmentedString::DidMatch) {
1119 | $this->addState();
1120 | $this->_SegmentedString->read(strlen('--'));
1121 | $this->_Token->beginComment();
1122 | $this->_HTML_SWITCH_TO(static::CommentStartState);
1123 | continue;
1124 | } else if ($result === SegmentedString::NotEnoughCharacters) {
1125 | $this->addState();
1126 | return $this->_haveBufferedCharacterToken();
1127 | }
1128 | } else if ($char === 'D' || $char === 'd') {
1129 | $result = $this->_SegmentedString->lookAheadIgnoringCase($doctypeString);
1130 | if ($result === SegmentedString::DidMatch) {
1131 | $this->addState();
1132 | $this->_SegmentedString->read(strlen($doctypeString));
1133 | $this->_HTML_SWITCH_TO(static::DOCTYPEState);
1134 | continue;
1135 | } else if ($result === SegmentedString::NotEnoughCharacters) {
1136 | $this->addState();
1137 | return $this->_haveBufferedCharacterToken();
1138 | }
1139 | } else if ($char === '[' && $this->_shouldAllowCDATA()) {
1140 | $result = $source->lookAhead($cdataString);
1141 | if ($result === SegmentedString::DidMatch) {
1142 | $this->addState();
1143 | $this->_SegmentedString->read(strlen($cdataString));
1144 | $this->_HTML_SWITCH_TO(static::CDATASectionState);
1145 | continue;
1146 | } else if ($result === SegmentedString::NotEnoughCharacters) {
1147 | $this->addState();
1148 | return $this->_haveBufferedCharacterToken();
1149 | }
1150 | }
1151 | $this->_parseError();
1152 | $this->_HTML_RECONSUME_IN(static::BogusCommentState);
1153 | break;
1154 |
1155 | case static::CommentStartState:
1156 | if ($char === '-') {
1157 | $this->_HTML_ADVANCE_TO(static::CommentStartDashState);
1158 | } else if ($char === '>') {
1159 | $this->_parseError();
1160 | return $this->_emitAndResumeIn();
1161 | } else if ($char === static::kEndOfFileMarker) {
1162 | $this->_parseError();
1163 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1164 | } else {
1165 | $this->_Token->appendToComment($char);
1166 | $this->_HTML_ADVANCE_TO(static::CommentState);
1167 | }
1168 | break;
1169 |
1170 | case static::CommentStartDashState:
1171 | if ($char === '-') {
1172 | $this->_HTML_ADVANCE_TO(static::CommentEndState);
1173 | } else if ($char === '>') {
1174 | $this->_parseError();
1175 | return $this->_emitAndResumeIn();
1176 | } else if ($char === static::kEndOfFileMarker) {
1177 | $this->_parseError();
1178 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1179 | } else {
1180 | $this->_Token->appendToComment('-');
1181 | $this->_Token->appendToComment($char);
1182 | $this->_HTML_ADVANCE_TO(static::CommentState);
1183 | }
1184 | break;
1185 |
1186 | case static::CommentState:
1187 | if ($char === '-') {
1188 | $this->_HTML_ADVANCE_TO(static::CommentEndDashState);
1189 | } else if ($char === static::kEndOfFileMarker) {
1190 | $this->_parseError();
1191 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1192 | } else {
1193 | $this->_Token->appendToComment($char);
1194 | $this->_HTML_ADVANCE_TO(static::CommentState);
1195 | }
1196 | break;
1197 |
1198 | case static::CommentEndDashState:
1199 | if ($char === '-') {
1200 | $this->_HTML_ADVANCE_TO(static::CommentEndState);
1201 | } else if ($char === static::kEndOfFileMarker) {
1202 | $this->_parseError();
1203 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1204 | } else {
1205 | $this->_Token->appendToComment('-');
1206 | $this->_Token->appendToComment($char);
1207 | $this->_HTML_ADVANCE_TO(static::CommentState);
1208 | }
1209 | break;
1210 |
1211 | case static::CommentEndState:
1212 | if ($char === '>') {
1213 | return $this->_emitAndResumeIn();
1214 | } else if ($char === '!') {
1215 | $this->_parseError();
1216 | $this->_HTML_ADVANCE_TO(static::CommentEndBangState);
1217 | } else if ($char === '-') {
1218 | $this->_parseError();
1219 | $this->_Token->appendToComment('-');
1220 | $this->_HTML_ADVANCE_TO(static::CommentEndState);
1221 | } else if ($char === static::kEndOfFileMarker) {
1222 | $this->_parseError(true);
1223 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1224 | } else {
1225 | $this->_parseError();
1226 | $this->_Token->appendToComment('-');
1227 | $this->_Token->appendToComment('-');
1228 | $this->_Token->appendToComment($char);
1229 | $this->_HTML_ADVANCE_TO(static::CommentState);
1230 | }
1231 | break;
1232 |
1233 | case static::CommentEndBangState:
1234 | if ($char === '-') {
1235 | $this->_Token->appendToComment('-');
1236 | $this->_Token->appendToComment('-');
1237 | $this->_Token->appendToComment('!');
1238 | $this->_HTML_ADVANCE_TO(static::CommentEndDashState);
1239 | } else if ($char === '>') {
1240 | return $this->_emitAndResumeIn();
1241 | } else if ($char === static::kEndOfFileMarker) {
1242 | $this->_parseError(true);
1243 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1244 | } else {
1245 | $this->_Token->appendToComment('-');
1246 | $this->_Token->appendToComment('-');
1247 | $this->_Token->appendToComment('!');
1248 | $this->_Token->appendToComment($char);
1249 | $this->_HTML_ADVANCE_TO(static::CommentState);
1250 | }
1251 | break;
1252 |
1253 | case static::DOCTYPEState:
1254 | if ($this->_isTokenizerWhitespace($char)) {
1255 | $this->_HTML_ADVANCE_TO(static::BeforeDOCTYPENameState);
1256 | } else if ($char === static::kEndOfFileMarker) {
1257 | $this->_parseError();
1258 | $this->_Token->beginDOCTYPE();
1259 | $this->_Token->setForceQuirks();
1260 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1261 | } else {
1262 | $this->_parseError();
1263 | $this->_HTML_RECONSUME_IN(static::BeforeDOCTYPENameState);
1264 | }
1265 | break;
1266 |
1267 | case static::BeforeDOCTYPENameState:
1268 | if ($this->_isTokenizerWhitespace($char)) {
1269 | $this->_HTML_ADVANCE_TO(static::BeforeDOCTYPENameState);
1270 | } else if (ctype_upper($char)) {
1271 | $this->_Token->beginDOCTYPE(strtolower($char));
1272 | $this->_HTML_ADVANCE_TO(static::DOCTYPENameState);
1273 | } else if ($char === '>') {
1274 | $this->_parseError();
1275 | $this->_Token->beginDOCTYPE();
1276 | $this->_Token->setForceQuirks();
1277 | return $this->_emitAndResumeIn();
1278 | } else if ($char === static::kEndOfFileMarker) {
1279 | $this->_parseError(true);
1280 | $this->_Token->beginDOCTYPE();
1281 | $this->_Token->setForceQuirks();
1282 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1283 | } else {
1284 | $this->_Token->beginDOCTYPE($char);
1285 | $this->_HTML_ADVANCE_TO(static::DOCTYPENameState);
1286 | }
1287 | break;
1288 |
1289 | case static::DOCTYPENameState:
1290 | if ($this->_isTokenizerWhitespace($char)) {
1291 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPENameState);
1292 | } else if ($char === '>') {
1293 | return $this->_emitAndResumeIn();
1294 | } else if (ctype_upper($char)) {
1295 | $this->_Token->appendToName(strtolower($char));
1296 | $this->_HTML_ADVANCE_TO(static::DOCTYPENameState);
1297 | } else if ($char === static::kEndOfFileMarker) {
1298 | $this->_parseError(true);
1299 | $this->_Token->setForceQuirks();
1300 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1301 | } else {
1302 | $this->_Token->appendToName($char);
1303 | $this->_HTML_ADVANCE_TO(static::DOCTYPENameState);
1304 | }
1305 | break;
1306 |
1307 | case static::AfterDOCTYPENameState:
1308 | if ($this->_isTokenizerWhitespace($char)) {
1309 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPENameState);
1310 | } else if ($char === '>') {
1311 | return $this->_emitAndResumeIn();
1312 | } else if ($char === static::kEndOfFileMarker) {
1313 | $this->_parseError(true);
1314 | $this->_Token->setForceQuirks();
1315 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1316 | } else {
1317 | // DEFINE_STATIC_LOCAL(String, publicString, (ASCIILiteral("public")));
1318 | $publicString = 'public';
1319 | // DEFINE_STATIC_LOCAL(String, systemString, (ASCIILiteral("system")));
1320 | $systemString = 'system';
1321 | if ($char === 'P' || $char === 'p') {
1322 | $result = $source->lookAheadIgnoringCase($publicString);
1323 | if ($result === SegmentedString::DidMatch) {
1324 | $this->addState();
1325 | $this->_HTML_SWITCH_TO(static::AfterDOCTYPEPublicKeywordState);
1326 | $this->_SegmentedString->read(strlen($publicString));
1327 | continue;
1328 | }
1329 | // @todo
1330 | // else if ($result === SegmentedString::NotEnoughCharacters) {
1331 | // $this->addState();
1332 | // return $this->_haveBufferedCharacterToken();
1333 | // }
1334 | } else if ($char === 'S' || $char === 's') {
1335 | $result = $source->lookAheadIgnoringCase($systemString);
1336 | if ($result === SegmentedString::DidMatch) {
1337 | $this->addState();
1338 | $this->_HTML_SWITCH_TO(static::AfterDOCTYPESystemKeywordState);
1339 | $this->_SegmentedString->read(strlen($systemString));
1340 | continue;
1341 | }
1342 | // @todo
1343 | // else if ($result === SegmentedString::NotEnoughCharacters) {
1344 | // $this->addState();
1345 | // return $this->_haveBufferedCharacterToken();
1346 | // }
1347 | }
1348 | $this->_parseError();
1349 | $this->_Token->setForceQuirks();
1350 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState);
1351 | }
1352 | break;
1353 |
1354 | case static::AfterDOCTYPEPublicKeywordState:
1355 | if ($this->_isTokenizerWhitespace($char)) {
1356 | $this->_HTML_ADVANCE_TO(static::BeforeDOCTYPEPublicIdentifierState);
1357 | } else if ($char === '"') {
1358 | $this->_parseError();
1359 | $this->_Token->setPublicIdentifierToEmptyString();
1360 | $this->_HTML_ADVANCE_TO(static::DOCTYPEPublicIdentifierDoubleQuotedState);
1361 | } else if ($char === '\'') {
1362 | $this->_parseError();
1363 | $this->_Token->setPublicIdentifierToEmptyString();
1364 | $this->_HTML_ADVANCE_TO(static::DOCTYPEPublicIdentifierSingleQuotedState);
1365 | } else if ($char === '>') {
1366 | $this->_parseError();
1367 | $this->_Token->setForceQuirks();
1368 | return $this->_emitAndResumeIn();
1369 | } else if ($char === static::kEndOfFileMarker) {
1370 | $this->_parseError(true);
1371 | $this->_Token->setForceQuirks();
1372 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1373 | } else {
1374 | $this->_parseError();
1375 | $this->_Token->setForceQuirks();
1376 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState);
1377 | }
1378 | break;
1379 |
1380 | case static::BeforeDOCTYPEPublicIdentifierState:
1381 | if ($this->_isTokenizerWhitespace($char)) {
1382 | $this->_HTML_ADVANCE_TO(static::BeforeDOCTYPEPublicIdentifierState);
1383 | } else if ($char === '"') {
1384 | $this->_Token->setPublicIdentifierToEmptyString();
1385 | $this->_HTML_ADVANCE_TO(static::DOCTYPEPublicIdentifierDoubleQuotedState);
1386 | } else if ($char === '\'') {
1387 | $this->_Token->setPublicIdentifierToEmptyString();
1388 | $this->_HTML_ADVANCE_TO(static::DOCTYPEPublicIdentifierSingleQuotedState);
1389 | } else if ($char === '>') {
1390 | $this->_parseError();
1391 | $this->_Token->setForceQuirks();
1392 | return $this->_emitAndResumeIn();
1393 | } else if ($char === static::kEndOfFileMarker) {
1394 | $this->_parseError(true);
1395 | $this->_Token->setForceQuirks();
1396 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1397 | } else {
1398 | $this->_parseError();
1399 | $this->_Token->setForceQuirks();
1400 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState);
1401 | }
1402 | break;
1403 |
1404 | case static::DOCTYPEPublicIdentifierDoubleQuotedState:
1405 | if ($char === '"') {
1406 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPEPublicIdentifierState);
1407 | } else if ($char === '>') {
1408 | $this->_parseError();
1409 | $this->_Token->setForceQuirks();
1410 | return $this->_emitAndResumeIn();
1411 | } else if ($char === static::kEndOfFileMarker) {
1412 | $this->_parseError();
1413 | $this->_Token->setForceQuirks();
1414 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1415 | } else {
1416 | $this->_Token->appendToPublicIdentifier($char);
1417 | $this->_HTML_ADVANCE_TO(static::DOCTYPEPublicIdentifierDoubleQuotedState);
1418 | }
1419 | break;
1420 |
1421 | case static::DOCTYPEPublicIdentifierSingleQuotedState:
1422 | if ($char === '\'') {
1423 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPEPublicIdentifierState);
1424 | } else if ($char === '>') {
1425 | $this->_parseError();
1426 | $this->_Token->setForceQuirks();
1427 | return $this->_emitAndResumeIn();
1428 | } else if ($char === static::kEndOfFileMarker) {
1429 | $this->_parseError();
1430 | $this->_Token->setForceQuirks();
1431 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1432 | } else {
1433 | $this->_Token->appendToPublicIdentifier($char);
1434 | $this->_HTML_ADVANCE_TO(static::DOCTYPEPublicIdentifierSingleQuotedState);
1435 | }
1436 | break;
1437 |
1438 | case static::AfterDOCTYPEPublicIdentifierState:
1439 | if ($this->_isTokenizerWhitespace($char)) {
1440 | $this->_HTML_ADVANCE_TO(static::BetweenDOCTYPEPublicAndSystemIdentifiersState);
1441 | } else if ($char === '>') {
1442 | return $this->_emitAndResumeIn();
1443 | } else if ($char === '"') {
1444 | $this->_parseError();
1445 | $this->_Token->setSystemIdentifierToEmptyString();
1446 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierDoubleQuotedState);
1447 | } else if ($char === '\'') {
1448 | $this->_parseError();
1449 | $this->_Token->setSystemIdentifierToEmptyString();
1450 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierSingleQuotedState);
1451 | } else if ($char === static::kEndOfFileMarker) {
1452 | $this->_parseError();
1453 | $this->_Token->setForceQuirks();
1454 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1455 | } else {
1456 | $this->_parseError();
1457 | $this->_Token->setForceQuirks();
1458 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState);
1459 | }
1460 | break;
1461 |
1462 | case static::BetweenDOCTYPEPublicAndSystemIdentifiersState:
1463 | if ($this->_isTokenizerWhitespace($char)) {
1464 | $this->_HTML_ADVANCE_TO(static::BetweenDOCTYPEPublicAndSystemIdentifiersState);
1465 | } else if ($char === '>') {
1466 | return $this->_emitAndResumeIn();
1467 | } else if ($char === '"') {
1468 | $this->_Token->setSystemIdentifierToEmptyString();
1469 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierDoubleQuotedState);
1470 | } else if ($char === '\'') {
1471 | $this->_Token->setSystemIdentifierToEmptyString();
1472 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierSingleQuotedState);
1473 | } else if ($char === static::kEndOfFileMarker) {
1474 | $this->_parseError();
1475 | $this->_Token->setForceQuirks();
1476 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1477 | } else {
1478 | $this->_parseError();
1479 | $this->_Token->setForceQuirks();
1480 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState);
1481 | }
1482 | break;
1483 |
1484 | case static::AfterDOCTYPESystemKeywordState:
1485 | if ($this->_isTokenizerWhitespace($char)) {
1486 | $this->_HTML_ADVANCE_TO(static::BeforeDOCTYPESystemIdentifierState);
1487 | } else if ($char === '"') {
1488 | $this->_parseError();
1489 | $this->_Token->setSystemIdentifierToEmptyString();
1490 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierDoubleQuotedState);
1491 | } else if ($char === '\'') {
1492 | $this->_parseError();
1493 | $this->_Token->setSystemIdentifierToEmptyString();
1494 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierSingleQuotedState);
1495 | } else if ($char === '>') {
1496 | $this->_parseError();
1497 | $this->_Token->setForceQuirks();
1498 | return $this->_emitAndResumeIn();
1499 | } else if ($char === static::kEndOfFileMarker) {
1500 | $this->_parseError();
1501 | $this->_Token->setForceQuirks();
1502 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1503 | } else {
1504 | $this->_parseError();
1505 | $this->_Token->setForceQuirks();
1506 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState);
1507 | }
1508 | break;
1509 |
1510 | case static::BeforeDOCTYPESystemIdentifierState:
1511 | if ($this->_isTokenizerWhitespace($char)) {
1512 | $this->_HTML_ADVANCE_TO(static::BeforeDOCTYPESystemIdentifierState);
1513 | continue;
1514 | }
1515 | if ($char === '"') {
1516 | $this->_Token->setSystemIdentifierToEmptyString();
1517 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierDoubleQuotedState);
1518 | } else if ($char === '\'') {
1519 | $this->_Token->setSystemIdentifierToEmptyString();
1520 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierSingleQuotedState);
1521 | } else if ($char === '>') {
1522 | $this->_parseError();
1523 | $this->_Token->setForceQuirks();
1524 | return $this->_emitAndResumeIn();
1525 | } else if ($char === static::kEndOfFileMarker) {
1526 | $this->_parseError();
1527 | $this->_Token->setForceQuirks();
1528 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1529 | } else {
1530 | $this->_parseError();
1531 | $this->_Token->setForceQuirks();
1532 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState);
1533 | }
1534 | break;
1535 |
1536 | case static::DOCTYPESystemIdentifierDoubleQuotedState:
1537 | if ($char === '"') {
1538 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPESystemIdentifierState);
1539 | } else if ($char === '>') {
1540 | $this->_parseError();
1541 | $this->_Token->setForceQuirks();
1542 | return $this->_emitAndResumeIn();
1543 | } else if ($char === static::kEndOfFileMarker) {
1544 | $this->_parseError();
1545 | $this->_Token->setForceQuirks();
1546 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1547 | } else {
1548 | $this->_Token->appendToSystemIdentifier($char);
1549 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierDoubleQuotedState);
1550 | }
1551 | break;
1552 |
1553 | case static::DOCTYPESystemIdentifierSingleQuotedState:
1554 | if ($char === '\'') {
1555 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPESystemIdentifierState);
1556 | } else if ($char === '>') {
1557 | $this->_parseError();
1558 | $this->_Token->setForceQuirks();
1559 | return $this->_emitAndResumeIn();
1560 | } else if ($char === static::kEndOfFileMarker) {
1561 | $this->_parseError();
1562 | $this->_Token->setForceQuirks();
1563 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1564 | } else {
1565 | $this->_Token->appendToSystemIdentifier($char);
1566 | $this->_HTML_ADVANCE_TO(static::DOCTYPESystemIdentifierSingleQuotedState);
1567 | }
1568 | break;
1569 |
1570 | case static::AfterDOCTYPESystemIdentifierState:
1571 | if ($this->_isTokenizerWhitespace($char)) {
1572 | $this->_HTML_ADVANCE_TO(static::AfterDOCTYPESystemIdentifierState);
1573 | } else if ($char === '>') {
1574 | return $this->_emitAndResumeIn();
1575 | } else if ($char === static::kEndOfFileMarker) {
1576 | $this->_parseError();
1577 | $this->_Token->setForceQuirks();
1578 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1579 | } else {
1580 | $this->_parseError();
1581 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState);
1582 | }
1583 | break;
1584 |
1585 | case static::BogusDOCTYPEState:
1586 | if ($char === '>') {
1587 | return $this->_emitAndResumeIn();
1588 | } else if ($char === static::kEndOfFileMarker) {
1589 | return $this->_emitAndReconsumeIn($source, HTMLTokenizer::DataState);
1590 | }
1591 | $this->_HTML_ADVANCE_TO(static::BogusDOCTYPEState);
1592 | break;
1593 |
1594 | case static::CDATASectionState:
1595 | if ($char === ']') {
1596 | $this->_HTML_ADVANCE_TO(static::CDATASectionRightSquareBracketState);
1597 | } else if ($char === static::kEndOfFileMarker) {
1598 | $this->_HTML_RECONSUME_IN(static::DataState);
1599 | } else {
1600 | $this->_bufferCharacter($char);
1601 | $this->_HTML_ADVANCE_TO(static::CDATASectionState);
1602 | }
1603 | break;
1604 |
1605 | case static::CDATASectionRightSquareBracketState:
1606 | if ($char === ']') {
1607 | $this->_HTML_ADVANCE_TO(static::CDATASectionDoubleRightSquareBracketState);
1608 | } else {
1609 | $this->_bufferCharacter(']');
1610 | $this->_HTML_RECONSUME_IN(static::CDATASectionState);
1611 | }
1612 | break;
1613 |
1614 | case static::CDATASectionDoubleRightSquareBracketState:
1615 | if ($char === '>') {
1616 | $this->_HTML_ADVANCE_TO(static::DataState);
1617 | } else {
1618 | $this->_bufferCharacter(']');
1619 | $this->_bufferCharacter(']');
1620 | $this->_HTML_RECONSUME_IN(static::CDATASectionState);
1621 | }
1622 | break;
1623 | default:
1624 | break 2;
1625 | }
1626 | }
1627 | // ASSERT_NOT_REACHED
1628 | return false;
1629 | }
1630 |
1631 | protected function _parseError() {
1632 | $this->_Token->parseError();
1633 | $this->_notImplemented();
1634 | }
1635 |
1636 | protected function _notImplemented() {
1637 | // Source/core/platform/NotImplemented.h
1638 | // logger
1639 | }
1640 |
1641 | protected function _temporaryBufferIs($expectedString) {
1642 | return $this->_vectorEqualsString($this->_temporaryBuffer, $expectedString);
1643 | }
1644 |
1645 | protected function _vectorEqualsString($vector, $string) {
1646 | return $vector === $string;
1647 | }
1648 |
1649 | protected function _isAppropriateEndTag() {
1650 | return $this->_bufferedEndTagName === $this->_appropriateEndTagName;
1651 | }
1652 |
1653 | protected function _emitAndReconsumeIn(SegmentedString $source, $state) {
1654 | $this->_saveEndTagNameIfNeeded();
1655 | $this->_state = $state;
1656 | return true;
1657 | }
1658 |
1659 | protected function _saveEndTagNameIfNeeded() {
1660 | if ($this->_Token->getType() === HTMLToken::StartTag) {
1661 | $this->_appropriateEndTagName = $this->_Token->getName();
1662 | }
1663 | }
1664 |
1665 | protected function _emitEndOfFile() {
1666 | if ($this->_haveBufferedCharacterToken()) {
1667 | return true;
1668 | }
1669 |
1670 | $this->_state = HTMLTokenizer::DataState;
1671 | //source.advanceAndUpdateLineNumber();
1672 | //$this->_Token->clear();
1673 | $this->_Token->makeEndOfFile();
1674 | return true;
1675 | }
1676 |
1677 | protected function _emitAndResumeIn() {
1678 | $this->addState();
1679 | $this->_saveEndTagNameIfNeeded();
1680 | //m_state = state;
1681 | $this->_state = static::DataState;
1682 | //source.advanceAndUpdateLineNumber();
1683 | $this->_SegmentedString->advance();
1684 | return true;
1685 | }
1686 |
1687 | protected function _flushEmitAndResumeIn(SegmentedString $source, $state) {
1688 | // m_state = state;
1689 | $this->_state = $state;
1690 | $this->_flushBufferedEndTag($source);
1691 | return true;
1692 | }
1693 |
1694 | protected function _flushBufferedEndTag(SegmentedString $source) {
1695 | $source->advance();
1696 | if ($this->_Token->getType() === HTMLToken::Character) {
1697 | return true;
1698 | }
1699 | $this->_Token->beginEndTag($this->_bufferedEndTagName);
1700 | $this->_bufferedEndTagName = '';
1701 | $this->_appropriateEndTagName = '';
1702 | $this->_temporaryBuffer = '';
1703 | return false;
1704 | }
1705 |
1706 | protected function _haveBufferedCharacterToken() {
1707 | return $this->_Token->getType() === HTMLToken::Character;
1708 | }
1709 |
1710 | protected function _bufferCharacter($char) {
1711 | $this->_Token->ensureIsCharacterToken();
1712 | $this->_Token->appendToCharacter($char);
1713 | }
1714 |
1715 | // todo
1716 | protected function _shouldAllowCDATA() {
1717 | return true;
1718 | }
1719 |
1720 | protected function _isTokenizerWhitespace($char) {
1721 | return $char === ' ' || $char === "\x0A" || $char === "\x09" || $char === "\x0C";
1722 | }
1723 |
1724 | protected function _FLUSH_AND_ADVANCE_TO($state) {
1725 | $this->addState();
1726 | $this->_state = $state;
1727 | if ($this->_flushBufferedEndTag($this->_SegmentedString)) {
1728 | return true;
1729 | }
1730 | // if ( !m_inputStreamPreprocessor.peek(source)) return haveBufferedCharacterToken();
1731 | return null;
1732 | }
1733 |
1734 | protected function _HTML_RECONSUME_IN($state) {
1735 | $this->_state = $state;
1736 | }
1737 |
1738 | protected function _HTML_SWITCH_TO($state) {
1739 | $this->_state = $state;
1740 | }
1741 |
1742 | protected function _HTML_ADVANCE_TO($state) {
1743 | $this->addState();
1744 | $this->_state = $state;
1745 | $this->_SegmentedString->advance();
1746 | }
1747 |
1748 | protected function addState() {
1749 | if (!$this->_debug) {
1750 | return;
1751 | }
1752 | $this->_buffer[$this->_SegmentedString->tell() - $this->_startPos] = $this->_state;
1753 | }
1754 |
1755 | }
--------------------------------------------------------------------------------
/src/zz/Html/SegmentedString.php:
--------------------------------------------------------------------------------
1 | str = $str;
44 | $this->len = strlen($str);
45 | }
46 |
47 | /**
48 | * @return bool|string
49 | */
50 | public function getCurrentChar() {
51 | $i = $this->i;
52 | if ($this->len <= $i) {
53 | return false;
54 | }
55 | return $this->str[$i];
56 | }
57 |
58 | public function advance() {
59 | $this->i += 1;
60 | }
61 |
62 | /**
63 | * @param int $i
64 | * @return string
65 | */
66 | public function read($i) {
67 | if ($this->eos() && $i > 0) {
68 | return false;
69 | }
70 | $this->i += $i;
71 | return substr($this->str, $this->i - $i, $i);
72 | }
73 |
74 | /**
75 | * @param int $startPos
76 | * @param int $length
77 | * @return string
78 | */
79 | public function substr($startPos, $length) {
80 | return substr($this->str, $startPos, $length);
81 | }
82 |
83 | /**
84 | * @param int $offset
85 | * @param int $whence
86 | * @throws \InvalidArgumentException
87 | * @return bool
88 | */
89 | public function seek($offset, $whence = self::begin) {
90 | switch ($whence) {
91 | case static::begin:
92 | if ($this->len < $offset) {
93 | return false;
94 | }
95 | $this->i = $offset;
96 | return true;
97 | break;
98 | case static::current:
99 | $lookAhead = $this->i + $offset;
100 | if ($lookAhead < 0 || $lookAhead > $this->len) {
101 | return false;
102 | }
103 | $this->i = $lookAhead;
104 | return true;
105 | break;
106 | }
107 |
108 | throw new \InvalidArgumentException;
109 | }
110 |
111 | /**
112 | * @return int
113 | */
114 | public function tell() {
115 | return $this->i;
116 | }
117 |
118 | /**
119 | * @return bool
120 | */
121 | public function eos() {
122 | return $this->len <= $this->i;
123 | }
124 |
125 | public function get() {
126 | return $this->str;
127 | }
128 |
129 | public function len() {
130 | return $this->len;
131 | }
132 |
133 | public function token($str, $caseSensitive = true) {
134 | $matched = $this->read(strlen($str));
135 | if ($caseSensitive) {
136 | return $str === $matched ? $str : false;
137 | } else {
138 | return strtolower($str) === strtolower($matched) ? $matched : false;
139 | }
140 | }
141 |
142 | public function lookAheadIgnoringCase($str) {
143 | return $this->_lookAhead($str, false);
144 | }
145 |
146 | public function lookAhead($str) {
147 | return $this->_lookAhead($str, true);
148 | }
149 |
150 | protected function _lookAhead($str, $caseSensitive = true) {
151 | $i = $this->i;
152 | $result = $this->token($str, $caseSensitive) !== false;
153 | $this->seek($i);
154 | if (strlen($str) + $i <= $this->len) {
155 | if ($result) {
156 | return static::DidMatch;
157 | }
158 | return static::DidNotMatch;
159 | }
160 | return static::NotEnoughCharacters;
161 | }
162 |
163 | // int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
164 | public function numberOfCharactersConsumed() {
165 | // int numberOfPushedCharacters = 0;
166 | // if (m_pushedChar1) {
167 | // ++numberOfPushedCharacters;
168 | // if (m_pushedChar2)
169 | // ++numberOfPushedCharacters;
170 | // }
171 | // return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters;
172 | return $this->i;
173 | }
174 |
175 | }
176 |
--------------------------------------------------------------------------------