├── .github
    └── workflows
    │   └── tests.yml
├── .gitignore
├── .vscode
    ├── launch.json
    └── settings.json
├── LICENSE
├── README.md
├── README_HTML.md
├── compare.js
├── demo
    ├── 005055fd7e2625aba5e8d2d370ea4914a152fe50d16620f896cdf4b1a68ba741-origin.html
    ├── 005055fd7e2625aba5e8d2d370ea4914a152fe50d16620f896cdf4b1a68ba741.html
    ├── 039c4b966d1f2a0c589ac0aad211fe65500ad1cb58c7f45b34251db7056803ec-origin.html
    ├── 039c4b966d1f2a0c589ac0aad211fe65500ad1cb58c7f45b34251db7056803ec.html
    ├── 0475e5eeadaaca857eea3f36d0eda01937fe672d48be7f98ba6bc7f25ecd63d0.html
    ├── 06ed0a833361190536a4f61888354e07dccaa501bd9a1c0f1c545533bde1650b.html
    ├── 078cdb456d1beb698aeed86e0f2161e442e9431c4580295f1ba4ece22741068c.html
    ├── 0e55dcdbeb54c88ee87942b9fef7ea5398fa9a1e83493d55844b479506a80fd8-origin.html
    ├── 0e55dcdbeb54c88ee87942b9fef7ea5398fa9a1e83493d55844b479506a80fd8.html
    ├── CSDN.html
    ├── CSDN_SPM.html
    ├── MDN_HTML.html
    ├── MDN_JavaScript.html
    ├── demo copy.html
    ├── demo.html
    ├── demo1.html
    ├── demo2.html
    ├── demo3.html
    ├── demo4.html
    ├── demo5.html
    ├── google.html
    ├── qidian.html
    ├── qidian1.html
    ├── test.html
    ├── test1.html
    ├── test10.html
    ├── test11.html
    ├── test12.html
    ├── test13.html
    ├── test14.html
    ├── test15.html
    ├── test16.html
    ├── test17.html
    ├── test18.html
    ├── test19.html
    ├── test2.html
    ├── test20.html
    ├── test21.html
    ├── test22.html
    ├── test23.html
    ├── test24.html
    ├── test25.html
    ├── test26.html
    ├── test27.html
    ├── test28.html
    ├── test29.html
    ├── test30.html
    ├── test31.html
    ├── test32.html
    ├── test33.html
    ├── test34.html
    ├── test35.html
    ├── test36.html
    ├── test37.html
    ├── test38.html
    ├── test39.html
    ├── test4.html
    ├── test40.html
    ├── test5.html
    ├── test6.html
    ├── test7.html
    ├── test8.html
    └── test9.html
├── dist
    ├── definition.js
    ├── index.js
    ├── lexer.js
    ├── parser.js
    └── parser
    │   ├── Comment.js
    │   ├── DTD.js
    │   ├── Directive.js
    │   ├── Html.js
    │   ├── parseText.js
    │   └── tagClose.js
├── mycheck
    ├── check-dist.js
    ├── check.ts
    └── checkfile.js
├── package-lock.json
├── package.json
├── script
    └── addSuffixJs.js
├── server.js
├── src
    ├── definition.ts
    ├── index.ts
    ├── lexer.ts
    ├── parser.ts
    └── parser
    │   ├── Comment.ts
    │   ├── DTD.ts
    │   ├── Directive.ts
    │   ├── Html.ts
    │   ├── parseText.ts
    │   └── tagClose.ts
├── test-server.js
├── test
    ├── test24.spec.ts
    ├── test25.spec.ts
    ├── test26.spec.ts
    └── testall.spec.ts
├── tsconfig-esmodule.json
├── tsconfig.json
└── yarn.lock


/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |     build:
 7 |         runs-on: ubuntu-latest
 8 | 
 9 |         strategy:
10 |             matrix:
11 |                 node-version: [10.x, 12.x]
12 | 
13 |         steps:
14 | 
15 |         - uses: actions/checkout@v2
16 | 
17 |         - name: Use Node.js ${{ matrix.node-version }}
18 |           uses: actions/setup-node@v1
19 |           with:
20 |               node-version: ${{ matrix.node-version }}
21 | 
22 |         - run: npm install
23 | 
24 |         - run: npm run test-all


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /node_modules/
 2 | /src/*.js.map
 3 | /dist/*.js.map
 4 | /dist/examples/*/*.js.map
 5 | /.cache/
 6 | /dist/src/*.js.map
 7 | /dist/vm/*.js.map
 8 | /dist/test/*.js.map
 9 | /dist/src/*/*.js.map
10 | /dist/vm/*/*.js.map
11 | /out/*.json
12 | /files/*
13 | /test/testfile.js
14 | /app/*
15 | /dist-esmodule/*
16 | /copy/*
17 | /matchtest/*
18 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   // 使用 IntelliSense 了解相关属性。 
 3 |   // 悬停以查看现有属性的描述。
 4 |   // 欲了解更多信息，请访问: https://go.microsoft.com/fwlink/?linkid=830387
 5 |   "version": "0.2.0",
 6 |   "configurations": [{
 7 |     "name": "ts-node",
 8 |     "type": "pwa-node",
 9 |     "request": "launch",
10 |     "args": [
11 |       "${relativeFile}" // 入口文件
12 |     ],
13 |     "runtimeArgs": [
14 |       "--nolazy",
15 |       "-r",
16 |       "ts-node/register"
17 |     ],
18 |     "sourceMaps": true,
19 |     "cwd": "${workspaceRoot}",
20 |     "protocol": "inspector",
21 |     // "console": "integratedTerminal",
22 |     "internalConsoleOptions": "openOnSessionStart"
23 |   }]
24 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "typescript.tsdk": "node_modules\\typescript\\lib"
3 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 liulinboy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # HTML Parser
  2 | 
  3 | ## 解析HTML
  4 | 
  5 | [![Tests](https://github.com/liulinboyi/HTMLParser/actions/workflows/tests.yml/badge.svg)](https://github.com/liulinboyi/HTMLParser/actions/workflows/tests.yml)
  6 | 
  7 | ## HTML
  8 | 
  9 | ```html
 10 | <!DOCTYPE html>
 11 | <html lang="en">
 12 | <head>
 13 |     <meta charset="UTF-8">
 14 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 15 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 16 |     <title>Document</title>
 17 | </head>
 18 | <body>
 19 |     <div>
 20 |         <h1 v-if="res.value" name='11' @click="tes">11{{res.value}}</h1>
 21 |     </div>
 22 |     <a href="http://github.com/"></a>
 23 | </body>
 24 | </html>
 25 | ```
 26 | 
 27 | ## AST
 28 | <details>
 29 | <summary>点击查看详情(Click to view details)</summary>
 30 | <pre><code>
 31 | {
 32 |     "type": "root",
 33 |     "children": [
 34 |         {
 35 |             "type": "DTD",
 36 |             "LineNum": 1,
 37 |             "content": "DOCTYPE html"
 38 |         },
 39 |         {
 40 |             "content": "\r\n",
 41 |             "LineNum": 1,
 42 |             "type": "text"
 43 |         },
 44 |         {
 45 |             "children": [
 46 |                 {
 47 |                     "content": "\r\n",
 48 |                     "LineNum": 2,
 49 |                     "type": "text"
 50 |                 },
 51 |                 {
 52 |                     "children": [
 53 |                         {
 54 |                             "content": "\r\n    ",
 55 |                             "LineNum": 3,
 56 |                             "type": "text"
 57 |                         },
 58 |                         {
 59 |                             "children": [],
 60 |                             "attr": [
 61 |                                 {
 62 |                                     "name": "charset",
 63 |                                     "value": "UTF-8"
 64 |                                 }
 65 |                             ],
 66 |                             "LineNum": 4,
 67 |                             "type": "tag",
 68 |                             "tag": "meta"
 69 |                         },
 70 |                         {
 71 |                             "content": "\r\n    ",
 72 |                             "LineNum": 4,
 73 |                             "type": "text"
 74 |                         },
 75 |                         {
 76 |                             "children": [],
 77 |                             "attr": [
 78 |                                 {
 79 |                                     "name": "http-equiv",
 80 |                                     "value": "X-UA-Compatible"
 81 |                                 },
 82 |                                 {
 83 |                                     "name": "content",
 84 |                                     "value": "IE=edge"
 85 |                                 }
 86 |                             ],
 87 |                             "LineNum": 5,
 88 |                             "type": "tag",
 89 |                             "tag": "meta"
 90 |                         },
 91 |                         {
 92 |                             "content": "\r\n    ",
 93 |                             "LineNum": 5,
 94 |                             "type": "text"
 95 |                         },
 96 |                         {
 97 |                             "children": [],
 98 |                             "attr": [
 99 |                                 {
100 |                                     "name": "name",
101 |                                     "value": "viewport"
102 |                                 },
103 |                                 {
104 |                                     "name": "content",
105 |                                     "value": "width=device-width, initial-scale=1.0"
106 |                                 }
107 |                             ],
108 |                             "LineNum": 6,
109 |                             "type": "tag",
110 |                             "tag": "meta"
111 |                         },
112 |                         {
113 |                             "content": "\r\n    ",
114 |                             "LineNum": 6,
115 |                             "type": "text"
116 |                         },
117 |                         {
118 |                             "children": [
119 |                                 {
120 |                                     "content": "Document",
121 |                                     "LineNum": 7,
122 |                                     "type": "text"
123 |                                 }
124 |                             ],
125 |                             "attr": [],
126 |                             "LineNum": 7,
127 |                             "type": "tag",
128 |                             "tag": "title"
129 |                         },
130 |                         {
131 |                             "content": "\r\n",
132 |                             "LineNum": 7,
133 |                             "type": "text"
134 |                         }
135 |                     ],
136 |                     "attr": [],
137 |                     "LineNum": 3,
138 |                     "type": "tag",
139 |                     "tag": "head"
140 |                 },
141 |                 {
142 |                     "content": "\r\n",
143 |                     "LineNum": 8,
144 |                     "type": "text"
145 |                 },
146 |                 {
147 |                     "children": [
148 |                         {
149 |                             "content": "\r\n    ",
150 |                             "LineNum": 9,
151 |                             "type": "text"
152 |                         },
153 |                         {
154 |                             "children": [
155 |                                 {
156 |                                     "content": "\r\n        ",
157 |                                     "LineNum": 10,
158 |                                     "type": "text"
159 |                                 },
160 |                                 {
161 |                                     "children": [
162 |                                         {
163 |                                             "content": "11{{res.value}}",
164 |                                             "LineNum": 11,
165 |                                             "type": "text"
166 |                                         }
167 |                                     ],
168 |                                     "attr": [
169 |                                         {
170 |                                             "name": "v-if",
171 |                                             "value": "res.value"
172 |                                         },
173 |                                         {
174 |                                             "name": "name",
175 |                                             "value": "11"
176 |                                         },
177 |                                         {
178 |                                             "name": "@click",
179 |                                             "value": "tes"
180 |                                         }
181 |                                     ],
182 |                                     "LineNum": 11,
183 |                                     "type": "tag",
184 |                                     "tag": "h1"
185 |                                 },
186 |                                 {
187 |                                     "content": "\r\n    ",
188 |                                     "LineNum": 11,
189 |                                     "type": "text"
190 |                                 }
191 |                             ],
192 |                             "attr": [],
193 |                             "LineNum": 10,
194 |                             "type": "tag",
195 |                             "tag": "div"
196 |                         },
197 |                         {
198 |                             "content": "\r\n    ",
199 |                             "LineNum": 12,
200 |                             "type": "text"
201 |                         },
202 |                         {
203 |                             "children": [],
204 |                             "attr": [
205 |                                 {
206 |                                     "name": "href",
207 |                                     "value": "http://github.com/"
208 |                                 }
209 |                             ],
210 |                             "LineNum": 13,
211 |                             "type": "tag",
212 |                             "tag": "a"
213 |                         },
214 |                         {
215 |                             "content": "\r\n",
216 |                             "LineNum": 13,
217 |                             "type": "text"
218 |                         }
219 |                     ],
220 |                     "attr": [],
221 |                     "LineNum": 9,
222 |                     "type": "tag",
223 |                     "tag": "body"
224 |                 },
225 |                 {
226 |                     "content": "\r\n",
227 |                     "LineNum": 14,
228 |                     "type": "text"
229 |                 }
230 |             ],
231 |             "attr": [
232 |                 {
233 |                     "name": "lang",
234 |                     "value": "en"
235 |                 }
236 |             ],
237 |             "LineNum": 2,
238 |             "type": "tag",
239 |             "tag": "html"
240 |         }
241 |     ],
242 |     "LineNum": 1
243 | }
244 | </code></pre>
245 | </details>
246 | 
247 | ## 添加应用
248 | [查找节点](https://github.com/liulinboyi/HTMLParser-App/tree/main/platform)
249 | 
250 | ## TIPS
251 | 
252 | > 无运行时依赖
253 | 
254 | 没有做到浏览器那样兼容性巨好，HTML写成啥样都不报错都会解析，我只解析了一部分奇葩写法~有的HTML写法太奇葩了，要兼容就需要更多的分支和处理，需要更多的精力就算了。
255 | 
256 | ## 注意
257 | 
258 | #### ~~tsc编译后无法加上.js后缀，导致无法使用module，所以在所有ts文件导入加上了js后缀~~
259 | #### ~~https://segmentfault.com/q/1010000038671707~~
260 | #### ~~[社区讨论](https://github.com/microsoft/TypeScript/issues/16577)~~
261 | 
262 | #### 已解决，写了个[脚本](./script/addSuffixJs.js)，将所有编译后的ES modules的导入导出部分加上了js后缀
263 | 
264 | ## [测试](./test)
265 | #### 使用[playwright](https://github.com/microsoft/playwright.git)和浏览器生成的DOM结构做了对比，除了一些奇葩写法，其他基本没问题。
266 | 


--------------------------------------------------------------------------------
/README_HTML.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | tag-open ::= '<' tag-name ws* attr-list? ws* '>'
 3 | tag-empty ::= '<' tag-name ws* attr-list? ws* '/>'
 4 | tag-close ::= '</' tag-name ws* '>'
 5 | 
 6 | 
 7 | attr-list ::= (ws+ attr)*
 8 | attr ::= attr-empty | attr-unquoted | attr-single-quoted | attr-double-quoted
 9 | 
10 | attr-empty ::= attr-name
11 | attr-unquoted ::= attr-name ws* '=' ws* attr-unquoted-value
12 | attr-single-quoted ::= attr-name ws* "=" ws* "'" attr-single-quoted-value "'"
13 | attr-double-quoted ::= attr-name ws* "=" ws* '"' attr-double-quoted-value '"'
14 | 
15 | tag-name ::= alphabets (alphabets | digits)*                      // digits can not become first letter
16 | attr-name ::= [^\s"'>/=[#x0000-#x001f]+ // [^\s"'>/=[\u0000-\u001f]+
17 | 
18 | // These three items should not contain 'ambiguous ampersand'...
19 | attr-unquoted-value ::= [^\s"'=<>`]+
20 | attr-single-quoted-value ::= [^']*
21 | attr-double-quoted-value ::= [^"]*
22 | 
23 | alphabets ::= [a-zA-Z]
24 | digits ::= [0-9]
25 | ws ::= #x9 | #xA | #xD | #x20
26 | 
27 | ```
28 | 


--------------------------------------------------------------------------------
/compare.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs')
 2 | const path = require('path')
 3 | let parser = fs.readFileSync(path.resolve(__dirname, `./out/$parser.ast.json`), {encoding: 'utf-8'})
 4 | let browser = fs.readFileSync(path.resolve(__dirname, `./out/$browser.ast.json`), {encoding: 'utf-8'})
 5 | parser = JSON.parse(parser)
 6 | browser = JSON.parse(browser)
 7 | console.log(parser, browser)
 8 | 
 9 | let count = browser.length > parser.length ? browser.length : parser.length;
10 | for (let i = 0; i < count; i++) {
11 |     let a = browser[i]
12 |     let b = parser[i]
13 |     if (a === undefined) {
14 |         debugger
15 |     }
16 |     if (a.tag.toLowerCase() !== b.tag) {
17 |         let resta = []
18 |         let restb = []
19 |         let start = i - 10 < 0 ? 0 : i - 10
20 |         for (let s = start; s < i; s++) {
21 |             resta.push(browser[s])
22 |             restb.push(parser[s])
23 |         }
24 |         resta.push(browser[i])
25 |         restb.push(parser[i])
26 |         for (let s = i; s < i + 10; s++) {
27 |             resta.push(browser[s])
28 |             restb.push(parser[s])
29 |         }
30 |         debugger
31 | 
32 |     }
33 |     // console.log(browser[i] ? browser[i].tag : "undefined", parser[i] ? parser[i].tag : "undefined")
34 |     // expect(browser[i].tag.toLowerCase()).toBe(parser[i].tag)
35 |     // console.assert(browser[i].tag.toLowerCase() === parser[i].tag, `${browser[i] ? browser[i].tag : "undefined"}, ${parser[i] ? parser[i].tag : "undefined"}`)
36 | }
37 | 


--------------------------------------------------------------------------------
/demo/demo copy.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <link rel="stylesheet" href="">
 8 |     <title>Document</title>
 9 | </head>
10 | <body>
11 |     <div></div>
12 |     <script type="text/javascript">
13 |         console.log("Hello world!");
14 |     </script>
15 |     <script type="text/javascript" src="my.js"></script>
16 | </body>
17 | </html>


--------------------------------------------------------------------------------
/demo/demo.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <div style="background: red;">hello</div>
11 | </body>
12 | </html>


--------------------------------------------------------------------------------
/demo/demo1.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <div>
11 |         <h1 v-if="res.value" name='11' @click="tes">11{{res.value}}</h1>
12 |     </div>
13 |     <a href="http://github.com/"></a>
14 | </body>
15 | </html>


--------------------------------------------------------------------------------
/demo/demo2.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <img src="" alt="">
11 |     <img src="" alt="">哈哈</img>
12 |     <img src="" alt="">
13 | </body>
14 | </html>


--------------------------------------------------------------------------------
/demo/demo3.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <video controls="controls">
11 |         <source src="movie.webm" type="video/webm">
12 |         <source src="movie.ogg" type="video/ogg">
13 |         <source src="movie.mp4" type="video/mp4">
14 |         You browser does not support video.
15 |     </video>
16 | </body>
17 | </html>


--------------------------------------------------------------------------------
/demo/demo4.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <audio controls>
11 |         <source src="song.mp3" type="audio/mpeg">
12 |         <source src="song.ogg" type="audio/ogg">
13 |         <p>You browser does not support audio.</p>
14 |     </audio>
15 | </body>
16 | </html>


--------------------------------------------------------------------------------
/demo/demo5.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <picture>
11 |         <source srcset="image-wide.png" media="(min-width: 600px)">
12 |         <img src="image-narrow.png">
13 |     </picture>
14 |     <iframe src="http://time.geekbang.org"></iframe>
15 | </body>
16 | </html>


--------------------------------------------------------------------------------
/demo/test.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <div>
11 |         <div>
12 |             <h1 v-if="res.value" name  =  '11' @click  =  "tes">11{{res.value}}</h1>
13 |         </div>
14 |         <a href="http://github.com/"></a>
15 |     </div>
16 |     
17 | </body>
18 | </html>


--------------------------------------------------------------------------------
/demo/test1.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <div name   =   "haha">haha<</div>
11 |     <a1 <a = "aa" ></a1>
12 | </body>
13 | </html>


--------------------------------------------------------------------------------
/demo/test10.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <meta name="referrer" content="always">
11 |     <!---->
12 |     <!---->
13 |     <!---->
14 |     <!---->
15 |     <!---->
16 |     <!-- <div></div> -->
17 |     <a target="_blank" href="https://blog.csdn.net/coderising/article/details/118585903" class="title" data-v-0045335f>
18 |         <!----> 一行代码卖出570美元， 天价代码的内幕</a>
19 |     <script src="https://g.csdnimg.cn/tingyun/1.8.5/www-index.js"></script>
20 | </body>
21 | </html>


--------------------------------------------------------------------------------
/demo/test11.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <div class="mas_con con clearfix">
11 |         <a href="https://blog.csdn.net/nokiaguy" target="_blank"
12 |             data-report-click='{"mod":"popu_472","dest":"https://blog.csdn.net/nokiaguy"}'>
13 |             <p class="text oneline">
14 |                 李宁，东北大学计算机专业硕士，超过20年软件开发和培训经验，UnityMarvel创始人，CSDN学院高级讲师，企业内训讲师，IT畅销书作者。曾出版超过30本IT畅销书，培训过数以千计的企业学员，制作做数千小时的视频课程。代表作包括《Python从菜鸟到高手》、《Python爬虫技术：深入理解原理...
15 |             </p>
16 |             </p>
17 |         </a>
18 |     </div>
19 | </body>
20 | </html>


--------------------------------------------------------------------------------
/demo/test12.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
  6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7 |     <title>Document</title>
  8 | </head>
  9 | <body>
 10 |     <script>
 11 |         LUX = (function () {
 12 |             var a = ("undefined" !== typeof (LUX) && "undefined" !== typeof (LUX.gaMarks) ? LUX.gaMarks : []);
 13 |             var d = ("undefined" !== typeof (LUX) && "undefined" !== typeof (LUX.gaMeasures) ? LUX.gaMeasures : []);
 14 |             var j = "LUX_start";
 15 |             var k = window.performance;
 16 |             var l = ("undefined" !== typeof (LUX) && LUX.ns ? LUX.ns : (Date.now ? Date.now() : +(new Date())));
 17 |             if (k && k.timing && k.timing.navigationStart) {
 18 |                 l = k.timing.navigationStart
 19 |             }
 20 |     
 21 |             function f() {
 22 |                 if (k && k.now) {
 23 |                     return k.now()
 24 |                 }
 25 |                 var o = Date.now ? Date.now() : +(new Date());
 26 |                 return o - l
 27 |             }
 28 |     
 29 |             function b(n) {
 30 |                 if (k) {
 31 |                     if (k.mark) {
 32 |                         return k.mark(n)
 33 |                     } else {
 34 |                         if (k.webkitMark) {
 35 |                             return k.webkitMark(n)
 36 |                         }
 37 |                     }
 38 |                 }
 39 |                 a.push({
 40 |                     name: n,
 41 |                     entryType: "mark",
 42 |                     startTime: f(),
 43 |                     duration: 0
 44 |                 });
 45 |                 return
 46 |             }
 47 |     
 48 |             function m(p, t, n) {
 49 |                 if ("undefined" === typeof (t) && h(j)) {
 50 |                     t = j
 51 |                 }
 52 |                 if (k) {
 53 |                     if (k.measure) {
 54 |                         if (t) {
 55 |                             if (n) {
 56 |                                 return k.measure(p, t, n)
 57 |                             } else {
 58 |                                 return k.measure(p, t)
 59 |                             }
 60 |                         } else {
 61 |                             return k.measure(p)
 62 |                         }
 63 |                     } else {
 64 |                         if (k.webkitMeasure) {
 65 |                             return k.webkitMeasure(p, t, n)
 66 |                         }
 67 |                     }
 68 |                 }
 69 |                 var r = 0,
 70 |                     o = f();
 71 |                 if (t) {
 72 |                     var s = h(t);
 73 |                     if (s) {
 74 |                         r = s.startTime
 75 |                     } else {
 76 |                         if (k && k.timing && k.timing[t]) {
 77 |                             r = k.timing[t] - k.timing.navigationStart
 78 |                         } else {
 79 |                             return
 80 |                         }
 81 |                     }
 82 |                 }
 83 |                 if (n) {
 84 |                     var q = h(n);
 85 |                     if (q) {
 86 |                         o = q.startTime
 87 |                     } else {
 88 |                         if (k && k.timing && k.timing[n]) {
 89 |                             o = k.timing[n] - k.timing.navigationStart
 90 |                         } else {
 91 |                             return
 92 |                         }
 93 |                     }
 94 |                 }
 95 |                 d.push({
 96 |                     name: p,
 97 |                     entryType: "measure",
 98 |                     startTime: r,
 99 |                     duration: (o - r)
100 |                 });
101 |                 return
102 |             }
103 |     
104 |             function h(n) {
105 |                 return c(n, g())
106 |             }
107 |     
108 |             function c(p, o) {
109 |                 for (i = o.length - 1; i >= 0; i--) {
110 |                     var n = o[i];
111 |                     if (p === n.name) {
112 |                         return n
113 |                     }
114 |                 }
115 |                 return undefined
116 |             }
117 |     
118 |             function g() {
119 |                 if (k) {
120 |                     if (k.getEntriesByType) {
121 |                         return k.getEntriesByType("mark")
122 |                     } else {
123 |                         if (k.webkitGetEntriesByType) {
124 |                             return k.webkitGetEntriesByType("mark")
125 |                         }
126 |                     }
127 |                 }
128 |                 return a
129 |             }
130 |             return {
131 |                 mark: b,
132 |                 measure: m,
133 |                 gaMarks: a,
134 |                 gaMeasures: d
135 |             }
136 |         })();
137 |         LUX.ns = (Date.now ? Date.now() : +(new Date()));
138 |         LUX.ac = [];
139 |         LUX.cmd = function (a) {
140 |             LUX.ac.push(a)
141 |         };
142 |         LUX.init = function () {
143 |             LUX.cmd(["init"])
144 |         };
145 |         LUX.send = function () {
146 |             LUX.cmd(["send"])
147 |         };
148 |         LUX.addData = function (a, b) {
149 |             LUX.cmd(["addData", a, b])
150 |         };
151 |         LUX_ae = [];
152 |         window.addEventListener("error", function (a) {
153 |             LUX_ae.push(a)
154 |         });
155 |         LUX_al = [];
156 |         if ("function" === typeof (PerformanceObserver) && "function" === typeof (PerformanceLongTaskTiming)) {
157 |             var LongTaskObserver = new PerformanceObserver(function (c) {
158 |                 var b = c.getEntries();
159 |                 for (var a = 0; a < b.length; a++) {
160 |                     var d = b[a];
161 |                     LUX_al.push(d)
162 |                 }
163 |             });
164 |             try {
165 |                 LongTaskObserver.observe({
166 |                     type: ["longtask"]
167 |                 })
168 |             } catch (e) {}
169 |         };
170 |     </script>
171 |     <script src="https://cdn.speedcurve.com/js/lux.js?id=108906238" async="" defer="" crossorigin="anonymous"></script>
172 | </body>
173 | </html>


--------------------------------------------------------------------------------
/demo/test13.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <div>
11 |         <br />
12 |         <br />
13 |         <br style="background: red;" />
14 |         <!--  -->
15 |         哈哈哈哈
16 |         <!--  -->
17 |         <br />
18 |         <!--  -->
19 |         <br />
20 |         <div>
21 |     
22 |         </div>
23 |         <br />
24 |         <div>
25 |             <div>
26 |                 <!--  -->
27 |             </div>
28 |         </div>
29 |         <script>
30 |     
31 |         </script>
32 |         <script>
33 |     
34 |         </script>
35 |     </div>
36 | </body>
37 | </html>


--------------------------------------------------------------------------------
/demo/test14.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <!--  -->
11 |     哈哈哈哈
12 |     <!--  -->
13 | </body>
14 | </html>


--------------------------------------------------------------------------------
/demo/test16.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <!--[if IE 7]><link rel="stylesheet" href="http://sc.wsj.net/djstyle/1/ie7/EUROPE_WSJ/0_0_WA_0006-20130626021328.css" type="text/css" /><![endif]-->
11 |     <!--[if IE 8]><link rel="stylesheet" href="http://sc.wsj.net/djstyle/1/ie8/EUROPE_WSJ/0_0_WA_0006-20130626021328.css" type="text/css" /><![endif]-->
12 |     <script type="text/javascript" language="javascript" charset="utf-8"><!--
13 |     var userName = '(none)';
14 |     
15 |     var serverTime = new Date("July 19, 2013 05:06:05");
16 |     //--></script>
17 |     <script type="text/javascript"><!--
18 |     var openHouseMode="false",pDateinSpanish="jueves, 18 de julio, 2013, 15:14:00 EDT",uP="http://online.wsj.com",mpsection="T   Wire",pDateinTurkish="18 Temmuz 2013, Perşembe, 22:14 TSİ",isDenial="false",pDate="Thursday,&nbsp;July&nbsp;18,&nbsp;2013&nbsp;As of&nbsp;3:14&nbsp;PM&nbsp;EDT",_navText="",gcLFU="https://commerce.wsj.com/auth/submitlogin",pID="0_0_WA_0006",cdnDomain="http://s.wsj.net",nSP="",parentTabID="HNTAB1",gcDomain="online.wsj.com",isTrial="false",isFree="true",PSSG="header0_0_WA_0006",pDateinJapanese="2013&#24180; 7月 19&#26085; (金曜日)",pDateinGerman="Donnerstag, 18. Juli 2013, 21:14:00 MESZ",gcHSP="https://",globalHeaderPageTitle="",pDateinGMT="Thursday,July 18, 2013 19:14:00 GMT",PSS="0_0_WA_0006",pStl="renovation",gcPH="/pj/PortfolioDisplay.cgi",pDateinKorean="금요일, 19. 7월 2013, 04:14:00 KST",pDateinIndonesian="Jumat, 19. Juli 2013, 02:14:00 WIT",pDateinPortuguese="Quinta-feira, 18 de Julho, 2013, 15:14:00 EDT";
19 |     var AT_VARS={articlePage:'T   Wire',baseDocId:'BT-CO-20130718-711176',suppressClickUrl:false,suppressReprints:false,suppressEmailThis:false,suppressMostPopular:false,articleUrl:'http://online.wsj.com/article/BT-CO-20130718-711176.html',articleEmailUrl:'http://online.wsj.com/article_email/BT-CO-20130718-711176-kIyVDAtMUMzTzEtOTIxMDkxWj.html',clickTitle:'WSJ.com - Panasonic, Sanyo to Pay $56.5 Million for Price Fixing - DOJ',articleHeadline:'Panasonic, Sanyo to Pay $56.5 Million for Price Fixing - DOJ',bodyText:'',seoDescription:'',imgSizeA:'null',publicationName:'wsj.com',publicationDate:'2013-07-18',baseDocExtension:'.djml',authors:'',djType:'false',articleType:'T+++Wire'};
20 |     window.name = "wndMain"
21 |     
22 |     //--></script>
23 |     <script type="text/javascript" src="http://s.wsj.net/javascript/j_top-static.js"></script>
24 |     <link rel="shortcut icon" href="/favicon.ico" />
25 |     <!-- fastdynapage - secj2kentwap09 - Fri 07/19/13 - 05:06:04 EDT -->
26 | </body>
27 | </html>


--------------------------------------------------------------------------------
/demo/test17.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     
11 | </body>
12 | </html>
13 | <!-- fastdynapage - secj2kentwap09 - Fri 07/19/13 - 05:06:04 EDT -->


--------------------------------------------------------------------------------
/demo/test18.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <ul>
11 | 
12 |         <li class="twitter " tns="no">
13 |             <a href="http://twitter.com/share" class="twitter-share-button"
14 |                 data-counturl="http://www.reuters.com/article/2013/06/07/us-nutrition-idUSBRE9560DL20130607"
15 |                 data-via="reuters" data-text="Malnutrition condemns millions to stunted lives: UNICEF"
16 |                 id="twitter-share-link">Tweet</a>
17 |         </li>
18 | 
19 |         <script type="text/javascript">
20 |             var shortUrl = 'http://reut.rs/19OkNeF';
21 |             var twitterShareLink = document.getElementById("twitter-share-link");
22 |             twitterShareLink.setAttribute("data-url", shortUrl);
23 |             Reuters.utils.loadScript("twitterShare", "http://platform.twitter.com/widgets.js");
24 |         </script>
25 |         <li class="linkedIn " tns="no">
26 |             <script type="text/javascript" src="http://platform.linkedin.com/in.js"></script>
27 |             <script type="in/share"
28 |                 data-url="http://www.reuters.com/article/2013/06/07/us-nutrition-idUSBRE9560DL20130607"
29 |                 data-counter="right"></script>
30 |         </li>
31 |         <li class="facebook" tns="no"><span class="hrefClone"
32 |                 onclick="Reuters.utils.popup('http://www.facebook.com/sharer.php?u=http%3A%2F%2Fwww.reuters.com%2Farticle%2F2013%2F06%2F07%2Fus-nutrition-idUSBRE9560DL20130607&t=Malnutrition+condemns+millions+to+stunted+lives%3A+UNICEF', 626, 436, 1, 'shareArticle');">Share
33 |                 this</span></li>
34 |         <li class="google " tns="no"><span id="googleTag">
35 |                 <g:plusone size="small" count="true"
36 |                     href="http://www.reuters.com/article/2013/06/07/us-nutrition-idUSBRE9560DL20130607">
37 |             </span></g:plusone>
38 |         </li>
39 |         <li tns="no" class="email"><span class="hrefClone"
40 |                 onclick="Reuters.utils.popup('/do/emailArticle?articleId=USBRE9560DL20130607', 580, 735, 1, 'emailArticle');">Email</span>
41 |         </li>
42 |         <li tns="no" class="print last"><span class="hrefClone"
43 |                 onclick="Reuters.utils.popup('/assets/print?aid=USBRE9560DL20130607', 580, 735, 3, 'printArticle');">Print</span>
44 |         </li>
45 |     </ul>
46 | </body>
47 | </html>


--------------------------------------------------------------------------------
/demo/test19.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <div class="field-items">
11 |         <div class="field-item even" property="content:encoded">
12 |             <p>>A federal appeals court has dealt the Obama administration yet another blow in its quest to keep at least
13 |                 some age restrictions on the sale of emergency contraceptive pills. </p>
14 |             <p>In a <a
15 |                     href="http://www.scribd.com/doc/145909756/2nd-Circuit-Court-of-Appeals-Stay-Order-on-emergency-contraception">three-paragraph
16 |                     order</a>, a three-judge panel for the United States Court of Appeals for the 2nd Circuit ruled that
17 |                 although the government's appeal of a <a
18 |                     href="http://www.npr.org/blogs/health/2013/04/07/176396151/with-plan-b-ruling-judge-signs-off-on-years-of-advocacy">lower
19 |                     court decision</a> removing all age restrictions on morning-after pills is still pending, at least some
20 |                 medications must be made available over the counter immediately. </p>
21 |             <p>Specifically, the panel said that while the requirement for one-pill versions of the morning-after pill to be
22 |                 made available without age restrictions can be delayed while the appeal is considered, that is not the case
23 |                 for "two-pill variants," which include generic products <a
24 |                     href="http://ec.princeton.edu/pills/next%20choice.html">Next Choice</a> and other <a
25 |                     href="http://www.levo4u.com/">levonorgestral</a> tablets. </p>
26 |             <p>Ironically, the FDA had sought to produce a compromise by <a
27 |                     href="http://www.npr.org/blogs/health/2013/04/30/180133269/fda-oks-prescription-free-plan-b-pill-for-women-15-and-up">approving
28 |                     in late April</a> an over-the-counter version of <a href="http://www.planbonestep.com/">Plan B
29 |                     One-Step</a>, a one-pill version that would be available on pharmacy shelves but only to those 15 and
30 |                 over who are able to produce proper identification. </p>
31 |             <p>But Plan B One-Step <a
32 |                     href="http://www.drugstore.com/plan-b-one-step-emergency-contraceptive-must-be-17-or-over-to-purchase-without-a-prescription/qxp161395">costs
33 |                     in the neighborhood of $50</a>, while the generic two-pill formulations cost about <a
34 |                     href="http://www.goodrx.com/next-choice">$20 to $35</a>. </p>
35 |             <p>The saga of trying to move emergency contraception from a prescription-only to an over-the-counter product
36 |                 has been ongoing for more than a decade through two successive presidential administrations. </p>
37 |             <p>U.S. District Court Judge Edward Korman, who has overseen the case since 2005, has made it clear that he
38 |                 thinks the government has dragged its feet to the point of violating the law. </p>
39 |             <p>But few expected the New York-based appeals court to agree with Korman, even in part, by denying the
40 |                 government's request to stay his April 6 order while the appeal is being heard. </p>
41 |             <p>The government — via the Departments of Justice and Health and Human Services — had no immediate comment on
42 |                 the ruling. Representatives would say only that they were "reviewing the order" from the appeals court. </p>
43 |             <p>Those who have been pursuing the case, however, had a bit more to say. </p>
44 |             <p>"Today's decision from the 2nd Circuit marks an historic day for women's health," said Nancy Northup,
45 |                 president and CEO of the <a href="http://reproductiverights.org/">Center for Reproductive Rights</a>, which
46 |                 has represented some of the plaintiffs in the lawsuit. "Finally, after more than a decade of politically
47 |                 motivated delays, women will no longer have to endure intrusive, onerous and medically unnecessary
48 |                 restrictions to get emergency contraception." </p>
49 |             <p>What happens next remains unclear. Some lawyers say the government might be able to appeal to the full 2nd
50 |                 Circuit. But more likely, if they insist on fighting, government attorneys would have to seek relief from
51 |                 the Supreme Court justice who oversees the 2nd Circuit — Ruth Bader Ginsburg.<strong> </strong><br /></p>
52 |             <div class="fullattribution">Copyright 2013 NPR. To see more, visit http://www.npr.org/.<img
53 |                     src="http://www.google-analytics.com/__utm.gif?utmac=UA-5828686-4&amp;utmdt=Court+Says+Some+Morning-After+Pills+Must+Be+Available+OTC+Now&amp;utme=8(APIKey)9(MDAyMTc0MTg1MDEyNTUxMTQxMDRkODNiYw004)" />
54 |             </div>
55 |             <p></p>
56 |         </div>
57 |     </div>
58 | </body>
59 | </html>


--------------------------------------------------------------------------------
/demo/test2.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <div class="out" style="background: red;">
11 |         <div>
12 |             <h1 v-if="res.value" name  =  '11' @click  =  "tes">11{{res.value}}</h1>
13 |         </div>
14 |         <a href="http://github.com/"></a>
15 |     </div>
16 |     
17 | </body>
18 | </html>


--------------------------------------------------------------------------------
/demo/test20.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     
11 | 
12 | <noscript>< img alt="dcsimg" id="dcsimg" width="1" height="1" src="//webtrends.telegraph.co.uk/dcsz8d3revz5bdaez1q24pfc4_7k6c/njs.gif?MLC=&amp;Channel=&amp;Genre=&amp;Category=&amp;Content_Type=&amp;Level=&amp;source=&amp;dcsuri=/nojavascript&amp;WT.js=No&amp;WT.tv=10.2.10&amp;dcssip=www.telegraph.co.uk"/></noscript>
13 | 
14 | 
15 | </body>
16 | </html>


--------------------------------------------------------------------------------
/demo/test21.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     
11 | <span class="gslCommentsCount" id="gslCommentsCount-1">
12 |     <fb:comments-count name=xiaoming style="display:inline-block" href=http://www.dailyworld.com/article/20130719/NEWS01/307190003></fb:comments-count>
13 | </span>
14 | 
15 | </body>
16 | </html>


--------------------------------------------------------------------------------
/demo/test22.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |         <meta charset="UTF-8">
 5 |         <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |         <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |         <title>Document</title>
 8 | </head>
 9 | <body>
10 |         
11 | 
12 | <ul class='bullet_orange'>
13 |         <li><a target='_blank'
14 |                 href='http://blogs.voanews.com/sonny/2013/07/14/somalia-basketball-bounces-back/'>Somalia
15 |                 Basketball Bounces Back</a>&nbsp;<span class='date'>4 days
16 |                 ago</span></li>
17 |         </ul>
18 |     
19 |     
20 | </body>
21 | </html>


--------------------------------------------------------------------------------
/demo/test23.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 | 	<meta charset="UTF-8">
 5 | 	<meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 | 	<meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 | 	<title>Document</title>
 8 | </head>
 9 | <body>
10 | 	<div id="sesCountBox" style="background-color: transparent;">
11 | 		<div class="pop-up" style="padding-bottom: 30px;border-radius: 10px;position: absolute;left: 5%;background-color: rgba(0, 0, 0, 0.85);color: black;text-align: center;width: 90%;padding-top: 15px;font-size: 17px;font-family: sans-serif;display: flex;align-items: center;justify-content: center;">
12 | 			<div>
13 | 			<div style="margin-top: 10px;font-size: 70px;font-weight: 900;color: #ffffff;">Go Pro Now!</div>
14 | 			<div style="color: #2a8cdd;font-size: 20px;">Unlock all the features</div>
15 | 			<div style="padding: 30px;text-align: center;border: 0px;">
16 | 				<div style="display: inline-block;">
17 | 					<div id="priceContainer" style="display: inline-block;float: left;">
18 | 						<div id="yearlyPrice" style="width: 450px;height: 350px;color: #333333;margin: 15px;margin-left: 0px;margin-right: 30px;text-align: center;font-size: 25px;cursor: pointer;transition: background 0.15s linear;border-radius: 10px;border: solid;border-color: #ffffff;border-width: 1px;box-shadow: 3px 3px 12px rgba(0,0,0,0.44);">
19 | 						<div style="position: relative;background: #2a8cdd;border-color: #ffffff;border-top-left-radius: 8px;border-top-right-radius: 8px;color: #ffffff;font-size: 37px;line-height: 50px;text-transform: uppercase;border-style: solid;border-width: 0px;border-bottom-width: 1px;top: 0px!important;">
20 | 							<text msgid="save25">Save 25%</text>
21 | 						</div>
22 | 						<div style="position: relative;margin: auto;content: &quot;&quot;width: 0;height: 0;border-left: 16px solid transparent;border-right: 16px solid rgba(255, 255, 255, 0);border-top: 16px solid #ffffff;top: 0px!important;"></div>
23 | 						<div style="position: relative;margin: auto;content: &quot;&quot;width: 0;height: 0;border-left: 16px solid transparent;border-right: 16px solid transparent;border-top: 16px solid #2a8cdd;z-index: 999;top: -18px!important;"></div>
24 | 						<div style="top: 5px;position: relative;">
25 | 							<text msgid="yearlyPrice_alone" style="color: #ffffff;font-weight: bold;font-size: 70px;line-height: 60px;text-transform: uppercase;padding-top: 20px;display: block;">$2.99</text>
26 | 							<br>
27 | 							<text msgid="monthly" style="font-size: 30px;color: #ffffff;margin-top: -20px;display: block;">Monthly</text>
28 | 							<text msgid="annual_billing" style="font-size: 15px;color: #ffffff;margin-top: 0px;display: block;">Annual Billing</text>
29 | 						</div>
30 | 						<div id="sesUpgradeButton" style="top: 0px;background-color: #49bd11;box-shadow: 0px 3px 0px #308404;position: relative;font-weight: 400;height: 45px;padding-top: 5px;padding-bottom: 5px;border-radius: 7px;text-align: center;font-size: 40px;text-transform: uppercase;color: white;cursor: pointer;transition: background 0.15s linear;padding-left: 0px;padding-right: 0px;width: 400px;margin: auto;margin-top: 30px;" >
31 | 							<text msgid="upgrade_now">Upgrade Now</text>
32 | 						</div>
33 | 						</div>
34 | 						<div id="checkoutStudentsTeachers1" style="display: none;color: #333333;width: 200px;height: 200px;margin: 15px;margin-left: 0px;margin-right: 30px;text-align: center;font-size: 25px;cursor: pointer;transition: background 0.15s linear;border-radius: 10px;border: solid;border-color: #ffffff;border-width: 1px;box-shadow: 3px 3px 12px rgba(0,0,0,0.44);">
35 | 						<div style="position: relative;top: 30%;">
36 | 							<text msgid="checkout_schools" style="font-size: 25px;">Offer for Schools</text>
37 | 						</div>
38 | 						<div id="priceButtonStudents"	style="position: relative;top: 30%;">
39 | 							<text msgid="upgrade_now">Upgrade Now</text>
40 | 						</div>
41 | 						</div>
42 | 					</div>
43 | 					<div id="featuresContainer" style="text-align: left;float: left;display: inline-block;margin-left: 50px;margin-top: 30px;">
44 | 						<table id="featuresTable" style="margin-bottom: 10px;">
45 | 							<tbody>
46 | 								<tr style="font-size: 25px;color: #2a8cdd;">
47 | 									<td colspan="2" style="padding-bottom: 15px;">
48 | 										<text msgid="what_get_pro">Pro Version Features:</text>
49 | 									</td>
50 | 								</tr>
51 | 								<tr style="font-size: 18px;color: #ffffff;font-weight: 100;">
52 | 									<td style="padding-bottom: 15px;"><img src="<!--%img/icons/check-30.png%-->" style="padding-top: 6px;margin-right: 15px;"></td>
53 | 									<td style="padding-bottom: 15px;">
54 | 										<text msgid="unlimited_reading"><strong>Unlimited</strong> Words for non-stop Listening</text>
55 | 									</td>
56 | 								</tr>
57 | 								<tr style="font-size: 18px;color: #ffffff;font-weight: 100;">
58 | 									<td style="padding-bottom: 15px;"><img src="<!--%img/icons/check-30.png%-->" style="padding-top: 6px;margin-right: 15px;"></td>
59 | 									<td style="padding-bottom: 15px;">
60 | 										<text msgid="no_barriers">Read Text in 27 Different Languages</text>
61 | 									</td>
62 | 								</tr>
63 | 								<tr style="font-size: 18px;color: #ffffff;font-weight: 100;">
64 | 									<td style="padding-bottom: 15px;"><img src="<!--%img/icons/check-30.png%-->" style="padding-top: 6px;margin-right: 15px;"></td>
65 | 									<td style="padding-bottom: 15px;">
66 | 										<text msgid="fast_or_slow">Read Text at 21 Different Speed Settings</text>
67 | 									</td>
68 | 								</tr>
69 | 								<tr style="font-size: 18px;color: #ffffff;font-weight: 100;">
70 | 									<td style="padding-bottom: 15px;"><img src="<!--%img/icons/check-30.png%-->" style="padding-top: 6px;margin-right: 15px;"></td>
71 | 									<td style="padding-bottom: 15px;">
72 | 										<text msgid="read_autoplay">Autoplay Text - Fast and Easy</text>
73 | 									</td>
74 | 								</tr>
75 | 								<tr style="font-size: 18px;color: #ffffff;font-weight: 100;">
76 | 									<td style="padding-bottom: 15px;"><img src="<!--%img/icons/check-30.png%-->" style="padding-top: 6px;margin-right: 15px;"></td>
77 | 									<td style="padding-bottom: 15px;">
78 | 										<text msgid="detect_lang">Automatic Language Detection</text>
79 | 									</td>
80 | 								</tr>
81 | 							</tbody>
82 | 						</table>
83 | 					</div>
84 | 				</div>
85 | 			</div>
86 | 			<div id="sesContinue">
87 | 				<div id="sesCountNumber" style="cursor: pointer;top: calc(50% - 0.5em);position: relative;font-size: 23px;text-align: center;color: #afafaf;text-decoration: underline;"></div>
88 | 			</div>
89 | 			</div>
90 | 		</div>
91 | 	</div>
92 | </body>
93 | </html>


--------------------------------------------------------------------------------
/demo/test24.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <!-- <
11 | 
12 | <!DOCTYPE html> -->
13 |     <a name=xiaoming <p class="cf">
14 |         <a class="blue" href="//vipreader.qidian.com/chapter/1027339371/664119407" data-eid="qd_G19"
15 |             data-cid="//vipreader.qidian.com/chapter/1027339371/664119407" title="第一百七十九章 顶级秘法"
16 |             target="_blank">第一百七十九章 顶级秘法</a><i>&#183;</i><em class="time">28分钟前</em>
17 |         </p>
18 | </body>
19 | </html>


--------------------------------------------------------------------------------
/demo/test25.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <meta content="WSJ.com" name="application-name" /><meta content="app-id=364387007" name="apple-itunes-app" /><meta content="368513495882" property="fb:app_id" /><meta content="article" property="og:type" /><meta content="name=MarketWatch;action-uri=http://www.marketwatch.com/;icon-uri=http://www.marketwatch.com/favicon.ico" name="msapplication-task" /><meta content="name=Barron's;action-uri=http://online.barrons.com/;icon-uri=http://online.barrons.com/favicon.ico" name="msapplication-task" /><meta content="name=All Things Digital;action-uri=http://allthingsd.com/;icon-uri=http://allthingsd.com/favicon.ico" name="msapplication-task" /><meta content="name=SmartMoney;action-uri=http://www.smartmoney.com/;icon-uri=http://www.smartmoney.com/favicon.ico" name="msapplication-task" /><meta content="8304333127" property="fb:page_id" /><meta content="telephone=no" name="format-detection" /><meta itemprop="dateModified" value="2013-07-18 15:14:00.0" /><meta itemprop="datePublished" value="2013-07-18 15:14:00.0" /><meta name="pdate" value="20130718031400" /><meta name="udate" value="20130718031400" /><meta name='twitter:image' value='http://s.wsj.net/img/WSJ_profile_lg.gif'/><link rel="stylesheet" href="" type="text/css" /><link rel="stylesheet" href="" type="text/css" /><link rel="stylesheet" href="" type="text/css" /><!--[if lt IE 7]><link rel="stylesheet" href="" type="text/css" /><![endif]-->
11 | <!--[if IE 7]><link rel="stylesheet" href="" type="text/css" /><![endif]-->
12 | <!--[if IE 8]><link rel="stylesheet" href="" type="text/css" /><![endif]-->
13 | <script type="text/javascript" language="javascript" charset="utf-8"><!--
14 | var userName = '(none)';
15 | 
16 | var serverTime = new Date("July 19, 2013 05:06:05");
17 | //--></script>
18 | <script type="text/javascript"><!--
19 | var openHouseMode="false",pDateinSpanish="jueves, 18 de julio, 2013, 15:14:00 EDT",uP="http://online.wsj.com",mpsection="T   Wire",pDateinTurkish="18 Temmuz 2013, Perşembe, 22:14 TSİ",isDenial="false",pDate="Thursday,&nbsp;July&nbsp;18,&nbsp;2013&nbsp;As of&nbsp;3:14&nbsp;PM&nbsp;EDT",_navText="",gcLFU="https://commerce.wsj.com/auth/submitlogin",pID="0_0_WA_0006",cdnDomain="http://s.wsj.net",nSP="",parentTabID="HNTAB1",gcDomain="online.wsj.com",isTrial="false",isFree="true",PSSG="header0_0_WA_0006",pDateinJapanese="2013&#24180; 7月 19&#26085; (金曜日)",pDateinGerman="Donnerstag, 18. Juli 2013, 21:14:00 MESZ",gcHSP="https://",globalHeaderPageTitle="",pDateinGMT="Thursday,July 18, 2013 19:14:00 GMT",PSS="0_0_WA_0006",pStl="renovation",gcPH="/pj/PortfolioDisplay.cgi",pDateinKorean="금요일, 19. 7월 2013, 04:14:00 KST",pDateinIndonesian="Jumat, 19. Juli 2013, 02:14:00 WIT",pDateinPortuguese="Quinta-feira, 18 de Julho, 2013, 15:14:00 EDT";
20 | var AT_VARS={articlePage:'T   Wire',baseDocId:'BT-CO-20130718-711176',suppressClickUrl:false,suppressReprints:false,suppressEmailThis:false,suppressMostPopular:false,articleUrl:'http://online.wsj.com/article/BT-CO-20130718-711176.html',articleEmailUrl:'http://online.wsj.com/article_email/BT-CO-20130718-711176-kIyVDAtMUMzTzEtOTIxMDkxWj.html',clickTitle:'WSJ.com - Panasonic, Sanyo to Pay $56.5 Million for Price Fixing - DOJ',articleHeadline:'Panasonic, Sanyo to Pay $56.5 Million for Price Fixing - DOJ',bodyText:'',seoDescription:'',imgSizeA:'null',publicationName:'wsj.com',publicationDate:'2013-07-18',baseDocExtension:'.djml',authors:'',djType:'false',articleType:'T+++Wire'};
21 | window.name = "wndMain"
22 | 
23 | //--></script>
24 | <script type="text/javascript" src=""></script>
25 | </body>
26 | </html>


--------------------------------------------------------------------------------
/demo/test26.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |   <meta charset="UTF-8">
  5 |   <meta http-equiv="X-UA-Compatible" content="IE=edge">
  6 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7 |   <title>Document</title>
  8 | </head>
  9 | <body>
 10 |   <script type="text/javascript">
 11 |     /* <![CDATA[ */
 12 | 
 13 |     var jsexec = dj.util.JSExec(dj.context.jsexec);
 14 |     try {
 15 |       console.group("DJ JSExec:");
 16 |       console.info("[ begin jsexec ]");
 17 |     } catch (e) {
 18 |       var _fnc = function () {};
 19 |       console = {
 20 |         log: _fnc,
 21 |         info: _fnc,
 22 |         error: _fnc,
 23 |         dir: _fnc,
 24 |         group: _fnc,
 25 |         groupEnd: _fnc
 26 |       };
 27 |     }
 28 |     //------------------------------
 29 |     jsexec(0, "dj.module.header2012.userDetails.showUserName", function () {
 30 |       dj.module.header2012.userDetails.showUserName = function () {
 31 |         dojo.removeClass(dojo.query(".uNav")[0], "hidden");
 32 |         dj.util.User.renderFirstName("userName");
 33 |         var uNamePlaceholder = dojo.byId("userName").innerHTML;
 34 |         if (uNamePlaceholder && uNamePlaceholder !== '' && uNamePlaceholder !== 'undefined') {
 35 |           uNamePlaceholder += "'s Journal";
 36 |         } else {
 37 |           dj.util.User.renderCallsign("userName");
 38 |           var uNamePlaceholder = dojo.byId("userName").innerHTML;
 39 |           uNamePlaceholder += "'s Journal";
 40 |         }
 41 |         dojo.byId("userName").innerHTML = uNamePlaceholder;
 42 |         dojo.place("<span class='sym'></span>", dojo.byId("userName"), "last");
 43 |       }
 44 |     });
 45 |     jsexec(1, "dojo.removeClass", function () {
 46 |       dojo.removeClass(dojo.query(".custNav")[0], "hidden");
 47 |     });
 48 |     jsexec(2, "dj.module.header2012.editionSwitcher.init", function () {
 49 |       dj.module.header2012.editionSwitcher.init();
 50 |     });
 51 |     jsexec(3, "dj.util.Tealium.init", function () {
 52 |       dj.util.Tealium.init();
 53 |       dj.util.Omniture.init();
 54 |       dj.util.Tracking.omniture.init();
 55 |       var pDatePlaceholder = dojo.byId('pageDatePlaceholder');
 56 |       var s = "";
 57 |       var i = pDateinGMT.indexOf(",");
 58 |       var gmtDate = new Date(pDateinGMT.substring(0, i + 1) + " " + pDateinGMT.substring(i + 1));
 59 |       if (typeof gmtDate !== 'undefined') {
 60 |         s = dj.util.Date.simpleDateFormat.format(gmtDate, "EE, MMM d, yyyy") + " As of " + dj.util.Date
 61 |           .simpleDateFormat.format(gmtDate, "h:mm a");
 62 |       } else if (pDate && pDate !== 'undefined') {
 63 |         s = pDate;
 64 |       }
 65 |       if (pDatePlaceholder && pID !== '0_0_WCR_0001' && s !== "") {
 66 |         pDatePlaceholder.innerHTML = s;
 67 |       };
 68 |       (function () {
 69 |         var edition = dj.util.Cookie.getCookie('wsjregion');
 70 |         if (edition == 'asia') {
 71 |           dj.context.videoCenter.host = 'video.asia.wsj.com';
 72 |         } else if (edition == 'europe') {
 73 |           dj.context.videoCenter.host = 'video.europe.wsj.com'
 74 |         }
 75 |       })();
 76 |       dj.util.Cookie.deleteGroupCookie("DJCOOKIE", "weatherJson");
 77 |     });
 78 |     jsexec(4, "dojo.query", function () {
 79 |       dojo.query(".meta_date")[0].innerHTML = pDate;
 80 |     });
 81 |     jsexec(5, "if", function () {
 82 |       if (window.location.pathname === "/public/page/factiva.html") {
 83 |         dojo.removeClass(dojo.byId("factivaCustomerService"), "hidden");
 84 |       }
 85 |     });
 86 |     jsexec(6, "nielsenTracking.init", function () {
 87 |       nielsenTracking.init();
 88 |     });
 89 |     jsexec(7, "dj.context.videoCenter.height", function () {
 90 |       dj.context.videoCenter.height = 840;
 91 |       dj.context.videoCenter.width = 418;
 92 |       dj.context.videoCenter.popUpPageURL = 'http://live.wsj.com/public/page/video-popup.html';
 93 |       dj.util.VideoUtils.popUpPlayer = function (parameters) {
 94 |         var popUpPageURL = "/public/page/0_0_WP_3001.html";
 95 |         if (dj.context.videoCenter.popUpPageURL) {
 96 |           popUpPageURL = dj.context.videoCenter.popUpPageURL;
 97 |         }
 98 |         if (dj.context.videoCenter.height) {
 99 |           var popUpWindow = window.open(popUpPageURL + '?currentPlayingLocation=' + parameters.playLocation +
100 |             '&currentlyPlayingCollection=' + escape(parameters.collection) + '&currentlyPlayingVideoId=' +
101 |             parameters.videoID, 'popUpPlayer', 'height=' + dj.context.videoCenter.height + ',width=' + dj
102 |             .context.videoCenter.width + ',left=' + ((screen.width - Number(parameters.width)) / 2) + ',top=' +
103 |             (screen.height - Number(parameters.height)) / 2 + ',resizable=no,scrollbars=no,toolbar=no,status=no'
104 |             );
105 |         } else {
106 |           var popUpWindow = window.open(popUpPageURL + '?currentPlayingLocation=' + parameters.playLocation +
107 |             '&currentlyPlayingCollection=' + escape(parameters.collection) + '&currentlyPlayingVideoId=' +
108 |             parameters.videoID, 'popUpPlayer', 'height=690,width=510,left=' + ((screen.width - Number(parameters
109 |               .width)) / 2) + ',top=' + (screen.height - Number(parameters.height)) / 2 +
110 |             ',resizable=yes,scrollbars=no,toolbar=no,status=no');
111 |         }
112 |       };
113 |     });
114 |     jsexec(8, "setMetaData", function () {
115 |       setMetaData('subsection', 'DJ Newswires');
116 |       setMetaData('csource', 'DJ Newswires');
117 |       setMetaData('ctype', 'article');
118 |       setMetaData('pagename', 'T   Wire_BT-CO-20130718-711176');
119 |       setMetaData('abasedocid', 'BT-CO-20130718-711176');
120 |       setMetaData('apublished', '2013-07-18T15:14:00');
121 |       setMetaData('section', 'Article');
122 |       setMetaData('apage', 'T   Wire');
123 |       setMetaData('primaryproduct', 'Online Journal');
124 |       setMetaData('atype', 'T   Wire');
125 |       setMetaData('sitedomain', 'online.wsj.com');
126 |       setMetaData('caccess', 'free');
127 |       setMetaData('basesection', 'WSJ_TWire');
128 |       setMetaData('aheadline', 'Panasonic, Sanyo to Pay $56.5 Million for Price Fixing - DOJ');
129 |       setMetaData('displayname', 'Newswires Article Layout');
130 |     });
131 |     jsexec(9, "dj.module.facebook.connect.init", function () {
132 |       dj.module.facebook.connect.init();
133 |     });
134 |     jsexec(10, "try", function () {
135 |       try {
136 |         dojo.connect(dojo.byId("forceMobile"), "onclick", function () {
137 |           dj.util.Cookie.setGroupCookie("DJSESSION", "mcookie", "force-mobile");
138 |         });
139 |       } catch (err) {
140 |         console.log("Error: Not setting mobile cookie.")
141 |       };
142 |     });
143 |     jsexec(11, "dj.util.Tracking.omniture.firePixel", function () {
144 |       dj.util.Tracking.omniture.firePixel();
145 |     });
146 |     jsexec(12, "dojo.getObject", function () {
147 |       dojo.getObject("dj.context.autocomplete", true).exclusionlist =
148 |         "XBUE,XBAH,XCNQ,XTNX,XCYS,XCAI,XSTU,XBER,XHAN,XTAE,XAMM,XKAZ,XKUW,XCAS,XMUS,XKAR,XLIM,DSMD,XMIC,RTSX,XSAU,XBRA,XCOL,XADS,XDFM,XCAR,MISX"
149 |     });
150 |     jsexec(13, "dj.module.header2012.localWeather.init", function () {
151 |       dj.module.header2012.localWeather.init();
152 |     });
153 |     jsexec(14, "setTimeout", function () {
154 |       setTimeout(
155 |         "if(dj.util.Cookie.getCookie('djmcn')==='true'){if(dojo.byId('hat_tab_secure')){dojo.removeClass(dojo.byId('hat_tab_secure'),'hidden');}if(dojo.byId('hat_tab_chat')){dojo.removeClass(dojo.byId('hat_tab_chat'),'hidden');}}",
156 |         2000);
157 |     });
158 |     jsexec(15, "dojo.getObject", function () {
159 |       dojo.getObject("dj.context.autocomplete", true).exclusionlist =
160 |         "XBUE,XBAH,XCNQ,XTNX,XCYS,XCAI,XSTU,XBER,XHAN,XTAE,XAMM,XKAZ,XKUW,XCAS,XMUS,XKAR,XLIM,DSMD,XMIC,RTSX,XSAU,XBRA,XCOL,XADS,XDFM,XCAR,MISX"
161 |     });
162 |     jsexec(16, "dj.module.header2012.lifp.init", function () {
163 |       dj.module.header2012.lifp.init();
164 |     });
165 |     jsexec(17, "dj.module.header2012.sectionMenu.init", function () {
166 |       dj.module.header2012.sectionMenu.init();
167 |     });
168 |     jsexec(18, "if", function () {
169 |       if (window.location.pathname === "/public/page/rc-login.html" || window.location.pathname ===
170 |         "/public/page/rc-login2.html") {
171 |         dj.widget.networkHat.RCLogin.init();
172 |       }
173 |     });
174 |     jsexec(19, "if", function () {
175 |       if (dj.module.mst) {
176 |         dj.module.mst.preview.decorator.init();
177 |       }
178 |     });
179 |     jsexec(20, "dj.module.entitlements.googleClickTrack.init", function () {
180 |       dj.module.entitlements.googleClickTrack.init({
181 |         expirationInterval: "1d+"
182 |       });
183 |     });
184 |     jsexec(21, "dj.module.articleTextTab", function () {
185 |       dj.module.articleTextTab = new dj.widget.article.text.ArticleTabText(dj.module.articleTabs.panels);
186 |     });
187 |     jsexec(22, "dj.module.articleTools.Initilizer", function () {
188 |       new dj.module.articleTools.Initilizer('abtt', true, true);
189 |     });
190 |     jsexec(23, "dj.module.articleTools.Initilizer", function () {
191 |       new dj.module.articleTools.Initilizer('abt', false, false, true);
192 |     });
193 |     jsexec(24, "dojo.query", function () {
194 |       dojo.query('div[data-cb-ad-id]').forEach(function (tag, i) {
195 |         if (tag.id.match(/^ad0_0.*[GA][\d]*$/) != null) {
196 |           dojo.attr(tag.id, "data-cb-ad-id", "adTop")
197 |         } else if (tag.id.match(/^ad0_0.*[C][\d]*$/) != null) {
198 |           dojo.attr(tag.id, "data-cb-ad-id", "adCirc")
199 |         } else if (tag.id.match(/^ad0_0.*[B]$/) != null) {
200 |           dojo.attr(tag.id, "data-cb-ad-id", "adBH")
201 |         } else if (tag.id.match(/^ad0_0.*[Z]$/) != null) {
202 |           dojo.attr(tag.id, "data-cb-ad-id", "adZ")
203 |         } else if (tag.id.match(/^adEmailCircAd.*[\d]*$/) != null) {
204 |           dojo.attr(tag.id, "data-cb-ad-id", "adEM")
205 |         };
206 |       });
207 |     });
208 |     jsexec(25, "dj.context.hummingbird2Enabled", function () {
209 |       dj.context.hummingbird2Enabled = (function () {
210 |         if (dj.module.hummingbird2) {
211 |           return dj.lang.connect(window, "onload", dj.module.hummingbird2, "onPageLoad");
212 |         }
213 |       }());
214 |     });
215 |     jsexec(26, "dj.module.geotargeting.germanyScrim.init", function () {
216 |       dj.module.geotargeting.germanyScrim.init();
217 |     });
218 |     jsexec(27, "if", function () {
219 |       if (dojo.getObject("dj.module.video.liveMicroPlayer", true).init) {
220 |         dj.module.video.liveMicroPlayer.init();
221 |       }
222 |     });
223 |     jsexec(28, "var", function () {
224 |       var moreNode = dojo.byId("MoreIndustries_Container");
225 |       if (moreNode) {
226 |         dj.module.moreIndustries = new dj.widget.panel.SelectDropdownPanel(moreNode);
227 |       }
228 |     });
229 |     jsexec(29, "if", function () {
230 |       if (dojo.getObject("dj.module.panels.liveSlideshow", true).init) {
231 |         dj.module.panels.liveSlideshow.init();
232 |       }
233 |     });
234 |     jsexec(30, "dj.module.header2012.autocomplete.searchExec", function () {
235 |       dj.module.header2012.autocomplete.searchExec();
236 |     });
237 |     jsexec(31, "if", function () {
238 |       if (dojo.getObject("dj.util.flash.template", true).scan) {
239 |         dj.util.flash.template.scan();
240 |       }
241 |       if (dojo.getObject("dj.util.onVisibleWidget", true).scan) {
242 |         dj.util.onVisibleWidget.scan();
243 |       }
244 |       if (dojo.getObject("dj.util.onVisibleImg", true).scan) {
245 |         dj.util.onVisibleImg.scan();
246 |       }
247 |     });
248 |     jsexec(32, "dj.widget.ad.AdManager.createAd", function () {
249 |       dj.widget.ad.AdManager.createAd('headerPromoContainer', 'iframe', {
250 |         width: 377,
251 |         height: 50,
252 |         size: '377x50',
253 |         site: 'interactive.wsj.com',
254 |         zone: 'newswires',
255 |         adClass: 'M',
256 |         meta: '',
257 |         metazone: '',
258 |         category: '',
259 |         frequency: '',
260 |         cacheId: '',
261 |         classEnabled: 'true',
262 |         classValue: 'promo',
263 |         styleValue: '',
264 |         conditionType: '',
265 |         conditionValue: '',
266 |         conditionalString: ''
267 |       })
268 |     });
269 |     jsexec(33, "dj.widget.ad.AdManager.createAd", function () {
270 |       dj.widget.ad.AdManager.createAd('ad0_0_WA_0006L', 'iframe', {
271 |         width: 728,
272 |         height: 90,
273 |         size: '728x90',
274 |         site: 'interactive.wsj.com',
275 |         zone: 'newswires',
276 |         adClass: 'G',
277 |         meta: '',
278 |         metazone: '',
279 |         category: '',
280 |         frequency: '',
281 |         cacheId: '',
282 |         classEnabled: 'true',
283 |         classValue: 'adSummary ad_728',
284 |         styleValue: '',
285 |         conditionType: '',
286 |         conditionValue: '',
287 |         conditionalString: ''
288 |       })
289 |     });
290 |     jsexec(34, "dj.widget.ad.AdManager.createAd", function () {
291 |       dj.widget.ad.AdManager.createAd('editorsPicks_', 'iframe', {
292 |         width: 180,
293 |         height: 150,
294 |         size: '180x150',
295 |         site: 'interactive.wsj.com',
296 |         zone: 'newswires',
297 |         adClass: 'A',
298 |         meta: '',
299 |         metazone: '',
300 |         category: '',
301 |         frequency: '',
302 |         cacheId: '',
303 |         classEnabled: 'False',
304 |         classValue: 'adSummary',
305 |         styleValue: '',
306 |         conditionType: '',
307 |         conditionValue: '',
308 |         conditionalString: ''
309 |       })
310 |     });
311 |     jsexec(35, "dj.module.uberHat", function () {
312 |       dj.module.uberHat = new dj.widget.uberHat.UberHat({
313 |         "divExists": true
314 |       });
315 |       dj.module.survey = new dj.widget.survey.SurveyPopup({
316 |         "url": "http://survey.confirmit.com/wix/p913069351.aspx",
317 |         "width": 550,
318 |         "height": 525,
319 |         "cookieName": "cambriaSurvey",
320 |         "windowName": "cambriaSurvey",
321 |         "userHasRole": "CAMBRIA",
322 |         "enableInterval": "60s+",
323 |         "noRepeatInterval": "90d+",
324 |         "frequencyPercent": 100
325 |       });
326 |       dj.module.survey = new dj.widget.survey.SurveyPopup({
327 |         "url": "http://survey.confirmit.com/wix1/p944166011.aspx",
328 |         "width": 550,
329 |         "height": 525,
330 |         "cookieName": "hyattSurvey",
331 |         "windowName": "hyattSurvey",
332 |         "userHasRole": "HYATT",
333 |         "enableInterval": "60s+",
334 |         "noRepeatInterval": "90d+",
335 |         "frequencyPercent": 100
336 |       });
337 |     });
338 |     jsexec(36, "blueKai.blueKai.bk_track", function () {
339 |       blueKai.blueKai.bk_track(true);
340 |     });
341 |     jsexec(37, "if", function () {
342 |       if (dj.util.Cookie.getCookie("djmcn") === "true" && djcs.UserInfo.getGroup() === "DJN" && dojo.doc.URL
343 |         .indexOf("#printMode") === -1) {
344 |         dojo.create("script", {
345 |           "src": "https://chat.wsj.com/chatMinimizedPopoutLink.js",
346 |           type: "text/javascript"
347 |         }, dojo.query("body")[0]);
348 |       }
349 |     });
350 |     jsexec(38, "dj.module.articleTextTab.playbookmark", function () {
351 |       dj.module.articleTextTab.playbookmark();
352 |     });
353 |     jsexec(39, "var", function () {
354 |       var beg = (new Date).getTime(),
355 |         itv = setInterval(function () {
356 |           dj.util.User.isLoggedIn(function (a) {
357 |             if (a) {
358 |               var b = dojo.query("a.md_index"),
359 |                 c, d;
360 |               for (c = 0, d = b.length; c < d; c++) {
361 |                 if (b[c].href.indexOf("/index/SP%20500%20Futures") !== -1) {
362 |                   b[c].href =
363 |                     "/mdc/public/page/2_3028.html?category=Index&subcategory=U.S.&contract=SP%2520500%2520-%2520Mini%2520-%2520cme&catandsubcat=Index%257CU.S.&contractset=SP%2520500%2520Mini%2520-%2520cme";
364 |                   console.log(c);
365 |                   clearInterval(itv);
366 |                   break
367 |                 }
368 |               }
369 |               if ((new Date).getTime() - beg > 15e3) {
370 |                 clearInterval(itv)
371 |               }
372 |             } else {
373 |               clearInterval(itv)
374 |             }
375 |           })
376 |         }, 1e3)
377 |     });
378 | 
379 |     //------------------------------
380 |     console.info("[ end jsexec ]");
381 |     console.groupEnd();
382 | 
383 |     /* ]]> */
384 |   </script>
385 | </body>
386 | </html>


--------------------------------------------------------------------------------
/demo/test27.html:
--------------------------------------------------------------------------------
 1 | <html lang="zh-Hans">
 2 | <head></head>
 3 | <body>
 4 |     <div>
 5 |         <h1 v-if="res.value" name='11' @click="tes">11{{res.value}}</h1>
 6 |     </div>
 7 |     <br />
 8 |     <a href="http://github.com/">github</a>
 9 | </body>
10 | </html>


--------------------------------------------------------------------------------
/demo/test28.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <div class="take-wrap">
11 | 
12 | 
13 | 
14 |         <a class="blue" id="subscribe" href="javascript:" data-eid="qd_G13"><em class="iconfont">&#xe636;</em>订阅</a>
15 | 
16 | 
17 | 
18 |         </span>
19 | 
20 |         <!-- <em>1<em>2</em> -->
21 |         <!-- <a><a></a></a> -->
22 |         <!-- <i><i></i></i> -->
23 |         <!-- <a><div></div></a> -->
24 |         <!-- <a><input></a> -->
25 |         <!-- <a><img/></a> -->
26 |         <!-- <button><a></a></button> -->
27 |         <!-- <button><button></button></button> -->
28 |         
29 |         <!-- <a><div></div><a></a></a>
30 | ----------------------------------------------
31 |         <a><a></a><div></div></a>
32 | ----------------------------------------------
33 |         <a><a><div></div></a></a>
34 | ----------------------------------------------
35 |         <a><div><span><a></a></span></div></a>
36 | ----------------------------------------------
37 |         <a><span>aaa</span></a>
38 | ----------------------------------------------
39 |         <a><a><a></a></a></a> -->
40 | </body>
41 | </html>


--------------------------------------------------------------------------------
/demo/test29.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <haha>
11 |         <a class="blue" href="//vipreader.qidian.com/chapter/1027339371/664119407" data-eid="qd_G19"
12 |             data-cid="//vipreader.qidian.com/chapter/1027339371/664119407" title="第一百七十九章 顶级秘法"
13 |             target="_blank">第一百七十九章 顶级秘法</a><i>&#183;</i><em class="time">28分钟前</em>
14 |         </p>
15 |     </haha>
16 |     <a name=xiaoming1>
17 |         <a class="blue" href="//vipreader.qidian.com/chapter/1027339371/664119407" data-eid="qd_G19"
18 |             data-cid="//vipreader.qidian.com/chapter/1027339371/664119407" title="第一百七十九章 顶级秘法"
19 |             target="_blank">第一百七十九章 顶级秘法</a><i>&#183;</i><em class="time">28分钟前</em>
20 |         </p>
21 |     </a>
22 |     <a name=xiaoming2>123
23 |         <a class="blue" href="//vipreader.qidian.com/chapter/1027339371/664119407" data-eid="qd_G19"
24 |             data-cid="//vipreader.qidian.com/chapter/1027339371/664119407" title="第一百七十九章 顶级秘法"
25 |             target="_blank">第一百七十九章 顶级秘法</a><i>&#183;</i><em class="time">28分钟前</em>
26 |         </p>
27 |         <div name=xiaoming3>
28 |             <a class="blue" href="//vipreader.qidian.com/chapter/1027339371/664119407" data-eid="qd_G19"
29 |                 data-cid="//vipreader.qidian.com/chapter/1027339371/664119407" title="第一百七十九章 顶级秘法"
30 |                 target="_blank">第一百七十九章 顶级秘法</a><i>&#183;</i><em class="time">28分钟前</em>
31 |             </p>
32 |             <div name=xiaoming4>
33 |                 <a class="blue" href="//vipreader.qidian.com/chapter/1027339371/664119407" data-eid="qd_G19"
34 |                     data-cid="//vipreader.qidian.com/chapter/1027339371/664119407" title="第一百七十九章 顶级秘法"
35 |                     target="_blank">第一百七十九章 顶级秘法</a><i>&#183;</i><em class="time">28分钟前</em>
36 |                 </span>
37 | </body>
38 | </html>


--------------------------------------------------------------------------------
/demo/test30.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <p id="license" class="footer-license">© 2005-
11 |         <!-- -->2021
12 |         <!-- --> Mozilla and individual contributors. Content is available under
13 |         <!-- --> <a href="/docs/MDN/About#Copyrights_and_licenses">these licenses</a>.</p>
14 | </body>
15 | </html>


--------------------------------------------------------------------------------
/demo/test31.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <aside class="metadata">
11 |         <div class="metadata-content-container">
12 |             <div id="on-github" class="on-github">
13 |                 <h4>Found a problem with this page?</h4>
14 |                 <ul>
15 |                     <li><a href="https://github.com/mdn/translated-content/blob/main/files/zh-cn/web/html/index.html"
16 |                             title="Folder: zh-cn/web/html (Opens in a new tab)" target="_blank"
17 |                             rel="noopener noreferrer">Source on <b>GitHub</b></a></li>
18 |                     <li><a href="https://github.com/mdn/translated-content/issues/new?body=MDN+URL%3A+https%3A%2F%2Fdeveloper.mozilla.org%2Fzh-CN%2Fdocs%2FWeb%2FHTML%0A%0A%23%23%23%23+What+information+was+incorrect%2C+unhelpful%2C+or+incomplete%3F%0A%0A%0A%23%23%23%23+Specific+section+or+headline%3F%0A%0A%0A%23%23%23%23+What+did+you+expect+to+see%3F%0A%0A%0A%23%23%23%23+Did+you+test+this%3F+If+so%2C+how%3F%0A%0A%0A%3C%21--+Do+not+make+changes+below+this+line+--%3E%0A%3Cdetails%3E%0A%3Csummary%3EMDN+Content+page+report+details%3C%2Fsummary%3E%0A%0A*+Folder%3A+%60zh-cn%2Fweb%2Fhtml%60%0A*+MDN+URL%3A+https%3A%2F%2Fdeveloper.mozilla.org%2Fzh-CN%2Fdocs%2FWeb%2FHTML%0A*+GitHub+URL%3A+https%3A%2F%2Fgithub.com%2Fmdn%2Ftranslated-content%2Fblob%2Fmain%2Ffiles%2Fzh-cn%2Fweb%2Fhtml%2Findex.html%0A*+Last+commit%3A+https%3A%2F%2Fgithub.com%2Fmdn%2Ftranslated-content%2Fcommit%2F235c34993c7b14f783fc8259cc237ac09f0d3e57%0A*+Document+last+modified%3A+2021-07-09T00%3A38%3A08.000Z%0A%0A%3C%2Fdetails%3E&amp;title=Issue+with+%22HTML%EF%BC%88%E8%B6%85%E6%96%87%E6%9C%AC%E6%A0%87%E8%AE%B0%E8%AF%AD%E8%A8%80%EF%BC%89%22%3A+%28short+summary+here+please%29&amp;labels=needs-triage%2Cl10n-zh"
19 |                             title="This will take you to https://github.com/mdn/content to file a new issue"
20 |                             target="_blank" rel="noopener noreferrer">Report a problem with this content on
21 |                             <b>GitHub</b></a></li>
22 |                     <li>Want to fix the problem yourself? See
23 |                         <!-- --> <a href="https://github.com/mdn/content/blob/main/README.md"
24 |                             target="_blank" rel="noopener noreferrer">our Contribution guide</a>.</li>
25 |                 </ul>
26 |             </div>
27 |             <p class="last-modified-date"><b>Last modified:</b> <time
28 |                     datetime="2021-07-09T00:38:08.000Z">Jul 9, 2021</time>,
29 |                 <!-- --> <a href="/zh-CN/docs/Web/HTML/contributors.txt">by MDN contributors</a></p>
30 |             <form class="language-menu">
31 |                 <fieldset id="select-language">
32 |                     <legend>Change your language</legend><label for="language-selector"
33 |                         class="visually-hidden">Select your preferred language</label> <select
34 |                         id="language-selector" name="language">
35 |                         <option selected="" value="zh-CN">中文 (简体)</option>
36 |                         <option value="de">Deutsch</option>
37 |                         <option value="en-US">English (US)</option>
38 |                         <option value="es">Español</option>
39 |                         <option value="fr">Français</option>
40 |                         <option value="ja">日本語</option>
41 |                         <option value="ko">한국어</option>
42 |                         <option value="pl">Polski</option>
43 |                         <option value="pt-BR">Português (do&nbsp;Brasil)</option>
44 |                         <option value="ru">Русский</option>
45 |                         <option value="zh-TW">正體中文 (繁體)</option>
46 |                     </select> <button type="submit" class="button minimal">Change language</button>
47 |                 </fieldset>
48 |             </form>
49 |         </div>
50 |     </aside>
51 | </body>
52 | </html>


--------------------------------------------------------------------------------
/demo/test32.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | 
 3 | 
 4 | <html lang="en">
 5 | 
 6 | <head>
 7 |     <meta charset="UTF-8">
 8 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 9 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
10 |     <title>Document</title>
11 | </head>
12 | <body>
13 |     <body>
14 |         <div>top</div>
15 |     </body>
16 |     <div>test32</div>
17 | </body>
18 | 456
19 | 789
20 | <div>
21 |     123
22 | 
23 |     <body>
24 |         <div>out</div>
25 |         <script src=""></script>
26 |     </body>
27 | </div>
28 | </html>


--------------------------------------------------------------------------------
/demo/test33.html:
--------------------------------------------------------------------------------
 1 |  <!DOCTYPE html
 2 |     PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | <html id="html1" xmlns="http://www.w3.org/1999/xhtml" xmlns:og="http://ogp.me/ns#"
 4 |     xmlns:fb="http://www.facebook.com/2008/fbml" xml:lang="en" lang="en">
 5 | <head id="ctl00_ctl00_Head1">
 6 |     <title>
 7 |         West Africa Regional Leaders to Meet
 8 |     </title>
 9 | </head>
10 | <body>
11 |     <div>123</div>
12 | </body>
13 | </html>


--------------------------------------------------------------------------------
/demo/test34.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE 
 2 | HTML 
 3 | PUBLIC 
 4 | "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 5 | <html lang="en" xmlns:fb="http://www.facebook.com/2008/fbml">
 6 | <!-- COMO/GCI v1.17 Revision 8393 -->
 7 | <!-- GEL Version: 1.17.3052 -->
 8 | <!-- Built by: MOC-WN0502 -->
 9 | 
10 | <!-- connect to http://aws.gannett.com/m?p=la-opelousas.dailyworld.com/news/article.htm&networkid=7103&pt=article for webservice call -->
11 | <!-- Network id is 7103 -->
12 | <!--Mon, 22 Jul 2013 14:45:22 UTC :Response :  {"dfp": { "networkid": "7103" },"siteid": "la-opelousas-C1540", "inventory": [ "la-opelousas-C1540","","news","world",""], "pageType": ""}-->
13 | 
14 | 
15 | <!-- No ad scheduled for Interstitial-Video. Slot will be suppressed -->
16 | 
17 | <head>
18 | 		
19 | </head>
20 | <body class="ody-skin">
21 | <!-- connect to http://aws.gannett.com/m?p=la-opelousas.dailyworld.com/news/article.htm&networkid=7103&pt=article for webservice call -->
22 | <!-- Network id is 7103 -->
23 | <!--Mon, 22 Jul 2013 14:45:24 UTC :Response :  {"dfp": { "networkid": "7103" },"siteid": "la-opelousas-C1540", "inventory": [ "la-opelousas-C1540","","news","world",""], "pageType": ""}-->
24 | <!-- No ad scheduled for CustomAd. Slot will be suppressed -->
25 | <!--
26 | @ody_adposCustomAd=noad
27 | @ody_adOPAtype= <br>
28 | -->
29 | <div>
30 | <div>
31 | 		</div> <!--end main container-->
32 | 	</div><!-- end ody-custom -->
33 | </body>
34 | </html>


--------------------------------------------------------------------------------
/demo/test35.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <noscript>
11 |         <img src="http://gpaper155.112.2O7.net/b/ss/gpaper155,gntbcstglobal/1/H.3--NS/0" height="1" width="1" border="0"
12 |             alt="" />
13 |     </noscript>
14 |     <! [if gte IE 8.0] > 
15 |     <div id="ody-nextstoryslider" style="right:-420px;">
16 |         <h3>More In News</h3>
17 |         <script type="text/javascript">
18 | 
19 |         </script>
20 |         <![if gte IE 8.0]>
21 |         <script type="text/javascript">
22 |             GEL.thepage.initializer.addInitRoutine({
23 |                 name: 'nextSlider',
24 |                 namespace: 'widget.slider',
25 |                 callback: initNextSlider,
26 |                 priority: 60
27 |             });
28 |         </script>
29 |         <![endif]>
30 |     </div>
31 |     <![endif]>
32 | </body>
33 | </html>


--------------------------------------------------------------------------------
/demo/test36.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 | 	<ul class="companion" id="on-air-links">
11 | 		<li class="listen"><a class="btn-news" href="/streaming-upr-live" target="_blank">UPR NEWS</a></li>
12 | 		<li class="now-playing"><span class="label">On Air Now:</span><SCRIPT LANGUAGE="JavaScript" SRC=http://www.publicbroadcasting.net/upr/guide.guidenocode?fetch=1,0,0,1,0,0></SCRIPT></li>
13 | 		<li class="listen"><a class="btn-music" href="/streaming-upr-too" target="_blank">UPR Too</a></li>
14 | 		<li class="now-playing"><span class="label">On Air Now:</span><SCRIPT LANGUAGE="JavaScript" SRC=http://www.publicbroadcasting.net/upr-hd2/guide.guidenocode?fetch=1,0,0,1,0,0></script>
15 | </li>
16 | 	</ul>	
17 | </body>
18 | </html>


--------------------------------------------------------------------------------
/demo/test37.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <!-- 这个测试用例暂时无法解决，对浏览器这个用例的解析有些摸不着头脑 todo -->
11 |     <a href="https://blog.csdn.net/xufive" target="_blank"
12 |         data-report-click='{"mod":"popu_472","dest":"https://blog.csdn.net/xufive"}'>
13 |         <dl class="media_left clearfix">
14 |             <dt><img src="https://profile.csdnimg.cn/1/3/9/1_xufive" alt="xufive" title="xufive" username="xufive"></dt>
15 |             <dd>
16 |                 <p class="name">
17 |                     天元浪子 </p>
18 |                 <a class="expert_icon" href="https://blog.csdn.net/blogdevteam/article/details/103478461#博客专家"
19 |                     target="_blank">
20 |                     <!-- <svg class="icon" aria-hidden="true">
21 |                     <use xlink:href="#Feed-bokezhuanjia"></use>
22 |                 </svg> -->
23 |                     <img src="/images/expertbig.png" alt="">
24 |                     <span class="bubble">CSDN博客技术专家</span>
25 |                     <div class="haha">
26 |                         <!-- 这里是块级元素和行列内元素浏览器的解析策略不同，目前摸不着头脑。
27 |                             不要在a标签中内部再使用a标签！！！
28 |                             浏览器的处理会很奇葩！！！
29 |                             会把里面的a标签提出去，放在里面也白搭。
30 |                             望共勉。 -->
31 |                         <!-- <span class="hehe">
32 |                             <a>哈哈</a>
33 |                         </span> -->
34 |                         <div class="hehe">
35 |                             <a>哈哈</a>
36 |                         </div>
37 |                     </div>
38 |                 </a>
39 |                 <p class="tit">总经理</p>
40 |                 <p class="num">153篇</p>
41 |             </dd>
42 |         </dl>
43 |         <p class="text oneline">
44 |             生于1968年，程序员，使用python超过10年。长期从事数据处理工作，先后参与过风云系列卫星、碳卫星、海洋卫星、嫦娥探测器等卫星数据处理。</p>
45 |         </p>
46 |     </a>
47 |     <a>
48 |         <div>567</div>
49 |     </a>
50 | </body>
51 | </html>


--------------------------------------------------------------------------------
/demo/test38.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <li class="google " tns="no"><span id="googleTag">
11 |             <g:plusone size="small" count="true"
12 |                 href="http://www.reuters.com/article/2013/06/07/us-nutrition-idUSBRE9560DL20130607">
13 |         </span></g:plusone>
14 |     </li>
15 | </body>
16 | </html>


--------------------------------------------------------------------------------
/demo/test39.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <a>click <a>here</a>
11 | </body>
12 | </html>


--------------------------------------------------------------------------------
/demo/test4.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <div class="summary">
11 |         <p>
12 |             <strong>JavaScript&nbsp;(&nbsp;</strong><a
13 |                 href="/zh-CN/docs/Glossary/First-class_Function">函数优先</a>的轻量级<a
14 |                 class="external" href="https://en.wikipedia.org/wiki/JavaScript#Uses_outside_Web_pages"
15 |                 rel=" noopener">非浏览器环境</a>
16 |         </p>
17 |     </div>
18 |     <div>
19 |         <br />
20 |         哈哈哈哈
21 |         <br />
22 |     </div>
23 |     <script>
24 |         Array.prototype.flat && Array.prototype.includes || document.write(
25 |             '<script src="https://polyfill.io/v3/polyfill.min.js?features=Array.prototype.flat%2Ces6"><\/script>')
26 |     </script>
27 | </body>
28 | </html>


--------------------------------------------------------------------------------
/demo/test40.html:
--------------------------------------------------------------------------------
1 | <div>1</div><div>2</div>


--------------------------------------------------------------------------------
/demo/test5.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <div>
11 |         <br />
12 |         哈哈哈哈
13 |         <br />
14 |     </div>
15 | </body>
16 | </html>


--------------------------------------------------------------------------------
/demo/test6.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <p>
11 |         本部分
12 |         <a href="/zh-CN/docs/Glossary/API">APIs</a>
13 |         &nbsp;，请参考 
14 |         <a href="/zh-CN/docs/Web/API">Web APIs</a>
15 |         &nbsp;以及 
16 |         <a href="/zh-CN/docs/Glossary/DOM">DOM</a>
17 |         。
18 |     </p>
19 | </body>
20 | </html>


--------------------------------------------------------------------------------
/demo/test7.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |         <meta charset="UTF-8">
 5 |         <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |         <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |         <title>Document</title>
 8 | </head>
 9 | <body>
10 |         <script>
11 |                 '<script src="https://"><\/script>'
12 |         </script>
13 |         <script>
14 | 
15 |         </script>
16 |         <div>
17 | 
18 |         </div>
19 | </body>
20 | </html>


--------------------------------------------------------------------------------
/demo/test8.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <code style="background: red;">===</code> 和非严格的比较操作符 <code>==</code>，以及&nbsp;<a
11 |         href="/zh-CN/docs/Web/JavaScript/Reference/Global_Objects/Object/is"><code>Object.is()</code></a>
12 |     方法。
13 |     <p>
14 |         <strong>JavaScript&nbsp;(&nbsp;</strong><a href="/zh-CN/docs/Glossary/First-class_Function">函数优先</a>的轻量级<a
15 |             class="external" href="https://en.wikipedia.org/wiki/JavaScript#Uses_outside_Web_pages"
16 |             rel=" noopener">非浏览器环境</a>
17 |     </p>
18 | </body>
19 | </html>


--------------------------------------------------------------------------------
/demo/test9.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |     <title>Document</title>
 8 | </head>
 9 | <body>
10 |     <meta name="referrer" content="always">
11 |     <!---->
12 |     <a target="_blank" href="https://blog.csdn.net/coderising/article/details/118585903" class="title" data-v-0045335f>
13 |         <!----> 一行代码卖出570美元， 天价代码的内幕</a>
14 |     <script src="https://g.csdnimg.cn/tingyun/1.8.5/www-index.js"></script>
15 | </body>
16 | </html>


--------------------------------------------------------------------------------
/dist/definition.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 | Object.defineProperty(exports, "__esModule", { value: true });
3 | 


--------------------------------------------------------------------------------
/dist/index.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 | Object.defineProperty(exports, "__esModule", { value: true });
3 | exports.paser = void 0;
4 | var parser_1 = require("./parser");
5 | Object.defineProperty(exports, "paser", { enumerable: true, get: function () { return parser_1.parse; } });
6 | 


--------------------------------------------------------------------------------
/dist/lexer.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | Object.defineProperty(exports, "__esModule", { value: true });
  3 | exports.NewLexer = exports.Lexer = exports.tokenNameMap = exports.keywords = exports.regexName = exports.SourceCharacter = exports.DIRECTIVE = exports.COMMENT = exports.INTERGER = exports.TOKEN_IGNORED = exports.TOKEN_NAME = exports.TOKEN_SELF_CLOSE = exports.TOKEN_DTD = exports.TOKEN_CLOSE = exports.TOKEN_CONTENT_TEXT = exports.TOKEN_DUOQUOTE = exports.TOKEN_LEFT_LINE = exports.TOKEN_SINGLE_QUOTE = exports.TOKEN_QUOTE = exports.TOKEN_EQUAL = exports.TOKEN_RIGHT_PAREN = exports.TOKEN_TAG_NAME = exports.TOKEN_LEFT_PAREN = exports.TOKEN_EOF = exports.Tokens = void 0;
  4 | // token const
  5 | var Tokens;
  6 | (function (Tokens) {
  7 |     Tokens[Tokens["TOKEN_EOF"] = 0] = "TOKEN_EOF";
  8 |     Tokens[Tokens["TOKEN_LEFT_PAREN"] = 1] = "TOKEN_LEFT_PAREN";
  9 |     Tokens[Tokens["TOKEN_TAG_NAME"] = 2] = "TOKEN_TAG_NAME";
 10 |     Tokens[Tokens["TOKEN_RIGHT_PAREN"] = 3] = "TOKEN_RIGHT_PAREN";
 11 |     Tokens[Tokens["TOKEN_EQUAL"] = 4] = "TOKEN_EQUAL";
 12 |     Tokens[Tokens["TOKEN_QUOTE"] = 5] = "TOKEN_QUOTE";
 13 |     Tokens[Tokens["TOKEN_SINGLE_QUOTE"] = 6] = "TOKEN_SINGLE_QUOTE";
 14 |     Tokens[Tokens["TOKEN_LEFT_LINE"] = 7] = "TOKEN_LEFT_LINE";
 15 |     Tokens[Tokens["TOKEN_DUOQUOTE"] = 8] = "TOKEN_DUOQUOTE";
 16 |     Tokens[Tokens["TOKEN_CONTENT_TEXT"] = 9] = "TOKEN_CONTENT_TEXT";
 17 |     Tokens[Tokens["TOKEN_CLOSE"] = 10] = "TOKEN_CLOSE";
 18 |     Tokens[Tokens["TOKEN_DTD"] = 11] = "TOKEN_DTD";
 19 |     Tokens[Tokens["TOKEN_SELF_CLOSE"] = 12] = "TOKEN_SELF_CLOSE";
 20 |     Tokens[Tokens["TOKEN_NAME"] = 13] = "TOKEN_NAME";
 21 |     Tokens[Tokens["TOKEN_IGNORED"] = 14] = "TOKEN_IGNORED";
 22 |     Tokens[Tokens["INTERGER"] = 15] = "INTERGER";
 23 |     Tokens[Tokens["COMMENT"] = 16] = "COMMENT";
 24 |     Tokens[Tokens["DIRECTIVE"] = 17] = "DIRECTIVE";
 25 |     Tokens[Tokens["SourceCharacter"] = 18] = "SourceCharacter";
 26 | })(Tokens = exports.Tokens || (exports.Tokens = {}));
 27 | exports.TOKEN_EOF = Tokens.TOKEN_EOF, exports.TOKEN_LEFT_PAREN = Tokens.TOKEN_LEFT_PAREN, exports.TOKEN_TAG_NAME = Tokens.TOKEN_TAG_NAME, exports.TOKEN_RIGHT_PAREN = Tokens.TOKEN_RIGHT_PAREN, exports.TOKEN_EQUAL = Tokens.TOKEN_EQUAL, exports.TOKEN_QUOTE = Tokens.TOKEN_QUOTE, exports.TOKEN_SINGLE_QUOTE = Tokens.TOKEN_SINGLE_QUOTE, exports.TOKEN_LEFT_LINE = Tokens.TOKEN_LEFT_LINE, exports.TOKEN_DUOQUOTE = Tokens.TOKEN_DUOQUOTE, exports.TOKEN_CONTENT_TEXT = Tokens.TOKEN_CONTENT_TEXT, exports.TOKEN_CLOSE = Tokens.TOKEN_CLOSE, exports.TOKEN_DTD = Tokens.TOKEN_DTD, exports.TOKEN_SELF_CLOSE = Tokens.TOKEN_SELF_CLOSE, exports.TOKEN_NAME = Tokens.TOKEN_NAME, exports.TOKEN_IGNORED = Tokens.TOKEN_IGNORED, exports.INTERGER = Tokens.INTERGER, exports.COMMENT = Tokens.COMMENT, exports.DIRECTIVE = Tokens.DIRECTIVE, exports.SourceCharacter = Tokens.SourceCharacter;
 28 | // regex match patterns
 29 | exports.regexName = /^[a-zA-z]+[0-9]*([-_:']*[a-zA-z0-9]*)*/;
 30 | // 关键字
 31 | exports.keywords = {};
 32 | exports.tokenNameMap = {
 33 |     [exports.TOKEN_EOF]: "EOF",
 34 |     [exports.TOKEN_LEFT_PAREN]: "<",
 35 |     [exports.TOKEN_TAG_NAME]: "tagNmae",
 36 |     [exports.TOKEN_RIGHT_PAREN]: ">",
 37 |     [exports.TOKEN_EQUAL]: "=",
 38 |     [exports.TOKEN_QUOTE]: "\"",
 39 |     [exports.TOKEN_SINGLE_QUOTE]: "'",
 40 |     [exports.TOKEN_LEFT_LINE]: "/",
 41 |     [exports.TOKEN_DUOQUOTE]: "\"\"",
 42 |     [exports.TOKEN_CONTENT_TEXT]: "ContentText",
 43 |     [exports.TOKEN_CLOSE]: "close",
 44 |     [exports.TOKEN_DTD]: "dtd",
 45 |     [exports.TOKEN_SELF_CLOSE]: "self-close",
 46 |     [exports.TOKEN_NAME]: "Name",
 47 |     [exports.TOKEN_IGNORED]: "Ignored",
 48 |     [exports.INTERGER]: "INTERGER",
 49 |     [exports.COMMENT]: "COMMENT",
 50 |     [exports.DIRECTIVE]: "DIRECTIVE",
 51 |     [exports.SourceCharacter]: "SourceCharacter",
 52 | };
 53 | class Lexer {
 54 |     constructor(sourceCode, lineNum, nextToken, nextTokenType, nextTokenLineNum) {
 55 |         this.sourceCode = sourceCode;
 56 |         this.lineNum = lineNum;
 57 |         this.nextToken = nextToken;
 58 |         this.nextTokenType = nextTokenType;
 59 |         this.nextTokenLineNum = nextTokenLineNum;
 60 |         this.hasCache = false;
 61 |         this.stack = [];
 62 |     }
 63 |     get judgeIsContent() {
 64 |         const length = this.stack.length - 1;
 65 |         return this.stack[length].tokenType === exports.TOKEN_RIGHT_PAREN /*>*/ ||
 66 |             this.stack[length].tokenType === exports.TOKEN_SELF_CLOSE /*/> <br />*/ ||
 67 |             this.stack[length].tokenType === exports.TOKEN_DTD /*dtd*/ ||
 68 |             this.stack[length].tokenType === exports.COMMENT /*<!---->*/ ||
 69 |             this.stack[length].tokenType === exports.TOKEN_CONTENT_TEXT; /*ContentText*/
 70 |     }
 71 |     get isContentText() {
 72 |         if (this.stack.length < 1) {
 73 |             if (this.sourceCode[0] === "<") {
 74 |                 return false;
 75 |             }
 76 |             return true;
 77 |         }
 78 |         let origin = this.sourceCode;
 79 |         // while (this.stack.length > 10) {
 80 |         //     this.stack.shift()
 81 |         // }
 82 |         if (this.judgeIsContent) {
 83 |             // this.isIgnored()
 84 |             // <noscript>
 85 |             // if (this.stack.length > 2 && this.stack[this.stack.length - 2].token === "noscript") {
 86 |             //     return true
 87 |             // }
 88 |             if (this.sourceCode[0] === "<") {
 89 |                 this.sourceCode = origin;
 90 |                 return false;
 91 |             }
 92 |             else {
 93 |                 this.sourceCode = origin;
 94 |                 return true;
 95 |             }
 96 |         }
 97 |         else {
 98 |             return false;
 99 |         }
100 |     }
101 |     /**
102 |      * LookAhead (向前看) 一个 Token, 告诉我们下一个 Token 是什么
103 |      * @returns
104 |      */
105 |     LookAhead() {
106 |         // lexer.nextToken already setted
107 |         if (this.hasCache) {
108 |             return { tokenType: this.nextTokenType, lineNum: this.lineNum, token: this.nextToken };
109 |         }
110 |         // set it
111 |         // 当前行
112 |         let { lineNum, tokenType, token } = this.GetNextToken();
113 |         // *
114 |         // 下一行
115 |         this.hasCache = true;
116 |         this.lineNum = lineNum;
117 |         this.nextTokenType = tokenType;
118 |         this.nextToken = token;
119 |         return { tokenType, lineNum, token };
120 |     }
121 |     LookAheadAndSkip(expectedType) {
122 |         // get next token
123 |         // 查看看下一个Token信息
124 |         let { lineNum, tokenType, token } = this.GetNextToken();
125 |         // not is expected type, reverse cursor
126 |         if (tokenType != expectedType) {
127 |             this.hasCache = true;
128 |             this.lineNum = lineNum;
129 |             this.nextTokenType = tokenType;
130 |             this.nextToken = token;
131 |         }
132 |     }
133 |     /**
134 |     * 断言下一个 Token 是什么
135 |     */
136 |     NextTokenIs(tokenType) {
137 |         const { lineNum: nowLineNum, tokenType: nowTokenType, token: nowToken } = this.GetNextToken();
138 |         // syntax error
139 |         if (tokenType != nowTokenType) {
140 |             throw new Error(`NextTokenIs(): syntax error near '${exports.tokenNameMap[nowTokenType]}', expected token: {${exports.tokenNameMap[tokenType]}} but got {${exports.tokenNameMap[nowTokenType]}}. at line ${this.GetLineNum()} ${this.sourceCode.slice(0, 100)}`);
141 |         }
142 |         return { nowLineNum, nowToken, nowTokenType };
143 |     }
144 |     // MatchToken() 的封装，每一次调用，都会吃掉相应Token
145 |     GetNextToken() {
146 |         // next token already loaded
147 |         if (this.hasCache) {
148 |             // 在LookAhead和LookAheadSkip处对nextTokenLineNum进行了赋值操作
149 |             let lineNum = this.lineNum;
150 |             let tokenType = this.nextTokenType;
151 |             let token = this.nextToken;
152 |             this.hasCache = false;
153 |             return {
154 |                 lineNum,
155 |                 tokenType,
156 |                 token
157 |             };
158 |         }
159 |         return this.MatchToken();
160 |     }
161 |     checkCode(c) {
162 |         // 确保源代码，不包含非法字符，对应着SourceCharacter的EBNF
163 |         if (!/\u0009|\u000A|\u000D|[\u0020-\uFFFF]/.test(c)) {
164 |             throw new Error('The source code contains characters that cannot be parsed.');
165 |         }
166 |     }
167 |     // 直接跳过几个字符，返回被跳过的字符
168 |     next(skip) {
169 |         this.checkCode(this.sourceCode[0]);
170 |         const code = this.sourceCode[0];
171 |         this.skipSourceCode(skip);
172 |         return code;
173 |     }
174 |     isTagNmae() {
175 |         let origin = this.sourceCode;
176 |         this.skipSourceCode(1);
177 |         // if (this.sourceCode[0] === "/") {
178 |         //     this.sourceCode = origin
179 |         //     return false
180 |         // }
181 |         let tag_name = exports.regexName.exec(this.sourceCode);
182 |         if (tag_name) {
183 |             let tag = tag_name[0];
184 |             this.skipSourceCode(tag.length);
185 |             this.isIgnored();
186 |             this.hasCache = false;
187 |             if (this.sourceCode[0] === "=") {
188 |                 this.sourceCode = origin;
189 |                 return false;
190 |             }
191 |             else {
192 |                 this.sourceCode = origin;
193 |                 return true;
194 |             }
195 |         }
196 |         else {
197 |             this.sourceCode = origin;
198 |             return false;
199 |         }
200 |     }
201 |     // 匹配Token并跳过匹配的Token
202 |     MatchToken() {
203 |         this.checkCode(this.sourceCode[0]); // 只做检查，不吃字符
204 |         // if(this.lineNum === 12) {
205 |         //     debugger
206 |         // }
207 |         // finish
208 |         if (this.sourceCode.length == 0) {
209 |             let res = { lineNum: this.lineNum, tokenType: exports.TOKEN_EOF, token: exports.tokenNameMap[exports.TOKEN_EOF] };
210 |             this.stack.push(res);
211 |             return res;
212 |         }
213 |         if (this.isContentText) {
214 |             let contentText = /[\s\S]+/.exec(this.sourceCode[0]);
215 |             if (contentText) {
216 |                 let res = { lineNum: this.lineNum, tokenType: exports.TOKEN_CONTENT_TEXT /*ContentText*/, token: contentText[0] };
217 |                 this.stack.push(res);
218 |                 return res;
219 |             }
220 |         }
221 |         else {
222 |             // check ignored
223 |             if (this.isIgnored()) {
224 |                 let res = { lineNum: this.lineNum, tokenType: exports.TOKEN_IGNORED, token: "Ignored" };
225 |                 this.stack.push(res);
226 |                 return res;
227 |             }
228 |             switch (this.sourceCode[0]) {
229 |                 case '<':
230 |                     // <!-- -->
231 |                     if (this.sourceCode.slice(0, 4) === "<!--") {
232 |                         this.skipSourceCode(4);
233 |                         let res = { lineNum: this.lineNum, tokenType: exports.COMMENT, token: exports.tokenNameMap[exports.COMMENT] };
234 |                         this.stack.push(res);
235 |                         return res;
236 |                     }
237 |                     else if (this.sourceCode[1] === "!") {
238 |                         let origin = this.sourceCode;
239 |                         this.sourceCode = this.sourceCode.slice(2);
240 |                         this.isIgnored();
241 |                         // <![if gte IE 8.0] > 这种情况需要兼容
242 |                         if (this.sourceCode[0] === "[") {
243 |                             this.sourceCode = origin;
244 |                             this.skipSourceCode(1);
245 |                             let res = { lineNum: this.lineNum, tokenType: exports.COMMENT, token: exports.tokenNameMap[exports.COMMENT] };
246 |                             this.stack.push(JSON.parse(JSON.stringify(res))); // 这里偷个懒，虽然是DIRECTIVE但是仍然按COMMENT入栈
247 |                             res.tokenType = exports.DIRECTIVE; // 这里修改tokenType为DIRECTIVE是为了后面判断
248 |                             return res;
249 |                         }
250 |                         else {
251 |                             this.sourceCode = origin;
252 |                         }
253 |                         this.skipSourceCode(2);
254 |                         let res = { lineNum: this.lineNum, tokenType: exports.TOKEN_DTD, token: exports.tokenNameMap[exports.TOKEN_DTD] };
255 |                         this.stack.push(res);
256 |                         return res;
257 |                     }
258 |                     else if (this.isTagNmae()) {
259 |                         this.skipSourceCode(1);
260 |                         let res = { lineNum: this.lineNum, tokenType: exports.TOKEN_LEFT_PAREN, token: "<" };
261 |                         this.stack.push(res);
262 |                         return res;
263 |                     }
264 |                     else if (this.sourceCode[1] === "/") {
265 |                         this.skipSourceCode(2);
266 |                         let res = { lineNum: this.lineNum, tokenType: exports.TOKEN_CLOSE, token: "</" };
267 |                         this.stack.push(res);
268 |                         return res;
269 |                     }
270 |                     else {
271 |                         let contentText = /[\s\S]+/.exec(this.sourceCode[0]);
272 |                         if (contentText) {
273 |                             let res = { lineNum: this.lineNum, tokenType: exports.TOKEN_CONTENT_TEXT /*ContentText*/, token: contentText[0] };
274 |                             this.stack.push(res);
275 |                             return res;
276 |                         }
277 |                     }
278 |                 case '>':
279 |                     this.skipSourceCode(1);
280 |                     let RES_TOKEN_RIGHT_PAREN = { lineNum: this.lineNum, tokenType: exports.TOKEN_RIGHT_PAREN /*>*/, token: ">" };
281 |                     this.stack.push(RES_TOKEN_RIGHT_PAREN);
282 |                     return RES_TOKEN_RIGHT_PAREN;
283 |                 case '=': // =
284 |                     this.skipSourceCode(1);
285 |                     let RES_TOKEN_EQUAL = { lineNum: this.lineNum, tokenType: exports.TOKEN_EQUAL, token: "=" };
286 |                     this.stack.push(RES_TOKEN_EQUAL);
287 |                     return RES_TOKEN_EQUAL;
288 |                 case '"':
289 |                     this.skipSourceCode(1);
290 |                     let RES_TOKEN_QUOTE = { lineNum: this.lineNum, tokenType: exports.TOKEN_QUOTE, token: "\"" };
291 |                     this.stack.push(RES_TOKEN_QUOTE);
292 |                     return RES_TOKEN_QUOTE;
293 |                 case "'":
294 |                     this.skipSourceCode(1);
295 |                     let RES_TOKEN_SINGLE_QUOTE = { lineNum: this.lineNum, tokenType: exports.TOKEN_SINGLE_QUOTE, token: "'" };
296 |                     this.stack.push(RES_TOKEN_SINGLE_QUOTE);
297 |                     return RES_TOKEN_SINGLE_QUOTE;
298 |                 case "/":
299 |                     this.skipSourceCode(1);
300 |                     let RES_TOKEN_LEFT_LINE = { lineNum: this.lineNum, tokenType: exports.TOKEN_LEFT_LINE, token: "/" };
301 |                     this.stack.push(RES_TOKEN_LEFT_LINE);
302 |                     return RES_TOKEN_LEFT_LINE;
303 |             }
304 |             let tag_name = exports.regexName.exec(this.sourceCode);
305 |             if (tag_name) {
306 |                 let tag = "";
307 |                 tag = tag_name[0].toLocaleLowerCase(); // 变小写
308 |                 this.skipSourceCode(tag.length);
309 |                 let res = { lineNum: this.lineNum, tokenType: exports.TOKEN_NAME /*tag_name*/, token: tag };
310 |                 this.stack.push(res);
311 |                 return res;
312 |             }
313 |         }
314 |         // unexpected symbol
315 |         throw new Error(`MatchToken(): unexpected symbol near '${this.sourceCode[0]}'.`);
316 |     }
317 |     skipSourceCode(n) {
318 |         this.sourceCode = this.sourceCode.slice(n);
319 |     }
320 |     nextSourceCodeIs(s) {
321 |         return this.sourceCode.startsWith(s);
322 |     }
323 |     isNewLine(c) {
324 |         /*
325 |         在Windows中：
326 |         '\r' 回车，回到当前行的行首，而不会换到下一行，如果接着输出的话，本行以前的内容会被逐一覆盖；
327 |         '\n' 换行，换到当前位置的下一行，而不会回到行首；
328 |         Unix系统里:
329 |         每行结尾只有“<换行>”，即"\n"；Windows系统里面，每行结尾是“<回车><换行>”，即“\r\n”；Mac系统里，每行结尾是“<回车>”，即"\r"；。一个直接后果是，Unix/Mac系统下的文件在Windows里打开的话，所有文字会变成一行；而Windows里的文件在Unix/Mac下打开的话，在每行的结尾可能会多出一个^M符号。
330 |         */
331 |         // return c == '\r' || c == '\n'
332 |         return c === '\n';
333 |     }
334 |     isEmpty() {
335 |         return this.sourceCode.length === 0;
336 |     }
337 |     isIgnored() {
338 |         let isIgnored = false;
339 |         // target pattern
340 |         let isNewLine = function (c) {
341 |             return c == '\r' || c == '\n';
342 |         };
343 |         let isWhiteSpace = function (c) {
344 |             if (['\t', '\v', '\f', ' '].includes(c)) {
345 |                 return true;
346 |             }
347 |             return false;
348 |         };
349 |         // matching 匹配isIgnored的情况，把isIgnored的字符都吃掉
350 |         while (this.sourceCode.length > 0) {
351 |             // if (this.nextSourceCodeIs("\r\n") || this.nextSourceCodeIs("\n\r")) {
352 |             //     this.skipSourceCode(2)
353 |             //     this.lineNum += 1
354 |             //     isIgnored = true
355 |             // } else 
356 |             // if (isNewLine(this.sourceCode[0])) {
357 |             //     this.skipSourceCode(1)
358 |             //     this.lineNum += 1
359 |             //     isIgnored = true
360 |             // } else 
361 |             if (isWhiteSpace(this.sourceCode[0])) {
362 |                 this.skipSourceCode(1);
363 |                 isIgnored = true;
364 |             }
365 |             else {
366 |                 break;
367 |             }
368 |         }
369 |         return isIgnored;
370 |     }
371 |     GetLineNum() {
372 |         return this.lineNum;
373 |     }
374 | }
375 | exports.Lexer = Lexer;
376 | function NewLexer(sourceCode) {
377 |     return new Lexer(sourceCode, 1, "", 0, 0); // start at line 1 in default.
378 | }
379 | exports.NewLexer = NewLexer;
380 | 


--------------------------------------------------------------------------------
/dist/parser.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | Object.defineProperty(exports, "__esModule", { value: true });
  3 | exports.parse = exports.Program = void 0;
  4 | const lexer_1 = require("./lexer");
  5 | const Comment_1 = require("./parser/Comment");
  6 | const Directive_1 = require("./parser/Directive");
  7 | const DTD_1 = require("./parser/DTD");
  8 | const Html_1 = require("./parser/Html");
  9 | const parseText_1 = require("./parser/parseText");
 10 | const tagClose_1 = require("./parser/tagClose");
 11 | class Program {
 12 |     constructor() {
 13 |         this.type = 'root';
 14 |         this.children = [];
 15 |     }
 16 | }
 17 | exports.Program = Program;
 18 | // SourceCode ::= Statement+ 
 19 | function parseSourceCode(lexer, check) {
 20 |     let LineNum = lexer.GetLineNum();
 21 |     let root = parseStatements(lexer, check);
 22 |     root.LineNum = LineNum;
 23 |     return root;
 24 | }
 25 | /**
 26 |  * 将children中的多余的text节点去除
 27 |  * @param children
 28 |  * @returns
 29 |  */
 30 | function filterText(children) {
 31 |     for (let start = 0; start < children.length; start++) {
 32 |         if (children[start].type === "text") {
 33 |             // 从实践中知道，如果有去除body后多余的text节点，则最多是两个取一个，所以有下面代码
 34 |             let i = start + 1;
 35 |             if (i < children.length && children[i].type === "text") {
 36 |                 // 其中重要的特征就是，里面是只有\r\n和空格
 37 |                 // 只要当前标签和下一个标签这两个标签，则一定会删除一个"空标签(只包含\r\n和空格)"
 38 |                 if (!children[i].content.replace(/[\r\n]+/g, "").trim()) {
 39 |                     children[i].delete = true; // 添加上delete属性，后面好处理
 40 |                 }
 41 |                 else {
 42 |                     children[start].delete = true; // 添加上delete属性，后面好处理
 43 |                 }
 44 |             }
 45 |         }
 46 |     }
 47 |     // 删除delete为true的标签
 48 |     return children.filter((item) => !item.delete);
 49 | }
 50 | // Statement
 51 | function parseStatements(lexer, check) {
 52 |     if (check) {
 53 |         lexer.check = true;
 54 |     }
 55 |     let root = {
 56 |         type: "root",
 57 |         children: [],
 58 |         LineNum: 1
 59 |     };
 60 |     let statements = [root];
 61 |     let Block_level_elements = [
 62 |         "address",
 63 |         "article",
 64 |         "aside",
 65 |         "audio",
 66 |         "blockquote",
 67 |         "canvas",
 68 |         "dd",
 69 |         // "div",
 70 |         "dl",
 71 |         "fieldset",
 72 |         "figcaption",
 73 |         "figure",
 74 |         "figcaption",
 75 |         "footer",
 76 |         "form",
 77 |         "header",
 78 |         "hgroup",
 79 |         "hr",
 80 |         "noscript",
 81 |         "ol",
 82 |         "output",
 83 |         "p",
 84 |         "pre",
 85 |         "section",
 86 |         "table",
 87 |         "tfoot",
 88 |         "ul",
 89 |         "video"
 90 |     ];
 91 |     let inlInline_elementsine = [
 92 |         "b",
 93 |         "big",
 94 |         "i",
 95 |         "small",
 96 |         "tt",
 97 |         "abbr",
 98 |         "acronym",
 99 |         "cite",
100 |         "code",
101 |         "dfn",
102 |         "em",
103 |         "kbd",
104 |         "strong",
105 |         "samp",
106 |         "var",
107 |         "a",
108 |         "bdo",
109 |         "br",
110 |         "img",
111 |         "map",
112 |         "object",
113 |         "q",
114 |         "script",
115 |         "span",
116 |         "sub",
117 |         "sup",
118 |         "button",
119 |         "input",
120 |         "label",
121 |         "select",
122 |         "textarea"
123 |     ];
124 |     let notInSelf = [
125 |         "a",
126 |         "br",
127 |         "img",
128 |         "script",
129 |         "button",
130 |         "input",
131 |     ];
132 |     // select <select><select></select></select> 里面的select会消失
133 |     // textarea <textarea><textarea></textarea></textarea> 会解析成 <textarea><textarea></textarea>
134 |     let body = null;
135 |     let mainBodyFinished = false;
136 |     let uniqueStack = [];
137 |     // let mainBodyFinishedIsText = false
138 |     // 先调用LookAhead一次，将GetNextToken的结果缓存
139 |     while (!isSourceCodeEnd(lexer.LookAhead().tokenType)) {
140 |         // if (lexer.GetLineNum() === 20) {
141 |         //     debugger
142 |         // }
143 |         let statement = {};
144 |         statement = parseStatement(lexer);
145 |         // console.log(`at line ${lexer.GetLineNum()} ${lexer.sourceCode.slice(0, 30)}`)
146 |         if (!statement)
147 |             continue;
148 |         let stack = statements;
149 |         let s = statement;
150 |         const length = stack.length - 1;
151 |         if (s.type === "tag") {
152 |             s.tag = s.tag.toLocaleLowerCase();
153 |         }
154 |         if (!s.closeTag) {
155 |             uniqueStack = [];
156 |             if (notInSelf.includes(s.tag) && s.tag === stack[length].tag) { // 不能包含自己的元素
157 |                 stack.pop();
158 |                 stack[stack.length - 1].children.push(s);
159 |                 stack.push(s);
160 |                 if (check) {
161 |                     s.parent = stack[stack.length - 1];
162 |                 }
163 |                 continue;
164 |             }
165 |             // 处理多个body标签的问题
166 |             // 如果mainBodyFinished位false，表示还未出现第一个body，并且当前起始标签是body，则寻找他的父节点，并将其赋值给body变量
167 |             if (!mainBodyFinished && s.tag === "body" && !body) {
168 |                 // 寻找父节点
169 |                 let i = stack.length - 1;
170 |                 let parent = null;
171 |                 while (stack[i].type !== "tag" && i >= 0) {
172 |                     i--;
173 |                 }
174 |                 parent = i >= 0 ? stack[i] : null;
175 |                 // 找到的节点，赋值给body
176 |                 body = s;
177 |                 // 找到的父节点赋值给上面节点的parent属性，方便后续处理
178 |                 body.parent = parent;
179 |             }
180 |             stack[length].children.push(s); // 栈顶就是levalElement层级元素
181 |             if (check) {
182 |                 s.parent = stack[length];
183 |             }
184 |             if (s.type === "tag" && !s.selfClose && !tagClose_1.isSpecialTag(s)) {
185 |                 stack.push(s);
186 |                 // 处理多个body标签的问题
187 |                 // 如果已经出现过一个body标签并且现在这个起始标签还是body，则将其从栈中弹出，并且将其从栈顶的children中弹出
188 |                 if (mainBodyFinished && s.tag === "body") {
189 |                     stack.pop();
190 |                     stack[length].children.pop();
191 |                     if (check) {
192 |                         s.parent = null;
193 |                     }
194 |                 }
195 |             }
196 |             // 处理多个body标签的问题
197 |             // 如果出现第一个body起始标签，则将mainBodyFinished置为true，方便在第一个body标签中再次出现body起始标签时将其忽略
198 |             if (!mainBodyFinished && s.tag === "body") {
199 |                 mainBodyFinished = true;
200 |             }
201 |         }
202 |         else {
203 |             if (stack[length].tag !== s.tag) {
204 |                 uniqueStack.push(s);
205 |                 // 处理多个body标签的问题
206 |                 // 如果当前第一个body标签解析完成（mainBodyFinished），并且当前结束标签是body，则直接进行下次循环
207 |                 if (mainBodyFinished && s.tag === "body") {
208 |                     continue;
209 |                 }
210 |                 if (Block_level_elements.includes(s.tag)) { // 如果是块级元素会加入到levalElement层级元素当child
211 |                     stack[length].children.push(s);
212 |                     if (check) {
213 |                         s.parent = stack[length];
214 |                     }
215 |                 }
216 |                 // 学习浏览器HTML解析，即使匹配不上也不报错，直接添加到levalElement层级元素当child
217 |                 console.warn(`${stack[length].tag} and ${s.tag} is not math! at line ${lexer.GetLineNum()} ${lexer.sourceCode.slice(0, 100)}`);
218 |                 // throw new Error(`${stack[length].tag} and ${s.tag} is not math! at line ${lexer.GetLineNum()} ${lexer.sourceCode.slice(0, 100)}`)
219 |             }
220 |             else {
221 |                 // 处理多个body标签的问题
222 |                 // 如果第一个body标签没有解析完成（mainBodyFinished），并且当前结束标签是body，则mainBodyFinished置为true
223 |                 if (!mainBodyFinished && s.tag === "body") {
224 |                     mainBodyFinished = true;
225 |                 }
226 |                 stack.pop();
227 |                 if (uniqueStack.length > 0 && uniqueStack[uniqueStack.length - 1].tag === stack[stack.length - 1].tag) {
228 |                     uniqueStack.pop();
229 |                     stack.pop();
230 |                 }
231 |             }
232 |         }
233 |     }
234 |     // 处理多个body标签的问题
235 |     // 找出body在父节点的索引
236 |     let index = body && body.parent.children.findIndex((item) => item === body);
237 |     // 从父节点下一个索引开始添加到第一个body中
238 |     let real = index + 1;
239 |     if (body) {
240 |         for (let i = real; i < body.parent.children.length; i++) {
241 |             if (body.parent.children[i].type === "tag") {
242 |                 body.parent.children[i].children = filterText(body.parent.children[i].children);
243 |             }
244 |             body.children.push(body.parent.children[i]);
245 |         }
246 |         let childrenLength = body.parent.children.length;
247 |         for (let i = real; i < childrenLength; i++) {
248 |             body.parent.children.pop();
249 |         }
250 |         body.children = filterText(body.children);
251 |         body.parent = null;
252 |     }
253 |     for (let i = 0; i < root.children.length; i++) {
254 |         if (root.children[i].type === "DTD") {
255 |             if (i - 1 >= 0 && root.children[i - 1].type === "text" && !root.children[i - 1].content.replace(/[\r\n]+/g, "").trim()) {
256 |                 root.children[i - 1].delete = true;
257 |             }
258 |         }
259 |         if (root.children[i].tag === "html") {
260 |             if (i - 1 >= 0 && root.children[i - 1].type === "text" && !root.children[i - 1].content.replace(/[\r\n]+/g, "").trim()) {
261 |                 root.children[i - 1].delete = true;
262 |             }
263 |         }
264 |         if (check) {
265 |             root.children[i].parent = null;
266 |         }
267 |     }
268 |     root.children = root.children.filter((item) => !item.delete);
269 |     return root;
270 | }
271 | function parseStatement(lexer) {
272 |     // 向前看一个token并跳过
273 |     lexer.LookAheadAndSkip(lexer_1.TOKEN_IGNORED); // skip if source code start with ignored token
274 |     let look = lexer.LookAhead().tokenType;
275 |     let flag = false;
276 |     let top = lexer.stack[lexer.stack.length - 1];
277 |     if (top.tokenType === lexer_1.TOKEN_CONTENT_TEXT
278 |     // isClose(lexer) &&
279 |     // top.tokenType !== TOKEN_LEFT_PAREN /*<*/ &&
280 |     // top.tokenType !== TOKEN_CLOSE /*</*/ &&
281 |     // top.tokenType !== TOKEN_DTD /*DTD*/ &&
282 |     // top.tokenType !== COMMENT /*COMMENT*/
283 |     ) {
284 |         flag = true;
285 |     }
286 |     else {
287 |         flag = false;
288 |     }
289 |     if (flag) {
290 |         return parseText_1.parseText(lexer);
291 |     }
292 |     else {
293 |         switch (look) {
294 |             case lexer_1.TOKEN_LEFT_PAREN: // <
295 |                 return Html_1.parseHtml(lexer);
296 |             case lexer_1.TOKEN_CLOSE: // </
297 |                 return tagClose_1.parseClose(lexer);
298 |             case lexer_1.TOKEN_DTD: // dtd
299 |                 return DTD_1.parseDtd(lexer);
300 |             case lexer_1.COMMENT:
301 |                 return Comment_1.paseComment(lexer);
302 |             case lexer_1.DIRECTIVE:
303 |                 return Directive_1.paseDirective(lexer);
304 |             default:
305 |                 throw new Error(`parseStatement(): unknown Statement. at line ${lexer.GetLineNum()} ${lexer.sourceCode.slice(0, 50)}`);
306 |         }
307 |     }
308 | }
309 | function isSourceCodeEnd(token) {
310 |     return token === lexer_1.TOKEN_EOF;
311 | }
312 | function parse(code, check) {
313 |     let lexer = lexer_1.NewLexer(code);
314 |     let sourceCode = parseSourceCode(lexer, check);
315 |     lexer.NextTokenIs(lexer_1.TOKEN_EOF);
316 |     return sourceCode;
317 | }
318 | exports.parse = parse;
319 | 


--------------------------------------------------------------------------------
/dist/parser/Comment.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.paseComment = void 0;
 4 | function paseComment(lexer) {
 5 |     let content = "";
 6 |     while (lexer.sourceCode.slice(0, 3) !== "-->") {
 7 |         if (lexer.nextSourceCodeIs("\r\n") || lexer.nextSourceCodeIs("\n\r")) {
 8 |             lexer.lineNum += 1;
 9 |             content += lexer.sourceCode.slice(0, 2);
10 |             lexer.skipSourceCode(2);
11 |             continue;
12 |         }
13 |         else {
14 |             if (lexer.isNewLine(lexer.sourceCode[0])) {
15 |                 lexer.lineNum += 1;
16 |                 content += lexer.sourceCode.slice(0, 1);
17 |                 lexer.skipSourceCode(1);
18 |                 continue;
19 |             }
20 |         }
21 |         content += lexer.sourceCode[0];
22 |         lexer.skipSourceCode(1);
23 |     }
24 |     lexer.skipSourceCode(3);
25 |     lexer.hasCache = false;
26 |     return {
27 |         type: "comment",
28 |         LineNum: lexer.GetLineNum(),
29 |         content,
30 |     };
31 | }
32 | exports.paseComment = paseComment;
33 | 


--------------------------------------------------------------------------------
/dist/parser/DTD.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.parseDtd = void 0;
 4 | class DTD {
 5 |     constructor() {
 6 |         this.type = "DTD";
 7 |     }
 8 | }
 9 | function parseDtd(lexer) {
10 |     let dtd = new DTD();
11 |     let content = "";
12 |     while (lexer.sourceCode[0] !== ">") {
13 |         if (lexer.nextSourceCodeIs("\r\n") || lexer.nextSourceCodeIs("\n\r")) {
14 |             lexer.lineNum += 1;
15 |             content += lexer.sourceCode.slice(0, 2);
16 |             lexer.skipSourceCode(2);
17 |             continue;
18 |         }
19 |         else {
20 |             if (lexer.isNewLine(lexer.sourceCode[0])) {
21 |                 lexer.lineNum += 1;
22 |                 content += lexer.sourceCode.slice(0, 1);
23 |                 lexer.skipSourceCode(1);
24 |                 continue;
25 |             }
26 |         }
27 |         content += lexer.sourceCode[0];
28 |         lexer.skipSourceCode(1);
29 |     }
30 |     lexer.skipSourceCode(1);
31 |     lexer.isIgnored();
32 |     lexer.hasCache = false;
33 |     dtd.content = content;
34 |     dtd.LineNum = lexer.GetLineNum();
35 |     return dtd;
36 | }
37 | exports.parseDtd = parseDtd;
38 | 


--------------------------------------------------------------------------------
/dist/parser/Directive.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.paseDirective = void 0;
 4 | function paseDirective(lexer) {
 5 |     let content = "";
 6 |     while (lexer.sourceCode.slice(0, 1) !== "]") {
 7 |         if (lexer.nextSourceCodeIs("\r\n") || lexer.nextSourceCodeIs("\n\r")) {
 8 |             lexer.lineNum += 1;
 9 |             content += lexer.sourceCode.slice(0, 2);
10 |             lexer.skipSourceCode(2);
11 |             continue;
12 |         }
13 |         else {
14 |             if (lexer.isNewLine(lexer.sourceCode[0])) {
15 |                 lexer.lineNum += 1;
16 |                 content += lexer.sourceCode.slice(0, 1);
17 |                 lexer.skipSourceCode(1);
18 |                 continue;
19 |             }
20 |         }
21 |         content += lexer.sourceCode[0];
22 |         lexer.skipSourceCode(1);
23 |     }
24 |     content += lexer.sourceCode[0]; // ]加入content
25 |     lexer.skipSourceCode(1); // ]
26 |     lexer.isIgnored(); // 空格
27 |     lexer.skipSourceCode(1); // >
28 |     lexer.hasCache = false;
29 |     return {
30 |         type: "comment",
31 |         LineNum: lexer.GetLineNum(),
32 |         content,
33 |     };
34 | }
35 | exports.paseDirective = paseDirective;
36 | 


--------------------------------------------------------------------------------
/dist/parser/Html.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | Object.defineProperty(exports, "__esModule", { value: true });
  3 | exports.parseHtml = exports.parseAttr = exports.parseDoubleQuotedAttr = exports.parseSingleQuotedAttr = exports.parseString = exports.parseValue = exports.parseName = exports.parseTag = exports.Node = void 0;
  4 | const lexer_1 = require("../lexer");
  5 | let temp = Symbol("temp");
  6 | let nextSibling = temp;
  7 | class Node {
  8 |     constructor() {
  9 |         this.children = [];
 10 |         this.attr = [];
 11 |     }
 12 |     get nextSibling() {
 13 |         if (nextSibling !== temp)
 14 |             return nextSibling;
 15 |         if (!this.parent)
 16 |             return null;
 17 |         let lengtn = this.parent.children.length;
 18 |         let index = -1;
 19 |         for (let item of this.parent.children) {
 20 |             index++;
 21 |             if (item === this) {
 22 |                 break;
 23 |             }
 24 |         }
 25 |         if (index + 1 > lengtn) {
 26 |             return null;
 27 |         }
 28 |         return this.parent.children[index + 1];
 29 |     }
 30 |     set nextSibling(value) {
 31 |         nextSibling = value;
 32 |     }
 33 | }
 34 | exports.Node = Node;
 35 | function parseTag(lexer) {
 36 |     return lexer.NextTokenIs(lexer_1.TOKEN_NAME).nowToken; // tag_name
 37 | }
 38 | exports.parseTag = parseTag;
 39 | function parseName(lexer, node) {
 40 |     let attrReg = /[^\s"'>/=[\u0000-\u001f]+/.exec(lexer.sourceCode);
 41 |     let name = "";
 42 |     if (attrReg) {
 43 |         name = attrReg[0];
 44 |     }
 45 |     if (name.includes("<")) {
 46 |         /*
 47 |         <a name="xiaoming"
 48 |             <p class="cf">
 49 | 
 50 |             </p>
 51 |         
 52 |         暂时当做selfClose标签处理
 53 |         */
 54 |         node.selfClose = true;
 55 |     }
 56 |     lexer.skipSourceCode(name.length);
 57 |     return name;
 58 | }
 59 | exports.parseName = parseName;
 60 | function genereteAttr(name, value) {
 61 |     return {
 62 |         name,
 63 |         value,
 64 |     };
 65 | }
 66 | function parseValue(lexer) {
 67 |     if (lexer.sourceCode[0] === "'") {
 68 |         return parseSingleQuotedAttr(lexer);
 69 |     }
 70 |     else if (lexer.sourceCode[0] === '"') {
 71 |         return parseDoubleQuotedAttr(lexer);
 72 |     }
 73 |     else {
 74 |         return parseString(lexer);
 75 |     }
 76 | }
 77 | exports.parseValue = parseValue;
 78 | function parseString(lexer) {
 79 |     let value = "";
 80 |     // lexer.NextTokenIs(TOKEN_SINGLE_QUOTE);
 81 |     // lexer.stack.pop()
 82 |     let res = /[^\s><]*/.exec(lexer.sourceCode);
 83 |     if (res) {
 84 |         value = res[0];
 85 |     }
 86 |     lexer.skipSourceCode(value.length);
 87 |     // lexer.NextTokenIs(TOKEN_SINGLE_QUOTE);
 88 |     // lexer.stack.pop()
 89 |     return value;
 90 | }
 91 | exports.parseString = parseString;
 92 | function parseSingleQuotedAttr(lexer) {
 93 |     let value = "";
 94 |     lexer.NextTokenIs(lexer_1.TOKEN_SINGLE_QUOTE);
 95 |     lexer.stack.pop();
 96 |     let res = /[^']*/.exec(lexer.sourceCode);
 97 |     if (res) {
 98 |         value = res[0];
 99 |     }
100 |     lexer.skipSourceCode(value.length);
101 |     lexer.NextTokenIs(lexer_1.TOKEN_SINGLE_QUOTE);
102 |     lexer.stack.pop();
103 |     return value;
104 | }
105 | exports.parseSingleQuotedAttr = parseSingleQuotedAttr;
106 | function parseDoubleQuotedAttr(lexer) {
107 |     let value = "";
108 |     lexer.NextTokenIs(lexer_1.TOKEN_QUOTE);
109 |     lexer.stack.pop();
110 |     let res = /[^"]*/.exec(lexer.sourceCode);
111 |     if (res) {
112 |         value = res[0];
113 |     }
114 |     lexer.skipSourceCode(value.length);
115 |     lexer.NextTokenIs(lexer_1.TOKEN_QUOTE);
116 |     lexer.stack.pop();
117 |     return value;
118 | }
119 | exports.parseDoubleQuotedAttr = parseDoubleQuotedAttr;
120 | function parseAttr(lexer, node) {
121 |     let attrItem = {};
122 |     lexer.isIgnored(); // 空格
123 |     let tag = parseName(lexer, node);
124 |     lexer.isIgnored(); // 空格
125 |     if (tag) {
126 |         let attr = tag;
127 |         attrItem = genereteAttr(attr); // name
128 |         lexer.isIgnored(); // 空格
129 |         if (lexer.sourceCode[0] === "=") {
130 |             lexer.NextTokenIs(lexer_1.TOKEN_EQUAL); // =
131 |             lexer.stack.pop();
132 |             lexer.isIgnored(); // 空格 
133 |             attrItem.value = parseValue(lexer);
134 |             lexer.isIgnored(); // 空格 
135 |         }
136 |         else {
137 |             attrItem.value = "true";
138 |             lexer.isIgnored(); // 空格 
139 |         }
140 |     }
141 |     return attrItem;
142 | }
143 | exports.parseAttr = parseAttr;
144 | function checkAttrEnd(lexer, node) {
145 |     if (lexer.sourceCode[0] === ">") {
146 |         lexer.skipSourceCode(1);
147 |         lexer.stack.push({ lineNum: lexer.lineNum, tokenType: lexer_1.TOKEN_RIGHT_PAREN /*>*/, token: ">" });
148 |         return false;
149 |     }
150 |     else if (lexer.sourceCode.slice(0, 2) === "/>") {
151 |         node.selfClose = true;
152 |         lexer.skipSourceCode(2);
153 |         lexer.stack.push({ lineNum: lexer.lineNum, tokenType: lexer_1.TOKEN_SELF_CLOSE /*/> <br />*/, token: "/>" });
154 |         return false;
155 |     }
156 |     else {
157 |         return true;
158 |     }
159 | }
160 | function parseHtml(lexer) {
161 |     let node = new Node();
162 |     if (!lexer.check) {
163 |         node.nextSibling = null;
164 |     }
165 |     node.LineNum = lexer.GetLineNum();
166 |     lexer.NextTokenIs(lexer_1.TOKEN_LEFT_PAREN); // <
167 |     node.type = "tag";
168 |     node.tag = parseTag(lexer);
169 |     lexer.isIgnored();
170 |     while (checkAttrEnd(lexer, node)) {
171 |         if (lexer.nextSourceCodeIs("\r\n") || lexer.nextSourceCodeIs("\n\r")) {
172 |             lexer.lineNum += 1;
173 |             lexer.skipSourceCode(2);
174 |         }
175 |         else {
176 |             if (lexer.isNewLine(lexer.sourceCode[0])) {
177 |                 lexer.lineNum += 1;
178 |                 lexer.skipSourceCode(1);
179 |             }
180 |         }
181 |         let res = parseAttr(lexer, node);
182 |         node.attr.push(res);
183 |     }
184 |     // lexer.isIgnored()
185 |     return node;
186 | }
187 | exports.parseHtml = parseHtml;
188 | 


--------------------------------------------------------------------------------
/dist/parser/parseText.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | Object.defineProperty(exports, "__esModule", { value: true });
  3 | exports.parseText = exports.isClose = exports.Node = void 0;
  4 | const lexer_1 = require("../lexer");
  5 | const Html_1 = require("./Html");
  6 | const tagClose_1 = require("./tagClose");
  7 | let temp = Symbol("temp");
  8 | let nextSibling = temp;
  9 | class Node {
 10 |     constructor() {
 11 |         this.content = "";
 12 |     }
 13 |     get nextSibling() {
 14 |         if (nextSibling !== temp)
 15 |             return nextSibling;
 16 |         if (!this.parent)
 17 |             return null;
 18 |         let lengtn = this.parent.children.length;
 19 |         let index = -1;
 20 |         for (let item of this.parent.children) {
 21 |             index++;
 22 |             if (item === this) {
 23 |                 break;
 24 |             }
 25 |         }
 26 |         if (index + 1 > lengtn) {
 27 |             return null;
 28 |         }
 29 |         return this.parent.children[index + 1];
 30 |     }
 31 |     set nextSibling(value) {
 32 |         nextSibling = value;
 33 |     }
 34 | }
 35 | exports.Node = Node;
 36 | function isClose(lexer) {
 37 |     const length = lexer.stack.length;
 38 |     const topTwo = length >= 2 ? lexer.stack[length - 2].tokenType : "";
 39 |     const isTOKEN_DTD = topTwo === lexer_1.TOKEN_DTD;
 40 |     const isCOMMENT = topTwo === lexer_1.COMMENT;
 41 |     if (length >= 2 &&
 42 |         (isTOKEN_DTD /*dtd*/ ||
 43 |             isCOMMENT /*comment*/)) {
 44 |         return true;
 45 |     }
 46 |     if (length < 4)
 47 |         return false;
 48 |     const topThree = lexer.stack[length - 3].tokenType;
 49 |     const topFour = lexer.stack[length - 4].tokenType;
 50 |     const isTOKEN_RIGHT_PAREN = topTwo === lexer_1.TOKEN_RIGHT_PAREN;
 51 |     const isTOKEN_NAME = topThree === lexer_1.TOKEN_NAME;
 52 |     // </a>
 53 |     let one = isTOKEN_RIGHT_PAREN /*>*/ &&
 54 |         isTOKEN_NAME /*tag_name*/ &&
 55 |         topFour === lexer_1.TOKEN_CLOSE /*</*/;
 56 |     // <a>
 57 |     let close = isTOKEN_RIGHT_PAREN /*>*/ &&
 58 |         topThree === lexer_1.TOKEN_NAME /*tag_name*/ &&
 59 |         topFour === lexer_1.TOKEN_LEFT_PAREN /*<*/;
 60 |     // />
 61 |     let selfClose = topTwo === lexer_1.TOKEN_SELF_CLOSE; // /> <br />
 62 |     return one || close || selfClose;
 63 | }
 64 | exports.isClose = isClose;
 65 | /*
 66 | 提取出来的公共代码
 67 | */
 68 | function judgeEnd(lexer) {
 69 |     if (lexer.sourceCode.slice(0, 2) === "</" /*在这里是什么特征看调用函数的注释部分contentText</div>*/ ||
 70 |         lexer.sourceCode.slice(0, 2) === "<!" /*在这里是什么特征看调用函数的注释部分contentText<!----> || <!DOCTYPE*/ ||
 71 |         (lexer.sourceCode[0] === "<" &&
 72 |             lexer_1.regexName.test(lexer.sourceCode[1])) /*在这里是什么特征看调用函数的注释部分contentText<div>*/) {
 73 |         return false;
 74 |     }
 75 |     else {
 76 |         /*在这里是什么特征看调用函数的注释部分contentText<br />*/
 77 |         if ((lexer.sourceCode[0] === "<"
 78 |             && lexer_1.regexName.test(lexer.sourceCode[1]))) {
 79 |             let parseRes = Html_1.parseHtml(lexer);
 80 |             if (parseRes.selfClose) {
 81 |                 return false;
 82 |             }
 83 |         }
 84 |         return true;
 85 |     }
 86 | }
 87 | function contentEnd(lexer) {
 88 |     // <div>contentText</div>
 89 |     // <div>contentText<div>
 90 |     // <meta>contentText<!---->
 91 |     // <div>contentText<br /> || <div>contentText<br 属性    />
 92 |     let stack = lexer.stack;
 93 |     const length = stack.length;
 94 |     if (isClose(lexer) &&
 95 |         length >= 4 &&
 96 |         stack[length - 2].tokenType === lexer_1.TOKEN_RIGHT_PAREN /*>*/ &&
 97 |         stack[length - 3].tokenType === lexer_1.TOKEN_NAME /*name*/ &&
 98 |         stack[length - 4].tokenType === lexer_1.TOKEN_LEFT_PAREN /*<*/) {
 99 |         // <script>
100 |         if (lexer.stack[length - 3].token === "script") {
101 |             /*
102 |             <script>
103 |                 '<script src="https://"><\/script>'
104 |             </script>
105 |             */
106 |             let script = ["</script>", "</SCRIPT>"];
107 |             if (script.includes(lexer.sourceCode.slice(0, script[0].length))) {
108 |                 return false;
109 |             }
110 |             else {
111 |                 return true;
112 |             }
113 |         }
114 |         // noscript
115 |         // if (lexer.stack[length - 3].token === "noscript") {
116 |         //     /*
117 |         //     <script>
118 |         //         '<script src="https://"><\/script>'
119 |         //     </script>
120 |         //     */
121 |         //     let script = "</noscript>"
122 |         //     if (lexer.sourceCode.slice(0, script.length) === script) {
123 |         //         return false
124 |         //     } else {
125 |         //         return true
126 |         //     }
127 |         // }
128 |         judgeEnd(lexer);
129 |     }
130 |     // </div>contentText<div>
131 |     // </div>contentText<!---->
132 |     // </div>contentText</div>
133 |     // </div>contentText<br />
134 |     if (isClose(lexer) &&
135 |         length >= 4 &&
136 |         stack[length - 2].tokenType === lexer_1.TOKEN_RIGHT_PAREN /*>*/ &&
137 |         stack[length - 3].tokenType === lexer_1.TOKEN_NAME /*name*/ &&
138 |         stack[length - 4].tokenType === lexer_1.TOKEN_CLOSE /*</*/) {
139 |         judgeEnd(lexer);
140 |     }
141 |     // <br />contentText<div>
142 |     // <br />contentText<!---->
143 |     // <br />contentText</div>
144 |     // <br />contentText<br />
145 |     if (isClose(lexer) &&
146 |         length >= 4 &&
147 |         stack[length - 2].tokenType === lexer_1.TOKEN_SELF_CLOSE /*self-close /> <br />*/ &&
148 |         stack[length - 3].tokenType === lexer_1.TOKEN_NAME /*name*/ &&
149 |         stack[length - 4].tokenType === lexer_1.TOKEN_LEFT_PAREN /*<*/) {
150 |         judgeEnd(lexer);
151 |     }
152 |     // <!---->contentText<div>
153 |     // <!---->contentText<!---->
154 |     // <!---->contentText</div>
155 |     // <!---->contentText<br />
156 |     if (isClose(lexer) &&
157 |         stack[length - 2].tokenType === lexer_1.COMMENT /*COMMENT*/) {
158 |         judgeEnd(lexer);
159 |     }
160 |     // <!DOCTYPE html>contentText<div>
161 |     // <!DOCTYPE html>contentText</div>
162 |     // <!DOCTYPE html>contentText<!---->
163 |     // <!DOCTYPE html>contentText<br />
164 |     if (isClose(lexer) &&
165 |         stack[length - 2].tokenType === lexer_1.TOKEN_DTD /*DTD*/) {
166 |         judgeEnd(lexer);
167 |     }
168 |     /*contentText<div>*/
169 |     /*contentText<!----> || <!DOCTYPE*/
170 |     /*contentText</div>*/
171 |     /*contentText<br />*/
172 |     if (stack[length - 1].tokenType === lexer_1.TOKEN_CONTENT_TEXT /*contentText*/) {
173 |         if ((lexer.sourceCode[0] === "<" &&
174 |             lexer_1.regexName.test(lexer.sourceCode[1])) /*contentText<div>*/ ||
175 |             lexer.sourceCode.slice(0, 2) === "<!" /*contentText<!----> || <!DOCTYPE*/ ||
176 |             lexer.sourceCode.slice(0, 2) === "</" /*contentText</div>*/) {
177 |             return false;
178 |         }
179 |         else {
180 |             /*contentText<br />*/
181 |             if ((lexer.sourceCode[0] === "<"
182 |                 && lexer_1.regexName.test(lexer.sourceCode[1]))) {
183 |                 let parseRes = Html_1.parseHtml(lexer);
184 |                 if (parseRes.selfClose) {
185 |                     return false;
186 |                 }
187 |             }
188 |             return true;
189 |         }
190 |     }
191 |     // return true
192 |     throw new Error(`not find contentEnd! at line ${lexer.GetLineNum()} ${lexer.sourceCode.slice(0, 100)}`);
193 | }
194 | function parseText(lexer) {
195 |     lexer.hasCache = false;
196 |     let node = new Node();
197 |     if (!lexer.check) {
198 |         node.nextSibling = null;
199 |     }
200 |     // lexer.isIgnored();
201 |     node.LineNum = lexer.GetLineNum();
202 |     let content = "";
203 |     while (contentEnd(lexer) && !lexer.isEmpty()) {
204 |         if (lexer.nextSourceCodeIs("\r\n") || lexer.nextSourceCodeIs("\n\r")) {
205 |             lexer.lineNum += 1;
206 |             content += lexer.sourceCode.slice(0, 2);
207 |             lexer.skipSourceCode(2);
208 |         }
209 |         else {
210 |             if (lexer.isNewLine(lexer.sourceCode[0])) {
211 |                 lexer.lineNum += 1;
212 |                 content += lexer.sourceCode[0];
213 |                 lexer.skipSourceCode(1);
214 |             }
215 |             else {
216 |                 content += lexer.sourceCode[0];
217 |                 lexer.skipSourceCode(1);
218 |             }
219 |         }
220 |     }
221 |     if (lexer.stack.length >= 3 &&
222 |         tagClose_1.isSpecialTag({ tag: lexer.stack[lexer.stack.length - 3].token })) {
223 |         let token = lexer.stack[lexer.stack.length - 3].token;
224 |         let tokenLen = `</${token}>`.length;
225 |         if (lexer.sourceCode.slice(0, tokenLen) === `</${token}>`) {
226 |             lexer.skipSourceCode(2);
227 |             let res = { lineNum: lexer.lineNum, tokenType: lexer_1.TOKEN_CLOSE, token: "</" };
228 |             lexer.stack.push(res);
229 |             tagClose_1.parseClose(lexer);
230 |             lexer.GetNextToken();
231 |             while (contentEnd(lexer) && !lexer.isEmpty()) {
232 |                 if (lexer.nextSourceCodeIs("\r\n") || lexer.nextSourceCodeIs("\n\r")) {
233 |                     lexer.lineNum += 1;
234 |                     content += lexer.sourceCode.slice(0, 2);
235 |                     lexer.skipSourceCode(2);
236 |                 }
237 |                 else {
238 |                     if (lexer.isNewLine(lexer.sourceCode[0])) {
239 |                         lexer.lineNum += 1;
240 |                         content += lexer.sourceCode[0];
241 |                         lexer.skipSourceCode(1);
242 |                     }
243 |                     else {
244 |                         content += lexer.sourceCode[0];
245 |                         lexer.skipSourceCode(1);
246 |                     }
247 |                 }
248 |             }
249 |             lexer.stack.splice(lexer.stack.length - 4, lexer.stack.length - 1);
250 |         }
251 |     }
252 |     // lexer.isIgnored();
253 |     node.content = content;
254 |     node.type = "text";
255 |     return node;
256 | }
257 | exports.parseText = parseText;
258 | 


--------------------------------------------------------------------------------
/dist/parser/tagClose.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | Object.defineProperty(exports, "__esModule", { value: true });
 3 | exports.parseClose = exports.isSpecialTag = exports.Node = void 0;
 4 | const lexer_1 = require("../lexer");
 5 | const Html_1 = require("./Html");
 6 | class Node {
 7 |     constructor() {
 8 |         this.children = [];
 9 |         this.attr = [];
10 |     }
11 | }
12 | exports.Node = Node;
13 | function isSpecialTag(node) {
14 |     let tags = [
15 |         "img",
16 |         "source",
17 |         "link",
18 |         "meta",
19 |         "area",
20 |         "input",
21 |         "br"
22 |     ];
23 |     return tags.includes(node.tag);
24 | }
25 | exports.isSpecialTag = isSpecialTag;
26 | function parseClose(lexer) {
27 |     lexer.hasCache = false;
28 |     let node = new Node();
29 |     node.closeTag = true;
30 |     node.LineNum = lexer.GetLineNum();
31 |     node.type = "tag";
32 |     node.tag = Html_1.parseTag(lexer);
33 |     lexer.NextTokenIs(lexer_1.TOKEN_RIGHT_PAREN); // >
34 |     // lexer.isIgnored()
35 |     if (isSpecialTag(node)) {
36 |         return null;
37 |     }
38 |     return node;
39 | }
40 | exports.parseClose = parseClose;
41 | 


--------------------------------------------------------------------------------
/mycheck/check-dist.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs')
 2 | const path = require('path')
 3 | const paser = require('../dist/index').paser
 4 | 
 5 | let paths = [
 6 |     "demo.html",
 7 |     "demo1.html",
 8 |     "demo2.html",
 9 |     "demo3.html",
10 |     "demo4.html",
11 |     "demo5.html",
12 |     "test4.html",
13 |     "test5.html",
14 |     "test6.html",
15 |     "test7.html",
16 |     "test8.html",
17 |     "MDN_HTML.html",
18 |     "MDN_JavaScript.html",
19 |     "CSDN.html",
20 |     "CSDN_SPM.html", // error
21 |     "test9.html",
22 |     "test10.html",
23 |     "test11.html",
24 |     "test12.html",
25 |     "test13.html",
26 |     "test14.html",
27 |     "test15.html",
28 |     "test16.html",
29 |     "test17.html",
30 |     "test18.html",
31 |     "test19.html",
32 |     "test20.html",
33 |     "test21.html",
34 |     "test22.html",
35 |     "test23.html",
36 |     "google.html",
37 |     "005055fd7e2625aba5e8d2d370ea4914a152fe50d16620f896cdf4b1a68ba741.html",
38 |     "005055fd7e2625aba5e8d2d370ea4914a152fe50d16620f896cdf4b1a68ba741-origin.html",
39 |     "039c4b966d1f2a0c589ac0aad211fe65500ad1cb58c7f45b34251db7056803ec-origin.html",
40 |     "0475e5eeadaaca857eea3f36d0eda01937fe672d48be7f98ba6bc7f25ecd63d0.html",
41 |     "078cdb456d1beb698aeed86e0f2161e442e9431c4580295f1ba4ece22741068c.html",
42 |     "0e55dcdbeb54c88ee87942b9fef7ea5398fa9a1e83493d55844b479506a80fd8.html",
43 |     "qidian.html",
44 |     "test24.html",
45 |     "test25.html",
46 |     "test26.html",
47 | ]
48 | 
49 | for (let p of paths) {
50 |     let code = fs.readFileSync(path.resolve(__dirname, `../demo/${p}`), {encoding: 'utf-8'})
51 |     // console.log(code, 'code')
52 |     if (code.length > 0) {
53 |         console.time("test")
54 |         let ast = paser(code)
55 |         console.timeEnd("test")
56 |         // console.log(__dirname, __filename)
57 |         console.log(path.resolve(__dirname, "../out/", `./${p}.ast.json`))
58 |         fs.writeFileSync(path.resolve(__dirname, "../out/", `./${p}.ast.json`), JSON.stringify(ast, null, 4))
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/mycheck/check.ts:
--------------------------------------------------------------------------------
 1 | const fs = require('fs')
 2 | const path = require('path')
 3 | import { paser } from '../src/index'
 4 | 
 5 | let paths = [
 6 |     // "demo.html",
 7 |     // "demo1.html",
 8 |     // "demo2.html",
 9 |     // "demo3.html",
10 |     // "demo4.html",
11 |     // "demo5.html",
12 |     // "test4.html",
13 |     // "test5.html",
14 |     // "test6.html",
15 |     // "test7.html",
16 |     // "test8.html",
17 |     // "MDN_HTML.html",
18 |     // "MDN_JavaScript.html",
19 |     // "CSDN.html",
20 |     // "CSDN_SPM.html",
21 |     // "test9.html",
22 |     // "test10.html",
23 |     // "test11.html",
24 |     // "test12.html",
25 |     // "test13.html",
26 |     // "test14.html",
27 |     // "test15.html",
28 |     // "test16.html",
29 |     // "test17.html",
30 |     // "test18.html",
31 |     // "test19.html",
32 |     // "test20.html",
33 |     // "test21.html",
34 |     // "test22.html",
35 |     // "test23.html",
36 |     // "google.html",
37 |     // "005055fd7e2625aba5e8d2d370ea4914a152fe50d16620f896cdf4b1a68ba741.html",
38 |     // "005055fd7e2625aba5e8d2d370ea4914a152fe50d16620f896cdf4b1a68ba741-origin.html",
39 |     // "039c4b966d1f2a0c589ac0aad211fe65500ad1cb58c7f45b34251db7056803ec-origin.html",
40 |     // "0475e5eeadaaca857eea3f36d0eda01937fe672d48be7f98ba6bc7f25ecd63d0.html",
41 |     // "078cdb456d1beb698aeed86e0f2161e442e9431c4580295f1ba4ece22741068c.html",
42 |     // "0e55dcdbeb54c88ee87942b9fef7ea5398fa9a1e83493d55844b479506a80fd8.html",
43 |     // "qidian.html",
44 |     // "test24.html",
45 |     // "test25.html",
46 |     // "test26.html",
47 |     // "qidian1.html",
48 |     // "test27.html",
49 |     // "test28.html",
50 |     // "test30.html",
51 |     // "test31.html",
52 |     // "test32.html",
53 |     // "test33.html",
54 |     // "test34.html",
55 |     // "test35.html",
56 |     // "test36.html",
57 |     // "test38.html",
58 |     // "test40.html",
59 | ]
60 | 
61 | for (let p of paths) {
62 |     let code = fs.readFileSync(path.resolve(__dirname, `../demo/${p}`), { encoding: 'utf-8' })
63 |     // console.log(code, 'code')
64 |     if (code.length > 0) {
65 |         // console.time("test")
66 |         let ast = paser(code, true)
67 |         // console.timeEnd("test")
68 |         // console.log(__dirname, __filename)
69 |         console.log(path.resolve(__dirname, "../out/", `./${p}.ast.json`))
70 |         fs.writeFileSync(path.resolve(__dirname, "../out/", `./${p}.ast.json`), JSON.stringify(ast, null, 4))
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/mycheck/checkfile.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs')
 2 | const path = require('path')
 3 | const paser = require('../dist/index').paser
 4 | 
 5 | const files = fs.readdirSync(path.resolve(__dirname, "../files"))
 6 | 
 7 | for (let i = 10; i < 20; i++) {
 8 |     console.log("-----------------------------------------------------")
 9 |     console.log(files[i], `   ${i}`)
10 |     let code = fs.readFileSync(path.resolve(__dirname, `../files/${
11 |         files[i]
12 |     }`), {encoding: 'utf-8'})
13 | 
14 |     console.time("test")
15 |     let ast = paser(code)
16 |     console.timeEnd("test")
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@liulinboyi/htmlparser",
 3 |   "main": "./dist/index.js",
 4 |   "files": [
 5 |     "dist"
 6 |   ],
 7 |   "devDependencies": {
 8 |     "@playwright/test": "^1.13.1",
 9 |     "@types/node": "^14.14.20",
10 |     "cross-env": "^7.0.3",
11 |     "playwright": "^1.13.1",
12 |     "prettier": "2.2.1",
13 |     "pretty-quick": "3.1.0",
14 |     "request": "^2.88.2",
15 |     "request-promise": "^4.2.6",
16 |     "ts-lint": "4.5.1",
17 |     "ts-loader": "8.0.15",
18 |     "ts-node": "^9.1.1",
19 |     "tslint": "5.20.1",
20 |     "tslint-config-prettier": "1.18.0",
21 |     "typescript": "^4.1.3"
22 |   },
23 |   "scripts": {
24 |     "test-ts": "ts-node ./mycheck/check.ts",
25 |     "build": "rm -rf ./dist/ && tsc --project ./tsconfig.json",
26 |     "build-es": "rm -rf ./dist-esmodule/ && tsc --project ./tsconfig-esmodule.json && node ./script/addSuffixJs.js",
27 |     "build-all": "npm run build && npm run build-es",
28 |     "test": "playwright test testall.spec.ts",
29 |     "test-all": "node test-server.js"
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/script/addSuffixJs.js:
--------------------------------------------------------------------------------
 1 | const fs = require("fs").promises
 2 | const path = require("path")
 3 | 
 4 | void async function () {
 5 |     try {
 6 |         let p = path.resolve(__dirname, "../dist-esmodule")
 7 |         let paths = await fs.readdir(p)
 8 |         // console.log(paths)
 9 |         let stack = [... paths]
10 |         while (stack.length) {
11 |             let top = stack.pop()
12 |             let pat = path.resolve(p, top)
13 |             let stat = await fs.stat(pat)
14 |             if (stat.isDirectory()) {
15 |                 let temp = await fs.readdir(pat)
16 |                 if (temp) {
17 |                     for (let i of temp) {
18 |                         stack.push(path.join(top, i))
19 |                     }
20 |                 }
21 |             } else {
22 |                 // console.log(pat)
23 | 
24 |                 let personList = await fs.readFile(pat, {encoding: "utf8"})
25 | 
26 |                 var regexpNames = /(?:export|import)(?:\s)*?(?:\{)??.*?(?:\})??(?:\s)*?from(?:\s)*?"(.+?)"/gm
27 | 
28 |                 var match = personList.matchAll(regexpNames);
29 | 
30 |                 let count = 0
31 |                 for (let item of match) {
32 |                     if (/.js$/.test(item[1])) {
33 |                         continue
34 |                     }
35 |                     let temp = item[0]
36 |                     let index = item.index + count
37 |                     let now = temp.replace(item[1], `${
38 |                         item[1]
39 |                     }.js`)
40 |                     let past = personList.slice(0, index)
41 |                     let feature = personList.slice(index + temp.length, personList.length)
42 |                     personList = `${past}${now}${feature}`
43 |                     count = count + 3
44 |                 }
45 | 
46 |                 await fs.writeFile(pat, personList, {encoding: "utf8"})
47 |             }
48 |         }
49 |         // for (let item of paths) {
50 |         //     let pat = path.resolve(p, item)
51 |         //     console.log(pat)
52 |         //     let stat = await fs.stat(pat)
53 |         //     console.log(stat.isDirectory())
54 |         // }
55 |     } catch (error) {
56 |         console.log(error)
57 |     }
58 | 
59 | }()
60 | 


--------------------------------------------------------------------------------
/server.js:
--------------------------------------------------------------------------------
 1 | let http = require('http');
 2 | let url = require('url');
 3 | let util = require('util');
 4 | let fs = require('fs');
 5 | let path = require("path")
 6 | const {spawn} = require('child_process');
 7 | const process = require("process");
 8 | const { URL, URLSearchParams } = require('url');
 9 | 
10 | const pour = (cmd, args, opts = {
11 |     encoding: 'utf8'
12 | }, stdout = process.stdout, stderr = process.stderr) => {
13 |     return new Promise((resolve, reject) => {
14 |         const p = spawn(cmd, args, opts);
15 |         p.stdout.setEncoding('utf-8');
16 |         p.stdout.on('data', data => {
17 |             stdout.write(data, "utf8");
18 |         });
19 |         p.stderr.on('data', data => {
20 |             stderr.write(data);
21 |         });
22 |         p.on('close', code => {
23 |             resolve(code);
24 |         });
25 |     });
26 | }
27 | 
28 | async function exec(shell, args, opt) {
29 |     console.log(`${shell} ${
30 |         args.join(" ")
31 |     }`)
32 |     await pour(shell, args, opt);
33 | }
34 | 
35 | let server = http.createServer((req, res) => {
36 |     if (path.normalize(decodeURIComponent(req.url)) !== decodeURIComponent(req.url)) {
37 |         res.statusCode = 403;
38 |         res.end();
39 |         return;
40 |     }
41 |     const u = decodeURIComponent(req.url)
42 |     console.log("decodeURIComponent",u)
43 |     // var pathname = url.parse(u).pathname; // 获取url的pathname (/index.html)
44 |     console.log("file:" + u.substring(1)) // 将‘/’去掉
45 |     // console.log(__dirname, __filename)
46 |     let curPath = path.join(__dirname, u.substring(1))
47 |     console.log(curPath)
48 |     fs.readFile(curPath, function (err, data) { // fs模块加载文件
49 |         if (err) {
50 |             res.writeHead(404, {'Content-Type': 'text/html'});
51 |         } else {
52 |             res.writeHead(200, {'Content-Type': 'text/html'});
53 |             res.write(data.toString());
54 |         }
55 |         res.end();
56 |     });
57 | 
58 | });
59 | 
60 | server.listen(3000, '127.0.0.1', async () => {
61 |     console.log("服务器已经运行，请打开浏览,输入:http://127.0.0.1:3000/ 来进行访问.")
62 |     // await exec(process.platform === 'win32' ? 'npm.cmd' : "npm", ["run", "test"])
63 |     // process.exit(0);
64 | });
65 | 


--------------------------------------------------------------------------------
/src/definition.ts:
--------------------------------------------------------------------------------
1 | 
2 | export interface Keywords {
3 |     [index: string]: any
4 | }
5 | 
6 | export interface TokenNameMap {
7 |     [index: number]: any
8 | }
9 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | export { parse as paser } from "./parser"
2 | 


--------------------------------------------------------------------------------
/src/parser.ts:
--------------------------------------------------------------------------------
  1 | import { COMMENT, DIRECTIVE, Lexer, NewLexer, TOKEN_CLOSE, TOKEN_CONTENT_TEXT, TOKEN_DTD, TOKEN_EOF, TOKEN_IGNORED, TOKEN_LEFT_PAREN } from "./lexer"
  2 | import { paseComment } from "./parser/Comment"
  3 | import { paseDirective } from "./parser/Directive"
  4 | import { parseDtd } from "./parser/DTD"
  5 | import { parseHtml } from "./parser/Html"
  6 | import { parseText } from "./parser/parseText"
  7 | import { isSpecialTag, parseClose } from "./parser/tagClose"
  8 | 
  9 | export interface Program {
 10 |     type?: string,
 11 |     LineNum?: number,
 12 |     children: Array<any>,
 13 | }
 14 | 
 15 | export class Program {
 16 |     constructor() {
 17 |         this.type = 'root'
 18 |         this.children = []
 19 |     }
 20 | }
 21 | 
 22 | 
 23 | // SourceCode ::= Statement+ 
 24 | function parseSourceCode(lexer: Lexer, check: boolean) {
 25 |     let LineNum = lexer.GetLineNum()
 26 |     let root = parseStatements(lexer, check)
 27 |     root.LineNum = LineNum
 28 |     return root
 29 | }
 30 | 
 31 | /**
 32 |  * 将children中的多余的text节点去除
 33 |  * @param children 
 34 |  * @returns 
 35 |  */
 36 | function filterText(children: any) {
 37 |     for (let start = 0; start < children.length; start++) {
 38 |         if (children[start].type === "text") {
 39 |             // 从实践中知道，如果有去除body后多余的text节点，则最多是两个取一个，所以有下面代码
 40 |             let i = start + 1
 41 |             if (i < children.length && children[i].type === "text") {
 42 |                 // 其中重要的特征就是，里面是只有\r\n和空格
 43 |                 // 只要当前标签和下一个标签这两个标签，则一定会删除一个"空标签(只包含\r\n和空格)"
 44 |                 if (!children[i].content.replace(/[\r\n]+/g, "").trim()) {
 45 |                     children[i].delete = true // 添加上delete属性，后面好处理
 46 |                 } else {
 47 |                     children[start].delete = true // 添加上delete属性，后面好处理
 48 |                 }
 49 |             }
 50 |         }
 51 |     }
 52 |     // 删除delete为true的标签
 53 |     return children.filter((item: any) => !item.delete)
 54 | }
 55 | 
 56 | // Statement
 57 | function parseStatements(lexer: Lexer, check: boolean) {
 58 | 
 59 |     if (check) {
 60 |         lexer.check = true
 61 |     }
 62 | 
 63 |     let root: any = {
 64 |         type: "root",
 65 |         children: [],
 66 |         LineNum: 1
 67 |     }
 68 | 
 69 |     let statements: Array<any> = [root]
 70 | 
 71 |     let Block_level_elements = [ // 块级元素
 72 |         "address",
 73 |         "article",
 74 |         "aside",
 75 |         "audio",
 76 |         "blockquote",
 77 |         "canvas",
 78 |         "dd",
 79 |         // "div",
 80 |         "dl",
 81 |         "fieldset",
 82 |         "figcaption",
 83 |         "figure",
 84 |         "figcaption",
 85 |         "footer",
 86 |         "form",
 87 |         "header",
 88 |         "hgroup",
 89 |         "hr",
 90 |         "noscript",
 91 |         "ol",
 92 |         "output",
 93 |         "p",
 94 |         "pre",
 95 |         "section",
 96 |         "table",
 97 |         "tfoot",
 98 |         "ul",
 99 |         "video"
100 |     ]
101 | 
102 |     let inlInline_elementsine = [ // 行内元素
103 |         "b",
104 |         "big",
105 |         "i",
106 |         "small",
107 |         "tt",
108 |         "abbr",
109 |         "acronym",
110 |         "cite",
111 |         "code",
112 |         "dfn",
113 |         "em",
114 |         "kbd",
115 |         "strong",
116 |         "samp",
117 |         "var",
118 |         "a",
119 |         "bdo",
120 |         "br",
121 |         "img",
122 |         "map",
123 |         "object",
124 |         "q",
125 |         "script",
126 |         "span",
127 |         "sub",
128 |         "sup",
129 |         "button",
130 |         "input",
131 |         "label",
132 |         "select",
133 |         "textarea"
134 |     ]
135 | 
136 |     let notInSelf = [ // 不能包含自己的元素
137 |         "a",
138 |         "br",
139 |         "img",
140 |         "script",
141 |         "button",
142 |         "input",
143 |     ]
144 |     // select <select><select></select></select> 里面的select会消失
145 |     // textarea <textarea><textarea></textarea></textarea> 会解析成 <textarea><textarea></textarea>
146 | 
147 | 
148 |     let body: any = null
149 |     let mainBodyFinished = false
150 |     let uniqueStack = []
151 |     // let mainBodyFinishedIsText = false
152 |     // 先调用LookAhead一次，将GetNextToken的结果缓存
153 |     while (!isSourceCodeEnd(lexer.LookAhead().tokenType)) {
154 |         // if (lexer.GetLineNum() === 20) {
155 |         //     debugger
156 |         // }
157 |         let statement: any = {}
158 |         statement = parseStatement(lexer)
159 |         // console.log(`at line ${lexer.GetLineNum()} ${lexer.sourceCode.slice(0, 30)}`)
160 |         if (!statement) continue
161 |         let stack = statements;
162 |         let s = statement;
163 |         const length = stack.length - 1
164 |         if (s.type === "tag") {
165 |             s.tag = s.tag.toLocaleLowerCase()
166 |         }
167 |         if (!s.closeTag) {
168 | 
169 |             uniqueStack = []
170 |             if (notInSelf.includes(s.tag) && s.tag === stack[length].tag) { // 不能包含自己的元素
171 |                 stack.pop()
172 |                 stack[stack.length - 1].children.push(s)
173 |                 stack.push(s)
174 |                 if (check) {
175 |                     s.parent = stack[stack.length - 1]
176 |                 }
177 |                 continue
178 |             }
179 | 
180 |             // 处理多个body标签的问题
181 |             // 如果mainBodyFinished位false，表示还未出现第一个body，并且当前起始标签是body，则寻找他的父节点，并将其赋值给body变量
182 |             if (!mainBodyFinished && s.tag === "body" && !body) {
183 |                 // 寻找父节点
184 |                 let i = stack.length - 1
185 |                 let parent = null
186 |                 while (stack[i].type !== "tag" && i >= 0) {
187 |                     i--;
188 |                 }
189 |                 parent = i >= 0 ? stack[i] : null;
190 |                 // 找到的节点，赋值给body
191 |                 body = s
192 |                 // 找到的父节点赋值给上面节点的parent属性，方便后续处理
193 |                 body.parent = parent
194 |             }
195 | 
196 |             stack[length].children.push(s) // 栈顶就是levalElement层级元素
197 |             if (check) {
198 |                 s.parent = stack[length]
199 |             }
200 |             if (s.type === "tag" && !s.selfClose && !isSpecialTag(s)) {
201 |                 stack.push(s)
202 |                 // 处理多个body标签的问题
203 |                 // 如果已经出现过一个body标签并且现在这个起始标签还是body，则将其从栈中弹出，并且将其从栈顶的children中弹出
204 |                 if (mainBodyFinished && s.tag === "body") {
205 |                     stack.pop()
206 |                     stack[length].children.pop()
207 |                     if (check) {
208 |                         s.parent = null
209 |                     }
210 |                 }
211 |             }
212 |             // 处理多个body标签的问题
213 |             // 如果出现第一个body起始标签，则将mainBodyFinished置为true，方便在第一个body标签中再次出现body起始标签时将其忽略
214 |             if (!mainBodyFinished && s.tag === "body") {
215 |                 mainBodyFinished = true
216 |             }
217 |         } else {
218 |             if (stack[length].tag !== s.tag) {
219 |                 uniqueStack.push(s)
220 |                 // 处理多个body标签的问题
221 |                 // 如果当前第一个body标签解析完成（mainBodyFinished），并且当前结束标签是body，则直接进行下次循环
222 |                 if (mainBodyFinished && s.tag === "body") {
223 |                     continue
224 |                 }
225 |                 if (Block_level_elements.includes(s.tag)) { // 如果是块级元素会加入到levalElement层级元素当child
226 |                     stack[length].children.push(s)
227 |                     if (check) {
228 |                         s.parent = stack[length]
229 |                     }
230 |                 }
231 |                 // 学习浏览器HTML解析，即使匹配不上也不报错，直接添加到levalElement层级元素当child
232 |                 console.warn(`${stack[length].tag} and ${s.tag} is not math! at line ${lexer.GetLineNum()} ${lexer.sourceCode.slice(0, 100)}`)
233 |                 // throw new Error(`${stack[length].tag} and ${s.tag} is not math! at line ${lexer.GetLineNum()} ${lexer.sourceCode.slice(0, 100)}`)
234 |             } else {
235 |                 // 处理多个body标签的问题
236 |                 // 如果第一个body标签没有解析完成（mainBodyFinished），并且当前结束标签是body，则mainBodyFinished置为true
237 |                 if (!mainBodyFinished && s.tag === "body") {
238 |                     mainBodyFinished = true
239 |                 }
240 |                 stack.pop()
241 |                 if (uniqueStack.length > 0 && uniqueStack[uniqueStack.length - 1].tag === stack[stack.length - 1].tag) {
242 |                     uniqueStack.pop()
243 |                     stack.pop()
244 |                 }
245 |             }
246 |         }
247 |     }
248 | 
249 |     // 处理多个body标签的问题
250 |     // 找出body在父节点的索引
251 |     let index = body && body.parent.children.findIndex((item: any) => item === body)
252 |     // 从父节点下一个索引开始添加到第一个body中
253 |     let real = index + 1
254 | 
255 |     if (body) {
256 | 
257 |         for (let i = real; i < body.parent.children.length; i++) {
258 |             if (body.parent.children[i].type === "tag") {
259 |                 body.parent.children[i].children = filterText(body.parent.children[i].children)
260 |             }
261 |             body.children.push(body.parent.children[i])
262 |         }
263 | 
264 |         let childrenLength = body.parent.children.length
265 |         for (let i = real; i < childrenLength; i++) {
266 |             body.parent.children.pop()
267 |         }
268 | 
269 |         body.children = filterText(body.children)
270 | 
271 |         body.parent = null
272 |     }
273 | 
274 | 
275 |     for (let i = 0; i < root.children.length; i++) {
276 |         if (root.children[i].type === "DTD") {
277 |             if (i - 1 >= 0 && root.children[i - 1].type === "text" && !root.children[i - 1].content.replace(/[\r\n]+/g, "").trim()) {
278 |                 root.children[i - 1].delete = true
279 |             }
280 |         }
281 |         if (root.children[i].tag === "html") {
282 |             if (i - 1 >= 0 && root.children[i - 1].type === "text" && !root.children[i - 1].content.replace(/[\r\n]+/g, "").trim()) {
283 |                 root.children[i - 1].delete = true
284 |             }
285 |         }
286 |         if (check) {
287 |             root.children[i].parent = null
288 |         }
289 |     }
290 | 
291 |     root.children = root.children.filter((item: any) => !item.delete)
292 | 
293 |     return root
294 | }
295 | 
296 | function parseStatement(lexer: Lexer) {
297 |     // 向前看一个token并跳过
298 |     lexer.LookAheadAndSkip(TOKEN_IGNORED) // skip if source code start with ignored token
299 |     let look = lexer.LookAhead().tokenType
300 |     let flag = false
301 |     let top = lexer.stack[lexer.stack.length - 1]
302 |     if (
303 |         top.tokenType === TOKEN_CONTENT_TEXT
304 |         // isClose(lexer) &&
305 |         // top.tokenType !== TOKEN_LEFT_PAREN /*<*/ &&
306 |         // top.tokenType !== TOKEN_CLOSE /*</*/ &&
307 |         // top.tokenType !== TOKEN_DTD /*DTD*/ &&
308 |         // top.tokenType !== COMMENT /*COMMENT*/
309 |     ) {
310 |         flag = true
311 |     } else {
312 |         flag = false
313 |     }
314 | 
315 |     if (flag) {
316 |         return parseText(lexer)
317 |     } else {
318 |         switch (look) {
319 |             case TOKEN_LEFT_PAREN: // <
320 |                 return parseHtml(lexer)
321 |             case TOKEN_CLOSE: // </
322 |                 return parseClose(lexer)
323 |             case TOKEN_DTD: // dtd
324 |                 return parseDtd(lexer)
325 |             case COMMENT:
326 |                 return paseComment(lexer)
327 |             case DIRECTIVE:
328 |                 return paseDirective(lexer)
329 |             default:
330 |                 throw new Error(`parseStatement(): unknown Statement. at line ${lexer.GetLineNum()} ${lexer.sourceCode.slice(0, 50)}`)
331 |         }
332 |     }
333 | 
334 | 
335 | }
336 | 
337 | function isSourceCodeEnd(token: number): boolean {
338 |     return token === TOKEN_EOF
339 | }
340 | 
341 | export function parse(code: string, check: boolean) {
342 | 
343 |     let lexer = NewLexer(code)
344 |     let sourceCode = parseSourceCode(lexer, check);
345 | 
346 |     lexer.NextTokenIs(TOKEN_EOF)
347 |     return sourceCode
348 | }
349 | 


--------------------------------------------------------------------------------
/src/parser/Comment.ts:
--------------------------------------------------------------------------------
 1 | import { Lexer } from "../lexer";
 2 | 
 3 | export function paseComment(lexer: Lexer) {
 4 |     let content = ""
 5 |     while (lexer.sourceCode.slice(0, 3) !== "-->") {
 6 |         if (lexer.nextSourceCodeIs("\r\n") || lexer.nextSourceCodeIs("\n\r")) {
 7 |             lexer.lineNum += 1
 8 |             content += lexer.sourceCode.slice(0, 2)
 9 |             lexer.skipSourceCode(2)
10 |             continue
11 |         } else {
12 |             if (lexer.isNewLine(lexer.sourceCode[0])) {
13 |                 lexer.lineNum += 1
14 |                 content += lexer.sourceCode.slice(0, 1)
15 |                 lexer.skipSourceCode(1)
16 |                 continue
17 |             }
18 |         }
19 |         content += lexer.sourceCode[0]
20 |         lexer.skipSourceCode(1)
21 |     }
22 |     lexer.skipSourceCode(3)
23 |     lexer.hasCache = false
24 |     return {
25 |         type: "comment",
26 |         LineNum: lexer.GetLineNum(),
27 |         content,
28 |     }
29 | }


--------------------------------------------------------------------------------
/src/parser/DTD.ts:
--------------------------------------------------------------------------------
 1 | import { Lexer } from "../lexer";
 2 | 
 3 | interface DTD {
 4 |     content?: string,
 5 |     type: string,
 6 |     LineNum: number
 7 | }
 8 | 
 9 | class DTD {
10 |     constructor() {
11 |         this.type = "DTD"
12 |     }
13 | }
14 | 
15 | export function parseDtd(lexer: Lexer) {
16 |     let dtd = new DTD()
17 |     let content = ""
18 |     while (lexer.sourceCode[0] !== ">") {
19 |         if (lexer.nextSourceCodeIs("\r\n") || lexer.nextSourceCodeIs("\n\r")) {
20 |             lexer.lineNum += 1
21 |             content += lexer.sourceCode.slice(0, 2)
22 |             lexer.skipSourceCode(2)
23 |             continue
24 |         } else {
25 |             if (lexer.isNewLine(lexer.sourceCode[0])) {
26 |                 lexer.lineNum += 1
27 |                 content += lexer.sourceCode.slice(0, 1)
28 |                 lexer.skipSourceCode(1)
29 |                 continue
30 |             }
31 |         }
32 |         content += lexer.sourceCode[0]
33 |         lexer.skipSourceCode(1)
34 |     }
35 |     lexer.skipSourceCode(1)
36 |     lexer.isIgnored()
37 |     lexer.hasCache = false
38 |     dtd.content = content
39 |     dtd.LineNum = lexer.GetLineNum()
40 |     return dtd
41 | }


--------------------------------------------------------------------------------
/src/parser/Directive.ts:
--------------------------------------------------------------------------------
 1 | import { Lexer } from "../lexer";
 2 | 
 3 | export function paseDirective(lexer: Lexer) {
 4 |     let content = ""
 5 |     while (lexer.sourceCode.slice(0, 1) !== "]") {
 6 |         if (lexer.nextSourceCodeIs("\r\n") || lexer.nextSourceCodeIs("\n\r")) {
 7 |             lexer.lineNum += 1
 8 |             content += lexer.sourceCode.slice(0, 2)
 9 |             lexer.skipSourceCode(2)
10 |             continue
11 |         } else {
12 |             if (lexer.isNewLine(lexer.sourceCode[0])) {
13 |                 lexer.lineNum += 1
14 |                 content += lexer.sourceCode.slice(0, 1)
15 |                 lexer.skipSourceCode(1)
16 |                 continue
17 |             }
18 |         }
19 |         content += lexer.sourceCode[0]
20 |         lexer.skipSourceCode(1)
21 |     }
22 |     content += lexer.sourceCode[0] // ]加入content
23 |     lexer.skipSourceCode(1) // ]
24 |     lexer.isIgnored() // 空格
25 |     lexer.skipSourceCode(1) // >
26 |     lexer.hasCache = false
27 |     return {
28 |         type: "comment", // 在浏览器中解析成comment了
29 |         LineNum: lexer.GetLineNum(),
30 |         content,
31 |     }
32 | }


--------------------------------------------------------------------------------
/src/parser/Html.ts:
--------------------------------------------------------------------------------
  1 | import { Lexer, TOKEN_EQUAL, TOKEN_LEFT_PAREN, TOKEN_NAME, TOKEN_QUOTE, TOKEN_RIGHT_PAREN, TOKEN_SELF_CLOSE, TOKEN_SINGLE_QUOTE } from "../lexer";
  2 | 
  3 | export interface Node {
  4 |     LineNum?: number,
  5 |     children?: Array<any>,
  6 |     attr: Array<any>,
  7 |     type?: string,
  8 |     tag?: string,
  9 |     selfClose?: boolean,
 10 |     parent?: any,
 11 | }
 12 | 
 13 | let temp = Symbol("temp")
 14 | let nextSibling = temp
 15 | 
 16 | export class Node {
 17 |     constructor() {
 18 |         this.children = []
 19 |         this.attr = []
 20 |     }
 21 |     get nextSibling() {
 22 |         if (nextSibling !== temp) return nextSibling
 23 |         if (!this.parent) return null
 24 |         let lengtn = this.parent.children.length
 25 |         let index = -1
 26 |         for (let item of this.parent.children) {
 27 |             index++
 28 |             if (item === this) {
 29 |                 break
 30 |             }
 31 |         }
 32 |         if (index + 1 > lengtn) {
 33 |             return null
 34 |         }
 35 |         return this.parent.children[index + 1]
 36 |     }
 37 | 
 38 |     set nextSibling(value: any) {
 39 |         nextSibling = value
 40 |     }
 41 | }
 42 | 
 43 | export function parseTag(lexer: Lexer) {
 44 |     return lexer.NextTokenIs(TOKEN_NAME).nowToken; // tag_name
 45 | }
 46 | 
 47 | export function parseName(lexer: Lexer, node: any) {
 48 |     let attrReg = /[^\s"'>/=[\u0000-\u001f]+/.exec(lexer.sourceCode)
 49 |     let name = ""
 50 |     if (attrReg) {
 51 |         name = attrReg[0]
 52 |     }
 53 |     if (name.includes("<")) {
 54 |         /*
 55 |         <a name="xiaoming"
 56 |             <p class="cf">
 57 | 
 58 |             </p>
 59 |         
 60 |         暂时当做selfClose标签处理
 61 |         */
 62 |         node.selfClose = true
 63 |     }
 64 |     lexer.skipSourceCode(name.length)
 65 |     return name
 66 | }
 67 | 
 68 | function genereteAttr(name: any, value?: any) {
 69 |     return {
 70 |         name,
 71 |         value,
 72 |     }
 73 | }
 74 | 
 75 | export function parseValue(lexer: Lexer) {
 76 |     if (lexer.sourceCode[0] === "'") {
 77 |         return parseSingleQuotedAttr(lexer)
 78 |     } else if (lexer.sourceCode[0] === '"') {
 79 |         return parseDoubleQuotedAttr(lexer)
 80 |     } else {
 81 |         return parseString(lexer)
 82 |     }
 83 | }
 84 | 
 85 | export function parseString(lexer: Lexer) {
 86 |     let value = ""
 87 |     // lexer.NextTokenIs(TOKEN_SINGLE_QUOTE);
 88 |     // lexer.stack.pop()
 89 |     let res = /[^\s><]*/.exec(lexer.sourceCode)
 90 |     if (res) {
 91 |         value = res[0]
 92 |     }
 93 |     lexer.skipSourceCode(value.length)
 94 |     // lexer.NextTokenIs(TOKEN_SINGLE_QUOTE);
 95 |     // lexer.stack.pop()
 96 |     return value
 97 | }
 98 | 
 99 | export function parseSingleQuotedAttr(lexer: Lexer) {
100 |     let value = ""
101 |     lexer.NextTokenIs(TOKEN_SINGLE_QUOTE);
102 |     lexer.stack.pop()
103 |     let res = /[^']*/.exec(lexer.sourceCode)
104 |     if (res) {
105 |         value = res[0]
106 |     }
107 |     lexer.skipSourceCode(value.length)
108 |     lexer.NextTokenIs(TOKEN_SINGLE_QUOTE);
109 |     lexer.stack.pop()
110 |     return value
111 | }
112 | 
113 | export function parseDoubleQuotedAttr(lexer: Lexer) {
114 |     let value = ""
115 |     lexer.NextTokenIs(TOKEN_QUOTE);
116 |     lexer.stack.pop()
117 |     let res = /[^"]*/.exec(lexer.sourceCode)
118 |     if (res) {
119 |         value = res[0]
120 |     }
121 |     lexer.skipSourceCode(value.length)
122 |     lexer.NextTokenIs(TOKEN_QUOTE);
123 |     lexer.stack.pop()
124 |     return value
125 | }
126 | 
127 | export function parseAttr(lexer: Lexer, node: any) {
128 |     let attrItem: {
129 |         name?: string,
130 |         value?: string,
131 |     } = {}
132 |     lexer.isIgnored(); // 空格
133 |     let tag = parseName(lexer, node)
134 |     lexer.isIgnored() // 空格
135 |     if (tag) {
136 |         let attr = tag
137 |         attrItem = genereteAttr(attr); // name
138 |         lexer.isIgnored(); // 空格
139 |         if (lexer.sourceCode[0] === "=") {
140 |             lexer.NextTokenIs(TOKEN_EQUAL) // =
141 |             lexer.stack.pop()
142 |             lexer.isIgnored(); // 空格 
143 |             attrItem.value = parseValue(lexer)
144 |             lexer.isIgnored(); // 空格 
145 |         } else {
146 |             attrItem.value = "true"
147 |             lexer.isIgnored(); // 空格 
148 |         }
149 |     }
150 |     return attrItem
151 | }
152 | 
153 | function checkAttrEnd(lexer: Lexer, node: Node) {
154 |     if (lexer.sourceCode[0] === ">") {
155 |         lexer.skipSourceCode(1)
156 |         lexer.stack.push({ lineNum: lexer.lineNum, tokenType: TOKEN_RIGHT_PAREN /*>*/, token: ">" })
157 |         return false
158 |     } else if (lexer.sourceCode.slice(0, 2) === "/>") {
159 |         node.selfClose = true
160 |         lexer.skipSourceCode(2);
161 |         lexer.stack.push({ lineNum: lexer.lineNum, tokenType: TOKEN_SELF_CLOSE /*/> <br />*/, token: "/>" })
162 |         return false
163 |     } else {
164 |         return true
165 |     }
166 | }
167 | 
168 | export function parseHtml(lexer: Lexer) {
169 |     let node = new Node()
170 |     if (!lexer.check) {
171 |         node.nextSibling = null
172 |     }
173 | 
174 |     node.LineNum = lexer.GetLineNum()
175 |     lexer.NextTokenIs(TOKEN_LEFT_PAREN) // <
176 |     node.type = "tag"
177 |     node.tag = parseTag(lexer)
178 |     lexer.isIgnored()
179 |     while (checkAttrEnd(lexer, node)) {
180 |         if (lexer.nextSourceCodeIs("\r\n") || lexer.nextSourceCodeIs("\n\r")) {
181 |             lexer.lineNum += 1
182 |             lexer.skipSourceCode(2)
183 |         } else {
184 |             if (lexer.isNewLine(lexer.sourceCode[0])) {
185 |                 lexer.lineNum += 1
186 |                 lexer.skipSourceCode(1)
187 |             }
188 |         }
189 |         let res = parseAttr(lexer, node)
190 |         node.attr.push(res)
191 |     }
192 |     // lexer.isIgnored()
193 |     return node
194 | }


--------------------------------------------------------------------------------
/src/parser/parseText.ts:
--------------------------------------------------------------------------------
  1 | import { COMMENT, Lexer, regexName, TOKEN_CLOSE, TOKEN_CONTENT_TEXT, TOKEN_DTD, TOKEN_LEFT_PAREN, TOKEN_NAME, TOKEN_RIGHT_PAREN, TOKEN_SELF_CLOSE } from "../lexer";
  2 | import { parseHtml } from "./Html";
  3 | import { isSpecialTag, parseClose } from "./tagClose";
  4 | 
  5 | 
  6 | export interface Node {
  7 |     LineNum?: number,
  8 |     children?: Array<any>,
  9 |     content: string,
 10 |     type?: string,
 11 |     selfClose?: boolean,
 12 |     parent?: any,
 13 | }
 14 | 
 15 | let temp = Symbol("temp")
 16 | let nextSibling = temp
 17 | 
 18 | export class Node {
 19 |     constructor() {
 20 |         this.content = ""
 21 |     }
 22 |     get nextSibling() {
 23 |         if (nextSibling !== temp) return nextSibling
 24 |         if (!this.parent) return null
 25 |         let lengtn = this.parent.children.length
 26 |         let index = -1
 27 |         for (let item of this.parent.children) {
 28 |             index++
 29 |             if (item === this) {
 30 |                 break
 31 |             }
 32 |         }
 33 |         if (index + 1 > lengtn) {
 34 |             return null
 35 |         }
 36 |         return this.parent.children[index + 1]
 37 |     }
 38 | 
 39 |     set nextSibling(value: any) {
 40 |         nextSibling = value
 41 |     }
 42 | }
 43 | 
 44 | export function isClose(lexer: Lexer) {
 45 |     const length = lexer.stack.length
 46 |     const topTwo = length >= 2 ? lexer.stack[length - 2].tokenType : ""
 47 | 
 48 |     const isTOKEN_DTD = topTwo === TOKEN_DTD
 49 |     const isCOMMENT = topTwo === COMMENT
 50 |     if (length >= 2 &&
 51 |         (isTOKEN_DTD /*dtd*/ ||
 52 |             isCOMMENT /*comment*/)
 53 |     ) {
 54 |         return true
 55 |     }
 56 | 
 57 |     if (length < 4) return false
 58 | 
 59 |     const topThree = lexer.stack[length - 3].tokenType
 60 |     const topFour = lexer.stack[length - 4].tokenType
 61 |     const isTOKEN_RIGHT_PAREN = topTwo === TOKEN_RIGHT_PAREN
 62 |     const isTOKEN_NAME = topThree === TOKEN_NAME
 63 |     // </a>
 64 |     let one = isTOKEN_RIGHT_PAREN /*>*/ &&
 65 |         isTOKEN_NAME /*tag_name*/ &&
 66 |         topFour === TOKEN_CLOSE /*</*/;
 67 |     // <a>
 68 |     let close = isTOKEN_RIGHT_PAREN /*>*/ &&
 69 |         topThree === TOKEN_NAME /*tag_name*/ &&
 70 |         topFour === TOKEN_LEFT_PAREN /*<*/;
 71 |     // />
 72 |     let selfClose = topTwo === TOKEN_SELF_CLOSE; // /> <br />
 73 |     return one || close || selfClose;
 74 | }
 75 | 
 76 | /*
 77 | 提取出来的公共代码
 78 | */
 79 | function judgeEnd(lexer: Lexer) {
 80 |     if (lexer.sourceCode.slice(0, 2) === "</" /*在这里是什么特征看调用函数的注释部分contentText</div>*/ ||
 81 |         lexer.sourceCode.slice(0, 2) === "<!" /*在这里是什么特征看调用函数的注释部分contentText<!----> || <!DOCTYPE*/ ||
 82 |         (lexer.sourceCode[0] === "<" &&
 83 |             regexName.test(lexer.sourceCode[1])) /*在这里是什么特征看调用函数的注释部分contentText<div>*/
 84 |     ) {
 85 |         return false
 86 |     } else {
 87 |         /*在这里是什么特征看调用函数的注释部分contentText<br />*/
 88 |         if ((lexer.sourceCode[0] === "<"
 89 |             && regexName.test(lexer.sourceCode[1]))) {
 90 |             let parseRes = parseHtml(lexer)
 91 |             if (parseRes.selfClose) {
 92 |                 return false
 93 |             }
 94 |         }
 95 |         return true
 96 |     }
 97 | }
 98 | 
 99 | function contentEnd(lexer: Lexer) {
100 |     // <div>contentText</div>
101 |     // <div>contentText<div>
102 |     // <meta>contentText<!---->
103 |     // <div>contentText<br /> || <div>contentText<br 属性    />
104 |     let stack = lexer.stack
105 |     const length = stack.length
106 |     if (isClose(lexer) &&
107 |         length >= 4 &&
108 |         stack[length - 2].tokenType === TOKEN_RIGHT_PAREN /*>*/ &&
109 |         stack[length - 3].tokenType === TOKEN_NAME /*name*/ &&
110 |         stack[length - 4].tokenType === TOKEN_LEFT_PAREN /*<*/
111 |     ) {
112 |         // <script>
113 |         if (lexer.stack[length - 3].token === "script") {
114 |             /*
115 |             <script>
116 |                 '<script src="https://"><\/script>'
117 |             </script>
118 |             */
119 |             let script = ["</script>", "</SCRIPT>"]
120 |             if (script.includes(lexer.sourceCode.slice(0, script[0].length))) {
121 |                 return false
122 |             } else {
123 |                 return true
124 |             }
125 |         }
126 |         // noscript
127 |         // if (lexer.stack[length - 3].token === "noscript") {
128 |         //     /*
129 |         //     <script>
130 |         //         '<script src="https://"><\/script>'
131 |         //     </script>
132 |         //     */
133 |         //     let script = "</noscript>"
134 |         //     if (lexer.sourceCode.slice(0, script.length) === script) {
135 |         //         return false
136 |         //     } else {
137 |         //         return true
138 |         //     }
139 |         // }
140 |         judgeEnd(lexer)
141 |     }
142 |     // </div>contentText<div>
143 |     // </div>contentText<!---->
144 |     // </div>contentText</div>
145 |     // </div>contentText<br />
146 |     if (isClose(lexer) &&
147 |         length >= 4 &&
148 |         stack[length - 2].tokenType === TOKEN_RIGHT_PAREN /*>*/ &&
149 |         stack[length - 3].tokenType === TOKEN_NAME /*name*/ &&
150 |         stack[length - 4].tokenType === TOKEN_CLOSE /*</*/
151 |     ) {
152 |         judgeEnd(lexer)
153 |     }
154 |     // <br />contentText<div>
155 |     // <br />contentText<!---->
156 |     // <br />contentText</div>
157 |     // <br />contentText<br />
158 | 
159 |     if (isClose(lexer) &&
160 |         length >= 4 &&
161 |         stack[length - 2].tokenType === TOKEN_SELF_CLOSE /*self-close /> <br />*/ &&
162 |         stack[length - 3].tokenType === TOKEN_NAME /*name*/ &&
163 |         stack[length - 4].tokenType === TOKEN_LEFT_PAREN /*<*/
164 |     ) {
165 |         judgeEnd(lexer)
166 |     }
167 | 
168 |     // <!---->contentText<div>
169 |     // <!---->contentText<!---->
170 |     // <!---->contentText</div>
171 |     // <!---->contentText<br />
172 |     if (isClose(lexer) &&
173 |         stack[length - 2].tokenType === COMMENT /*COMMENT*/
174 |     ) {
175 |         judgeEnd(lexer)
176 |     }
177 | 
178 |     // <!DOCTYPE html>contentText<div>
179 |     // <!DOCTYPE html>contentText</div>
180 |     // <!DOCTYPE html>contentText<!---->
181 |     // <!DOCTYPE html>contentText<br />
182 |     if (isClose(lexer) &&
183 |         stack[length - 2].tokenType === TOKEN_DTD /*DTD*/
184 |     ) {
185 |         judgeEnd(lexer)
186 |     }
187 | 
188 |     /*contentText<div>*/
189 |     /*contentText<!----> || <!DOCTYPE*/
190 |     /*contentText</div>*/
191 |     /*contentText<br />*/
192 |     if (stack[length - 1].tokenType === TOKEN_CONTENT_TEXT /*contentText*/) {
193 |         if ((lexer.sourceCode[0] === "<" &&
194 |             regexName.test(lexer.sourceCode[1])) /*contentText<div>*/ ||
195 |             lexer.sourceCode.slice(0, 2) === "<!" /*contentText<!----> || <!DOCTYPE*/ ||
196 |             lexer.sourceCode.slice(0, 2) === "</" /*contentText</div>*/
197 |         ) {
198 |             return false
199 |         } else {
200 |             /*contentText<br />*/
201 |             if ((lexer.sourceCode[0] === "<"
202 |                 && regexName.test(lexer.sourceCode[1]))) {
203 |                 let parseRes = parseHtml(lexer)
204 |                 if (parseRes.selfClose) {
205 |                     return false
206 |                 }
207 |             }
208 |             return true
209 |         }
210 |     }
211 | 
212 | 
213 |     // return true
214 |     throw new Error(`not find contentEnd! at line ${lexer.GetLineNum()} ${lexer.sourceCode.slice(0, 100)}`)
215 | 
216 | }
217 | 
218 | export function parseText(lexer: Lexer) {
219 |     lexer.hasCache = false
220 |     let node = new Node()
221 |     if (!lexer.check) {
222 |         node.nextSibling = null
223 |     }
224 | 
225 |     // lexer.isIgnored();
226 |     node.LineNum = lexer.GetLineNum()
227 | 
228 |     let content = ""
229 |     while (contentEnd(lexer) && !lexer.isEmpty()) {
230 |         if (lexer.nextSourceCodeIs("\r\n") || lexer.nextSourceCodeIs("\n\r")) {
231 |             lexer.lineNum += 1
232 |             content += lexer.sourceCode.slice(0, 2)
233 |             lexer.skipSourceCode(2)
234 |         } else {
235 |             if (lexer.isNewLine(lexer.sourceCode[0])) {
236 |                 lexer.lineNum += 1
237 |                 content += lexer.sourceCode[0]
238 |                 lexer.skipSourceCode(1)
239 |             } else {
240 |                 content += lexer.sourceCode[0]
241 |                 lexer.skipSourceCode(1)
242 |             }
243 |         }
244 |     }
245 | 
246 |     if (
247 |         lexer.stack.length >= 3 &&
248 |         isSpecialTag({ tag: lexer.stack[lexer.stack.length - 3].token })) {
249 |         let token = lexer.stack[lexer.stack.length - 3].token
250 |         let tokenLen = `</${token}>`.length
251 |         if (lexer.sourceCode.slice(0, tokenLen) === `</${token}>`) {
252 |             lexer.skipSourceCode(2)
253 |             let res = { lineNum: lexer.lineNum, tokenType: TOKEN_CLOSE, token: "</" }
254 |             lexer.stack.push(res)
255 |             parseClose(lexer)
256 |             lexer.GetNextToken()
257 |             while (contentEnd(lexer) && !lexer.isEmpty()) {
258 |                 if (lexer.nextSourceCodeIs("\r\n") || lexer.nextSourceCodeIs("\n\r")) {
259 |                     lexer.lineNum += 1
260 |                     content += lexer.sourceCode.slice(0, 2)
261 |                     lexer.skipSourceCode(2)
262 |                 } else {
263 |                     if (lexer.isNewLine(lexer.sourceCode[0])) {
264 |                         lexer.lineNum += 1
265 |                         content += lexer.sourceCode[0]
266 |                         lexer.skipSourceCode(1)
267 |                     } else {
268 |                         content += lexer.sourceCode[0]
269 |                         lexer.skipSourceCode(1)
270 |                     }
271 |                 }
272 |             }
273 |             lexer.stack.splice(lexer.stack.length - 4, lexer.stack.length - 1)
274 |         }
275 |     }
276 | 
277 |     // lexer.isIgnored();
278 |     node.content = content
279 | 
280 |     node.type = "text"
281 |     return node
282 | }
283 | 


--------------------------------------------------------------------------------
/src/parser/tagClose.ts:
--------------------------------------------------------------------------------
 1 | import { Lexer, TOKEN_RIGHT_PAREN } from "../lexer";
 2 | import { parseTag } from "./Html";
 3 | 
 4 | export interface Node {
 5 |     LineNum?: number,
 6 |     children?: Array<any>,
 7 |     attr: Array<any>,
 8 |     type?: string,
 9 |     tag?: string,
10 |     selfClose?: boolean
11 |     closeTag?: boolean
12 | }
13 | 
14 | export class Node {
15 |     constructor() {
16 |         this.children = []
17 |         this.attr = []
18 |     }
19 | }
20 | 
21 | export function isSpecialTag(node: any) {
22 |     let tags = [
23 |         "img",
24 |         "source",
25 |         "link",
26 |         "meta",
27 |         "area",
28 |         "input",
29 |         "br"
30 |     ]
31 |     return tags.includes(node.tag)
32 | }
33 | 
34 | export function parseClose(lexer: Lexer) {
35 |     lexer.hasCache = false
36 |     let node = new Node()
37 |     node.closeTag = true
38 |     node.LineNum = lexer.GetLineNum()
39 |     node.type = "tag"
40 |     node.tag = parseTag(lexer)
41 |     lexer.NextTokenIs(TOKEN_RIGHT_PAREN) // >
42 |     // lexer.isIgnored()
43 |     if (isSpecialTag(node)) {
44 |         return null
45 |     }
46 |     return node
47 | }


--------------------------------------------------------------------------------
/test-server.js:
--------------------------------------------------------------------------------
 1 | let http = require('http');
 2 | let url = require('url');
 3 | let util = require('util');
 4 | let fs = require('fs');
 5 | let path = require("path")
 6 | const {spawn} = require('child_process');
 7 | const process = require("process");
 8 | 
 9 | const pour = (cmd, args, opts = {
10 |     encoding: 'utf8'
11 | }, stdout = process.stdout, stderr = process.stderr) => {
12 |     return new Promise((resolve, reject) => {
13 |         const p = spawn(cmd, args, opts);
14 |         p.stdout.setEncoding('utf-8');
15 |         p.stdout.on('data', data => {
16 |             stdout.write(data, "utf8");
17 |         });
18 |         p.stderr.on('data', data => {
19 |             stderr.write(data);
20 |         });
21 |         p.on('close', code => {
22 |             resolve(code);
23 |         });
24 |     });
25 | }
26 | 
27 | async function exec(shell, args, opt) {
28 |     console.log(`${shell} ${
29 |         args.join(" ")
30 |     }`)
31 |     await pour(shell, args, opt);
32 | }
33 | 
34 | let server = http.createServer((req, res) => {
35 |     var pathname = url.parse(req.url).pathname; // 获取url的pathname (/index.html)
36 |     // console.log("file:" + pathname.substring(1)) // 将‘/’去掉
37 |     // console.log(__dirname, __filename)
38 |     let curPath = path.join(__dirname, pathname)
39 |     // console.log(curPath)
40 |     fs.readFile(curPath, function (err, data) { // fs模块加载文件
41 |         if (err) {
42 |             res.writeHead(404, {'Content-Type': 'text/html'});
43 |         } else {
44 |             res.writeHead(200, {'Content-Type': 'text/html'});
45 |             res.write(data.toString());
46 |         }
47 |         res.end();
48 |     });
49 | 
50 | });
51 | 
52 | server.listen(3000, '127.0.0.1', async () => {
53 |     console.log("服务器已经运行，请打开浏览,输入:http://127.0.0.1:3000/ 来进行访问.")
54 |     await exec(process.platform === 'win32' ? 'npm.cmd' : "npm", ["run", "test"])
55 |     process.exit(0);
56 | });
57 | 


--------------------------------------------------------------------------------
/test/test24.spec.ts:
--------------------------------------------------------------------------------
  1 | import { test, expect } from '@playwright/test';
  2 | import { webkit } from 'playwright'
  3 | const request = require("request-promise")
  4 | import { parse } from '../src/parser'
  5 | 
  6 | function getAll(source, type) {
  7 |   let current = source;
  8 |   let res = []
  9 |   let stack = Array.isArray(current) ? current : [current];
 10 |   while (stack.length) {
 11 |     let top = stack.pop();
 12 |     // console.log([top])
 13 |     if (type === "browser") {
 14 |       if (top.nodeName === "#comment") {
 15 |         continue
 16 |       }
 17 |       if (top.nodeName === "#text" && /[\r\n]+/.test(top.data)) {
 18 |         continue
 19 |       }
 20 |       res.push({ tag: top.tagName ? top.tagName : "text", content: top.data })
 21 |     } else if (type === "parser") {
 22 |       if (top.type === "comment") {
 23 |         continue
 24 |       }
 25 |       if (top.type === "text" && /[\r\n]+/.test(top.content)) {
 26 |         continue
 27 |       }
 28 |       res.push({ tag: top.tag ? top.tag : "text", content: top.content })
 29 |     }
 30 |     if (top.childNodes) {
 31 |       stack.push(...top.childNodes)
 32 |     } else if (top.children) {
 33 |       stack.push(...top.children)
 34 |     }
 35 |   }
 36 |   return res
 37 | }
 38 | 
 39 | test('test24.html', async (/*{ page }*/) => {
 40 |   // encodeURIComponent
 41 |   let url = "http://127.0.0.1:3000/demo/test24.html"
 42 |   const web = await webkit.launch();
 43 |   const context = await web.newContext({
 44 |     javaScriptEnabled: false
 45 |   });
 46 |   const newpage = await context.newPage();
 47 |   await newpage.goto(url, {
 48 |     referer: "",
 49 |     // timeout: 30,
 50 |     waitUntil: "domcontentloaded"
 51 |   });
 52 | 
 53 |   await newpage.evaluateHandle(`document.body.classList.add("body")`)
 54 | 
 55 |   // const name = await page.innerText('title');
 56 |   // expect(name).toBe('Document');
 57 |   // const elementHandle = await page.$('body');
 58 |   // console.log(elementHandle)
 59 |   // const bodyElement = elementHandle.asElement()
 60 |   // console.log(bodyElement)
 61 | 
 62 |   // const aHandle = await page.evaluateHandle('document');
 63 |   // console.log(aHandle)
 64 | 
 65 |   // const aHandle = await page.evaluateHandle(() => document.body);
 66 |   // let doc = await aHandle.jsonValue()
 67 |   // console.log(doc)
 68 |   // const resultHandle = await page.evaluateHandle(body => body.innerHTML, aHandle);
 69 |   // const jsonValue = await resultHandle.jsonValue()
 70 |   // console.log(jsonValue);
 71 |   // await resultHandle.dispose();
 72 | 
 73 |   const aHandle = await newpage.evaluateHandle(`(${getAll.toString()})(Array.from(document.childNodes),"browser")`);
 74 |   // console.log(aHandle)
 75 |   const browser: any = await aHandle.jsonValue()
 76 | 
 77 |   let html = await request({
 78 |     method: "GET",
 79 |     uri: url,
 80 |     headers: {
 81 |       "Accept": "*/*",
 82 |       "Accept-Encoding": "deflate, br",
 83 |       "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
 84 |       "Connection": "keep-alive",
 85 |       "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59",
 86 |     },
 87 |   })
 88 | 
 89 |   let ast = parse(html)
 90 | 
 91 |   // let body = search(ast, "body")
 92 | 
 93 |   let parser = getAll(ast.children, "parser")
 94 | 
 95 |   // console.log(browser, parser)
 96 |   let count = browser.length > parser.length ? browser.length : parser.length;
 97 |   for (let i = 0; i < count; i++) {
 98 |     // console.log(browser[i] ? browser[i].tag : "undefined", parser[i] ? parser[i].tag : "undefined")
 99 |     expect(browser[i].tag.toLowerCase()).toBe(parser[i].tag)
100 |     // console.assert(browser[i].tag.toLowerCase() === parser[i].tag, `${browser[i] ? browser[i].tag : "undefined"}, ${parser[i] ? parser[i].tag : "undefined"}`)
101 |   }
102 | });


--------------------------------------------------------------------------------
/test/test25.spec.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from '@playwright/test';
 2 | const request = require("request-promise")
 3 | import { parse } from '../src/parser'
 4 | 
 5 | function getAll(source, type) {
 6 |   let current = source;
 7 |   let res = []
 8 |   let stack = Array.isArray(current) ? current : [current];
 9 |   while (stack.length) {
10 |     let top = stack.pop();
11 |     // console.log([top])
12 |     if (type === "browser") {
13 |       if (top.nodeName === "#comment") {
14 |         continue
15 |       }
16 |       if (top.nodeName === "#text" && /[\r\n]+/.test(top.data)) {
17 |         continue
18 |       }
19 |       res.push({ tag: top.tagName ? top.tagName : "text", content: top.data })
20 |     } else if (type === "parser") {
21 |       if (top.type === "comment") {
22 |         continue
23 |       }
24 |       if (top.type === "text" && /[\r\n]+/.test(top.content)) {
25 |         continue
26 |       }
27 |       res.push({ tag: top.tag ? top.tag : "text", content: top.content })
28 |     }
29 |     if (top.childNodes) {
30 |       stack.push(...top.childNodes)
31 |     } else if (top.children) {
32 |       stack.push(...top.children)
33 |     }
34 |   }
35 |   return res
36 | }
37 | 
38 | test('test25.html', async ({ page }) => {
39 |   // encodeURIComponent
40 |   let url = "http://127.0.0.1:3000/demo/test25.html"
41 |   await page.goto(url, {
42 |     referer: "",
43 |     // timeout: 30,
44 |     waitUntil: "domcontentloaded"
45 |   });
46 | 
47 |   await page.evaluateHandle(`document.body.classList.add("body")`)
48 | 
49 |   // const name = await page.innerText('title');
50 |   // expect(name).toBe('Document');
51 |   // const elementHandle = await page.$('body');
52 |   // console.log(elementHandle)
53 |   // const bodyElement = elementHandle.asElement()
54 |   // console.log(bodyElement)
55 | 
56 |   // const aHandle = await page.evaluateHandle('document');
57 |   // console.log(aHandle)
58 | 
59 |   // const aHandle = await page.evaluateHandle(() => document.body);
60 |   // let doc = await aHandle.jsonValue()
61 |   // console.log(doc)
62 |   // const resultHandle = await page.evaluateHandle(body => body.innerHTML, aHandle);
63 |   // const jsonValue = await resultHandle.jsonValue()
64 |   // console.log(jsonValue);
65 |   // await resultHandle.dispose();
66 | 
67 |   const aHandle = await page.evaluateHandle(`(${getAll.toString()})(Array.from(document.childNodes),"browser")`);
68 |   // console.log(aHandle)
69 |   const browser: any = await aHandle.jsonValue()
70 | 
71 |   let html = await request({
72 |     method: "GET",
73 |     uri: url,
74 |     headers: {
75 |       "Accept": "*/*",
76 |       "Accept-Encoding": "deflate, br",
77 |       "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
78 |       "Connection": "keep-alive",
79 |       "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59",
80 |     },
81 |   })
82 | 
83 |   let ast = parse(html)
84 | 
85 |   // let body = search(ast, "body")
86 | 
87 |   let parser = getAll(ast.children, "parser")
88 | 
89 |   // console.log(browser, parser)
90 |   let count = browser.length > parser.length ? browser.length : parser.length;
91 |   for (let i = 0; i < count; i++) {
92 |     // console.log(browser[i] ? browser[i].tag : "undefined", parser[i] ? parser[i].tag : "undefined")
93 |     expect(browser[i].tag.toLowerCase()).toBe(parser[i].tag)
94 |     // console.assert(browser[i].tag.toLowerCase() === parser[i].tag, `${browser[i] ? browser[i].tag : "undefined"}, ${parser[i] ? parser[i].tag : "undefined"}`)
95 |   }
96 | });


--------------------------------------------------------------------------------
/test/test26.spec.ts:
--------------------------------------------------------------------------------
 1 | import { test, expect } from '@playwright/test';
 2 | const request = require("request-promise")
 3 | import { parse } from '../src/parser'
 4 | 
 5 | function getAll(source, type) {
 6 |   let current = source;
 7 |   let res = []
 8 |   let stack = Array.isArray(current) ? current : [current];
 9 |   while (stack.length) {
10 |     let top = stack.pop();
11 |     // console.log([top])
12 |     if (type === "browser") {
13 |       if (top.nodeName === "#comment") {
14 |         continue
15 |       }
16 |       if (top.nodeName === "#text" && /[\r\n]+/.test(top.data)) {
17 |         continue
18 |       }
19 |       res.push({ tag: top.tagName ? top.tagName : "text", content: top.data })
20 |     } else if (type === "parser") {
21 |       if (top.type === "comment") {
22 |         continue
23 |       }
24 |       if (top.type === "text" && /[\r\n]+/.test(top.content)) {
25 |         continue
26 |       }
27 |       res.push({ tag: top.tag ? top.tag : "text", content: top.content })
28 |     }
29 |     if (top.childNodes) {
30 |       stack.push(...top.childNodes)
31 |     } else if (top.children) {
32 |       stack.push(...top.children)
33 |     }
34 |   }
35 |   return res
36 | }
37 | 
38 | test('test26.html', async ({ page }) => {
39 |   // encodeURIComponent
40 |   let url = "http://127.0.0.1:3000/demo/test26.html"
41 |   await page.goto(url, {
42 |     referer: "",
43 |     // timeout: 30,
44 |     waitUntil: "domcontentloaded"
45 |   });
46 | 
47 |   await page.evaluateHandle(`document.body.classList.add("body")`)
48 | 
49 |   // const name = await page.innerText('title');
50 |   // expect(name).toBe('Document');
51 |   // const elementHandle = await page.$('body');
52 |   // console.log(elementHandle)
53 |   // const bodyElement = elementHandle.asElement()
54 |   // console.log(bodyElement)
55 | 
56 |   // const aHandle = await page.evaluateHandle('document');
57 |   // console.log(aHandle)
58 | 
59 |   // const aHandle = await page.evaluateHandle(() => document.body);
60 |   // let doc = await aHandle.jsonValue()
61 |   // console.log(doc)
62 |   // const resultHandle = await page.evaluateHandle(body => body.innerHTML, aHandle);
63 |   // const jsonValue = await resultHandle.jsonValue()
64 |   // console.log(jsonValue);
65 |   // await resultHandle.dispose();
66 | 
67 |   const aHandle = await page.evaluateHandle(`(${getAll.toString()})(Array.from(document.childNodes),"browser")`);
68 |   // console.log(aHandle)
69 |   const browser: any = await aHandle.jsonValue()
70 | 
71 |   let html = await request({
72 |     method: "GET",
73 |     uri: url,
74 |     headers: {
75 |       "Accept": "*/*",
76 |       "Accept-Encoding": "deflate, br",
77 |       "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
78 |       "Connection": "keep-alive",
79 |       "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59",
80 |     },
81 |   })
82 | 
83 |   let ast = parse(html)
84 | 
85 |   // let body = search(ast, "body")
86 | 
87 |   let parser = getAll(ast.children, "parser")
88 | 
89 |   // console.log(browser, parser)
90 |   let count = browser.length > parser.length ? browser.length : parser.length;
91 |   for (let i = 0; i < count; i++) {
92 |     // console.log(browser[i] ? browser[i].tag : "undefined", parser[i] ? parser[i].tag : "undefined")
93 |     expect(browser[i].tag.toLowerCase()).toBe(parser[i].tag)
94 |     // console.assert(browser[i].tag.toLowerCase() === parser[i].tag, `${browser[i] ? browser[i].tag : "undefined"}, ${parser[i] ? parser[i].tag : "undefined"}`)
95 |   }
96 | });


--------------------------------------------------------------------------------
/test/testall.spec.ts:
--------------------------------------------------------------------------------
  1 | import { test, expect } from '@playwright/test';
  2 | import { chromium } from 'playwright'
  3 | const request = require("request-promise")
  4 | import { parse } from '../src/parser'
  5 | const path = require("path")
  6 | const fs = require("fs")
  7 | 
  8 | function getAll(source, type) {
  9 |   let current = source;
 10 |   let res = []
 11 |   let stack = Array.isArray(current) ? current : [current];
 12 |   while (stack.length) {
 13 |     let top = stack.pop();
 14 |     // console.log([top])
 15 |     if (type === "browser") {
 16 |       if (top.nodeName === "#comment") {
 17 |         continue
 18 |       }
 19 |       if (top.nodeName === "#text" && /[\r\n]+/.test(top.data)) {
 20 |         continue
 21 |       }
 22 |       if (top.nodeName === "html") { // 浏览器中DocumentType的nodeName位html
 23 |         res.push({ tag: "DTD", content: null })
 24 |         continue
 25 |       }
 26 |       res.push({ tag: top.nodeName === "#text" ? "text" : top.nodeName, content: top.nodeName === "#text" ? top.data : null })
 27 |     } else if (type === "parser") {
 28 |       if (top.type === "comment") {
 29 |         continue
 30 |       }
 31 |       // top.content.replace(/[\r\n]+/g, "").trim()
 32 |       if (top.type === "text" && /[\r\n]+/.test(top.content)) {
 33 |         continue
 34 |       }
 35 |       if (top.type === "DTD") {
 36 |         res.push({ tag: "dtd", content: top.content, LineNum: top.LineNum })
 37 |         continue
 38 |       }
 39 |       res.push({ tag: top.tag ? top.tag : "text", content: top.content, LineNum: top.LineNum })
 40 |     }
 41 |     if (top.childNodes) {
 42 |       stack.push(...top.childNodes)
 43 |     } else if (top.children) {
 44 |       // debugger
 45 |       stack.push(...top.children)
 46 |     }
 47 |   }
 48 |   return res
 49 | }
 50 | 
 51 | 
 52 | let paths = [
 53 |   "demo.html",
 54 |   "demo1.html",
 55 |   "demo2.html",
 56 |   "demo3.html",
 57 |   "demo4.html",
 58 |   "demo5.html",
 59 |   "test4.html",
 60 |   "test5.html",
 61 |   "test6.html",
 62 |   "test7.html",
 63 |   "test8.html",
 64 |   "MDN_HTML.html",
 65 |   "MDN_JavaScript.html",
 66 |   "CSDN.html",
 67 | 
 68 |   // "CSDN_SPM.html",
 69 | 
 70 |   "test9.html",
 71 |   "test10.html",
 72 |   "test11.html",
 73 |   "test12.html",
 74 |   "test13.html",
 75 |   "test14.html",
 76 |   "test15.html",
 77 |   "test16.html",
 78 |   "test17.html",
 79 |   "test18.html",
 80 |   "test19.html",
 81 |   "test20.html",
 82 |   "test21.html",
 83 |   "test22.html",
 84 |   "test23.html",
 85 |   "google.html",
 86 | 
 87 |   // "005055fd7e2625aba5e8d2d370ea4914a152fe50d16620f896cdf4b1a68ba741.html",
 88 |   // "005055fd7e2625aba5e8d2d370ea4914a152fe50d16620f896cdf4b1a68ba741-origin.html",
 89 |   // "039c4b966d1f2a0c589ac0aad211fe65500ad1cb58c7f45b34251db7056803ec-origin.html",
 90 |   // ok
 91 |   "0475e5eeadaaca857eea3f36d0eda01937fe672d48be7f98ba6bc7f25ecd63d0.html",
 92 |   "078cdb456d1beb698aeed86e0f2161e442e9431c4580295f1ba4ece22741068c.html",
 93 |   "0e55dcdbeb54c88ee87942b9fef7ea5398fa9a1e83493d55844b479506a80fd8.html",
 94 |   "qidian.html",
 95 |   "test24.html",
 96 |   "test25.html",
 97 |   "test26.html",
 98 |   "qidian1.html",
 99 |   "test27.html",
100 |   "test28.html",
101 |   "test29.html",
102 |   "test30.html",
103 |   "test31.html",
104 |   "test32.html",
105 |   "test33.html",
106 |   "test34.html",
107 |   "test35.html",
108 |   "test36.html",
109 |   "test38.html",
110 |   "test39.html",
111 | ]
112 | 
113 | for (let item of paths) {
114 |   test(`${item}`, async (/*{ page }*/) => {
115 |     let url = `http://127.0.0.1:3000/demo/${item}`
116 |     // console.log(url)
117 |     const web = await chromium.launch();
118 |     const context = await web.newContext({
119 |       javaScriptEnabled: false
120 |     });
121 |     const newpage = await context.newPage();
122 |     await newpage.goto(url, {
123 |       referer: "",
124 |       // timeout: 30,
125 |       waitUntil: "domcontentloaded"
126 |     });
127 | 
128 |     await newpage.evaluateHandle(`document.body.classList.add("body")`)
129 | 
130 |     // const name = await page.innerText('title');
131 |     // expect(name).toBe('Document');
132 |     // const elementHandle = await page.$('body');
133 |     // console.log(elementHandle)
134 |     // const bodyElement = elementHandle.asElement()
135 |     // console.log(bodyElement)
136 | 
137 |     // const aHandle = await page.evaluateHandle('document');
138 |     // console.log(aHandle)
139 | 
140 |     // const aHandle = await page.evaluateHandle(() => document.body);
141 |     // let doc = await aHandle.jsonValue()
142 |     // console.log(doc)
143 |     // const resultHandle = await page.evaluateHandle(body => body.innerHTML, aHandle);
144 |     // const jsonValue = await resultHandle.jsonValue()
145 |     // console.log(jsonValue);
146 |     // await resultHandle.dispose();
147 | 
148 |     const aHandle = await newpage.evaluateHandle(`(${getAll.toString()})(Array.from(document.childNodes),"browser")`);
149 |     // console.log(aHandle)
150 |     const browser: any = await aHandle.jsonValue()
151 | 
152 |     let html = await request({
153 |       method: "GET",
154 |       uri: url,
155 |       headers: {
156 |         "Accept": "*/*",
157 |         "Accept-Encoding": "deflate, br",
158 |         "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
159 |         "Connection": "keep-alive",
160 |         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59",
161 |       },
162 |     })
163 | 
164 |     let ast = parse(html, false)
165 | 
166 |     // let body = search(ast, "body")
167 | 
168 |     let parser = getAll(ast.children, "parser")
169 |     if (!fs.existsSync(path.resolve(__dirname, "../out/"))) {
170 |       fs.mkdirSync(path.resolve(__dirname, "../out/"))
171 |     }
172 |     fs.writeFileSync(path.resolve(__dirname, "../out/", `./$parser.ast.json`), JSON.stringify(parser, null, 4))
173 |     fs.writeFileSync(path.resolve(__dirname, "../out/", `./$browser.ast.json`), JSON.stringify(browser, null, 4))
174 | 
175 |     // console.log(browser, parser)
176 |     let count = browser.length > parser.length ? browser.length : parser.length;
177 |     for (let i = 0; i < count; i++) {
178 |       // console.log(browser[i] ? browser[i].tag : "undefined", parser[i] ? parser[i].tag : "undefined")
179 | 
180 |       // if (browser[i] && parser[i]) {
181 |       expect(browser[i].tag.toLowerCase()).toBe(parser[i].tag)
182 |       // } else {
183 |       // console.log(i)
184 |       // console.log(browser[i], parser[i])
185 |       // }
186 | 
187 |       // console.assert(browser[i].tag.toLowerCase() === parser[i].tag, `${browser[i] ? browser[i].tag : "undefined"}, ${parser[i] ? parser[i].tag : "undefined"}`)
188 |     }
189 |   });
190 | }
191 | 


--------------------------------------------------------------------------------
/tsconfig-esmodule.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     /* Visit https://aka.ms/tsconfig.json to read more about this file */
 4 | 
 5 |     /* Basic Options */
 6 |     // "incremental": true,                   /* Enable incremental compilation */
 7 |     "target": "ESNEXT",                          /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */
 8 |     "module": "ESNext",                     /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */
 9 |     "lib": ["ESNext", "DOM"],                             /* Specify library files to be included in the compilation. */
10 |     "allowJs": true,                       /* Allow javascript files to be compiled. */
11 |     // "checkJs": true,                       /* Report errors in .js files. */
12 |     // "jsx": "preserve",                     /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */
13 |     // "declaration": true,                   /* Generates corresponding '.d.ts' file. */
14 |     // "declarationMap": true,                /* Generates a sourcemap for each corresponding '.d.ts' file. */
15 |     // "sourceMap": true,                     /* Generates corresponding '.map' file. */
16 |     // "outFile": "./",                       /* Concatenate and emit output to single file. */
17 |     "outDir": "./dist-esmodule",                        /* Redirect output structure to the directory. */
18 |     "rootDir": "./src",                       /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */
19 |     // "composite": true,                     /* Enable project compilation */
20 |     // "tsBuildInfoFile": "./",               /* Specify file to store incremental compilation information */
21 |     // "removeComments": true,                /* Do not emit comments to output. */
22 |     // "noEmit": true,                        /* Do not emit outputs. */
23 |     // "importHelpers": true,                 /* Import emit helpers from 'tslib'. */
24 |     // "downlevelIteration": true,            /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */
25 |     // "isolatedModules": true,               /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */
26 | 
27 |     /* Strict Type-Checking Options */
28 |     "strict": true,                           /* Enable all strict type-checking options. */
29 |     // "noImplicitAny": true,                 /* Raise error on expressions and declarations with an implied 'any' type. */
30 |     // "strictNullChecks": true,              /* Enable strict null checks. */
31 |     // "strictFunctionTypes": true,           /* Enable strict checking of function types. */
32 |     // "strictBindCallApply": true,           /* Enable strict 'bind', 'call', and 'apply' methods on functions. */
33 |     // "strictPropertyInitialization": true,  /* Enable strict checking of property initialization in classes. */
34 |     // "noImplicitThis": true,                /* Raise error on 'this' expressions with an implied 'any' type. */
35 |     // "alwaysStrict": true,                  /* Parse in strict mode and emit "use strict" for each source file. */
36 | 
37 |     /* Additional Checks */
38 |     // "noUnusedLocals": true,                /* Report errors on unused locals. */
39 |     // "noUnusedParameters": true,            /* Report errors on unused parameters. */
40 |     // "noImplicitReturns": true,             /* Report error when not all code paths in function return a value. */
41 |     // "noFallthroughCasesInSwitch": true,    /* Report errors for fallthrough cases in switch statement. */
42 |     // "noUncheckedIndexedAccess": true,      /* Include 'undefined' in index signature results */
43 | 
44 |     /* Module Resolution Options */
45 |     // "moduleResolution": "node",            /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */
46 |     // "baseUrl": "./",                       /* Base directory to resolve non-absolute module names. */
47 |     // "paths": {},                           /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */
48 |     // "rootDirs": [],                        /* List of root folders whose combined content represents the structure of the project at runtime. */
49 |     // "typeRoots": [],                       /* List of folders to include type definitions from. */
50 |     // "types": [],                           /* Type declaration files to be included in compilation. */
51 |     // "allowSyntheticDefaultImports": true,  /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */
52 |     "esModuleInterop": true,                  /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */
53 |     // "preserveSymlinks": true,              /* Do not resolve the real path of symlinks. */
54 |     // "allowUmdGlobalAccess": true,          /* Allow accessing UMD globals from modules. */
55 | 
56 |     /* Source Map Options */
57 |     // "sourceRoot": "",                      /* Specify the location where debugger should locate TypeScript files instead of source locations. */
58 |     // "mapRoot": "",                         /* Specify the location where debugger should locate map files instead of generated locations. */
59 |     // "inlineSourceMap": true,               /* Emit a single file with source maps instead of having a separate file. */
60 |     // "inlineSources": true,                 /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */
61 | 
62 |     /* Experimental Options */
63 |     // "experimentalDecorators": true,        /* Enables experimental support for ES7 decorators. */
64 |     // "emitDecoratorMetadata": true,         /* Enables experimental support for emitting type metadata for decorators. */
65 | 
66 |     /* Advanced Options */
67 |     "skipLibCheck": true,                     /* Skip type checking of declaration files. */
68 |     "forceConsistentCasingInFileNames": true  /* Disallow inconsistently-cased references to the same file. */
69 |   },
70 |   "exclude": [
71 |     "vm",
72 |     "examples",
73 |     "test",
74 |     "app",
75 |     "dist",
76 |     "script",
77 |     "copy",
78 |     "mycheck",
79 |     "coverage",
80 |     "matchtest",
81 |     "compare.js",
82 |     "jest-playwright.config.js",
83 |     "jest.config.js",
84 |     "server.js",
85 |     "test-server.js",
86 |   ],
87 | }
88 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     /* Visit https://aka.ms/tsconfig.json to read more about this file */
 4 | 
 5 |     /* Basic Options */
 6 |     // "incremental": true,                   /* Enable incremental compilation */
 7 |     "target": "ESNEXT",                          /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */
 8 |     "module": "commonjs",                     /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */
 9 |     "lib": ["ESNext", "DOM"],                             /* Specify library files to be included in the compilation. */
10 |     "allowJs": true,                       /* Allow javascript files to be compiled. */
11 |     // "checkJs": true,                       /* Report errors in .js files. */
12 |     // "jsx": "preserve",                     /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */
13 |     // "declaration": true,                   /* Generates corresponding '.d.ts' file. */
14 |     // "declarationMap": true,                /* Generates a sourcemap for each corresponding '.d.ts' file. */
15 |     // "sourceMap": true,                     /* Generates corresponding '.map' file. */
16 |     // "outFile": "./",                       /* Concatenate and emit output to single file. */
17 |     "outDir": "./dist",                        /* Redirect output structure to the directory. */
18 |     "rootDir": "./src",                       /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */
19 |     // "composite": true,                     /* Enable project compilation */
20 |     // "tsBuildInfoFile": "./",               /* Specify file to store incremental compilation information */
21 |     // "removeComments": true,                /* Do not emit comments to output. */
22 |     // "noEmit": true,                        /* Do not emit outputs. */
23 |     // "importHelpers": true,                 /* Import emit helpers from 'tslib'. */
24 |     // "downlevelIteration": true,            /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */
25 |     // "isolatedModules": true,               /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */
26 | 
27 |     /* Strict Type-Checking Options */
28 |     "strict": true,                           /* Enable all strict type-checking options. */
29 |     // "noImplicitAny": true,                 /* Raise error on expressions and declarations with an implied 'any' type. */
30 |     // "strictNullChecks": true,              /* Enable strict null checks. */
31 |     // "strictFunctionTypes": true,           /* Enable strict checking of function types. */
32 |     // "strictBindCallApply": true,           /* Enable strict 'bind', 'call', and 'apply' methods on functions. */
33 |     // "strictPropertyInitialization": true,  /* Enable strict checking of property initialization in classes. */
34 |     // "noImplicitThis": true,                /* Raise error on 'this' expressions with an implied 'any' type. */
35 |     // "alwaysStrict": true,                  /* Parse in strict mode and emit "use strict" for each source file. */
36 | 
37 |     /* Additional Checks */
38 |     // "noUnusedLocals": true,                /* Report errors on unused locals. */
39 |     // "noUnusedParameters": true,            /* Report errors on unused parameters. */
40 |     // "noImplicitReturns": true,             /* Report error when not all code paths in function return a value. */
41 |     // "noFallthroughCasesInSwitch": true,    /* Report errors for fallthrough cases in switch statement. */
42 |     // "noUncheckedIndexedAccess": true,      /* Include 'undefined' in index signature results */
43 | 
44 |     /* Module Resolution Options */
45 |     // "moduleResolution": "node",            /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */
46 |     // "baseUrl": "./",                       /* Base directory to resolve non-absolute module names. */
47 |     // "paths": {},                           /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */
48 |     // "rootDirs": [],                        /* List of root folders whose combined content represents the structure of the project at runtime. */
49 |     // "typeRoots": [],                       /* List of folders to include type definitions from. */
50 |     // "types": [],                           /* Type declaration files to be included in compilation. */
51 |     // "allowSyntheticDefaultImports": true,  /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */
52 |     "esModuleInterop": true,                  /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */
53 |     // "preserveSymlinks": true,              /* Do not resolve the real path of symlinks. */
54 |     // "allowUmdGlobalAccess": true,          /* Allow accessing UMD globals from modules. */
55 | 
56 |     /* Source Map Options */
57 |     // "sourceRoot": "",                      /* Specify the location where debugger should locate TypeScript files instead of source locations. */
58 |     // "mapRoot": "",                         /* Specify the location where debugger should locate map files instead of generated locations. */
59 |     // "inlineSourceMap": true,               /* Emit a single file with source maps instead of having a separate file. */
60 |     // "inlineSources": true,                 /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */
61 | 
62 |     /* Experimental Options */
63 |     // "experimentalDecorators": true,        /* Enables experimental support for ES7 decorators. */
64 |     // "emitDecoratorMetadata": true,         /* Enables experimental support for emitting type metadata for decorators. */
65 | 
66 |     /* Advanced Options */
67 |     "skipLibCheck": true,                     /* Skip type checking of declaration files. */
68 |     "forceConsistentCasingInFileNames": true  /* Disallow inconsistently-cased references to the same file. */
69 |   },
70 |   "exclude": [
71 |     "vm",
72 |     "examples",
73 |     "test",
74 |     "app",
75 |     "dist-esmodule",
76 |     "script",
77 |     "copy",
78 |     "mycheck",
79 |     "coverage",
80 |     "matchtest",
81 |     "compare.js",
82 |     "jest-playwright.config.js",
83 |     "jest.config.js",
84 |     "server.js",
85 |     "test-server.js",
86 |   ],
87 | }
88 | 


--------------------------------------------------------------------------------