├── .github └── workflows │ └── tests.yml ├── .gitignore ├── .vscode ├── launch.json └── settings.json ├── LICENSE ├── README.md ├── README_HTML.md ├── compare.js ├── demo ├── 005055fd7e2625aba5e8d2d370ea4914a152fe50d16620f896cdf4b1a68ba741-origin.html ├── 005055fd7e2625aba5e8d2d370ea4914a152fe50d16620f896cdf4b1a68ba741.html ├── 039c4b966d1f2a0c589ac0aad211fe65500ad1cb58c7f45b34251db7056803ec-origin.html ├── 039c4b966d1f2a0c589ac0aad211fe65500ad1cb58c7f45b34251db7056803ec.html ├── 0475e5eeadaaca857eea3f36d0eda01937fe672d48be7f98ba6bc7f25ecd63d0.html ├── 06ed0a833361190536a4f61888354e07dccaa501bd9a1c0f1c545533bde1650b.html ├── 078cdb456d1beb698aeed86e0f2161e442e9431c4580295f1ba4ece22741068c.html ├── 0e55dcdbeb54c88ee87942b9fef7ea5398fa9a1e83493d55844b479506a80fd8-origin.html ├── 0e55dcdbeb54c88ee87942b9fef7ea5398fa9a1e83493d55844b479506a80fd8.html ├── CSDN.html ├── CSDN_SPM.html ├── MDN_HTML.html ├── MDN_JavaScript.html ├── demo copy.html ├── demo.html ├── demo1.html ├── demo2.html ├── demo3.html ├── demo4.html ├── demo5.html ├── google.html ├── qidian.html ├── qidian1.html ├── test.html ├── test1.html ├── test10.html ├── test11.html ├── test12.html ├── test13.html ├── test14.html ├── test15.html ├── test16.html ├── test17.html ├── test18.html ├── test19.html ├── test2.html ├── test20.html ├── test21.html ├── test22.html ├── test23.html ├── test24.html ├── test25.html ├── test26.html ├── test27.html ├── test28.html ├── test29.html ├── test30.html ├── test31.html ├── test32.html ├── test33.html ├── test34.html ├── test35.html ├── test36.html ├── test37.html ├── test38.html ├── test39.html ├── test4.html ├── test40.html ├── test5.html ├── test6.html ├── test7.html ├── test8.html └── test9.html ├── dist ├── definition.js ├── index.js ├── lexer.js ├── parser.js └── parser │ ├── Comment.js │ ├── DTD.js │ ├── Directive.js │ ├── Html.js │ ├── parseText.js │ └── tagClose.js ├── mycheck ├── check-dist.js ├── check.ts └── checkfile.js ├── package-lock.json ├── package.json ├── script └── addSuffixJs.js ├── server.js ├── src ├── definition.ts ├── index.ts ├── lexer.ts ├── parser.ts └── parser │ ├── Comment.ts │ ├── DTD.ts │ ├── Directive.ts │ ├── Html.ts │ ├── parseText.ts │ └── tagClose.ts ├── test-server.js ├── test ├── test24.spec.ts ├── test25.spec.ts ├── test26.spec.ts └── testall.spec.ts ├── tsconfig-esmodule.json ├── tsconfig.json └── yarn.lock /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | strategy: 10 | matrix: 11 | node-version: [10.x, 12.x] 12 | 13 | steps: 14 | 15 | - uses: actions/checkout@v2 16 | 17 | - name: Use Node.js ${{ matrix.node-version }} 18 | uses: actions/setup-node@v1 19 | with: 20 | node-version: ${{ matrix.node-version }} 21 | 22 | - run: npm install 23 | 24 | - run: npm run test-all -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules/ 2 | /src/*.js.map 3 | /dist/*.js.map 4 | /dist/examples/*/*.js.map 5 | /.cache/ 6 | /dist/src/*.js.map 7 | /dist/vm/*.js.map 8 | /dist/test/*.js.map 9 | /dist/src/*/*.js.map 10 | /dist/vm/*/*.js.map 11 | /out/*.json 12 | /files/* 13 | /test/testfile.js 14 | /app/* 15 | /dist-esmodule/* 16 | /copy/* 17 | /matchtest/* 18 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // 使用 IntelliSense 了解相关属性。 3 | // 悬停以查看现有属性的描述。 4 | // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [{ 7 | "name": "ts-node", 8 | "type": "pwa-node", 9 | "request": "launch", 10 | "args": [ 11 | "${relativeFile}" // 入口文件 12 | ], 13 | "runtimeArgs": [ 14 | "--nolazy", 15 | "-r", 16 | "ts-node/register" 17 | ], 18 | "sourceMaps": true, 19 | "cwd": "${workspaceRoot}", 20 | "protocol": "inspector", 21 | // "console": "integratedTerminal", 22 | "internalConsoleOptions": "openOnSessionStart" 23 | }] 24 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "typescript.tsdk": "node_modules\\typescript\\lib" 3 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 liulinboy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HTML Parser 2 | 3 | ## 解析HTML 4 | 5 | [](https://github.com/liulinboyi/HTMLParser/actions/workflows/tests.yml) 6 | 7 | ## HTML 8 | 9 | ```html 10 | 11 | 12 |
13 | 14 | 15 | 16 |
31 | {
32 | "type": "root",
33 | "children": [
34 | {
35 | "type": "DTD",
36 | "LineNum": 1,
37 | "content": "DOCTYPE html"
38 | },
39 | {
40 | "content": "\r\n",
41 | "LineNum": 1,
42 | "type": "text"
43 | },
44 | {
45 | "children": [
46 | {
47 | "content": "\r\n",
48 | "LineNum": 2,
49 | "type": "text"
50 | },
51 | {
52 | "children": [
53 | {
54 | "content": "\r\n ",
55 | "LineNum": 3,
56 | "type": "text"
57 | },
58 | {
59 | "children": [],
60 | "attr": [
61 | {
62 | "name": "charset",
63 | "value": "UTF-8"
64 | }
65 | ],
66 | "LineNum": 4,
67 | "type": "tag",
68 | "tag": "meta"
69 | },
70 | {
71 | "content": "\r\n ",
72 | "LineNum": 4,
73 | "type": "text"
74 | },
75 | {
76 | "children": [],
77 | "attr": [
78 | {
79 | "name": "http-equiv",
80 | "value": "X-UA-Compatible"
81 | },
82 | {
83 | "name": "content",
84 | "value": "IE=edge"
85 | }
86 | ],
87 | "LineNum": 5,
88 | "type": "tag",
89 | "tag": "meta"
90 | },
91 | {
92 | "content": "\r\n ",
93 | "LineNum": 5,
94 | "type": "text"
95 | },
96 | {
97 | "children": [],
98 | "attr": [
99 | {
100 | "name": "name",
101 | "value": "viewport"
102 | },
103 | {
104 | "name": "content",
105 | "value": "width=device-width, initial-scale=1.0"
106 | }
107 | ],
108 | "LineNum": 6,
109 | "type": "tag",
110 | "tag": "meta"
111 | },
112 | {
113 | "content": "\r\n ",
114 | "LineNum": 6,
115 | "type": "text"
116 | },
117 | {
118 | "children": [
119 | {
120 | "content": "Document",
121 | "LineNum": 7,
122 | "type": "text"
123 | }
124 | ],
125 | "attr": [],
126 | "LineNum": 7,
127 | "type": "tag",
128 | "tag": "title"
129 | },
130 | {
131 | "content": "\r\n",
132 | "LineNum": 7,
133 | "type": "text"
134 | }
135 | ],
136 | "attr": [],
137 | "LineNum": 3,
138 | "type": "tag",
139 | "tag": "head"
140 | },
141 | {
142 | "content": "\r\n",
143 | "LineNum": 8,
144 | "type": "text"
145 | },
146 | {
147 | "children": [
148 | {
149 | "content": "\r\n ",
150 | "LineNum": 9,
151 | "type": "text"
152 | },
153 | {
154 | "children": [
155 | {
156 | "content": "\r\n ",
157 | "LineNum": 10,
158 | "type": "text"
159 | },
160 | {
161 | "children": [
162 | {
163 | "content": "11{{res.value}}",
164 | "LineNum": 11,
165 | "type": "text"
166 | }
167 | ],
168 | "attr": [
169 | {
170 | "name": "v-if",
171 | "value": "res.value"
172 | },
173 | {
174 | "name": "name",
175 | "value": "11"
176 | },
177 | {
178 | "name": "@click",
179 | "value": "tes"
180 | }
181 | ],
182 | "LineNum": 11,
183 | "type": "tag",
184 | "tag": "h1"
185 | },
186 | {
187 | "content": "\r\n ",
188 | "LineNum": 11,
189 | "type": "text"
190 | }
191 | ],
192 | "attr": [],
193 | "LineNum": 10,
194 | "type": "tag",
195 | "tag": "div"
196 | },
197 | {
198 | "content": "\r\n ",
199 | "LineNum": 12,
200 | "type": "text"
201 | },
202 | {
203 | "children": [],
204 | "attr": [
205 | {
206 | "name": "href",
207 | "value": "http://github.com/"
208 | }
209 | ],
210 | "LineNum": 13,
211 | "type": "tag",
212 | "tag": "a"
213 | },
214 | {
215 | "content": "\r\n",
216 | "LineNum": 13,
217 | "type": "text"
218 | }
219 | ],
220 | "attr": [],
221 | "LineNum": 9,
222 | "type": "tag",
223 | "tag": "body"
224 | },
225 | {
226 | "content": "\r\n",
227 | "LineNum": 14,
228 | "type": "text"
229 | }
230 | ],
231 | "attr": [
232 | {
233 | "name": "lang",
234 | "value": "en"
235 | }
236 | ],
237 | "LineNum": 2,
238 | "type": "tag",
239 | "tag": "html"
240 | }
241 | ],
242 | "LineNum": 1
243 | }
244 |
245 | >A federal appeals court has dealt the Obama administration yet another blow in its quest to keep at least 13 | some age restrictions on the sale of emergency contraceptive pills.
14 |In a three-paragraph 16 | order, a three-judge panel for the United States Court of Appeals for the 2nd Circuit ruled that 17 | although the government's appeal of a lower 19 | court decision removing all age restrictions on morning-after pills is still pending, at least some 20 | medications must be made available over the counter immediately.
21 |Specifically, the panel said that while the requirement for one-pill versions of the morning-after pill to be 22 | made available without age restrictions can be delayed while the appeal is considered, that is not the case 23 | for "two-pill variants," which include generic products Next Choice and other levonorgestral tablets.
26 |Ironically, the FDA had sought to produce a compromise by approving 28 | in late April an over-the-counter version of Plan B 29 | One-Step, a one-pill version that would be available on pharmacy shelves but only to those 15 and 30 | over who are able to produce proper identification.
31 |But Plan B One-Step costs 33 | in the neighborhood of $50, while the generic two-pill formulations cost about $20 to $35.
35 |The saga of trying to move emergency contraception from a prescription-only to an over-the-counter product 36 | has been ongoing for more than a decade through two successive presidential administrations.
37 |U.S. District Court Judge Edward Korman, who has overseen the case since 2005, has made it clear that he 38 | thinks the government has dragged its feet to the point of violating the law.
39 |But few expected the New York-based appeals court to agree with Korman, even in part, by denying the 40 | government's request to stay his April 6 order while the appeal is being heard.
41 |The government — via the Departments of Justice and Health and Human Services — had no immediate comment on 42 | the ruling. Representatives would say only that they were "reviewing the order" from the appeals court.
43 |Those who have been pursuing the case, however, had a bit more to say.
44 |"Today's decision from the 2nd Circuit marks an historic day for women's health," said Nancy Northup, 45 | president and CEO of the Center for Reproductive Rights, which 46 | has represented some of the plaintiffs in the lawsuit. "Finally, after more than a decade of politically 47 | motivated delays, women will no longer have to endure intrusive, onerous and medically unnecessary 48 | restrictions to get emergency contraception."
49 |What happens next remains unclear. Some lawyers say the government might be able to appeal to the full 2nd
50 | Circuit. But more likely, if they insist on fighting, government attorneys would have to seek relief from
51 | the Supreme Court justice who oversees the 2nd Circuit — Ruth Bader Ginsburg.
48 | |
50 | |
![]() |
53 |
54 | |
56 |
![]() |
59 |
60 | |
62 |
![]() |
65 |
66 | |
68 |
![]() |
71 |
72 | |
74 |
![]() |
77 |
78 | |
80 |
17 | 天元浪子
18 | 20 | 23 |总经理
40 |153篇
41 |44 | 生于1968年,程序员,使用python超过10年。长期从事数据处理工作,先后参与过风云系列卫星、碳卫星、海洋卫星、嫦娥探测器等卫星数据处理。
45 | 46 | 47 | 48 |11 | 本部分 12 | APIs 13 | ,请参考 14 | Web APIs 15 | 以及 16 | DOM 17 | 。 18 |
19 | 20 | -------------------------------------------------------------------------------- /demo/test7.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |===
和非严格的比较操作符 ==
,以及 Object.is()
12 | 方法。
13 | 14 | JavaScript ( 函数优先的轻量级非浏览器环境 17 |
18 | 19 | -------------------------------------------------------------------------------- /demo/test9.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |