├── .dockerignore
├── .gitattributes
├── .github
└── workflows
│ └── build.yml
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── Dockerfile
├── LICENSE
├── README.md
├── build.sh
├── ci-build
├── Dockerfile
├── README.md
├── docker-build.sh
├── docker-run.sh
└── inside-container.sh
├── entities
├── README.md
├── build.sh
├── entities-legacy.inc
├── entity-processor-json.py
├── entity-processor.py
├── entity-to-dtd.pl
├── json-entities-legacy.inc
└── out
│ ├── entities-dtd.url
│ ├── entities.inc
│ └── entities.json
├── lint.sh
└── src
├── annotate_attributes.rs
├── boilerplate.rs
├── dom_utils.rs
├── interface_index.rs
├── io_utils.rs
├── main.rs
├── parser.rs
├── rcdom_with_line_numbers.rs
├── represents.rs
└── tag_omission.rs
/.dockerignore:
--------------------------------------------------------------------------------
1 | *
2 | !entities/out
3 | !quotes/out
4 | !build.sh
5 | !lint.sh
6 | !Cargo.lock
7 | !Cargo.toml
8 | !src
9 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto eol=lf
2 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: html-build CI
2 | on:
3 | pull_request:
4 | branches: ['main']
5 | push:
6 | branches: ['main']
7 |
8 | env:
9 | REGISTRY: ghcr.io
10 | IMAGE_NAME: ${{ github.repository }}
11 |
12 |
13 | jobs:
14 | build:
15 | name: Build
16 | runs-on: ubuntu-latest
17 | permissions:
18 | contents: read
19 | packages: write
20 | steps:
21 | - name: Checkout whatwg/html-build
22 | uses: actions/checkout@v3
23 | with:
24 | fetch-depth: 0
25 | - name: Shellcheck
26 | run: |
27 | shellcheck *.sh
28 | shellcheck ci-build/*.sh
29 | - name: Docker build
30 | run: ci-build/docker-build.sh
31 | - name: Checkout whatwg/html
32 | uses: actions/checkout@v3
33 | with:
34 | repository: whatwg/html
35 | path: html
36 | fetch-depth: 2
37 | - name: Test against whatwg/html
38 | run: |
39 | mkdir output
40 | bash ci-build/docker-run.sh "$GITHUB_WORKSPACE/html" output
41 | - name: Docker login
42 | if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
43 | uses: docker/login-action@v2
44 | with:
45 | registry: ${{ env.REGISTRY }}
46 | username: ${{ github.actor }}
47 | password: ${{ secrets.GITHUB_TOKEN }}
48 | - name: Docker push
49 | if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
50 | run: |
51 | docker tag "$REGISTRY/$IMAGE_NAME" "$REGISTRY/$IMAGE_NAME:$GITHUB_SHA"
52 | docker tag "$REGISTRY/$IMAGE_NAME" "$REGISTRY/$IMAGE_NAME:latest"
53 | docker push "$REGISTRY/$IMAGE_NAME:$GITHUB_SHA"
54 | docker push "$REGISTRY/$IMAGE_NAME:latest"
55 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .cache/
2 | .temp/
3 | html/
4 | output/
5 | mdn/.id-list
6 | mdn/developer.mozilla.org/
7 | highlighter/
8 |
9 |
10 | # Added by cargo
11 |
12 | /target
13 |
--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 |
5 | [[package]]
6 | name = "aho-corasick"
7 | version = "1.0.2"
8 | source = "registry+https://github.com/rust-lang/crates.io-index"
9 | checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
10 | dependencies = [
11 | "memchr",
12 | ]
13 |
14 | [[package]]
15 | name = "autocfg"
16 | version = "1.1.0"
17 | source = "registry+https://github.com/rust-lang/crates.io-index"
18 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
19 |
20 | [[package]]
21 | name = "bitflags"
22 | version = "1.3.2"
23 | source = "registry+https://github.com/rust-lang/crates.io-index"
24 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
25 |
26 | [[package]]
27 | name = "bytes"
28 | version = "1.4.0"
29 | source = "registry+https://github.com/rust-lang/crates.io-index"
30 | checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be"
31 |
32 | [[package]]
33 | name = "cc"
34 | version = "1.0.79"
35 | source = "registry+https://github.com/rust-lang/crates.io-index"
36 | checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
37 |
38 | [[package]]
39 | name = "cfg-if"
40 | version = "1.0.0"
41 | source = "registry+https://github.com/rust-lang/crates.io-index"
42 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
43 |
44 | [[package]]
45 | name = "delegate"
46 | version = "0.12.0"
47 | source = "registry+https://github.com/rust-lang/crates.io-index"
48 | checksum = "4e018fccbeeb50ff26562ece792ed06659b9c2dae79ece77c4456bb10d9bf79b"
49 | dependencies = [
50 | "proc-macro2",
51 | "quote",
52 | "syn 2.0.18",
53 | ]
54 |
55 | [[package]]
56 | name = "errno"
57 | version = "0.3.1"
58 | source = "registry+https://github.com/rust-lang/crates.io-index"
59 | checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
60 | dependencies = [
61 | "errno-dragonfly",
62 | "libc",
63 | "windows-sys",
64 | ]
65 |
66 | [[package]]
67 | name = "errno-dragonfly"
68 | version = "0.1.2"
69 | source = "registry+https://github.com/rust-lang/crates.io-index"
70 | checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
71 | dependencies = [
72 | "cc",
73 | "libc",
74 | ]
75 |
76 | [[package]]
77 | name = "fastrand"
78 | version = "1.9.0"
79 | source = "registry+https://github.com/rust-lang/crates.io-index"
80 | checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be"
81 | dependencies = [
82 | "instant",
83 | ]
84 |
85 | [[package]]
86 | name = "futf"
87 | version = "0.1.5"
88 | source = "registry+https://github.com/rust-lang/crates.io-index"
89 | checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
90 | dependencies = [
91 | "mac",
92 | "new_debug_unreachable",
93 | ]
94 |
95 | [[package]]
96 | name = "getrandom"
97 | version = "0.2.10"
98 | source = "registry+https://github.com/rust-lang/crates.io-index"
99 | checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
100 | dependencies = [
101 | "cfg-if",
102 | "libc",
103 | "wasi",
104 | ]
105 |
106 | [[package]]
107 | name = "hermit-abi"
108 | version = "0.2.6"
109 | source = "registry+https://github.com/rust-lang/crates.io-index"
110 | checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
111 | dependencies = [
112 | "libc",
113 | ]
114 |
115 | [[package]]
116 | name = "hermit-abi"
117 | version = "0.3.1"
118 | source = "registry+https://github.com/rust-lang/crates.io-index"
119 | checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
120 |
121 | [[package]]
122 | name = "html-build"
123 | version = "0.0.0"
124 | dependencies = [
125 | "delegate",
126 | "html5ever",
127 | "markup5ever_rcdom",
128 | "regex",
129 | "tempfile",
130 | "tokio",
131 | ]
132 |
133 | [[package]]
134 | name = "html5ever"
135 | version = "0.26.0"
136 | source = "registry+https://github.com/rust-lang/crates.io-index"
137 | checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7"
138 | dependencies = [
139 | "log",
140 | "mac",
141 | "markup5ever",
142 | "proc-macro2",
143 | "quote",
144 | "syn 1.0.109",
145 | ]
146 |
147 | [[package]]
148 | name = "instant"
149 | version = "0.1.12"
150 | source = "registry+https://github.com/rust-lang/crates.io-index"
151 | checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
152 | dependencies = [
153 | "cfg-if",
154 | ]
155 |
156 | [[package]]
157 | name = "io-lifetimes"
158 | version = "1.0.11"
159 | source = "registry+https://github.com/rust-lang/crates.io-index"
160 | checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2"
161 | dependencies = [
162 | "hermit-abi 0.3.1",
163 | "libc",
164 | "windows-sys",
165 | ]
166 |
167 | [[package]]
168 | name = "libc"
169 | version = "0.2.146"
170 | source = "registry+https://github.com/rust-lang/crates.io-index"
171 | checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b"
172 |
173 | [[package]]
174 | name = "linux-raw-sys"
175 | version = "0.3.8"
176 | source = "registry+https://github.com/rust-lang/crates.io-index"
177 | checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
178 |
179 | [[package]]
180 | name = "lock_api"
181 | version = "0.4.10"
182 | source = "registry+https://github.com/rust-lang/crates.io-index"
183 | checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16"
184 | dependencies = [
185 | "autocfg",
186 | "scopeguard",
187 | ]
188 |
189 | [[package]]
190 | name = "log"
191 | version = "0.4.19"
192 | source = "registry+https://github.com/rust-lang/crates.io-index"
193 | checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
194 |
195 | [[package]]
196 | name = "mac"
197 | version = "0.1.1"
198 | source = "registry+https://github.com/rust-lang/crates.io-index"
199 | checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
200 |
201 | [[package]]
202 | name = "markup5ever"
203 | version = "0.11.0"
204 | source = "registry+https://github.com/rust-lang/crates.io-index"
205 | checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016"
206 | dependencies = [
207 | "log",
208 | "phf",
209 | "phf_codegen",
210 | "string_cache",
211 | "string_cache_codegen",
212 | "tendril",
213 | ]
214 |
215 | [[package]]
216 | name = "markup5ever_rcdom"
217 | version = "0.2.0"
218 | source = "registry+https://github.com/rust-lang/crates.io-index"
219 | checksum = "b9521dd6750f8e80ee6c53d65e2e4656d7de37064f3a7a5d2d11d05df93839c2"
220 | dependencies = [
221 | "html5ever",
222 | "markup5ever",
223 | "tendril",
224 | "xml5ever",
225 | ]
226 |
227 | [[package]]
228 | name = "memchr"
229 | version = "2.5.0"
230 | source = "registry+https://github.com/rust-lang/crates.io-index"
231 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
232 |
233 | [[package]]
234 | name = "mio"
235 | version = "0.8.8"
236 | source = "registry+https://github.com/rust-lang/crates.io-index"
237 | checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2"
238 | dependencies = [
239 | "libc",
240 | "wasi",
241 | "windows-sys",
242 | ]
243 |
244 | [[package]]
245 | name = "new_debug_unreachable"
246 | version = "1.0.4"
247 | source = "registry+https://github.com/rust-lang/crates.io-index"
248 | checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
249 |
250 | [[package]]
251 | name = "num_cpus"
252 | version = "1.15.0"
253 | source = "registry+https://github.com/rust-lang/crates.io-index"
254 | checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
255 | dependencies = [
256 | "hermit-abi 0.2.6",
257 | "libc",
258 | ]
259 |
260 | [[package]]
261 | name = "once_cell"
262 | version = "1.18.0"
263 | source = "registry+https://github.com/rust-lang/crates.io-index"
264 | checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
265 |
266 | [[package]]
267 | name = "parking_lot"
268 | version = "0.12.1"
269 | source = "registry+https://github.com/rust-lang/crates.io-index"
270 | checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
271 | dependencies = [
272 | "lock_api",
273 | "parking_lot_core",
274 | ]
275 |
276 | [[package]]
277 | name = "parking_lot_core"
278 | version = "0.9.8"
279 | source = "registry+https://github.com/rust-lang/crates.io-index"
280 | checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447"
281 | dependencies = [
282 | "cfg-if",
283 | "libc",
284 | "redox_syscall",
285 | "smallvec",
286 | "windows-targets",
287 | ]
288 |
289 | [[package]]
290 | name = "phf"
291 | version = "0.10.1"
292 | source = "registry+https://github.com/rust-lang/crates.io-index"
293 | checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
294 | dependencies = [
295 | "phf_shared",
296 | ]
297 |
298 | [[package]]
299 | name = "phf_codegen"
300 | version = "0.10.0"
301 | source = "registry+https://github.com/rust-lang/crates.io-index"
302 | checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
303 | dependencies = [
304 | "phf_generator",
305 | "phf_shared",
306 | ]
307 |
308 | [[package]]
309 | name = "phf_generator"
310 | version = "0.10.0"
311 | source = "registry+https://github.com/rust-lang/crates.io-index"
312 | checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
313 | dependencies = [
314 | "phf_shared",
315 | "rand",
316 | ]
317 |
318 | [[package]]
319 | name = "phf_shared"
320 | version = "0.10.0"
321 | source = "registry+https://github.com/rust-lang/crates.io-index"
322 | checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
323 | dependencies = [
324 | "siphasher",
325 | ]
326 |
327 | [[package]]
328 | name = "pin-project-lite"
329 | version = "0.2.9"
330 | source = "registry+https://github.com/rust-lang/crates.io-index"
331 | checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
332 |
333 | [[package]]
334 | name = "ppv-lite86"
335 | version = "0.2.17"
336 | source = "registry+https://github.com/rust-lang/crates.io-index"
337 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
338 |
339 | [[package]]
340 | name = "precomputed-hash"
341 | version = "0.1.1"
342 | source = "registry+https://github.com/rust-lang/crates.io-index"
343 | checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
344 |
345 | [[package]]
346 | name = "proc-macro2"
347 | version = "1.0.60"
348 | source = "registry+https://github.com/rust-lang/crates.io-index"
349 | checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406"
350 | dependencies = [
351 | "unicode-ident",
352 | ]
353 |
354 | [[package]]
355 | name = "quote"
356 | version = "1.0.28"
357 | source = "registry+https://github.com/rust-lang/crates.io-index"
358 | checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
359 | dependencies = [
360 | "proc-macro2",
361 | ]
362 |
363 | [[package]]
364 | name = "rand"
365 | version = "0.8.5"
366 | source = "registry+https://github.com/rust-lang/crates.io-index"
367 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
368 | dependencies = [
369 | "libc",
370 | "rand_chacha",
371 | "rand_core",
372 | ]
373 |
374 | [[package]]
375 | name = "rand_chacha"
376 | version = "0.3.1"
377 | source = "registry+https://github.com/rust-lang/crates.io-index"
378 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
379 | dependencies = [
380 | "ppv-lite86",
381 | "rand_core",
382 | ]
383 |
384 | [[package]]
385 | name = "rand_core"
386 | version = "0.6.4"
387 | source = "registry+https://github.com/rust-lang/crates.io-index"
388 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
389 | dependencies = [
390 | "getrandom",
391 | ]
392 |
393 | [[package]]
394 | name = "redox_syscall"
395 | version = "0.3.5"
396 | source = "registry+https://github.com/rust-lang/crates.io-index"
397 | checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
398 | dependencies = [
399 | "bitflags",
400 | ]
401 |
402 | [[package]]
403 | name = "regex"
404 | version = "1.8.4"
405 | source = "registry+https://github.com/rust-lang/crates.io-index"
406 | checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f"
407 | dependencies = [
408 | "aho-corasick",
409 | "memchr",
410 | "regex-syntax",
411 | ]
412 |
413 | [[package]]
414 | name = "regex-syntax"
415 | version = "0.7.2"
416 | source = "registry+https://github.com/rust-lang/crates.io-index"
417 | checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"
418 |
419 | [[package]]
420 | name = "rustix"
421 | version = "0.37.20"
422 | source = "registry+https://github.com/rust-lang/crates.io-index"
423 | checksum = "b96e891d04aa506a6d1f318d2771bcb1c7dfda84e126660ace067c9b474bb2c0"
424 | dependencies = [
425 | "bitflags",
426 | "errno",
427 | "io-lifetimes",
428 | "libc",
429 | "linux-raw-sys",
430 | "windows-sys",
431 | ]
432 |
433 | [[package]]
434 | name = "scopeguard"
435 | version = "1.1.0"
436 | source = "registry+https://github.com/rust-lang/crates.io-index"
437 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
438 |
439 | [[package]]
440 | name = "serde"
441 | version = "1.0.164"
442 | source = "registry+https://github.com/rust-lang/crates.io-index"
443 | checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d"
444 |
445 | [[package]]
446 | name = "signal-hook-registry"
447 | version = "1.4.1"
448 | source = "registry+https://github.com/rust-lang/crates.io-index"
449 | checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1"
450 | dependencies = [
451 | "libc",
452 | ]
453 |
454 | [[package]]
455 | name = "siphasher"
456 | version = "0.3.10"
457 | source = "registry+https://github.com/rust-lang/crates.io-index"
458 | checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
459 |
460 | [[package]]
461 | name = "smallvec"
462 | version = "1.10.0"
463 | source = "registry+https://github.com/rust-lang/crates.io-index"
464 | checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
465 |
466 | [[package]]
467 | name = "socket2"
468 | version = "0.4.9"
469 | source = "registry+https://github.com/rust-lang/crates.io-index"
470 | checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662"
471 | dependencies = [
472 | "libc",
473 | "winapi",
474 | ]
475 |
476 | [[package]]
477 | name = "string_cache"
478 | version = "0.8.7"
479 | source = "registry+https://github.com/rust-lang/crates.io-index"
480 | checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b"
481 | dependencies = [
482 | "new_debug_unreachable",
483 | "once_cell",
484 | "parking_lot",
485 | "phf_shared",
486 | "precomputed-hash",
487 | "serde",
488 | ]
489 |
490 | [[package]]
491 | name = "string_cache_codegen"
492 | version = "0.5.2"
493 | source = "registry+https://github.com/rust-lang/crates.io-index"
494 | checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988"
495 | dependencies = [
496 | "phf_generator",
497 | "phf_shared",
498 | "proc-macro2",
499 | "quote",
500 | ]
501 |
502 | [[package]]
503 | name = "syn"
504 | version = "1.0.109"
505 | source = "registry+https://github.com/rust-lang/crates.io-index"
506 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
507 | dependencies = [
508 | "proc-macro2",
509 | "quote",
510 | "unicode-ident",
511 | ]
512 |
513 | [[package]]
514 | name = "syn"
515 | version = "2.0.18"
516 | source = "registry+https://github.com/rust-lang/crates.io-index"
517 | checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
518 | dependencies = [
519 | "proc-macro2",
520 | "quote",
521 | "unicode-ident",
522 | ]
523 |
524 | [[package]]
525 | name = "tempfile"
526 | version = "3.6.0"
527 | source = "registry+https://github.com/rust-lang/crates.io-index"
528 | checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6"
529 | dependencies = [
530 | "autocfg",
531 | "cfg-if",
532 | "fastrand",
533 | "redox_syscall",
534 | "rustix",
535 | "windows-sys",
536 | ]
537 |
538 | [[package]]
539 | name = "tendril"
540 | version = "0.4.3"
541 | source = "registry+https://github.com/rust-lang/crates.io-index"
542 | checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
543 | dependencies = [
544 | "futf",
545 | "mac",
546 | "utf-8",
547 | ]
548 |
549 | [[package]]
550 | name = "tokio"
551 | version = "1.28.2"
552 | source = "registry+https://github.com/rust-lang/crates.io-index"
553 | checksum = "94d7b1cfd2aa4011f2de74c2c4c63665e27a71006b0a192dcd2710272e73dfa2"
554 | dependencies = [
555 | "autocfg",
556 | "bytes",
557 | "libc",
558 | "mio",
559 | "num_cpus",
560 | "parking_lot",
561 | "pin-project-lite",
562 | "signal-hook-registry",
563 | "socket2",
564 | "tokio-macros",
565 | "windows-sys",
566 | ]
567 |
568 | [[package]]
569 | name = "tokio-macros"
570 | version = "2.1.0"
571 | source = "registry+https://github.com/rust-lang/crates.io-index"
572 | checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
573 | dependencies = [
574 | "proc-macro2",
575 | "quote",
576 | "syn 2.0.18",
577 | ]
578 |
579 | [[package]]
580 | name = "unicode-ident"
581 | version = "1.0.9"
582 | source = "registry+https://github.com/rust-lang/crates.io-index"
583 | checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
584 |
585 | [[package]]
586 | name = "utf-8"
587 | version = "0.7.6"
588 | source = "registry+https://github.com/rust-lang/crates.io-index"
589 | checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
590 |
591 | [[package]]
592 | name = "wasi"
593 | version = "0.11.0+wasi-snapshot-preview1"
594 | source = "registry+https://github.com/rust-lang/crates.io-index"
595 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
596 |
597 | [[package]]
598 | name = "winapi"
599 | version = "0.3.9"
600 | source = "registry+https://github.com/rust-lang/crates.io-index"
601 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
602 | dependencies = [
603 | "winapi-i686-pc-windows-gnu",
604 | "winapi-x86_64-pc-windows-gnu",
605 | ]
606 |
607 | [[package]]
608 | name = "winapi-i686-pc-windows-gnu"
609 | version = "0.4.0"
610 | source = "registry+https://github.com/rust-lang/crates.io-index"
611 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
612 |
613 | [[package]]
614 | name = "winapi-x86_64-pc-windows-gnu"
615 | version = "0.4.0"
616 | source = "registry+https://github.com/rust-lang/crates.io-index"
617 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
618 |
619 | [[package]]
620 | name = "windows-sys"
621 | version = "0.48.0"
622 | source = "registry+https://github.com/rust-lang/crates.io-index"
623 | checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
624 | dependencies = [
625 | "windows-targets",
626 | ]
627 |
628 | [[package]]
629 | name = "windows-targets"
630 | version = "0.48.0"
631 | source = "registry+https://github.com/rust-lang/crates.io-index"
632 | checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5"
633 | dependencies = [
634 | "windows_aarch64_gnullvm",
635 | "windows_aarch64_msvc",
636 | "windows_i686_gnu",
637 | "windows_i686_msvc",
638 | "windows_x86_64_gnu",
639 | "windows_x86_64_gnullvm",
640 | "windows_x86_64_msvc",
641 | ]
642 |
643 | [[package]]
644 | name = "windows_aarch64_gnullvm"
645 | version = "0.48.0"
646 | source = "registry+https://github.com/rust-lang/crates.io-index"
647 | checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
648 |
649 | [[package]]
650 | name = "windows_aarch64_msvc"
651 | version = "0.48.0"
652 | source = "registry+https://github.com/rust-lang/crates.io-index"
653 | checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
654 |
655 | [[package]]
656 | name = "windows_i686_gnu"
657 | version = "0.48.0"
658 | source = "registry+https://github.com/rust-lang/crates.io-index"
659 | checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
660 |
661 | [[package]]
662 | name = "windows_i686_msvc"
663 | version = "0.48.0"
664 | source = "registry+https://github.com/rust-lang/crates.io-index"
665 | checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
666 |
667 | [[package]]
668 | name = "windows_x86_64_gnu"
669 | version = "0.48.0"
670 | source = "registry+https://github.com/rust-lang/crates.io-index"
671 | checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
672 |
673 | [[package]]
674 | name = "windows_x86_64_gnullvm"
675 | version = "0.48.0"
676 | source = "registry+https://github.com/rust-lang/crates.io-index"
677 | checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
678 |
679 | [[package]]
680 | name = "windows_x86_64_msvc"
681 | version = "0.48.0"
682 | source = "registry+https://github.com/rust-lang/crates.io-index"
683 | checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
684 |
685 | [[package]]
686 | name = "xml5ever"
687 | version = "0.17.0"
688 | source = "registry+https://github.com/rust-lang/crates.io-index"
689 | checksum = "4034e1d05af98b51ad7214527730626f019682d797ba38b51689212118d8e650"
690 | dependencies = [
691 | "log",
692 | "mac",
693 | "markup5ever",
694 | ]
695 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "html-build"
3 | version = "0.0.0"
4 | publish = false
5 | edition = "2021"
6 |
7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
8 |
9 | [dependencies]
10 | tokio = { version = "1", features = ["full"] }
11 | html5ever = "0.26.0"
12 | markup5ever_rcdom = "0.2.0"
13 | regex = "1"
14 | delegate = "0.12.0"
15 |
16 | [dev-dependencies]
17 | tempfile = "3"
18 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM rust:1.73-slim as builder
2 | WORKDIR /whatwg/html-build
3 | COPY Cargo.lock Cargo.toml ./
4 | COPY src ./src/
5 | RUN cargo install --path .
6 |
7 | FROM debian:stable-slim
8 | RUN apt-get update && \
9 | apt-get install --yes --no-install-recommends ca-certificates curl git python3 python3-pip pipx && \
10 | rm -rf /var/lib/apt/lists/*
11 |
12 | COPY --from=builder /usr/local/cargo/bin/html-build /bin/html-build
13 |
14 | COPY --from=ghcr.io/whatwg/wattsi:latest /whatwg/wattsi/bin/wattsi /bin/wattsi
15 |
16 | ENV PIPX_HOME /opt/pipx
17 | ENV PIPX_BIN_DIR /usr/bin
18 | RUN pipx install bs-highlighter
19 |
20 | COPY . /whatwg/html-build/
21 |
22 | ENV SKIP_BUILD_UPDATE_CHECK true
23 | ENTRYPOINT ["bash", "/whatwg/html-build/build.sh"]
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright © WHATWG (Apple, Google, Mozilla, Microsoft).
2 |
3 | This work is licensed under a Creative Commons Attribution 4.0 International
4 | License. To the extent portions of it are incorporated into source code,
5 | such portions in the source code are licensed under the BSD 3-Clause License instead.
6 |
7 | - - - -
8 |
9 | Creative Commons Attribution 4.0 International Public License
10 |
11 | By exercising the Licensed Rights (defined below), You accept and agree
12 | to be bound by the terms and conditions of this Creative Commons
13 | Attribution 4.0 International Public License ("Public License"). To the
14 | extent this Public License may be interpreted as a contract, You are
15 | granted the Licensed Rights in consideration of Your acceptance of
16 | these terms and conditions, and the Licensor grants You such rights in
17 | consideration of benefits the Licensor receives from making the
18 | Licensed Material available under these terms and conditions.
19 |
20 |
21 | Section 1 -- Definitions.
22 |
23 | a. Adapted Material means material subject to Copyright and Similar
24 | Rights that is derived from or based upon the Licensed Material
25 | and in which the Licensed Material is translated, altered,
26 | arranged, transformed, or otherwise modified in a manner requiring
27 | permission under the Copyright and Similar Rights held by the
28 | Licensor. For purposes of this Public License, where the Licensed
29 | Material is a musical work, performance, or sound recording,
30 | Adapted Material is always produced where the Licensed Material is
31 | synched in timed relation with a moving image.
32 |
33 | b. Adapter's License means the license You apply to Your Copyright
34 | and Similar Rights in Your contributions to Adapted Material in
35 | accordance with the terms and conditions of this Public License.
36 |
37 | c. Copyright and Similar Rights means copyright and/or similar rights
38 | closely related to copyright including, without limitation,
39 | performance, broadcast, sound recording, and Sui Generis Database
40 | Rights, without regard to how the rights are labeled or
41 | categorized. For purposes of this Public License, the rights
42 | specified in Section 2(b)(1)-(2) are not Copyright and Similar
43 | Rights.
44 |
45 | d. Effective Technological Measures means those measures that, in the
46 | absence of proper authority, may not be circumvented under laws
47 | fulfilling obligations under Article 11 of the WIPO Copyright
48 | Treaty adopted on December 20, 1996, and/or similar international
49 | agreements.
50 |
51 | e. Exceptions and Limitations means fair use, fair dealing, and/or
52 | any other exception or limitation to Copyright and Similar Rights
53 | that applies to Your use of the Licensed Material.
54 |
55 | f. Licensed Material means the artistic or literary work, database,
56 | or other material to which the Licensor applied this Public
57 | License.
58 |
59 | g. Licensed Rights means the rights granted to You subject to the
60 | terms and conditions of this Public License, which are limited to
61 | all Copyright and Similar Rights that apply to Your use of the
62 | Licensed Material and that the Licensor has authority to license.
63 |
64 | h. Licensor means the individual(s) or entity(ies) granting rights
65 | under this Public License.
66 |
67 | i. Share means to provide material to the public by any means or
68 | process that requires permission under the Licensed Rights, such
69 | as reproduction, public display, public performance, distribution,
70 | dissemination, communication, or importation, and to make material
71 | available to the public including in ways that members of the
72 | public may access the material from a place and at a time
73 | individually chosen by them.
74 |
75 | j. Sui Generis Database Rights means rights other than copyright
76 | resulting from Directive 96/9/EC of the European Parliament and of
77 | the Council of 11 March 1996 on the legal protection of databases,
78 | as amended and/or succeeded, as well as other essentially
79 | equivalent rights anywhere in the world.
80 |
81 | k. You means the individual or entity exercising the Licensed Rights
82 | under this Public License. Your has a corresponding meaning.
83 |
84 |
85 | Section 2 -- Scope.
86 |
87 | a. License grant.
88 |
89 | 1. Subject to the terms and conditions of this Public License,
90 | the Licensor hereby grants You a worldwide, royalty-free,
91 | non-sublicensable, non-exclusive, irrevocable license to
92 | exercise the Licensed Rights in the Licensed Material to:
93 |
94 | a. reproduce and Share the Licensed Material, in whole or
95 | in part; and
96 |
97 | b. produce, reproduce, and Share Adapted Material.
98 |
99 | 2. Exceptions and Limitations. For the avoidance of doubt, where
100 | Exceptions and Limitations apply to Your use, this Public
101 | License does not apply, and You do not need to comply with
102 | its terms and conditions.
103 |
104 | 3. Term. The term of this Public License is specified in Section
105 | 6(a).
106 |
107 | 4. Media and formats; technical modifications allowed. The
108 | Licensor authorizes You to exercise the Licensed Rights in
109 | all media and formats whether now known or hereafter created,
110 | and to make technical modifications necessary to do so. The
111 | Licensor waives and/or agrees not to assert any right or
112 | authority to forbid You from making technical modifications
113 | necessary to exercise the Licensed Rights, including
114 | technical modifications necessary to circumvent Effective
115 | Technological Measures. For purposes of this Public License,
116 | simply making modifications authorized by this Section 2(a)
117 | (4) never produces Adapted Material.
118 |
119 | 5. Downstream recipients.
120 |
121 | a. Offer from the Licensor -- Licensed Material. Every
122 | recipient of the Licensed Material automatically
123 | receives an offer from the Licensor to exercise the
124 | Licensed Rights under the terms and conditions of this
125 | Public License.
126 |
127 | b. No downstream restrictions. You may not offer or impose
128 | any additional or different terms or conditions on, or
129 | apply any Effective Technological Measures to, the
130 | Licensed Material if doing so restricts exercise of the
131 | Licensed Rights by any recipient of the Licensed
132 | Material.
133 |
134 | 6. No endorsement. Nothing in this Public License constitutes or
135 | may be construed as permission to assert or imply that You
136 | are, or that Your use of the Licensed Material is, connected
137 | with, or sponsored, endorsed, or granted official status by,
138 | the Licensor or others designated to receive attribution as
139 | provided in Section 3(a)(1)(A)(i).
140 |
141 | b. Other rights.
142 |
143 | 1. Moral rights, such as the right of integrity, are not
144 | licensed under this Public License, nor are publicity,
145 | privacy, and/or other similar personality rights; however, to
146 | the extent possible, the Licensor waives and/or agrees not to
147 | assert any such rights held by the Licensor to the limited
148 | extent necessary to allow You to exercise the Licensed
149 | Rights, but not otherwise.
150 |
151 | 2. Patent and trademark rights are not licensed under this
152 | Public License.
153 |
154 | 3. To the extent possible, the Licensor waives any right to
155 | collect royalties from You for the exercise of the Licensed
156 | Rights, whether directly or through a collecting society
157 | under any voluntary or waivable statutory or compulsory
158 | licensing scheme. In all other cases the Licensor expressly
159 | reserves any right to collect such royalties.
160 |
161 |
162 | Section 3 -- License Conditions.
163 |
164 | Your exercise of the Licensed Rights is expressly made subject to the
165 | following conditions.
166 |
167 | a. Attribution.
168 |
169 | 1. If You Share the Licensed Material (including in modified
170 | form), You must:
171 |
172 | a. retain the following if it is supplied by the Licensor
173 | with the Licensed Material:
174 |
175 | i. identification of the creator(s) of the Licensed
176 | Material and any others designated to receive
177 | attribution, in any reasonable manner requested by
178 | the Licensor (including by pseudonym if
179 | designated);
180 |
181 | ii. a copyright notice;
182 |
183 | iii. a notice that refers to this Public License;
184 |
185 | iv. a notice that refers to the disclaimer of
186 | warranties;
187 |
188 | v. a URI or hyperlink to the Licensed Material to the
189 | extent reasonably practicable;
190 |
191 | b. indicate if You modified the Licensed Material and
192 | retain an indication of any previous modifications; and
193 |
194 | c. indicate the Licensed Material is licensed under this
195 | Public License, and include the text of, or the URI or
196 | hyperlink to, this Public License.
197 |
198 | 2. You may satisfy the conditions in Section 3(a)(1) in any
199 | reasonable manner based on the medium, means, and context in
200 | which You Share the Licensed Material. For example, it may be
201 | reasonable to satisfy the conditions by providing a URI or
202 | hyperlink to a resource that includes the required
203 | information.
204 |
205 | 3. If requested by the Licensor, You must remove any of the
206 | information required by Section 3(a)(1)(A) to the extent
207 | reasonably practicable.
208 |
209 | 4. If You Share Adapted Material You produce, the Adapter's
210 | License You apply must not prevent recipients of the Adapted
211 | Material from complying with this Public License.
212 |
213 |
214 | Section 4 -- Sui Generis Database Rights.
215 |
216 | Where the Licensed Rights include Sui Generis Database Rights that
217 | apply to Your use of the Licensed Material:
218 |
219 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right
220 | to extract, reuse, reproduce, and Share all or a substantial
221 | portion of the contents of the database;
222 |
223 | b. if You include all or a substantial portion of the database
224 | contents in a database in which You have Sui Generis Database
225 | Rights, then the database in which You have Sui Generis Database
226 | Rights (but not its individual contents) is Adapted Material; and
227 |
228 | c. You must comply with the conditions in Section 3(a) if You Share
229 | all or a substantial portion of the contents of the database.
230 |
231 | For the avoidance of doubt, this Section 4 supplements and does not
232 | replace Your obligations under this Public License where the Licensed
233 | Rights include other Copyright and Similar Rights.
234 |
235 |
236 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
237 |
238 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
239 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
240 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
241 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
242 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
243 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
244 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
245 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
246 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
247 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
248 |
249 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
250 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
251 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
252 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
253 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
254 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
255 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
256 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
257 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
258 |
259 | c. The disclaimer of warranties and limitation of liability provided
260 | above shall be interpreted in a manner that, to the extent
261 | possible, most closely approximates an absolute disclaimer and
262 | waiver of all liability.
263 |
264 |
265 | Section 6 -- Term and Termination.
266 |
267 | a. This Public License applies for the term of the Copyright and
268 | Similar Rights licensed here. However, if You fail to comply with
269 | this Public License, then Your rights under this Public License
270 | terminate automatically.
271 |
272 | b. Where Your right to use the Licensed Material has terminated under
273 | Section 6(a), it reinstates:
274 |
275 | 1. automatically as of the date the violation is cured, provided
276 | it is cured within 30 days of Your discovery of the
277 | violation; or
278 |
279 | 2. upon express reinstatement by the Licensor.
280 |
281 | For the avoidance of doubt, this Section 6(b) does not affect any
282 | right the Licensor may have to seek remedies for Your violations
283 | of this Public License.
284 |
285 | c. For the avoidance of doubt, the Licensor may also offer the
286 | Licensed Material under separate terms or conditions or stop
287 | distributing the Licensed Material at any time; however, doing so
288 | will not terminate this Public License.
289 |
290 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
291 | License.
292 |
293 |
294 | Section 7 -- Other Terms and Conditions.
295 |
296 | a. The Licensor shall not be bound by any additional or different
297 | terms or conditions communicated by You unless expressly agreed.
298 |
299 | b. Any arrangements, understandings, or agreements regarding the
300 | Licensed Material not stated herein are separate from and
301 | independent of the terms and conditions of this Public License.
302 |
303 |
304 | Section 8 -- Interpretation.
305 |
306 | a. For the avoidance of doubt, this Public License does not, and
307 | shall not be interpreted to, reduce, limit, restrict, or impose
308 | conditions on any use of the Licensed Material that could lawfully
309 | be made without permission under this Public License.
310 |
311 | b. To the extent possible, if any provision of this Public License is
312 | deemed unenforceable, it shall be automatically reformed to the
313 | minimum extent necessary to make it enforceable. If the provision
314 | cannot be reformed, it shall be severed from this Public License
315 | without affecting the enforceability of the remaining terms and
316 | conditions.
317 |
318 | c. No term or condition of this Public License will be waived and no
319 | failure to comply consented to unless expressly agreed to by the
320 | Licensor.
321 |
322 | d. Nothing in this Public License constitutes or may be interpreted
323 | as a limitation upon, or waiver of, any privileges and immunities
324 | that apply to the Licensor or You, including from the legal
325 | processes of any jurisdiction or authority.
326 |
327 | - - - -
328 |
329 | BSD 3-Clause License
330 |
331 | Redistribution and use in source and binary forms, with or without
332 | modification, are permitted provided that the following conditions are met:
333 |
334 | 1. Redistributions of source code must retain the above copyright notice, this
335 | list of conditions and the following disclaimer.
336 |
337 | 2. Redistributions in binary form must reproduce the above copyright notice,
338 | this list of conditions and the following disclaimer in the documentation
339 | and/or other materials provided with the distribution.
340 |
341 | 3. Neither the name of the copyright holder nor the names of its
342 | contributors may be used to endorse or promote products derived from
343 | this software without specific prior written permission.
344 |
345 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
346 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
347 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
348 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
349 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
350 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
351 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
352 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
353 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
354 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
355 |
356 | - - - -
357 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # HTML Build Tools
2 |
3 | This repository contains the tools and instructions necessary for building the [HTML Standard](https://html.spec.whatwg.org/multipage/) from its [source](https://github.com/whatwg/html).
4 |
5 | ## Getting set up
6 |
7 | Make sure you have `git` installed on your system, and you are using a Bash shell. (On Windows, `cmd.exe` will not work, but the Git Bash shell that comes with [Git for Windows](https://git-for-windows.github.io/) works nicely.)
8 |
9 | Then, clone this ([html-build](https://github.com/whatwg/html-build)) repo:
10 |
11 | ```bash
12 | git clone https://github.com/whatwg/html-build.git && cd html-build
13 | ```
14 |
15 | ## Performing a build
16 |
17 | You have a decision to make as to how you want to do your builds:
18 |
19 | - Locally on your computer
20 | - Remotely, using the [build server](https://github.com/whatwg/build.whatwg.org)
21 | - Using a [Docker](https://www.docker.com/) container
22 |
23 | Local builds will be fastest, but require installing a lot of prerequisites. Using the build server is easiest, but slowest. Docker has speed close to a local build, and only requires Docker as a prerequisite.
24 |
25 | ### Building locally
26 |
27 | #### Prerequisites
28 |
29 | To build locally, you'll need the following commands installed on your system:
30 |
31 | - `curl`, `grep`, `perl`, `unzip`, `cargo`
32 |
33 | Optionally, for faster builds, you can install [Wattsi](https://github.com/whatwg/wattsi). If you don't bother with that, we will use the [build server](https://github.com/whatwg/build.whatwg.org), which requires an internet connection.
34 |
35 | If you're using a local install of Wattsi, then optionally, you can install Python 3.7+ with [pipx](https://pypa.github.io/pipx/), to enable syntax highlighting of `pre` contents.
36 |
37 | #### Running the build
38 |
39 | Run the `build.sh` script from inside your `html-build` working directory, like this:
40 |
41 | ```bash
42 | ./build.sh
43 | ```
44 |
45 | The first time this runs, it will look up for the HTML source from a `../html` folder, if it exists. Otherwise, it may ask for your input on where to clone the HTML source from, or where on your system to find it if you've already done that. If you're working to submit a pull request to [whatwg/html](https://github.com/whatwg/html), be sure to give it the URL of your fork.
46 |
47 | You may also set the environment variable `$HTML_SOURCE` to use a custom location for the HTML source. For example:
48 |
49 | ```bash
50 | HTML_SOURCE=~/hacks/dhtml ./build.sh
51 | ```
52 |
53 | ### Building using the build server
54 |
55 | To use the build server, use the `--remote` flag:
56 |
57 | ```bash
58 | ./build.sh --remote
59 | ```
60 |
61 | This will ZIP up most of the files in the `html/` directory, send them to the build server, get back another ZIP file with the output, and unzip those into the output folder.
62 |
63 | You will need `zip` and `unzip` commands available in your `$PATH`.
64 |
65 | ### Building using a Docker container
66 |
67 | The Dockerized version of the build allows you to run the build entirely inside a "container" (lightweight virtual machine). This includes tricky dependencies like a local copy of Wattsi and Python.
68 |
69 | To perform a Dockerized build, use the `--docker` flag:
70 |
71 | ```bash
72 | ./build.sh --docker
73 | ```
74 |
75 | The first time you do this, Docker will download a bunch of stuff to set up the container properly, but subsequent runs will simply build the standard and be very fast.
76 |
77 | If you get permissions errors on Windows, you need to first [configure](https://docs.docker.com/docker-for-windows/#file-sharing) your `html-build/` and `html/` directories to be shareable with Docker.
78 |
79 | ## Output
80 |
81 | After you complete the build steps above, the build will run and generate the single-page version of the spec, the multipage version, and more. If all goes well, you should very soon have an `output/` directory containing important files like `index.html`, `multipage/`, and `dev/`.
82 |
83 | You can also use the `--serve` option to `build.sh` to automatically serve the results on `http://localhost:8080/` after building (as long as you have Python 3.7+ installed).
84 |
85 | Now you're ready to edit the `html/source` file—and after you make your changes, you can run the `build.sh` script again to see the new output.
86 |
87 | ## Fast local iteration
88 |
89 | There are a number of options to disable certain parts of the build process to speed up local iteration. Run `./build.sh help` to see them all, or just use the `--fast` flag to get maximally-fast builds.
90 |
91 | ## A note on Git history
92 |
93 | Your clone doesn't need the HTML standard's complete revision history just for you to build the spec and contribute patches. So, if you use `build.sh` to create the clone, we don't start you out with a clone of the history. That makes your first build finish much faster. And if later you decide you do want to clone the complete history, you can still get it, by doing this:
94 |
95 | ```bash
96 | cd ./html && git fetch --unshallow
97 | ```
98 |
99 | That said, if you really do want to *start out* with the complete history of the repo, then run the build script for the first time like this:
100 |
101 | ```bash
102 | HTML_GIT_CLONE_OPTIONS="" ./build.sh
103 | ```
104 |
105 | That will clone the complete history for you. But be warned: It'll make your first build take *dramatically* longer to finish!
106 |
--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -o errexit
3 | set -o nounset
4 | set -o pipefail
5 |
6 | # cd to the directory containing this script
7 | cd "$(dirname "$0")"
8 | DIR=$(pwd)
9 |
10 | # The latest required version of Wattsi. Update this if you change how ./build.sh invokes Wattsi;
11 | # it will cause a warning if Wattsi's self-reported version is lower. Note that there's no need to
12 | # update this on every revision of Wattsi; only do so when a warning is justified.
13 | declare -r WATTSI_LATEST=140
14 |
15 | # Shared state variables throughout this script
16 | LOCAL_WATTSI=true
17 | WATTSI_RESULT=0
18 | DO_UPDATE=true
19 | DO_LINT=true
20 | DO_HIGHLIGHT=true
21 | SINGLE_PAGE_ONLY=false
22 | USE_DOCKER=false
23 | USE_SERVER=false
24 | VERBOSE=false
25 | QUIET=false
26 | SERVE=false
27 | HTML_SHA=""
28 | HIGHLIGHT_SERVER_PID=""
29 |
30 | # Can be set from the outside to customize the script, but the defaults are usually fine. (Only
31 | # $HTML_SOURCE is documented.) $HTML_SOURCE will be determined inside the main function.
32 | HTML_SOURCE=${HTML_SOURCE:-}
33 | HTML_CACHE=${HTML_CACHE:-$DIR/.cache}
34 | HTML_TEMP=${HTML_TEMP:-$DIR/.temp}
35 | HTML_OUTPUT=${HTML_OUTPUT:-$DIR/output}
36 | HTML_GIT_CLONE_OPTIONS=${HTML_GIT_CLONE_OPTIONS:-"--depth=2"}
37 |
38 | # This is used by child scripts, and so we export it
39 | export HTML_CACHE
40 |
41 | # Used specifically when the Dockerfile calls this script
42 | SKIP_BUILD_UPDATE_CHECK=${SKIP_BUILD_UPDATE_CHECK:-false}
43 | SHA_OVERRIDE=${SHA_OVERRIDE:-}
44 | BUILD_SHA_OVERRIDE=${BUILD_SHA_OVERRIDE:-}
45 |
46 | # This needs to be coordinated with the bs-highlighter package
47 | declare -r HIGHLIGHT_SERVER_URL="http://127.0.0.1:8080"
48 |
49 | declare -r SERVE_PORT=8080
50 |
51 | function main {
52 | processCommandLineArgs "$@"
53 |
54 | # $SKIP_BUILD_UPDATE_CHECK is set inside the Dockerfile so that we don't check for updates both inside and outside
55 | # the Docker container.
56 | if [[ $DO_UPDATE == "true" && $SKIP_BUILD_UPDATE_CHECK != "true" ]]; then
57 | checkHTMLBuildIsUpToDate
58 | fi
59 |
60 | findHTMLSource
61 |
62 | clearDir "$HTML_OUTPUT"
63 | # Set these up so rsync will not complain about either being missing
64 | mkdir -p "$HTML_OUTPUT/commit-snapshots"
65 | mkdir -p "$HTML_OUTPUT/review-drafts"
66 |
67 | clearCacheIfNecessary
68 |
69 | local html_git_dir="$HTML_SOURCE/.git/"
70 | HTML_SHA=${SHA_OVERRIDE:-$(git --git-dir="$html_git_dir" rev-parse HEAD)}
71 |
72 | if [[ $USE_DOCKER == "true" ]]; then
73 | doDockerBuild
74 | exit 0
75 | fi
76 |
77 | if [[ $USE_SERVER == "true" ]]; then
78 | doServerBuild
79 |
80 | if [[ $SERVE == "true" ]]; then
81 | cd "$HTML_OUTPUT"
82 | python3 -m http.server "$SERVE_PORT"
83 | fi
84 |
85 | exit 0
86 | fi
87 |
88 | checkWattsi
89 | ensureHighlighterInstalled
90 |
91 | doLint
92 |
93 | updateRemoteDataFiles
94 |
95 | startHighlightServer
96 |
97 | processSource "source" "default"
98 |
99 | if [[ -e "$html_git_dir" ]]; then
100 | # This is based on https://github.com/whatwg/whatwg.org/pull/201 and should be kept synchronized
101 | # with that.
102 | local changed_files
103 | changed_files=$(git --git-dir="$html_git_dir" show --format="format:" --name-only HEAD)
104 |
105 | local changed
106 | for changed in $changed_files; do # Omit quotes around variable to split on whitespace
107 | if ! [[ "$changed" =~ ^review-drafts/.*.wattsi$ ]]; then
108 | continue
109 | fi
110 | processSource "$changed" "review"
111 | done
112 | else
113 | echo ""
114 | echo "Skipping review draft production as the .git directory is not present"
115 | echo "(This always happens if you use the --docker or --remote options.)"
116 | fi
117 |
118 | $QUIET || echo
119 | $QUIET || echo "Success!"
120 |
121 | if [[ $SERVE == "true" ]]; then
122 | stopHighlightServer
123 | cd "$HTML_OUTPUT"
124 | python3 -m http.server "$SERVE_PORT"
125 | fi
126 | }
127 |
128 | # Processes incoming command-line arguments
129 | # Arguments: all arguments to this shell script
130 | # Output:
131 | # - If the clean or help commands are given, perform them
132 | # - Otherwise, sets the $DO_UPDATE, $USE_DOCKER, $QUIET, and $VERBOSE variables appropriately
133 | function processCommandLineArgs {
134 | local arg
135 | for arg in "$@"
136 | do
137 | case $arg in
138 | clean)
139 | clearDir "$HTML_CACHE"
140 | exit 0
141 | ;;
142 | --help|help)
143 | echo "Commands:"
144 | echo " $0 Build the HTML Standard."
145 | echo " $0 clean Remove downloaded dependencies and generated files (then stop)."
146 | echo " $0 help Show this usage statement."
147 | echo
148 | echo "Build options:"
149 | echo " -d|--docker Use Docker to build in a container."
150 | echo " -r|--remote Use the build server."
151 | echo " -s|--serve After building, serve the results on http://localhost:$SERVE_PORT."
152 | echo " -n|--no-update Don't update before building; just build."
153 | echo " -l|--no-lint Don't lint before building; just build."
154 | echo " -h|--no-highlight Don't syntax-highlight the output."
155 | echo " -p|--single-page Only build the single-page variant of the spec."
156 | echo " -f|--fast Alias for --no-update --no-lint --no-highlight --single-page."
157 | echo " -q|--quiet Don't emit any messages except errors/warnings."
158 | echo " -v|--verbose Show verbose output from every build step."
159 | exit 0
160 | ;;
161 | -n|--no-update|--no-updates)
162 | DO_UPDATE=false
163 | ;;
164 | -l|--no-lint)
165 | DO_LINT=false
166 | ;;
167 | -h|--no-highlight)
168 | DO_HIGHLIGHT=false
169 | ;;
170 | -p|--single-page)
171 | SINGLE_PAGE_ONLY=true
172 | ;;
173 | -f|--fast)
174 | DO_UPDATE=false
175 | DO_LINT=false
176 | DO_HIGHLIGHT=false
177 | SINGLE_PAGE_ONLY=true
178 | ;;
179 | -d|--docker)
180 | USE_DOCKER=true
181 | ;;
182 | -r|--remote)
183 | USE_SERVER=true
184 | ;;
185 | -q|--quiet)
186 | QUIET=true
187 | VERBOSE=false
188 | ;;
189 | -v|--verbose)
190 | VERBOSE=true
191 | QUIET=false
192 | set -vx
193 | ;;
194 | -s|--serve)
195 | SERVE=true
196 | ;;
197 | *)
198 | ;;
199 | esac
200 | done
201 |
202 | if [[ $USE_DOCKER == "true" && $USE_SERVER == "true" ]]; then
203 | echo "Error: --docker and --remote are mutually exclusive."
204 | exit 1
205 | fi
206 | }
207 |
208 | # Checks if the html-build repository is up to date
209 | # Arguments: none
210 | # Output: will tell the user and exit the script with code 1 if not up to date
211 | function checkHTMLBuildIsUpToDate {
212 | $QUIET || echo "Checking if html-build is up to date..."
213 |
214 | # TODO: `git remote get-url origin` is nicer, but new in Git 2.7.
215 | local origin_url
216 | origin_url=$(git config --get remote.origin.url)
217 |
218 | local git_fetch_args=()
219 | if ! $VERBOSE ; then
220 | git_fetch_args+=( --quiet )
221 | fi
222 | git_fetch_args+=( "$origin_url" main)
223 | git fetch "${git_fetch_args[@]}"
224 |
225 | local new_commits
226 | new_commits=$(git rev-list --count HEAD..FETCH_HEAD)
227 | if [[ $new_commits != "0" ]]; then
228 | $QUIET || echo
229 | echo -n "Your local branch is $new_commits "
230 | [[ $new_commits == "1" ]] && echo -n "commit" || echo -n "commits"
231 | echo " behind $origin_url:"
232 | git --no-pager log --oneline HEAD..FETCH_HEAD
233 | echo
234 | echo "To update, run this command:"
235 | echo
236 | echo " git pull --rebase origin main"
237 | echo
238 | echo "This check can be bypassed with the --no-update option."
239 | exit 1
240 | fi
241 | }
242 |
243 | # Tries to install the bs-highlighter Python package if necessary
244 | # - Arguments: none
245 | # - Output:
246 | # - Either bs-highlighter-server will be in the $PATH, or $DO_HIGHTLIGHT will be set to false and
247 | # a warning will be echoed.
248 | function ensureHighlighterInstalled {
249 | # If we're not using local Wattsi then we won't use the local highlighter.
250 | if [[ $LOCAL_WATTSI == "true" && $DO_HIGHLIGHT == "true" ]]; then
251 | if hash pipx 2>/dev/null; then
252 | if ! hash bs-highlighter-server 2>/dev/null; then
253 | pipx install bs-highlighter
254 | fi
255 | else
256 | echo
257 | echo "Warning: could not find pipx in your PATH. Disabling syntax highlighting."
258 | echo
259 | DO_HIGHLIGHT="false"
260 | fi
261 | fi
262 | }
263 |
264 | # Runs the lint.sh script, if requested
265 | # - Arguments: none
266 | # - Output:
267 | # - Will echo any errors and exit the script with error code 1 if lint fails.
268 | function doLint {
269 | if [[ $DO_LINT == "false" ]]; then
270 | return
271 | fi
272 |
273 | $QUIET || echo "Linting the source file..."
274 | ./lint.sh "$HTML_SOURCE/source" || {
275 | echo
276 | echo "There were lint errors. Stopping."
277 | exit 1
278 | }
279 | }
280 |
281 | # Finds the location of the HTML Standard, and stores it in the HTML_SOURCE variable.
282 | # It either guesses based on directory structure, or interactively prompts the user.
283 | # - Arguments: none
284 | # - Output:
285 | # - Sets $HTML_SOURCE
286 | function findHTMLSource {
287 | $QUIET || echo "Looking for the HTML source (set HTML_SOURCE to override)..."
288 | if [[ $HTML_SOURCE == "" ]]; then
289 | local parent_dir
290 | parent_dir=$(dirname "$DIR")
291 |
292 | if [[ -f "$parent_dir/html/source" ]]; then
293 | HTML_SOURCE="$parent_dir/html"
294 | $QUIET || echo "Found $HTML_SOURCE (alongside html-build)..."
295 | else
296 | if [[ -f "$DIR/html/source" ]]; then
297 | HTML_SOURCE="$DIR/html"
298 | $QUIET || echo "Found $HTML_SOURCE (inside html-build)..."
299 | else
300 | $QUIET || echo "Didn't find the HTML source on your system..."
301 | chooseRepo
302 | fi
303 | fi
304 | else
305 | if [[ -f "$HTML_SOURCE/source" ]]; then
306 | $QUIET || echo "Found $HTML_SOURCE (from HTML_SOURCE)..."
307 | else
308 | $QUIET || echo "Looked in the $HTML_SOURCE directory but didn't find HTML source there..."
309 | HTML_SOURCE=""
310 | chooseRepo
311 | fi
312 | fi
313 |
314 | export HTML_SOURCE
315 | }
316 |
317 | # Interactively prompts the user for where their HTML source file is.
318 | # - Arguments: none
319 | # - Output:
320 | # - Sets $HTML_SOURCE
321 | function chooseRepo {
322 | echo
323 | echo "What HTML source would you like to build from?"
324 | echo
325 | echo "1) Use an existing clone on my local filesystem."
326 | echo "2) Create a clone from https://github.com/whatwg/html."
327 | echo "3) Create a clone from an existing fork, by GitHub username."
328 | echo "4) Create a clone from an existing fork, by custom URL."
329 | echo "5) Quit"
330 | echo
331 |
332 | local choice
333 | read -r -e -p "Choose 1-5: " choice
334 | if [[ $choice == "1" ]]; then
335 | read -r -e -p "Path to your existing clone: "
336 | HTML_SOURCE=$(echo "$REPLY" | xargs) # trims leading/trailing space
337 | if [[ $HTML_SOURCE = "" ]]; then
338 | chooseRepo
339 | fi
340 | confirmRepo
341 | elif [[ $choice == "2" ]]; then
342 | HTML_REPO="https://github.com/whatwg/html.git"
343 | confirmRepo
344 | elif [[ $choice == "3" ]]; then
345 | echo
346 |
347 | local gh_username
348 | read -r -e -p "GitHub username of fork owner: " gh_username
349 | gh_username=$(echo "$gh_username" | xargs) # trims leading/trailing space
350 | if [[ $gh_username == "" ]]; then
351 | chooseRepo
352 | fi
353 | echo
354 | echo "Does a fork already exist at https://github.com/$gh_username/html?"
355 | echo
356 | read -r -e -p "Y or N? " yn
357 | if [[ $yn == "y" || $yn == "Y" ]]; then
358 | HTML_REPO="https://github.com/$gh_username/html.git"
359 | confirmRepo
360 | else
361 | echo
362 | echo "Before proceeding, first go to https://github.com/whatwg/html and create a fork."
363 | exit
364 | fi
365 | elif [[ $choice == "4" ]]; then
366 | echo
367 | read -r -e -p "URL: "
368 | REPLY=$(echo "$REPLY" | xargs) # trims leading/trailing space
369 | if [[ $REPLY == "" ]]; then
370 | chooseRepo
371 | fi
372 | HTML_REPO=$REPLY
373 | confirmRepo
374 | elif [[ $choice == "5" || $choice == "q" || $choice == "Q" ]]; then
375 | echo
376 | echo "Can't build without a source repo to build from. Quitting..."
377 | exit
378 | else
379 | chooseRepo
380 | fi
381 | }
382 |
383 | # Confirms the currently-set HTML_SOURCE with the user, or clones HTML_REPO into HTML_SOURCE
384 | # - Arguments: none
385 | # - Output:
386 | # - $HTML_SOURCE will now point to a folder containing the HTML Standard
387 | function confirmRepo {
388 | if [[ $HTML_SOURCE != "" ]]; then
389 | if [[ -f "$HTML_SOURCE/source" ]]; then
390 | echo
391 | echo "OK, build from the $HTML_SOURCE/source file?"
392 | echo
393 |
394 | local build_yn
395 | read -r -e -p "Y or N? " yn
396 | if [[ $build_yn == "y" || $build_yn == "Y" ]]; then
397 | return
398 | else
399 | HTML_SOURCE=""
400 | chooseRepo
401 | fi
402 | else
403 | echo
404 | echo "$HTML_SOURCE/source file doesn't exist. Please choose another option."
405 | HTML_SOURCE=""
406 | chooseRepo
407 | fi
408 | return
409 | fi
410 | HTML_SOURCE=${HTML_SOURCE:-$DIR/html}
411 | echo
412 | echo "OK, clone from $HTML_REPO?"
413 | echo
414 |
415 | local clone_yn
416 | read -r -e -p "Y or N? " clone_yn
417 |
418 | local git_clone_args=( "$HTML_GIT_CLONE_OPTIONS" )
419 | $QUIET && git_clone_args+=( --quiet )
420 | $VERBOSE && git_clone_args+=( --verbose )
421 | git_clone_args+=( "$HTML_REPO" "$HTML_SOURCE" )
422 | if [[ $clone_yn == "y" || $clone_yn == "Y" ]]; then
423 | git clone "${git_clone_args[@]}"
424 | else
425 | HTML_SOURCE=""
426 | chooseRepo
427 | fi
428 | }
429 |
430 | # Gives the relative path to $2 from $1
431 | # From http://stackoverflow.com/a/12498485
432 | # - Arguments:
433 | # - $1: absolute path beginning with /
434 | # - $2: absolute path beginning with /
435 | # - Output:
436 | # - Echoes the relative path
437 | function relativePath {
438 | local source=$1
439 | local target=$2
440 |
441 | local commonPart=$source
442 | local result=""
443 |
444 | while [[ "${target#"$commonPart"}" == "${target}" ]]; do
445 | # no match, means that candidate common part is not correct
446 | # go up one level (reduce common part)
447 | commonPart=$(dirname "$commonPart")
448 | # and record that we went back, with correct / handling
449 | if [[ $result == "" ]]; then
450 | result=".."
451 | else
452 | result="../$result"
453 | fi
454 | done
455 |
456 | if [[ $commonPart == "/" ]]; then
457 | # special case for root (no common path)
458 | result="$result/"
459 | fi
460 |
461 | # since we now have identified the common part,
462 | # compute the non-common part
463 | local forwardPart="${target#"$commonPart"}"
464 |
465 | # and now stick all parts together
466 | if [[ $result != "" ]] && [[ $forwardPart != "" ]]; then
467 | result="$result$forwardPart"
468 | elif [[ $forwardPart != "" ]]; then
469 | # extra slash removal
470 | result="${forwardPart:1}"
471 | fi
472 |
473 | echo "$result"
474 | }
475 |
476 | # Performs the build using Docker, essentially running this script again inside the container.
477 | # Arguments: none
478 | # Output: A web server with the build output will be running inside the Docker container
479 | function doDockerBuild {
480 | # Ensure ghcr.io/whatwg/wattsi:latest is up to date. Without this, the locally cached copy would
481 | # be used, i.e. once Wattsi was downloaded once, it would never update. Note that this is fast
482 | # (zero-transfer) if the locally cached copy is already up to date.
483 | local docker_pull_args=()
484 | $QUIET && docker_pull_args+=( --quiet )
485 | docker_pull_args+=( ghcr.io/whatwg/wattsi:latest )
486 | docker pull "${docker_pull_args[@]}"
487 |
488 | local docker_build_args=( --tag whatwg-html )
489 | $QUIET && docker_build_args+=( --quiet )
490 | docker build "${docker_build_args[@]}" .
491 |
492 | local docker_run_args=()
493 | $SERVE && docker_run_args+=( --publish "$SERVE_PORT:$SERVE_PORT" )
494 | docker_run_args+=( whatwg-html )
495 | $QUIET && docker_run_args+=( --quiet )
496 | $VERBOSE && docker_run_args+=( --verbose )
497 | $DO_UPDATE || docker_run_args+=( --no-update )
498 | $DO_LINT || docker_run_args+=( --no-lint )
499 | $DO_HIGHLIGHT || docker_run_args+=( --no-highlight )
500 | $SINGLE_PAGE_ONLY && docker_run_args+=( --single-page )
501 | $SERVE && docker_run_args+=( --serve )
502 |
503 | # Pass in the html-build SHA (since there's no .git directory inside the container)
504 | docker run --rm --interactive --tty \
505 | --env "BUILD_SHA_OVERRIDE=$(git rev-parse HEAD)" \
506 | --mount "type=bind,source=$HTML_SOURCE,destination=/whatwg/html-build/html,readonly=1" \
507 | --mount "type=bind,source=$HTML_CACHE,destination=/whatwg/html-build/.cache" \
508 | --mount "type=bind,source=$HTML_OUTPUT,destination=/whatwg/html-build/output" \
509 | "${docker_run_args[@]}"
510 | }
511 |
512 | # Performs the build using the build server, zipping up the input, sending it to the server, and
513 | # unzipping the output.
514 | # Output: the $HTML_OUTPUT directory will contain the built files
515 | function doServerBuild {
516 | clearDir "$HTML_TEMP"
517 |
518 | local input_zip="build-server-input.zip"
519 | local build_server_output="build-server-output"
520 | local build_server_headers="build-server-headers.txt"
521 |
522 | # Keep include list in sync with `processSource`
523 | #
524 | # We use an allowlist (--include) instead of a blocklist (--exclude) to avoid accidentally
525 | # sending files that the user might not anticipate sending to a remote server, e.g. their
526 | # private-notes-on-current-pull-request.txt.
527 | #
528 | # The contents of fonts/, images/, and dev/ are not round-tripped to the server, but instead
529 | # copied below in this function. (We still send the directories to avoid the build script on the
530 | # server getting confused about their absence.) demos/ needs to be sent in full for inlining.
531 | local zip_args=(
532 | --recurse-paths "$HTML_TEMP/$input_zip" . \
533 | --include ./source ./404.html ./link-fixup.js ./html-dfn.js ./styles.css \
534 | ./fonts/ ./images/ ./dev/ ./demos/\*
535 | )
536 | $QUIET && zip_args+=( --quiet )
537 | (cd "$HTML_SOURCE" && zip "${zip_args[@]}")
538 |
539 | local query_params=()
540 | $QUIET && query_params+=( quiet )
541 | $VERBOSE && query_params+=( verbose )
542 | $DO_UPDATE || query_params+=( no-update )
543 | $DO_LINT || query_params+=( no-lint )
544 | $DO_HIGHLIGHT || query_params+=( no-highlight )
545 | $SINGLE_PAGE_ONLY && query_params+=( single-page )
546 |
547 | $QUIET || echo
548 | $QUIET || echo "Sending files to the build server..."
549 |
550 | local query_string
551 | query_string=$(joinBy "\&" "${query_params[@]-''}")
552 | local curl_url="https://build.whatwg.org/html-build?${query_string}"
553 | local curl_args=( "$curl_url" \
554 | --form "html=@$HTML_TEMP/$input_zip" \
555 | --form "sha=$HTML_SHA" \
556 | --dump-header "$HTML_TEMP/$build_server_headers" \
557 | --output "$HTML_TEMP/$build_server_output" )
558 | $QUIET && curl_args+=( --silent )
559 | $VERBOSE && curl_args+=( --verbose )
560 | curl "${curl_args[@]}"
561 |
562 | # Read exit code from the Exit-Code header and assume failure if not found
563 | local build_server_result=1
564 | local name value
565 | while IFS=":" read -r name value; do
566 | shopt -s nocasematch
567 | if [[ $name == "Exit-Code" ]]; then
568 | build_server_result=$(echo "$value" | tr -d ' \r\n')
569 | break
570 | fi
571 | shopt -u nocasematch
572 | done < "$HTML_TEMP/$build_server_headers"
573 |
574 | if [[ $build_server_result != "0" ]]; then
575 | cat "$HTML_TEMP/$build_server_output"
576 | exit "$build_server_result"
577 | else
578 | local unzip_args=()
579 | # Note: Don't use the -v flag; it doesn't work in combination with -d
580 | if [[ "$VERBOSE" == "false" ]]; then
581 | unzip_args+=( -qq )
582 | fi
583 | unzip_args+=( "$HTML_TEMP/$build_server_output" -d "$HTML_OUTPUT" )
584 | unzip "${unzip_args[@]}"
585 | cp -pR "$HTML_SOURCE/fonts" "$HTML_OUTPUT"
586 | cp -pR "$HTML_SOURCE/images" "$HTML_OUTPUT"
587 |
588 | if [[ "$SINGLE_PAGE_ONLY" == "false" ]]; then
589 | cp -pR "$HTML_SOURCE/dev" "$HTML_OUTPUT"
590 | fi
591 |
592 | $QUIET || echo
593 | $QUIET || echo "Build server output:"
594 | cat "$HTML_OUTPUT/output.txt"
595 | rm "$HTML_OUTPUT/output.txt"
596 | fi
597 | }
598 |
599 | # Clears the $HTML_CACHE directory if the build tools have been updated since last run.
600 | # Arguments: none
601 | # Output:
602 | # - $HTML_CACHE will be usable (possibly empty)
603 | function clearCacheIfNecessary {
604 | if [[ -d "$HTML_CACHE" ]]; then
605 | local prev_build_sha
606 | prev_build_sha=$( cat "$HTML_CACHE/last-build-sha.txt" 2>/dev/null || echo )
607 |
608 | local current_build_sha
609 | current_build_sha=${BUILD_SHA_OVERRIDE:-$(git rev-parse HEAD)}
610 |
611 | if [[ "$prev_build_sha" != "$current_build_sha" ]]; then
612 | $QUIET || echo "Build tools have been updated since last run; clearing the cache..."
613 | DO_UPDATE=true
614 | clearDir "$HTML_CACHE"
615 | echo "$current_build_sha" > "$HTML_CACHE/last-build-sha.txt"
616 | fi
617 | else
618 | mkdir -p "$HTML_CACHE"
619 | fi
620 | }
621 |
622 | # Updates the mdn-spec-links-html.json file, if either $DO_UPDATE is true
623 | # or it is not yet cached.
624 | # Arguments: none
625 | # Output:
626 | # - $HTML_CACHE will contain a usable mdn-spec-links-html.json file
627 | function updateRemoteDataFiles {
628 | if [[ $DO_UPDATE == "true" || ! -f "$HTML_CACHE/mdn-spec-links-html.json" ]]; then
629 | rm -f "$HTML_CACHE/mdn-spec-links-html.json"
630 | $QUIET || echo "Downloading mdn-spec-links/html.json..."
631 |
632 | local curl_args=( "https://raw.githubusercontent.com/w3c/mdn-spec-links/master/html.json" \
633 | --output "$HTML_CACHE/mdn-spec-links-html.json" \
634 | --retry 2 )
635 | if ! $VERBOSE; then
636 | curl_args+=( --silent )
637 | fi
638 | curl "${curl_args[@]}"
639 | fi
640 | }
641 |
642 | # Performs a build of the HTML source file into the resulting output
643 | # - Arguments:
644 | # - $1: the filename of the source file within HTML_SOURCE (e.g. "source")
645 | # - $2: the build type, either "default" or "review"
646 | # - Output:
647 | # - $HTML_OUTPUT will contain the built files
648 | function processSource {
649 | local source_location="$1"
650 | local build_type="$2"
651 |
652 | clearDir "$HTML_TEMP"
653 |
654 | $QUIET || echo "Pre-processing the source..."
655 | cp -p entities/out/entities.inc "$HTML_CACHE"
656 | cp -p entities/out/entities-dtd.url "$HTML_CACHE"
657 | if hash html-build 2>/dev/null; then
658 | html-build <"$HTML_SOURCE/$source_location" >"$HTML_TEMP/source-whatwg-complete"
659 | else
660 | local cargo_args=( --release )
661 | $VERBOSE && cargo_args+=( --verbose )
662 | $QUIET && cargo_args+=( --quiet )
663 | cargo run "${cargo_args[@]}" <"$HTML_SOURCE/$source_location" >"$HTML_TEMP/source-whatwg-complete"
664 | fi
665 |
666 | runWattsi "$HTML_TEMP/source-whatwg-complete" "$HTML_TEMP/wattsi-output"
667 | if [[ $WATTSI_RESULT == "0" ]]; then
668 | if [[ $LOCAL_WATTSI != "true" ]]; then
669 | "$QUIET" || grep -v '^$' "$HTML_TEMP/wattsi-output.txt" # trim blank lines
670 | fi
671 | else
672 | if [[ $LOCAL_WATTSI != "true" ]]; then
673 | "$QUIET" || grep -v '^$' "$HTML_TEMP/wattsi-output.txt" # trim blank lines
674 | fi
675 | if [[ $WATTSI_RESULT == "65" ]]; then
676 | echo
677 | echo "There were errors. Running again to show the original line numbers."
678 | echo
679 | runWattsi "$HTML_SOURCE/$source_location" "$HTML_TEMP/wattsi-raw-source-output"
680 | if [[ $LOCAL_WATTSI != "true" ]]; then
681 | grep -v '^$' "$HTML_TEMP/wattsi-output.txt" # trim blank lines
682 | fi
683 | fi
684 | echo
685 | echo "There were errors. Stopping."
686 | exit "$WATTSI_RESULT"
687 | fi
688 |
689 | # Keep the list of files copied from $HTML_SOURCE in sync with `doServerBuild`
690 |
691 | if [[ $build_type == "default" ]]; then
692 | # Singlepage HTML
693 | mv "$HTML_TEMP/wattsi-output/index-html" "$HTML_OUTPUT/index.html"
694 |
695 | if [[ $SINGLE_PAGE_ONLY == "false" ]]; then
696 | # Singlepage Commit Snapshot
697 | local commit_dir="$HTML_OUTPUT/commit-snapshots/$HTML_SHA"
698 | mkdir -p "$commit_dir"
699 | mv "$HTML_TEMP/wattsi-output/index-snap" "$commit_dir/index.html"
700 |
701 | # Multipage HTML and Dev Edition
702 | mv "$HTML_TEMP/wattsi-output/multipage-html" "$HTML_OUTPUT/multipage"
703 | mv "$HTML_TEMP/wattsi-output/multipage-dev" "$HTML_OUTPUT/dev"
704 |
705 | cp -pR "$HTML_SOURCE/dev" "$HTML_OUTPUT"
706 | fi
707 |
708 | cp -p entities/out/entities.json "$HTML_OUTPUT"
709 | cp -p "$HTML_TEMP/wattsi-output/xrefs.json" "$HTML_OUTPUT"
710 |
711 | clearDir "$HTML_TEMP"
712 |
713 | echo "User-agent: *
714 | Disallow: /commit-snapshots/
715 | Disallow: /review-drafts/" > "$HTML_OUTPUT/robots.txt"
716 | cp -p "$HTML_SOURCE/404.html" "$HTML_OUTPUT"
717 | cp -p "$HTML_SOURCE/link-fixup.js" "$HTML_OUTPUT"
718 | cp -p "$HTML_SOURCE/html-dfn.js" "$HTML_OUTPUT"
719 | cp -p "$HTML_SOURCE/styles.css" "$HTML_OUTPUT"
720 | cp -pR "$HTML_SOURCE/fonts" "$HTML_OUTPUT"
721 | cp -pR "$HTML_SOURCE/images" "$HTML_OUTPUT"
722 | cp -pR "$HTML_SOURCE/demos" "$HTML_OUTPUT"
723 | else
724 | # Singlepage Review Draft
725 | local year_month
726 | year_month=$(basename "$source_location" .wattsi)
727 |
728 | local new_dir="$HTML_OUTPUT/review-drafts/$year_month"
729 | mkdir -p "$new_dir"
730 | mv "$HTML_TEMP/wattsi-output/index-review" "$new_dir/index.html"
731 | fi
732 | }
733 |
734 | # Checks if Wattsi is available and up to date
735 | # - Arguments: none
736 | # - Output:
737 | # - Sets $LOCAL_WATTSI to true or false
738 | # - Echoes a warning if Wattsi is out of date according to $WATTSI_LATEST
739 | function checkWattsi {
740 | if hash wattsi 2>/dev/null; then
741 | if [[ "$(wattsi --version | cut -d' ' -f2)" -lt "$WATTSI_LATEST" ]]; then
742 | echo
743 | echo "Warning: Your wattsi version is out of date. You should to rebuild an"
744 | echo "up-to-date wattsi binary from the wattsi sources."
745 | echo
746 | fi
747 | LOCAL_WATTSI=true
748 | else
749 | LOCAL_WATTSI=false
750 | fi
751 | }
752 |
753 | # Runs Wattsi on the given file, either locally or using the web service
754 | # - Arguments:
755 | # - $1: the file to run Wattsi on
756 | # - $2: the directory for Wattsi to write output to
757 | # - $3: the URL for the syntax-highlighter server
758 | # - Output:
759 | # - Sets $WATTSI_RESULT to the exit code
760 | # - $HTML_TEMP/wattsi-output directory will contain the output from Wattsi on success
761 | # - $HTML_TEMP/wattsi-output.txt will contain the output from Wattsi, on both success and failure
762 | function runWattsi {
763 | local source_file="$1"
764 | local output_dir="$2"
765 |
766 | clearDir "$output_dir"
767 |
768 | if [[ "$LOCAL_WATTSI" == "true" ]]; then
769 | local wattsi_args=()
770 | $QUIET && wattsi_args+=( --quiet )
771 | $SINGLE_PAGE_ONLY && wattsi_args+=( --single-page-only )
772 | wattsi_args+=( "$source_file" "$HTML_SHA" "$output_dir" "$build_type" "$HTML_CACHE/mdn-spec-links-html.json" )
773 | if [[ "$DO_HIGHLIGHT" == "true" ]]; then
774 | wattsi_args+=( "$HIGHLIGHT_SERVER_URL" )
775 | fi
776 |
777 | WATTSI_RESULT="0"
778 | wattsi "${wattsi_args[@]}" || WATTSI_RESULT=$?
779 | else
780 | $QUIET || echo
781 | $QUIET || echo "Local wattsi not present; trying the build server..."
782 |
783 |
784 | local query_params=()
785 | $QUIET && query_params+=( quiet )
786 | $SINGLE_PAGE_ONLY && query_params+=( single-page-only )
787 |
788 | local query_string
789 | query_string=$(joinBy "\&" "${query_params[@]-''}")
790 | local curl_url="https://build.whatwg.org/wattsi?${query_string}"
791 |
792 | local curl_args=( "$curl_url" \
793 | --form "source=@$source_file" \
794 | --form "sha=$HTML_SHA" \
795 | --form "build=$build_type" \
796 | --form "mdn=@$HTML_CACHE/mdn-spec-links-html.json" \
797 | --dump-header "$HTML_TEMP/wattsi-headers.txt" \
798 | --output "$HTML_TEMP/wattsi-output.zip" )
799 | $QUIET && curl_args+=( --silent )
800 | $VERBOSE && curl_args+=( --verbose )
801 | curl "${curl_args[@]}"
802 |
803 | # read exit code from the Exit-Code header and assume failure if not found
804 | WATTSI_RESULT="1"
805 | local name value
806 | while IFS=":" read -r name value; do
807 | shopt -s nocasematch
808 | if [[ $name == "Exit-Code" ]]; then
809 | WATTSI_RESULT=$(echo "$value" | tr -d ' \r\n')
810 | break
811 | fi
812 | shopt -u nocasematch
813 | done < "$HTML_TEMP/wattsi-headers.txt"
814 |
815 | if [[ $WATTSI_RESULT != "0" ]]; then
816 | mv "$HTML_TEMP/wattsi-output.zip" "$HTML_TEMP/wattsi-output.txt"
817 | else
818 | local unzip_args=()
819 | # Note: Don't use the -v flag; it doesn't work in combination with -d
820 | if ! $VERBOSE; then
821 | unzip_args+=( -qq )
822 | fi
823 | unzip_args+=( "$HTML_TEMP/wattsi-output.zip" -d "$output_dir" )
824 | unzip "${unzip_args[@]}"
825 | mv "$output_dir/output.txt" "$HTML_TEMP/wattsi-output.txt"
826 | fi
827 | fi
828 | }
829 |
830 | # Starts the syntax-highlighting Python server, when appropriate
831 | # Arguments: none
832 | # Output: if the server is necessary, then
833 | # - A server will be running in the background, at $HIGHLIGHT_SERVER_URL
834 | # - $HIGHLIGHT_SERVER_PID will be set for later use by stopHighlightServer
835 | function startHighlightServer {
836 | if [[ "$LOCAL_WATTSI" == "true" && "$DO_HIGHLIGHT" == "true" ]]; then
837 | local highlight_server_args=()
838 | $QUIET && highlight_server_args+=( --quiet )
839 | bs-highlighter-server ${highlight_server_args[@]+"${highlight_server_args[@]}"} &
840 | HIGHLIGHT_SERVER_PID=$!
841 |
842 | trap stopHighlightServer EXIT
843 | fi
844 | }
845 |
846 | # Stops the syntax-highlighting Python server
847 | # Arguments: none
848 | # Output: the server will be stopped, if it is running. Failures to stop will be suppressed.
849 | function stopHighlightServer {
850 | if [[ $HIGHLIGHT_SERVER_PID != "" ]]; then
851 | kill "$HIGHLIGHT_SERVER_PID" 2>/dev/null || true
852 |
853 | # This suppresses a 'Terminated: 15 "$DIR/highlighter/server.py"' message
854 | wait "$HIGHLIGHT_SERVER_PID" 2>/dev/null || true
855 | fi
856 | }
857 |
858 | # Ensures the given directory exists, but is empty
859 | # Arguments:
860 | # - $1: the directory to clear
861 | # Output: the directory will be empty (but guaranteed to exist)
862 | function clearDir {
863 | # We use this implementation strategy, instead of `rm -rf`ing the directory, because deleting the
864 | # directory itself can run into permissions issues, e.g. if the directory is open in another
865 | # program, or in the Docker case where we have permission to write to the directory but not delete
866 | # it.
867 | mkdir -p "$1"
868 | find "$1" -mindepth 1 -delete
869 | }
870 |
871 | # Joins parameters $2 onward with the separator given in $1
872 | # Arguments:
873 | # - $1: the separator string
874 | # - $2...: the strings to join
875 | # Output: echoes the joined string
876 | function joinBy {
877 | local d=${1-} f=${2-}
878 | if shift 2; then
879 | printf %s "$f" "${@/#/$d}"
880 | fi
881 | }
882 |
883 | main "$@"
884 |
--------------------------------------------------------------------------------
/ci-build/Dockerfile:
--------------------------------------------------------------------------------
1 | # This Dockerfile is just used to run on Travis CI in an environment that can easily and repeatedly
2 | # install our build dependencies.
3 | FROM rust:1.73-slim as builder
4 | WORKDIR /whatwg/html-build
5 | COPY Cargo.lock Cargo.toml ./
6 | COPY src ./src/
7 | RUN cargo install --path .
8 |
9 | FROM debian:stable
10 |
11 | RUN apt-get update && \
12 | apt-get install --yes --no-install-recommends \
13 | ca-certificates curl rsync git \
14 | default-jre \
15 | python3 python3-pip pipx \
16 | libbrotli1 libexpat1 libfontconfig1 libfreetype6 libpng16-16 \
17 | fonts-dejavu fonts-droid-fallback fonts-liberation fonts-symbola fonts-unfonts-core
18 |
19 | # Dependency lines above are:
20 | # - General
21 | # - validator
22 | # - Highlighter
23 | # - Prince
24 | # - fonts, for when Prince renders to PDF
25 |
26 | COPY --from=builder /usr/local/cargo/bin/html-build /bin/html-build
27 |
28 | COPY --from=ghcr.io/whatwg/wattsi:latest /whatwg/wattsi/bin/wattsi /bin/wattsi
29 |
30 | ENV PIPX_HOME /opt/pipx
31 | ENV PIPX_BIN_DIR /usr/bin
32 | RUN pipx install bs-highlighter
33 |
34 | # The DockerHub container for the validator only contains the server version, so we get the .jar
35 | # from GitHub:
36 | ADD https://github.com/validator/validator/releases/download/latest/vnu.jar /whatwg/
37 |
38 | # Trying to copy Prince from its DockerHub container like the others does not work; it has too many
39 | # shared library dependencies. Probably this is a job for Docker Compose... we should learn how that
40 | # works one day.
41 | # Prince also hasn't been updated for Debian 12 and is no longer installable from its deb file.
42 | ADD https://www.princexml.com/download/prince-15.1-linux-generic-x86_64.tar.gz /whatwg/prince.tar.gz
43 | RUN cd /whatwg && \
44 | tar xvzf prince.tar.gz && \
45 | ( cd prince-* && echo /usr | ./install.sh ) && \
46 | echo '@font-face { font-family: serif; src: local("Symbola") }' >> /usr/lib/prince/style/fonts.css && \
47 | rm -rf prince* && \
48 | prince --version
49 |
50 | ADD . /whatwg/html-build
51 |
52 | ENTRYPOINT ["bash", "/whatwg/html-build/ci-build/inside-container.sh"]
53 |
--------------------------------------------------------------------------------
/ci-build/README.md:
--------------------------------------------------------------------------------
1 | # HTML Standard CI Build
2 |
3 | This directory contains the infrastructure for building and running a Docker container, [whatwg/html-build](https://hub.docker.com/r/whatwg/html-build), which performs a "full" build of the HTML Standard, producing artifacts ready for deployment.
4 |
5 | The relevant entrypoints are:
6 |
7 | - `docker-build.sh` will build the Docker container
8 | - `docker-run.sh $INPUT $OUTPUT` will run the Docker container to do such a full build.
9 | - `$INPUT` should contain a checkout of the [whatwg/html](https://github.com/whatwg/html) repository
10 | - `$OUTPUT` should be an empty directory
11 |
--------------------------------------------------------------------------------
/ci-build/docker-build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -o errexit
3 | set -o nounset
4 | set -o pipefail
5 | shopt -s extglob
6 |
7 | TMP_DIR=$(mktemp -d)
8 |
9 | function main {
10 | local here
11 | here=$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )
12 |
13 | # We want the image to contain:
14 | # * All of the important stuff from the top-level (html-build) directory
15 | # * But, the Dockerfile from this (ci-build) directory
16 | # And in particular it should *not* contain the top-level Dockerfile, dotfiles, .git/, and
17 | # any html/ and output/ directories that might be hanging around from local testing.
18 | cp "$here/Dockerfile" "$TMP_DIR"
19 | cd "$here/.."
20 | cp -r !(.*|html|output|Dockerfile) "$TMP_DIR"
21 | cd "$TMP_DIR"
22 | trap cleanTemp EXIT
23 |
24 | local ghcr_repo="ghcr.io/whatwg/html-build"
25 |
26 | # Build the Docker image, using GHCR as a cache. (This will be fast if nothing has changed
27 | # in html-build or its dependencies).
28 | docker pull ghcr.io/whatwg/wattsi
29 | docker pull "$ghcr_repo" || true
30 | docker build --cache-from "$ghcr_repo" --tag "$ghcr_repo" .
31 | }
32 |
33 | function cleanTemp {
34 | rm -rf "$TMP_DIR"
35 | }
36 |
37 | main "$@"
38 |
--------------------------------------------------------------------------------
/ci-build/docker-run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -o errexit
3 | set -o nounset
4 | set -o pipefail
5 | shopt -s extglob
6 |
7 | HTML_SOURCE=$(realpath "$1")
8 | HTML_OUTPUT=$(realpath "$2")
9 |
10 | docker run --rm --mount "type=bind,source=$HTML_SOURCE,destination=/whatwg/html,readonly=1" \
11 | --env "HTML_SOURCE=/whatwg/html" \
12 | --mount "type=bind,source=$HTML_OUTPUT,destination=/whatwg/output" \
13 | --env "HTML_OUTPUT=/whatwg/output" \
14 | ghcr.io/whatwg/html-build
15 |
--------------------------------------------------------------------------------
/ci-build/inside-container.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -o errexit
3 | set -o nounset
4 | set -o pipefail
5 | cd "$(dirname "$0")/../.."
6 |
7 | PDF_SERVE_PORT=8080
8 |
9 | SKIP_BUILD_UPDATE_CHECK=true ./html-build/build.sh
10 |
11 | echo ""
12 | echo "Running conformance checker..."
13 | # the -Xmx1g argument sets the size of the Java heap space to 1 gigabyte
14 | java -Xmx1g -jar ./vnu.jar --skip-non-html "$HTML_OUTPUT"
15 | echo ""
16 |
17 | # The build output contains some relative links, which will end up pointing to
18 | # "https://0.0.0.0:$PDF_SERVE_PORT/" in the built PDF. That's undesirable; see
19 | # https://github.com/whatwg/html/issues/9097. Our hack is to replace such
20 | # relative links like so. Note: we can't just insert a
AElig
AMP
Aacute
Acirc
Agrave
Aring
Atilde
Auml
COPY
Ccedil
ETH
Eacute
Ecirc
Egrave
Euml
GT
Iacute
Icirc
Igrave
Iuml
LT
Ntilde
Oacute
Ocirc
Ograve
Oslash
Otilde
Ouml
QUOT
REG
THORN
Uacute
Ucirc
Ugrave
Uuml
Yacute
aacute
acirc
acute
aelig
agrave
amp
aring
atilde
auml
brvbar
ccedil
cedil
cent
copy
curren
deg
divide
eacute
ecirc
egrave
eth
euml
frac12
frac14
frac34
gt
iacute
icirc
iexcl
igrave
iquest
iuml
laquo
lt
macr
micro
middot
nbsp
not
ntilde
oacute
ocirc
ograve
ordf
ordm
oslash
otilde
ouml
para
plusmn
pound
quot
raquo
reg
sect
shy
sup1
sup2
sup3
szlig
thorn
times
uacute
ucirc
ugrave
uml
uuml
yacute
yen
yuml
' + name + ';
' + name + ';
(.+?);
(.+?);