├── .github └── workflows │ ├── semgrep.yml │ └── test.yml ├── .gitignore ├── .prettierrc ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── README.md ├── ava.config.js ├── build.sh ├── html_rewriter.js.patch ├── package-lock.json ├── package.json ├── src ├── asyncify.js ├── comment.rs ├── doctype.rs ├── document_end.rs ├── element.rs ├── end_tag.rs ├── handlers.rs ├── html_rewriter.d.ts ├── html_rewriter.rs ├── lib.rs └── text_chunk.rs ├── test ├── comments.spec.ts ├── doctype.spec.ts ├── document_end.spec.ts ├── element.spec.ts ├── index.ts ├── misc.spec.ts ├── selectors.spec.ts └── text_chunk.spec.ts └── tsconfig.json /.github/workflows/semgrep.yml: -------------------------------------------------------------------------------- 1 | 2 | on: 3 | pull_request: {} 4 | workflow_dispatch: {} 5 | push: 6 | branches: 7 | - main 8 | - master 9 | schedule: 10 | - cron: '0 0 * * *' 11 | name: Semgrep config 12 | jobs: 13 | semgrep: 14 | name: semgrep/ci 15 | runs-on: ubuntu-20.04 16 | env: 17 | SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN }} 18 | SEMGREP_URL: https://cloudflare.semgrep.dev 19 | SEMGREP_APP_URL: https://cloudflare.semgrep.dev 20 | SEMGREP_VERSION_CHECK_URL: https://cloudflare.semgrep.dev/api/check-version 21 | container: 22 | image: returntocorp/semgrep 23 | steps: 24 | - uses: actions/checkout@v3 25 | - run: semgrep ci 26 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Use Rust stable 16 | uses: actions-rs/toolchain@v1 17 | with: 18 | profile: minimal 19 | toolchain: stable 20 | - uses: Swatinem/rust-cache@v1 21 | with: 22 | cache-on-failure: true 23 | - name: Use Node.js LTS 24 | uses: actions/setup-node@v2 25 | with: 26 | node-version: lts/* 27 | - name: Install wasm-pack 28 | run: cargo install --git https://github.com/mrbbot/wasm-pack 29 | - run: npm ci 30 | - run: npm run build 31 | - run: npm test 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | dist 4 | node_modules 5 | target -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "proseWrap": "always" 3 | } -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 🚧 Changelog 2 | 3 | ## 0.4.1 4 | 5 | - Throw `TypeError`s instead of `string`s on parser errors 6 | 7 | ## 0.4.0 8 | 9 | - Add support for `Element#onEndTag` 10 | - Add support for the 11 | [`html_rewriter_treats_esi_include_as_void_tag`](https://developers.cloudflare.com/workers/platform/compatibility-dates#htmlrewriter-handling-of-esiinclude) 12 | compatibility flag 13 | - Throw a `TypeError` instead of a `string` when a content token is used outside 14 | the relevant content handler 15 | 16 | ## 0.3.3 17 | 18 | - Make `Promise` detection for async handlers stricter 19 | 20 | ## 0.3.2 21 | 22 | - Fix `Promise` detection for async handlers. `Promise`s in different realms 23 | will now be detected. Closes 24 | [issue #1](https://github.com/mrbbot/html-rewriter-wasm/issues/1). 25 | 26 | ## 0.3.1 27 | 28 | - Change the return type of `Element#attributes` to `IterableIterator` 29 | - Bind handlers' `this` in Rust 30 | 31 | ## 0.3.0 32 | 33 | Initial Release 34 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "ahash" 7 | version = "0.7.6" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" 10 | dependencies = [ 11 | "getrandom 0.2.4", 12 | "once_cell", 13 | "version_check", 14 | ] 15 | 16 | [[package]] 17 | name = "bitflags" 18 | version = "1.2.1" 19 | source = "registry+https://github.com/rust-lang/crates.io-index" 20 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" 21 | 22 | [[package]] 23 | name = "bumpalo" 24 | version = "3.7.0" 25 | source = "registry+https://github.com/rust-lang/crates.io-index" 26 | checksum = "9c59e7af012c713f529e7a3ee57ce9b31ddd858d4b512923602f74608b009631" 27 | 28 | [[package]] 29 | name = "byteorder" 30 | version = "1.3.2" 31 | source = "registry+https://github.com/rust-lang/crates.io-index" 32 | checksum = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" 33 | 34 | [[package]] 35 | name = "cfg-if" 36 | version = "0.1.10" 37 | source = "registry+https://github.com/rust-lang/crates.io-index" 38 | checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" 39 | 40 | [[package]] 41 | name = "cfg-if" 42 | version = "1.0.0" 43 | source = "registry+https://github.com/rust-lang/crates.io-index" 44 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 45 | 46 | [[package]] 47 | name = "convert_case" 48 | version = "0.4.0" 49 | source = "registry+https://github.com/rust-lang/crates.io-index" 50 | checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" 51 | 52 | [[package]] 53 | name = "cssparser" 54 | version = "0.27.2" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" 57 | dependencies = [ 58 | "cssparser-macros", 59 | "dtoa-short", 60 | "itoa", 61 | "matches", 62 | "phf", 63 | "proc-macro2", 64 | "quote", 65 | "smallvec", 66 | "syn", 67 | ] 68 | 69 | [[package]] 70 | name = "cssparser-macros" 71 | version = "0.6.0" 72 | source = "registry+https://github.com/rust-lang/crates.io-index" 73 | checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e" 74 | dependencies = [ 75 | "quote", 76 | "syn", 77 | ] 78 | 79 | [[package]] 80 | name = "derive_more" 81 | version = "0.99.17" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" 84 | dependencies = [ 85 | "convert_case", 86 | "proc-macro2", 87 | "quote", 88 | "rustc_version", 89 | "syn", 90 | ] 91 | 92 | [[package]] 93 | name = "dtoa" 94 | version = "0.4.4" 95 | source = "registry+https://github.com/rust-lang/crates.io-index" 96 | checksum = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e" 97 | 98 | [[package]] 99 | name = "dtoa-short" 100 | version = "0.3.2" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | checksum = "59020b8513b76630c49d918c33db9f4c91638e7d3404a28084083b87e33f76f2" 103 | dependencies = [ 104 | "dtoa", 105 | ] 106 | 107 | [[package]] 108 | name = "encoding_rs" 109 | version = "0.8.22" 110 | source = "registry+https://github.com/rust-lang/crates.io-index" 111 | checksum = "cd8d03faa7fe0c1431609dfad7bbe827af30f82e1e2ae6f7ee4fca6bd764bc28" 112 | dependencies = [ 113 | "cfg-if 0.1.10", 114 | ] 115 | 116 | [[package]] 117 | name = "fnv" 118 | version = "1.0.6" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" 121 | 122 | [[package]] 123 | name = "fxhash" 124 | version = "0.2.1" 125 | source = "registry+https://github.com/rust-lang/crates.io-index" 126 | checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" 127 | dependencies = [ 128 | "byteorder", 129 | ] 130 | 131 | [[package]] 132 | name = "getrandom" 133 | version = "0.1.16" 134 | source = "registry+https://github.com/rust-lang/crates.io-index" 135 | checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" 136 | dependencies = [ 137 | "cfg-if 1.0.0", 138 | "libc", 139 | "wasi 0.9.0+wasi-snapshot-preview1", 140 | ] 141 | 142 | [[package]] 143 | name = "getrandom" 144 | version = "0.2.4" 145 | source = "registry+https://github.com/rust-lang/crates.io-index" 146 | checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c" 147 | dependencies = [ 148 | "cfg-if 1.0.0", 149 | "libc", 150 | "wasi 0.10.2+wasi-snapshot-preview1", 151 | ] 152 | 153 | [[package]] 154 | name = "hashbrown" 155 | version = "0.11.2" 156 | source = "registry+https://github.com/rust-lang/crates.io-index" 157 | checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" 158 | dependencies = [ 159 | "ahash", 160 | ] 161 | 162 | [[package]] 163 | name = "html-rewriter" 164 | version = "0.3.0" 165 | dependencies = [ 166 | "js-sys", 167 | "lol_html", 168 | "serde", 169 | "serde-wasm-bindgen", 170 | "thiserror", 171 | "wasm-bindgen", 172 | ] 173 | 174 | [[package]] 175 | name = "itoa" 176 | version = "0.4.4" 177 | source = "registry+https://github.com/rust-lang/crates.io-index" 178 | checksum = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f" 179 | 180 | [[package]] 181 | name = "js-sys" 182 | version = "0.3.33" 183 | source = "registry+https://github.com/rust-lang/crates.io-index" 184 | checksum = "367647c532db6f1555d7151e619540ec5f713328235b8c062c6b4f63e84adfe3" 185 | dependencies = [ 186 | "wasm-bindgen", 187 | ] 188 | 189 | [[package]] 190 | name = "lazy_static" 191 | version = "1.4.0" 192 | source = "registry+https://github.com/rust-lang/crates.io-index" 193 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 194 | 195 | [[package]] 196 | name = "lazycell" 197 | version = "1.3.0" 198 | source = "registry+https://github.com/rust-lang/crates.io-index" 199 | checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" 200 | 201 | [[package]] 202 | name = "libc" 203 | version = "0.2.66" 204 | source = "registry+https://github.com/rust-lang/crates.io-index" 205 | checksum = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558" 206 | 207 | [[package]] 208 | name = "log" 209 | version = "0.4.8" 210 | source = "registry+https://github.com/rust-lang/crates.io-index" 211 | checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" 212 | dependencies = [ 213 | "cfg-if 0.1.10", 214 | ] 215 | 216 | [[package]] 217 | name = "lol_html" 218 | version = "0.3.0" 219 | source = "git+https://github.com/cloudflare/lol-html?rev=f32bd14#f32bd14b229ed1088c25725cce242817ea2fe43a" 220 | dependencies = [ 221 | "bitflags", 222 | "cfg-if 1.0.0", 223 | "cssparser", 224 | "encoding_rs", 225 | "hashbrown", 226 | "lazy_static", 227 | "lazycell", 228 | "memchr", 229 | "safemem", 230 | "selectors", 231 | "thiserror", 232 | ] 233 | 234 | [[package]] 235 | name = "matches" 236 | version = "0.1.8" 237 | source = "registry+https://github.com/rust-lang/crates.io-index" 238 | checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" 239 | 240 | [[package]] 241 | name = "memchr" 242 | version = "2.2.1" 243 | source = "registry+https://github.com/rust-lang/crates.io-index" 244 | checksum = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" 245 | 246 | [[package]] 247 | name = "nodrop" 248 | version = "0.1.14" 249 | source = "registry+https://github.com/rust-lang/crates.io-index" 250 | checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" 251 | 252 | [[package]] 253 | name = "once_cell" 254 | version = "1.9.0" 255 | source = "registry+https://github.com/rust-lang/crates.io-index" 256 | checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" 257 | 258 | [[package]] 259 | name = "phf" 260 | version = "0.8.0" 261 | source = "registry+https://github.com/rust-lang/crates.io-index" 262 | checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" 263 | dependencies = [ 264 | "phf_macros", 265 | "phf_shared", 266 | "proc-macro-hack", 267 | ] 268 | 269 | [[package]] 270 | name = "phf_codegen" 271 | version = "0.8.0" 272 | source = "registry+https://github.com/rust-lang/crates.io-index" 273 | checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" 274 | dependencies = [ 275 | "phf_generator", 276 | "phf_shared", 277 | ] 278 | 279 | [[package]] 280 | name = "phf_generator" 281 | version = "0.8.0" 282 | source = "registry+https://github.com/rust-lang/crates.io-index" 283 | checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" 284 | dependencies = [ 285 | "phf_shared", 286 | "rand", 287 | ] 288 | 289 | [[package]] 290 | name = "phf_macros" 291 | version = "0.8.0" 292 | source = "registry+https://github.com/rust-lang/crates.io-index" 293 | checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" 294 | dependencies = [ 295 | "phf_generator", 296 | "phf_shared", 297 | "proc-macro-hack", 298 | "proc-macro2", 299 | "quote", 300 | "syn", 301 | ] 302 | 303 | [[package]] 304 | name = "phf_shared" 305 | version = "0.8.0" 306 | source = "registry+https://github.com/rust-lang/crates.io-index" 307 | checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" 308 | dependencies = [ 309 | "siphasher", 310 | ] 311 | 312 | [[package]] 313 | name = "ppv-lite86" 314 | version = "0.2.16" 315 | source = "registry+https://github.com/rust-lang/crates.io-index" 316 | checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" 317 | 318 | [[package]] 319 | name = "precomputed-hash" 320 | version = "0.1.1" 321 | source = "registry+https://github.com/rust-lang/crates.io-index" 322 | checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" 323 | 324 | [[package]] 325 | name = "proc-macro-hack" 326 | version = "0.5.19" 327 | source = "registry+https://github.com/rust-lang/crates.io-index" 328 | checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" 329 | 330 | [[package]] 331 | name = "proc-macro2" 332 | version = "1.0.27" 333 | source = "registry+https://github.com/rust-lang/crates.io-index" 334 | checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038" 335 | dependencies = [ 336 | "unicode-xid", 337 | ] 338 | 339 | [[package]] 340 | name = "quote" 341 | version = "1.0.2" 342 | source = "registry+https://github.com/rust-lang/crates.io-index" 343 | checksum = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" 344 | dependencies = [ 345 | "proc-macro2", 346 | ] 347 | 348 | [[package]] 349 | name = "rand" 350 | version = "0.7.3" 351 | source = "registry+https://github.com/rust-lang/crates.io-index" 352 | checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" 353 | dependencies = [ 354 | "getrandom 0.1.16", 355 | "libc", 356 | "rand_chacha", 357 | "rand_core", 358 | "rand_hc", 359 | "rand_pcg", 360 | ] 361 | 362 | [[package]] 363 | name = "rand_chacha" 364 | version = "0.2.2" 365 | source = "registry+https://github.com/rust-lang/crates.io-index" 366 | checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" 367 | dependencies = [ 368 | "ppv-lite86", 369 | "rand_core", 370 | ] 371 | 372 | [[package]] 373 | name = "rand_core" 374 | version = "0.5.1" 375 | source = "registry+https://github.com/rust-lang/crates.io-index" 376 | checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" 377 | dependencies = [ 378 | "getrandom 0.1.16", 379 | ] 380 | 381 | [[package]] 382 | name = "rand_hc" 383 | version = "0.2.0" 384 | source = "registry+https://github.com/rust-lang/crates.io-index" 385 | checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" 386 | dependencies = [ 387 | "rand_core", 388 | ] 389 | 390 | [[package]] 391 | name = "rand_pcg" 392 | version = "0.2.1" 393 | source = "registry+https://github.com/rust-lang/crates.io-index" 394 | checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" 395 | dependencies = [ 396 | "rand_core", 397 | ] 398 | 399 | [[package]] 400 | name = "rustc_version" 401 | version = "0.4.0" 402 | source = "registry+https://github.com/rust-lang/crates.io-index" 403 | checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" 404 | dependencies = [ 405 | "semver", 406 | ] 407 | 408 | [[package]] 409 | name = "safemem" 410 | version = "0.3.3" 411 | source = "registry+https://github.com/rust-lang/crates.io-index" 412 | checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" 413 | 414 | [[package]] 415 | name = "selectors" 416 | version = "0.22.0" 417 | source = "registry+https://github.com/rust-lang/crates.io-index" 418 | checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" 419 | dependencies = [ 420 | "bitflags", 421 | "cssparser", 422 | "derive_more", 423 | "fxhash", 424 | "log", 425 | "matches", 426 | "phf", 427 | "phf_codegen", 428 | "precomputed-hash", 429 | "servo_arc", 430 | "smallvec", 431 | "thin-slice", 432 | ] 433 | 434 | [[package]] 435 | name = "semver" 436 | version = "1.0.4" 437 | source = "registry+https://github.com/rust-lang/crates.io-index" 438 | checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" 439 | 440 | [[package]] 441 | name = "serde" 442 | version = "1.0.104" 443 | source = "registry+https://github.com/rust-lang/crates.io-index" 444 | checksum = "414115f25f818d7dfccec8ee535d76949ae78584fc4f79a6f45a904bf8ab4449" 445 | dependencies = [ 446 | "serde_derive", 447 | ] 448 | 449 | [[package]] 450 | name = "serde-wasm-bindgen" 451 | version = "0.1.3" 452 | source = "registry+https://github.com/rust-lang/crates.io-index" 453 | checksum = "7ee6f12f7ed0e7ad2e55200da37dbabc2cadeb942355c5b629aa3771f5ac5636" 454 | dependencies = [ 455 | "fnv", 456 | "js-sys", 457 | "serde", 458 | "wasm-bindgen", 459 | ] 460 | 461 | [[package]] 462 | name = "serde_derive" 463 | version = "1.0.104" 464 | source = "registry+https://github.com/rust-lang/crates.io-index" 465 | checksum = "128f9e303a5a29922045a830221b8f78ec74a5f544944f3d5984f8ec3895ef64" 466 | dependencies = [ 467 | "proc-macro2", 468 | "quote", 469 | "syn", 470 | ] 471 | 472 | [[package]] 473 | name = "servo_arc" 474 | version = "0.1.1" 475 | source = "registry+https://github.com/rust-lang/crates.io-index" 476 | checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" 477 | dependencies = [ 478 | "nodrop", 479 | "stable_deref_trait", 480 | ] 481 | 482 | [[package]] 483 | name = "siphasher" 484 | version = "0.3.9" 485 | source = "registry+https://github.com/rust-lang/crates.io-index" 486 | checksum = "a86232ab60fa71287d7f2ddae4a7073f6b7aac33631c3015abb556f08c6d0a3e" 487 | 488 | [[package]] 489 | name = "smallvec" 490 | version = "1.8.0" 491 | source = "registry+https://github.com/rust-lang/crates.io-index" 492 | checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" 493 | 494 | [[package]] 495 | name = "stable_deref_trait" 496 | version = "1.1.1" 497 | source = "registry+https://github.com/rust-lang/crates.io-index" 498 | checksum = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8" 499 | 500 | [[package]] 501 | name = "syn" 502 | version = "1.0.73" 503 | source = "registry+https://github.com/rust-lang/crates.io-index" 504 | checksum = "f71489ff30030d2ae598524f61326b902466f72a0fb1a8564c001cc63425bcc7" 505 | dependencies = [ 506 | "proc-macro2", 507 | "quote", 508 | "unicode-xid", 509 | ] 510 | 511 | [[package]] 512 | name = "thin-slice" 513 | version = "0.1.1" 514 | source = "registry+https://github.com/rust-lang/crates.io-index" 515 | checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" 516 | 517 | [[package]] 518 | name = "thiserror" 519 | version = "1.0.9" 520 | source = "registry+https://github.com/rust-lang/crates.io-index" 521 | checksum = "6f357d1814b33bc2dc221243f8424104bfe72dbe911d5b71b3816a2dff1c977e" 522 | dependencies = [ 523 | "thiserror-impl", 524 | ] 525 | 526 | [[package]] 527 | name = "thiserror-impl" 528 | version = "1.0.9" 529 | source = "registry+https://github.com/rust-lang/crates.io-index" 530 | checksum = "eb2e25d25307eb8436894f727aba8f65d07adf02e5b35a13cebed48bd282bfef" 531 | dependencies = [ 532 | "proc-macro2", 533 | "quote", 534 | "syn", 535 | ] 536 | 537 | [[package]] 538 | name = "unicode-xid" 539 | version = "0.2.0" 540 | source = "registry+https://github.com/rust-lang/crates.io-index" 541 | checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" 542 | 543 | [[package]] 544 | name = "version_check" 545 | version = "0.9.4" 546 | source = "registry+https://github.com/rust-lang/crates.io-index" 547 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 548 | 549 | [[package]] 550 | name = "wasi" 551 | version = "0.9.0+wasi-snapshot-preview1" 552 | source = "registry+https://github.com/rust-lang/crates.io-index" 553 | checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" 554 | 555 | [[package]] 556 | name = "wasi" 557 | version = "0.10.2+wasi-snapshot-preview1" 558 | source = "registry+https://github.com/rust-lang/crates.io-index" 559 | checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" 560 | 561 | [[package]] 562 | name = "wasm-bindgen" 563 | version = "0.2.74" 564 | source = "registry+https://github.com/rust-lang/crates.io-index" 565 | checksum = "d54ee1d4ed486f78874278e63e4069fc1ab9f6a18ca492076ffb90c5eb2997fd" 566 | dependencies = [ 567 | "cfg-if 1.0.0", 568 | "wasm-bindgen-macro", 569 | ] 570 | 571 | [[package]] 572 | name = "wasm-bindgen-backend" 573 | version = "0.2.74" 574 | source = "registry+https://github.com/rust-lang/crates.io-index" 575 | checksum = "3b33f6a0694ccfea53d94db8b2ed1c3a8a4c86dd936b13b9f0a15ec4a451b900" 576 | dependencies = [ 577 | "bumpalo", 578 | "lazy_static", 579 | "log", 580 | "proc-macro2", 581 | "quote", 582 | "syn", 583 | "wasm-bindgen-shared", 584 | ] 585 | 586 | [[package]] 587 | name = "wasm-bindgen-macro" 588 | version = "0.2.74" 589 | source = "registry+https://github.com/rust-lang/crates.io-index" 590 | checksum = "088169ca61430fe1e58b8096c24975251700e7b1f6fd91cc9d59b04fb9b18bd4" 591 | dependencies = [ 592 | "quote", 593 | "wasm-bindgen-macro-support", 594 | ] 595 | 596 | [[package]] 597 | name = "wasm-bindgen-macro-support" 598 | version = "0.2.74" 599 | source = "registry+https://github.com/rust-lang/crates.io-index" 600 | checksum = "be2241542ff3d9f241f5e2cb6dd09b37efe786df8851c54957683a49f0987a97" 601 | dependencies = [ 602 | "proc-macro2", 603 | "quote", 604 | "syn", 605 | "wasm-bindgen-backend", 606 | "wasm-bindgen-shared", 607 | ] 608 | 609 | [[package]] 610 | name = "wasm-bindgen-shared" 611 | version = "0.2.74" 612 | source = "registry+https://github.com/rust-lang/crates.io-index" 613 | checksum = "d7cff876b8f18eed75a66cf49b65e7f967cb354a7aa16003fb55dbfd25b44b4f" 614 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "html-rewriter" 3 | version = "0.3.0" 4 | authors = ["Ivan Nikulin "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | js-sys = "0.3.33" 9 | lol_html = { git = "https://github.com/cloudflare/lol-html", rev = "f32bd14" } 10 | serde = { version = "1.0.104", features = ["derive"] } 11 | serde-wasm-bindgen = "0.1.3" 12 | wasm-bindgen = "0.2.74" 13 | thiserror = "1.0.2" 14 | 15 | [lib] 16 | crate-type = ["cdylib", "rlib"] 17 | 18 | [package.metadata.wasm-pack.profile.dev] 19 | wasm-opt = ["--asyncify"] 20 | [package.metadata.wasm-pack.profile.release] 21 | wasm-opt = ["-Os", "--asyncify"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `html-rewriter-wasm` 2 | 3 | An implementation of 4 | [HTMLRewriter](https://developers.cloudflare.com/workers/runtime-apis/html-rewriter) 5 | using a WebAssembly version of 6 | [lol-html](https://github.com/cloudflare/lol-html/). This was primarily written 7 | for [🔥 Miniflare](https://github.com/mrbbot/miniflare), but may be useful for 8 | other projects too. Many thanks to [@inikulin](https://github.com/inikulin) for 9 | their work on 10 | [lol-html's JavaScript API](https://github.com/cloudflare/lol-html/tree/master/js-api) 11 | which this package's Rust code is based on. 12 | 13 | ## Features 14 | 15 | - 🔋 Supports all handler types, properties and methods 16 | - ⏰ Supports synchronous and asynchronous handlers 17 | - 📌 Supports class handlers with correctly bound methods 18 | 19 | ## Usage 20 | 21 | ```js 22 | import { HTMLRewriter } from "html-rewriter-wasm"; 23 | 24 | const encoder = new TextEncoder(); 25 | const decoder = new TextDecoder(); 26 | 27 | let output = ""; 28 | const rewriter = new HTMLRewriter((outputChunk) => { 29 | output += decoder.decode(outputChunk); 30 | }); 31 | 32 | rewriter.on("p", { 33 | element(element) { 34 | element.setInnerContent("new"); 35 | }, 36 | }); 37 | 38 | try { 39 | await rewriter.write(encoder.encode("

old

")); 40 | await rewriter.end(); 41 | console.log(output); //

new

42 | } finally { 43 | rewriter.free(); // Remember to free memory 44 | } 45 | ``` 46 | 47 | See [test/index.ts](./test/index.ts) for a more traditional `HTMLRewriter` 48 | implementation that doesn't have the caveats listed below, but restricts input 49 | and output to strings. 50 | 51 | To enable the 52 | [`html_rewriter_treats_esi_include_as_void_tag`](https://developers.cloudflare.com/workers/platform/compatibility-dates#htmlrewriter-handling-of-esiinclude) 53 | compatibility flag, set `enableEsiTags` when constructing the `HTMLRewriter`: 54 | 55 | ```js 56 | const rewriter = new HTMLRewriter((outputChunk) => { ... }, { 57 | enableEsiTags: true, 58 | }); 59 | ``` 60 | 61 | ## Caveats 62 | 63 | - Once `write` or `end` has been called, you cannot add any more handlers. You 64 | must register all handlers before you start transforming: 65 | 66 | ```js 67 | const rewriter = new HTMLRewriter(...); 68 | 69 | // ❌ 70 | rewriter.on("h1", { ... }); 71 | await rewriter.write(encoder.encode("

12

")); 74 | 75 | // ✅ 76 | rewriter.on("h1", { ... }); 77 | rewriter.on("p", { ... }); 78 | await rewriter.write(encoder.encode("

12

")); 80 | ``` 81 | 82 | - `end` may only be called once per `HTMLRewriter` instance. This means you must 83 | create a new `HTMLRewriter` instance for each transformation: 84 | 85 | ```js 86 | // ❌ 87 | const rewriter = new HTMLRewriter(...); 88 | await rewriter.end(); 89 | await rewriter.end(); // not allowed 90 | 91 | // ✅ 92 | const rewriter1 = new HTMLRewriter(...); 93 | await rewriter1.end(); 94 | const rewriter2 = new HTMLRewriter(...); 95 | await rewriter2.end(); 96 | ``` 97 | 98 | - When using `async` handlers, you must always `await` calls to `write` and 99 | `end` before calling them again. In other words, you cannot have concurrent 100 | `write` and `end` calls: 101 | 102 | ```js 103 | const rewriter = new HTMLRewriter(...).on("p", { 104 | async element(element) { 105 | await fetch(...); 106 | element.setInnerContent("new"); 107 | } 108 | }); 109 | 110 | // ❌ 111 | rewriter.write(encoder.encode("

1

")); 112 | rewriter.write(encoder.encode("

2

")); // not allowed 113 | 114 | // ❌ 115 | const promise1 = rewriter.write(encoder.encode("

1

")); 116 | const promise2 = rewriter.write(encoder.encode("

2

")); 117 | await Promise.all([promise1, promise2]); // not allowed 118 | 119 | // ✅ 120 | await rewriter.write(encoder.encode("

1

")); 121 | await rewriter.write(encoder.encode("

2

")); 122 | ``` 123 | 124 | ## Internals 125 | 126 | `lol-html` doesn't natively support asynchronous handlers. Instead, whenever a 127 | handler returns a `Promise`, we have to unwind the WebAssembly stack into 128 | temporary storage, wait for the promise to resolve, then rewind the stack and 129 | continue parsing. This temporary storage is per `HTMLRewriter` instance, hence 130 | we cannot have concurrent `write` and `end` calls. We use the 131 | [Asyncify](https://github.com/WebAssembly/binaryen/blob/main/src/passes/Asyncify.cpp) 132 | feature of [Binaryen](https://github.com/WebAssembly/binaryen) to implement 133 | this. See 134 | [this article](https://kripken.github.io/blog/wasm/2019/07/16/asyncify.html) for 135 | more details. 136 | 137 | ## Building 138 | 139 | You can build the package by running `npm run build`. You must do this prior to 140 | running tests with `npm test`. 141 | 142 | You **must** have mrbbot's fork of wasm-pack installed. This upgrades binaryen 143 | (wasm-opt) to version_92 which exports `asyncify_get_state`: 144 | 145 | ```shell 146 | $ cargo install --git https://github.com/mrbbot/wasm-pack 147 | $ npm run build 148 | $ npm test 149 | ``` 150 | 151 | ## License 152 | 153 | `html-rewriter-wasm` uses [lol-html](https://github.com/cloudflare/lol-html/) 154 | which is BSD 3-Clause licensed: 155 | 156 | ``` 157 | Copyright (C) 2019, Cloudflare, Inc. 158 | All rights reserved. 159 | 160 | Redistribution and use in source and binary forms, with or without modification, 161 | are permitted provided that the following conditions are met: 162 | 163 | 1. Redistributions of source code must retain the above copyright notice, this 164 | list of conditions and the following disclaimer. 165 | 166 | 2. Redistributions in binary form must reproduce the above copyright notice, 167 | this list of conditions and the following disclaimer in the documentation and/or 168 | other materials provided with the distribution. 169 | 170 | 3. Neither the name of the copyright holder nor the names of its contributors 171 | may be used to endorse or promote products derived from this software without 172 | specific prior written permission. 173 | 174 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 175 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 176 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 177 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 178 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 179 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 180 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 181 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 182 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 183 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 184 | ``` 185 | -------------------------------------------------------------------------------- /ava.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | files: ["test/**/*.spec.ts"], 3 | extensions: ["ts"], 4 | require: ["ts-node/register"], 5 | }; 6 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | echo "---> Checking wasm-pack version..." 5 | # We need to make sure the version of wasm-pack uses Binaryen version_92, 6 | # which exports asyncify_get_state 7 | WASM_PACK_VERSION=$(wasm-pack --version) 8 | if [[ ! $WASM_PACK_VERSION =~ -asyncify$ ]]; then 9 | echo "$WASM_PACK_VERSION installed, please install mrbbot's fork:" 10 | echo "cargo install --git https://github.com/mrbbot/wasm-pack" 11 | exit 1 12 | fi 13 | 14 | echo "---> Building WebAssembly with wasm-pack..." 15 | wasm-pack build --target nodejs 16 | 17 | echo "---> Patching JavaScript glue code..." 18 | # Wraps write/end with asyncify magic and adds this returns for chaining 19 | # diff -uN pkg/html_rewriter.js pkg2/html_rewriter.js > html_rewriter.js.patch 20 | patch -uN pkg/html_rewriter.js < html_rewriter.js.patch 21 | 22 | echo "---> Copying required files to dist..." 23 | mkdir -p dist 24 | cp pkg/html_rewriter.js dist/html_rewriter.js 25 | cp pkg/html_rewriter_bg.wasm dist/html_rewriter_bg.wasm 26 | cp src/asyncify.js dist/asyncify.js 27 | cp src/html_rewriter.d.ts dist/html_rewriter.d.ts 28 | -------------------------------------------------------------------------------- /html_rewriter.js.patch: -------------------------------------------------------------------------------- 1 | --- pkg/html_rewriter.js 2022-01-18 17:37:39.000000000 +0000 2 | +++ pkg2/html_rewriter.js 2022-01-18 17:37:19.000000000 +0000 3 | @@ -1,7 +1,7 @@ 4 | let imports = {}; 5 | imports['__wbindgen_placeholder__'] = module.exports; 6 | let wasm; 7 | -const { awaitPromise } = require(String.raw`./asyncify.js`); 8 | +const { awaitPromise, setWasmExports, wrap } = require(String.raw`./asyncify.js`); 9 | const { TextDecoder, TextEncoder } = require(String.raw`util`); 10 | 11 | const heap = new Array(32).fill(undefined); 12 | @@ -233,6 +233,7 @@ 13 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 14 | var len0 = WASM_VECTOR_LEN; 15 | wasm.comment_before(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 16 | + return this; 17 | } 18 | /** 19 | * @param {string} content 20 | @@ -242,6 +243,7 @@ 21 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 22 | var len0 = WASM_VECTOR_LEN; 23 | wasm.comment_after(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 24 | + return this; 25 | } 26 | /** 27 | * @param {string} content 28 | @@ -251,11 +253,13 @@ 29 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 30 | var len0 = WASM_VECTOR_LEN; 31 | wasm.comment_replace(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 32 | + return this; 33 | } 34 | /** 35 | */ 36 | remove() { 37 | wasm.comment_remove(this.ptr); 38 | + return this; 39 | } 40 | /** 41 | * @returns {boolean} 42 | @@ -364,6 +368,7 @@ 43 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 44 | var len0 = WASM_VECTOR_LEN; 45 | wasm.documentend_append(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 46 | + return this; 47 | } 48 | } 49 | module.exports.DocumentEnd = DocumentEnd; 50 | @@ -397,6 +402,7 @@ 51 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 52 | var len0 = WASM_VECTOR_LEN; 53 | wasm.element_before(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 54 | + return this; 55 | } 56 | /** 57 | * @param {string} content 58 | @@ -406,6 +412,7 @@ 59 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 60 | var len0 = WASM_VECTOR_LEN; 61 | wasm.element_after(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 62 | + return this; 63 | } 64 | /** 65 | * @param {string} content 66 | @@ -415,11 +422,13 @@ 67 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 68 | var len0 = WASM_VECTOR_LEN; 69 | wasm.element_replace(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 70 | + return this; 71 | } 72 | /** 73 | */ 74 | remove() { 75 | wasm.element_remove(this.ptr); 76 | + return this; 77 | } 78 | /** 79 | * @returns {boolean} 80 | @@ -463,7 +472,7 @@ 81 | */ 82 | get attributes() { 83 | var ret = wasm.element_attributes(this.ptr); 84 | - return takeObject(ret); 85 | + return takeObject(ret)[Symbol.iterator](); 86 | } 87 | /** 88 | * @param {string} name 89 | @@ -495,6 +504,7 @@ 90 | var ptr1 = passStringToWasm0(value, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 91 | var len1 = WASM_VECTOR_LEN; 92 | wasm.element_setAttribute(this.ptr, ptr0, len0, ptr1, len1); 93 | + return this; 94 | } 95 | /** 96 | * @param {string} name 97 | @@ -503,6 +513,7 @@ 98 | var ptr0 = passStringToWasm0(name, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 99 | var len0 = WASM_VECTOR_LEN; 100 | wasm.element_removeAttribute(this.ptr, ptr0, len0); 101 | + return this; 102 | } 103 | /** 104 | * @param {string} content 105 | @@ -512,6 +523,7 @@ 106 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 107 | var len0 = WASM_VECTOR_LEN; 108 | wasm.element_prepend(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 109 | + return this; 110 | } 111 | /** 112 | * @param {string} content 113 | @@ -521,6 +533,7 @@ 114 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 115 | var len0 = WASM_VECTOR_LEN; 116 | wasm.element_append(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 117 | + return this; 118 | } 119 | /** 120 | * @param {string} content 121 | @@ -530,17 +543,19 @@ 122 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 123 | var len0 = WASM_VECTOR_LEN; 124 | wasm.element_setInnerContent(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 125 | + return this; 126 | } 127 | /** 128 | */ 129 | removeAndKeepContent() { 130 | wasm.element_removeAndKeepContent(this.ptr); 131 | + return this; 132 | } 133 | /** 134 | * @param {any} handler 135 | */ 136 | onEndTag(handler) { 137 | - wasm.element_onEndTag(this.ptr, addHeapObject(handler)); 138 | + wasm.element_onEndTag(this.ptr, addHeapObject(handler.bind(this))); 139 | } 140 | } 141 | module.exports.Element = Element; 142 | @@ -597,6 +612,7 @@ 143 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 144 | var len0 = WASM_VECTOR_LEN; 145 | wasm.endtag_before(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 146 | + return this; 147 | } 148 | /** 149 | * @param {string} content 150 | @@ -606,11 +622,13 @@ 151 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 152 | var len0 = WASM_VECTOR_LEN; 153 | wasm.endtag_after(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 154 | + return this; 155 | } 156 | /** 157 | */ 158 | remove() { 159 | wasm.endtag_remove(this.ptr); 160 | + return this; 161 | } 162 | } 163 | module.exports.EndTag = EndTag; 164 | @@ -656,25 +674,27 @@ 165 | var ptr0 = passStringToWasm0(selector, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 166 | var len0 = WASM_VECTOR_LEN; 167 | wasm.htmlrewriter_on(this.ptr, ptr0, len0, addHeapObject(handlers)); 168 | + return this; 169 | } 170 | /** 171 | * @param {any} handlers 172 | */ 173 | onDocument(handlers) { 174 | wasm.htmlrewriter_onDocument(this.ptr, addHeapObject(handlers)); 175 | + return this; 176 | } 177 | /** 178 | * @param {Uint8Array} chunk 179 | */ 180 | - write(chunk) { 181 | + async write(chunk) { 182 | var ptr0 = passArray8ToWasm0(chunk, wasm.__wbindgen_malloc); 183 | var len0 = WASM_VECTOR_LEN; 184 | - wasm.htmlrewriter_write(this.ptr, ptr0, len0); 185 | + await wrap(this, wasm.htmlrewriter_write, this.ptr, ptr0, len0); 186 | } 187 | /** 188 | */ 189 | - end() { 190 | - wasm.htmlrewriter_end(this.ptr); 191 | + async end() { 192 | + await wrap(this, wasm.htmlrewriter_end, this.ptr); 193 | } 194 | /** 195 | * @returns {number} 196 | @@ -715,6 +735,7 @@ 197 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 198 | var len0 = WASM_VECTOR_LEN; 199 | wasm.textchunk_before(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 200 | + return this; 201 | } 202 | /** 203 | * @param {string} content 204 | @@ -724,6 +745,7 @@ 205 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 206 | var len0 = WASM_VECTOR_LEN; 207 | wasm.textchunk_after(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 208 | + return this; 209 | } 210 | /** 211 | * @param {string} content 212 | @@ -733,11 +755,13 @@ 213 | var ptr0 = passStringToWasm0(content, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); 214 | var len0 = WASM_VECTOR_LEN; 215 | wasm.textchunk_replace(this.ptr, ptr0, len0, isLikeNone(content_type) ? 0 : addHeapObject(content_type)); 216 | + return this; 217 | } 218 | /** 219 | */ 220 | remove() { 221 | wasm.textchunk_remove(this.ptr); 222 | + return this; 223 | } 224 | /** 225 | * @returns {boolean} 226 | @@ -893,7 +917,8 @@ 227 | }; 228 | 229 | module.exports.__wbg_instanceof_Promise_c6535fc791fcc4d2 = function(arg0) { 230 | - var ret = getObject(arg0) instanceof Promise; 231 | + var obj = getObject(arg0); 232 | + var ret = (obj instanceof Promise) || (Object.prototype.toString.call(obj) === '[object Promise]'); 233 | return ret; 234 | }; 235 | 236 | @@ -939,5 +964,6 @@ 237 | const wasmModule = new WebAssembly.Module(bytes); 238 | const wasmInstance = new WebAssembly.Instance(wasmModule, imports); 239 | wasm = wasmInstance.exports; 240 | +setWasmExports(wasm); 241 | module.exports.__wasm = wasm; 242 | 243 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "html-rewriter-wasm", 3 | "version": "0.4.1", 4 | "description": "WebAssembly version of HTMLRewriter", 5 | "main": "dist/html_rewriter.js", 6 | "types": "dist/html_rewriter.d.ts", 7 | "files": [ 8 | "dist" 9 | ], 10 | "scripts": { 11 | "prepublishOnly": "npm run build && npm test", 12 | "build": "./build.sh", 13 | "test": "ava" 14 | }, 15 | "repository": { 16 | "type": "git", 17 | "url": "git+https://github.com/mrbbot/html-rewriter-wasm.git" 18 | }, 19 | "keywords": [ 20 | "cloudflare", 21 | "workers", 22 | "worker", 23 | "html", 24 | "rewriter", 25 | "lol" 26 | ], 27 | "author": "MrBBot", 28 | "license": "BSD-3-Clause", 29 | "bugs": { 30 | "url": "https://github.com/mrbbot/html-rewriter-wasm/issues" 31 | }, 32 | "homepage": "https://github.com/mrbbot/html-rewriter-wasm#readme", 33 | "devDependencies": { 34 | "@types/node": "^14.17.5", 35 | "ava": "^3.15.0", 36 | "prettier": "^2.3.2", 37 | "ts-node": "^10.1.0", 38 | "typescript": "^4.3.5" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/asyncify.js: -------------------------------------------------------------------------------- 1 | const assert = require("assert"); 2 | 3 | /** 4 | * @typedef {object} WasmExports 5 | * @property {WebAssembly.Memory} memory 6 | * @property {function} asyncify_get_state 7 | * @property {function} asyncify_start_unwind 8 | * @property {function} asyncify_stop_unwind 9 | * @property {function} asyncify_start_rewind 10 | * @property {function} asyncify_stop_rewind 11 | */ 12 | 13 | /** 14 | * @type {WasmExports} 15 | */ 16 | let wasm; 17 | 18 | /** 19 | * @param {WasmExports} wasmExports 20 | */ 21 | function setWasmExports(wasmExports) { 22 | wasm = wasmExports; 23 | } 24 | 25 | /** 26 | * @type {Int32Array} 27 | */ 28 | let cachedInt32Memory = null; 29 | 30 | /** 31 | * @returns {Int32Array} 32 | */ 33 | function getInt32Memory() { 34 | if ( 35 | cachedInt32Memory === null || 36 | cachedInt32Memory.buffer !== wasm.memory.buffer 37 | ) { 38 | cachedInt32Memory = new Int32Array(wasm.memory.buffer); 39 | } 40 | return cachedInt32Memory; 41 | } 42 | 43 | // https://github.com/WebAssembly/binaryen/blob/fb9de9d391a7272548dcc41cd8229076189d7398/src/passes/Asyncify.cpp#L99 44 | const State = { 45 | NONE: 0, 46 | UNWINDING: 1, 47 | REWINDING: 2, 48 | }; 49 | 50 | function assertNoneState() { 51 | assert.strictEqual(wasm.asyncify_get_state(), State.NONE); 52 | } 53 | 54 | /** 55 | * Maps `HTMLRewriter`s (their `asyncifyStackPtr`s) to `Promise`s. 56 | * `asyncifyStackPtr` acts as unique reference to `HTMLRewriter`. 57 | * Each rewriter MUST have AT MOST ONE pending promise at any time. 58 | * @type {Map} 59 | */ 60 | const promises = new Map(); 61 | 62 | /** 63 | * @param {number} stackPtr 64 | * @param {Promise} promise 65 | */ 66 | function awaitPromise(stackPtr, promise) { 67 | if (wasm.asyncify_get_state() === State.REWINDING) { 68 | wasm.asyncify_stop_rewind(); 69 | return; 70 | } 71 | 72 | assertNoneState(); 73 | 74 | // https://github.com/WebAssembly/binaryen/blob/fb9de9d391a7272548dcc41cd8229076189d7398/src/passes/Asyncify.cpp#L106 75 | assert.strictEqual(stackPtr % 4, 0); 76 | getInt32Memory().set([stackPtr + 8, stackPtr + 1024], stackPtr / 4); 77 | 78 | wasm.asyncify_start_unwind(stackPtr); 79 | 80 | assert(!promises.has(stackPtr)); 81 | promises.set(stackPtr, promise); 82 | } 83 | 84 | /** 85 | * @param {HTMLRewriter} rewriter 86 | * @param {Function} fn 87 | * @param args 88 | */ 89 | async function wrap(rewriter, fn, ...args) { 90 | const stackPtr = rewriter.asyncifyStackPtr; 91 | 92 | assertNoneState(); 93 | let result = fn(...args); 94 | 95 | while (wasm.asyncify_get_state() === State.UNWINDING) { 96 | wasm.asyncify_stop_unwind(); 97 | 98 | assertNoneState(); 99 | assert(promises.has(stackPtr)); 100 | await promises.get(stackPtr); 101 | promises.delete(stackPtr); 102 | 103 | assertNoneState(); 104 | wasm.asyncify_start_rewind(stackPtr); 105 | result = fn(); 106 | } 107 | 108 | assertNoneState(); 109 | return result; 110 | } 111 | 112 | module.exports = { awaitPromise, setWasmExports, wrap }; 113 | -------------------------------------------------------------------------------- /src/comment.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use lol_html::html_content::Comment as NativeComment; 3 | 4 | #[wasm_bindgen] 5 | pub struct Comment(NativeRefWrap>); 6 | 7 | impl_from_native!(NativeComment --> Comment); 8 | impl_mutations!(Comment); 9 | 10 | #[wasm_bindgen] 11 | impl Comment { 12 | #[wasm_bindgen(method, getter=text)] 13 | pub fn text(&self) -> JsResult { 14 | self.0.get().map(|c| c.text().into()) 15 | } 16 | 17 | #[wasm_bindgen(method, setter=text)] 18 | pub fn set_text(&mut self, text: &str) -> JsResult<()> { 19 | self.0.get_mut()?.set_text(text).into_js_result() 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/doctype.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use lol_html::html_content::Doctype as NativeDoctype; 3 | 4 | #[wasm_bindgen] 5 | pub struct Doctype(NativeRefWrap>); 6 | 7 | impl_from_native!(NativeDoctype --> Doctype); 8 | 9 | #[wasm_bindgen] 10 | impl Doctype { 11 | #[wasm_bindgen(method, getter)] 12 | pub fn name(&self) -> JsResult { 13 | self.0 14 | .get() 15 | .map(|d| d.name().map(JsValue::from).unwrap_or(JsValue::null())) 16 | } 17 | 18 | #[wasm_bindgen(method, getter=publicId)] 19 | pub fn public_id(&self) -> JsResult { 20 | self.0 21 | .get() 22 | .map(|d| d.public_id().map(JsValue::from).unwrap_or(JsValue::null())) 23 | } 24 | 25 | #[wasm_bindgen(method, getter=systemId)] 26 | pub fn system_id(&self) -> JsResult { 27 | self.0 28 | .get() 29 | .map(|d| d.system_id().map(JsValue::from).unwrap_or(JsValue::null())) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/document_end.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use lol_html::html_content::DocumentEnd as NativeDocumentEnd; 3 | 4 | #[wasm_bindgen] 5 | pub struct DocumentEnd(NativeRefWrap>); 6 | 7 | impl_from_native!(NativeDocumentEnd --> DocumentEnd); 8 | 9 | #[wasm_bindgen] 10 | impl DocumentEnd { 11 | pub fn append( 12 | &mut self, 13 | content: &str, 14 | content_type: Option, 15 | ) -> Result<(), JsValue> { 16 | self.0 17 | .get_mut() 18 | .map(|e| e.append(content, content_type.into_native())) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/element.rs: -------------------------------------------------------------------------------- 1 | use super::end_tag::EndTag; 2 | use super::handlers::{await_promise, make_handler, HandlerJsErrorWrap}; 3 | use super::*; 4 | use js_sys::{Function as JsFunction, Promise as JsPromise}; 5 | use lol_html::html_content::Element as NativeElement; 6 | use serde_wasm_bindgen::to_value as to_js_value; 7 | use wasm_bindgen::JsCast; 8 | 9 | #[wasm_bindgen] 10 | pub struct Element(NativeRefWrap>); 11 | 12 | impl_from_native!(NativeElement --> Element); 13 | impl_mutations!(Element); 14 | 15 | #[wasm_bindgen] 16 | impl Element { 17 | #[wasm_bindgen(method, getter=tagName)] 18 | pub fn tag_name(&self) -> JsResult { 19 | self.0.get().map(|e| e.tag_name()) 20 | } 21 | 22 | #[wasm_bindgen(method, setter=tagName)] 23 | pub fn set_tag_name(&mut self, name: &str) -> JsResult<()> { 24 | self.0.get_mut()?.set_tag_name(name).into_js_result() 25 | } 26 | 27 | #[wasm_bindgen(method, getter=namespaceURI)] 28 | pub fn namespace_uri(&self) -> JsResult { 29 | self.0.get().map(|e| e.namespace_uri().into()) 30 | } 31 | 32 | #[wasm_bindgen(method, getter)] 33 | pub fn attributes(&self) -> JsResult { 34 | self.0 35 | .get() 36 | .map(|e| { 37 | e.attributes() 38 | .iter() 39 | .map(|a| vec![a.name(), a.value()]) 40 | .collect::>() 41 | }) 42 | .and_then(|a| to_js_value(&a).into_js_result()) 43 | } 44 | 45 | #[wasm_bindgen(method, js_name=getAttribute)] 46 | pub fn get_attribute(&self, name: &str) -> JsResult { 47 | self.0.get().map(|e| { 48 | e.get_attribute(name) 49 | .map(JsValue::from) 50 | .unwrap_or(JsValue::null()) 51 | }) 52 | } 53 | 54 | #[wasm_bindgen(method, js_name=hasAttribute)] 55 | pub fn has_attribute(&self, name: &str) -> JsResult { 56 | self.0.get().map(|e| e.has_attribute(name)) 57 | } 58 | 59 | #[wasm_bindgen(method, js_name=setAttribute)] 60 | pub fn set_attribute(&mut self, name: &str, value: &str) -> JsResult<()> { 61 | self.0 62 | .get_mut()? 63 | .set_attribute(name, value) 64 | .into_js_result() 65 | } 66 | 67 | #[wasm_bindgen(method, js_name=removeAttribute)] 68 | pub fn remove_attribute(&mut self, name: &str) -> JsResult<()> { 69 | self.0.get_mut().map(|e| e.remove_attribute(name)) 70 | } 71 | 72 | pub fn prepend( 73 | &mut self, 74 | content: &str, 75 | content_type: Option, 76 | ) -> Result<(), JsValue> { 77 | self.0 78 | .get_mut() 79 | .map(|e| e.prepend(content, content_type.into_native())) 80 | } 81 | 82 | pub fn append( 83 | &mut self, 84 | content: &str, 85 | content_type: Option, 86 | ) -> Result<(), JsValue> { 87 | self.0 88 | .get_mut() 89 | .map(|e| e.append(content, content_type.into_native())) 90 | } 91 | 92 | #[wasm_bindgen(method, js_name=setInnerContent)] 93 | pub fn set_inner_content( 94 | &mut self, 95 | content: &str, 96 | content_type: Option, 97 | ) -> Result<(), JsValue> { 98 | self.0 99 | .get_mut() 100 | .map(|e| e.set_inner_content(content, content_type.into_native())) 101 | } 102 | 103 | #[wasm_bindgen(method, js_name=removeAndKeepContent)] 104 | pub fn remove_and_keep_content(&mut self) -> Result<(), JsValue> { 105 | self.0.get_mut().map(|e| e.remove_and_keep_content()) 106 | } 107 | 108 | #[wasm_bindgen(method, js_name=onEndTag)] 109 | pub fn on_end_tag(&mut self, handler: JsFunction) -> Result<(), JsValue> { 110 | let this = JsValue::NULL; 111 | let stack_ptr = self.0.stack_ptr; 112 | self.0 113 | .get_mut()? 114 | .on_end_tag(make_handler!(handler, EndTag, this, stack_ptr)) 115 | .into_js_result() 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/end_tag.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use lol_html::html_content::EndTag as NativeEndTag; 3 | 4 | #[wasm_bindgen] 5 | pub struct EndTag(NativeRefWrap>); 6 | 7 | impl_from_native!(NativeEndTag --> EndTag); 8 | 9 | #[wasm_bindgen] 10 | impl EndTag { 11 | #[wasm_bindgen(method, getter=name)] 12 | pub fn name(&self) -> JsResult { 13 | self.0.get().map(|e| e.name()) 14 | } 15 | 16 | #[wasm_bindgen(method, setter=name)] 17 | pub fn set_name(&mut self, name: &str) -> JsResult<()> { 18 | self.0.get_mut().map(|e| e.set_name_str(String::from(name))) 19 | } 20 | 21 | pub fn before( 22 | &mut self, 23 | content: &str, 24 | content_type: Option, 25 | ) -> JsResult<()> { 26 | self.0 27 | .get_mut() 28 | .map(|e| e.before(content, content_type.into_native())) 29 | } 30 | 31 | pub fn after( 32 | &mut self, 33 | content: &str, 34 | content_type: Option, 35 | ) -> JsResult<()> { 36 | self.0 37 | .get_mut() 38 | .map(|e| e.after(content, content_type.into_native())) 39 | } 40 | 41 | pub fn remove(&mut self) -> JsResult<()> { 42 | self.0.get_mut().map(|e| e.remove()) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/handlers.rs: -------------------------------------------------------------------------------- 1 | use super::comment::Comment; 2 | use super::doctype::Doctype; 3 | use super::document_end::DocumentEnd; 4 | use super::element::Element; 5 | use super::text_chunk::TextChunk; 6 | use super::*; 7 | use js_sys::{Function as JsFunction, Promise as JsPromise}; 8 | use lol_html::{ 9 | DocumentContentHandlers as NativeDocumentContentHandlers, 10 | ElementContentHandlers as NativeElementContentHandlers, 11 | }; 12 | use std::mem; 13 | use std::rc::Rc; 14 | use thiserror::Error; 15 | use wasm_bindgen::JsCast; 16 | 17 | // NOTE: Display is noop, because we'll unwrap JSValue error when it will be propagated to 18 | // `write()` or `end()`. 19 | #[derive(Error, Debug)] 20 | #[error("JS handler error")] 21 | pub struct HandlerJsErrorWrap(pub JsValue); 22 | // Probably horribly unsafe, but it works™ 23 | unsafe impl Send for HandlerJsErrorWrap {} 24 | unsafe impl Sync for HandlerJsErrorWrap {} 25 | 26 | #[wasm_bindgen(raw_module = "./asyncify.js")] 27 | extern "C" { 28 | #[wasm_bindgen(js_name = awaitPromise)] 29 | pub(crate) fn await_promise(stack_ptr: *mut u8, promise: &JsPromise); 30 | } 31 | 32 | macro_rules! make_handler { 33 | ($handler:ident, $JsArgType:ident, $this:ident, $stack_ptr:ident) => { 34 | move |arg: &mut _| { 35 | let (js_arg, anchor) = $JsArgType::from_native(arg, $stack_ptr); 36 | let js_arg = JsValue::from(js_arg); 37 | 38 | let res = match $handler.call1(&$this, &js_arg) { 39 | Ok(res) => { 40 | if let Some(promise) = res.dyn_ref::() { 41 | await_promise($stack_ptr, promise); 42 | } 43 | Ok(()) 44 | } 45 | Err(e) => Err(HandlerJsErrorWrap(e).into()), 46 | }; 47 | 48 | mem::drop(anchor); 49 | 50 | res 51 | } 52 | }; 53 | } 54 | pub(crate) use make_handler; 55 | 56 | pub trait IntoNativeHandlers { 57 | fn into_native(self, stack_ptr: *mut u8) -> T; 58 | } 59 | 60 | #[wasm_bindgen] 61 | extern "C" { 62 | pub type ElementContentHandlers; 63 | 64 | #[wasm_bindgen(method, getter)] 65 | fn element(this: &ElementContentHandlers) -> Option; 66 | 67 | #[wasm_bindgen(method, getter)] 68 | fn comments(this: &ElementContentHandlers) -> Option; 69 | 70 | #[wasm_bindgen(method, getter)] 71 | fn text(this: &ElementContentHandlers) -> Option; 72 | } 73 | 74 | impl IntoNativeHandlers> for ElementContentHandlers { 75 | fn into_native(self, stack_ptr: *mut u8) -> NativeElementContentHandlers<'static> { 76 | let handlers: Rc = Rc::new((&self).into()); 77 | let mut native = NativeElementContentHandlers::default(); 78 | 79 | if let Some(handler) = self.element() { 80 | let this = Rc::clone(&handlers); 81 | native = native.element(make_handler!(handler, Element, this, stack_ptr)); 82 | } 83 | 84 | if let Some(handler) = self.comments() { 85 | let this = Rc::clone(&handlers); 86 | native = native.comments(make_handler!(handler, Comment, this, stack_ptr)); 87 | } 88 | 89 | if let Some(handler) = self.text() { 90 | let this = Rc::clone(&handlers); 91 | native = native.text(make_handler!(handler, TextChunk, this, stack_ptr)); 92 | } 93 | 94 | native 95 | } 96 | } 97 | 98 | #[wasm_bindgen] 99 | extern "C" { 100 | pub type DocumentContentHandlers; 101 | 102 | #[wasm_bindgen(method, getter)] 103 | fn doctype(this: &DocumentContentHandlers) -> Option; 104 | 105 | #[wasm_bindgen(method, getter)] 106 | fn comments(this: &DocumentContentHandlers) -> Option; 107 | 108 | #[wasm_bindgen(method, getter)] 109 | fn text(this: &DocumentContentHandlers) -> Option; 110 | 111 | #[wasm_bindgen(method, getter)] 112 | fn end(this: &DocumentContentHandlers) -> Option; 113 | } 114 | 115 | impl IntoNativeHandlers> for DocumentContentHandlers { 116 | fn into_native(self, stack_ptr: *mut u8) -> NativeDocumentContentHandlers<'static> { 117 | let handlers: Rc = Rc::new((&self).into()); 118 | let mut native = NativeDocumentContentHandlers::default(); 119 | 120 | if let Some(handler) = self.doctype() { 121 | let this = Rc::clone(&handlers); 122 | native = native.doctype(make_handler!(handler, Doctype, this, stack_ptr)); 123 | } 124 | 125 | if let Some(handler) = self.comments() { 126 | let this = Rc::clone(&handlers); 127 | native = native.comments(make_handler!(handler, Comment, this, stack_ptr)); 128 | } 129 | 130 | if let Some(handler) = self.text() { 131 | let this = Rc::clone(&handlers); 132 | native = native.text(make_handler!(handler, TextChunk, this, stack_ptr)); 133 | } 134 | 135 | if let Some(handler) = self.end() { 136 | let this = Rc::clone(&handlers); 137 | native = native.end(make_handler!(handler, DocumentEnd, this, stack_ptr)); 138 | } 139 | 140 | native 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/html_rewriter.d.ts: -------------------------------------------------------------------------------- 1 | export interface ContentTypeOptions { 2 | html?: boolean; 3 | } 4 | 5 | export class Element { 6 | before(content: string, options?: ContentTypeOptions): this; 7 | after(content: string, options?: ContentTypeOptions): this; 8 | replace(content: string, options?: ContentTypeOptions): this; 9 | remove(): this; 10 | getAttribute(name: string): string | null; 11 | hasAttribute(name: string): boolean; 12 | setAttribute(name: string, value: string): this; 13 | removeAttribute(name: string): this; 14 | prepend(content: string, options?: ContentTypeOptions): this; 15 | append(content: string, options?: ContentTypeOptions): this; 16 | setInnerContent(content: string, options?: ContentTypeOptions): this; 17 | removeAndKeepContent(): this; 18 | readonly attributes: IterableIterator<[string, string]>; 19 | readonly namespaceURI: string; 20 | readonly removed: boolean; 21 | tagName: string; 22 | onEndTag(handler: (this: this, endTag: EndTag) => void | Promise): void; 23 | } 24 | 25 | export class EndTag { 26 | before(content: string, options?: ContentTypeOptions): this; 27 | after(content: string, options?: ContentTypeOptions): this; 28 | remove(): this; 29 | name: string; 30 | } 31 | 32 | export class Comment { 33 | before(content: string, options?: ContentTypeOptions): this; 34 | after(content: string, options?: ContentTypeOptions): this; 35 | replace(content: string, options?: ContentTypeOptions): this; 36 | remove(): this; 37 | readonly removed: boolean; 38 | text: string; 39 | } 40 | 41 | export class TextChunk { 42 | before(content: string, options?: ContentTypeOptions): this; 43 | after(content: string, options?: ContentTypeOptions): this; 44 | replace(content: string, options?: ContentTypeOptions): this; 45 | remove(): this; 46 | readonly lastInTextNode: boolean; 47 | readonly removed: boolean; 48 | readonly text: string; 49 | } 50 | 51 | export class Doctype { 52 | readonly name: string | null; 53 | readonly publicId: string | null; 54 | readonly systemId: string | null; 55 | } 56 | 57 | export class DocumentEnd { 58 | append(content: string, options?: ContentTypeOptions): this; 59 | } 60 | 61 | export interface ElementHandlers { 62 | element?(element: Element): void | Promise; 63 | comments?(comment: Comment): void | Promise; 64 | text?(text: TextChunk): void | Promise; 65 | } 66 | 67 | export interface DocumentHandlers { 68 | doctype?(doctype: Doctype): void | Promise; 69 | comments?(comment: Comment): void | Promise; 70 | text?(text: TextChunk): void | Promise; 71 | end?(end: DocumentEnd): void | Promise; 72 | } 73 | 74 | export interface HTMLRewriterOptions { 75 | enableEsiTags?: boolean; 76 | } 77 | 78 | export class HTMLRewriter { 79 | constructor( 80 | outputSink: (chunk: Uint8Array) => void, 81 | options?: HTMLRewriterOptions 82 | ); 83 | on(selector: string, handlers: ElementHandlers): this; 84 | onDocument(handlers: DocumentHandlers): this; 85 | write(chunk: Uint8Array): Promise; 86 | end(): Promise; 87 | free(): void; 88 | } 89 | -------------------------------------------------------------------------------- /src/html_rewriter.rs: -------------------------------------------------------------------------------- 1 | use super::handlers::{ 2 | DocumentContentHandlers, ElementContentHandlers, HandlerJsErrorWrap, IntoNativeHandlers, 3 | }; 4 | use super::*; 5 | use js_sys::{Function as JsFunction, Uint8Array}; 6 | use lol_html::errors::RewritingError; 7 | use lol_html::{ 8 | DocumentContentHandlers as NativeDocumentContentHandlers, 9 | ElementContentHandlers as NativeElementContentHandlers, HtmlRewriter as NativeHTMLRewriter, 10 | OutputSink, Selector, Settings, 11 | }; 12 | use std::borrow::Cow; 13 | 14 | struct JsOutputSink(JsFunction); 15 | 16 | impl JsOutputSink { 17 | fn new(func: &JsFunction) -> Self { 18 | JsOutputSink(func.clone()) 19 | } 20 | } 21 | 22 | impl OutputSink for JsOutputSink { 23 | #[inline] 24 | fn handle_chunk(&mut self, chunk: &[u8]) { 25 | let this = JsValue::NULL; 26 | let chunk = Uint8Array::from(chunk); 27 | 28 | // NOTE: the error is handled in the JS wrapper. 29 | self.0.call1(&this, &chunk).unwrap(); 30 | } 31 | } 32 | 33 | //noinspection RsTypeCheck 34 | fn rewriting_error_to_js(err: RewritingError) -> JsValue { 35 | match err { 36 | RewritingError::ContentHandlerError(err) => err.downcast::().unwrap().0, 37 | _ => JsValue::from(err.to_string()), 38 | } 39 | } 40 | 41 | #[wasm_bindgen] 42 | #[derive(Default)] 43 | pub struct HTMLRewriter { 44 | selectors: Vec, 45 | element_content_handlers: Vec>, 46 | document_content_handlers: Vec>, 47 | output_sink: Option, 48 | inner: Option>, 49 | inner_constructed: bool, 50 | asyncify_stack: Vec, 51 | enable_esi_tags: bool, 52 | } 53 | 54 | #[wasm_bindgen] 55 | extern "C" { 56 | pub type HTMLRewriterOptions; 57 | 58 | #[wasm_bindgen(structural, method, getter, js_name = enableEsiTags)] 59 | pub fn enable_esi_tags(this: &HTMLRewriterOptions) -> Option; 60 | } 61 | 62 | #[wasm_bindgen] 63 | impl HTMLRewriter { 64 | #[wasm_bindgen(constructor)] 65 | pub fn new(output_sink: &JsFunction, options: Option) -> Self { 66 | HTMLRewriter { 67 | output_sink: Some(JsOutputSink::new(output_sink)), 68 | asyncify_stack: vec![0; 1024], 69 | enable_esi_tags: options.and_then(|o| o.enable_esi_tags()).unwrap_or(false), 70 | ..Self::default() 71 | } 72 | } 73 | 74 | fn assert_not_fully_constructed(&self) -> JsResult<()> { 75 | if self.inner_constructed { 76 | Err("Handlers can't be added after write.".into()) 77 | } else { 78 | Ok(()) 79 | } 80 | } 81 | 82 | fn inner_mut(&mut self) -> JsResult<&mut NativeHTMLRewriter<'static, JsOutputSink>> { 83 | Ok(match self.inner { 84 | Some(ref mut inner) => inner, 85 | None => { 86 | let output_sink = self.output_sink.take().unwrap(); 87 | 88 | let settings = Settings { 89 | element_content_handlers: self 90 | .selectors 91 | .drain(..) 92 | .zip(self.element_content_handlers.drain(..)) 93 | .map(|(selector, h)| (Cow::Owned(selector), h)) 94 | .collect(), 95 | 96 | document_content_handlers: self.document_content_handlers.drain(..).collect(), 97 | enable_esi_tags: self.enable_esi_tags, 98 | ..Settings::default() 99 | }; 100 | 101 | let rewriter = NativeHTMLRewriter::new(settings, output_sink); 102 | 103 | self.inner = Some(rewriter); 104 | self.inner_constructed = true; 105 | 106 | self.inner.as_mut().unwrap() 107 | } 108 | }) 109 | } 110 | 111 | pub fn on(&mut self, selector: &str, handlers: ElementContentHandlers) -> JsResult<()> { 112 | self.assert_not_fully_constructed()?; 113 | 114 | let selector = selector.parse::().into_js_result()?; 115 | 116 | self.selectors.push(selector); 117 | let stack_ptr = self.asyncify_stack_ptr(); 118 | self.element_content_handlers 119 | .push(handlers.into_native(stack_ptr)); 120 | 121 | Ok(()) 122 | } 123 | 124 | #[wasm_bindgen(method, js_name=onDocument)] 125 | pub fn on_document(&mut self, handlers: DocumentContentHandlers) -> JsResult<()> { 126 | self.assert_not_fully_constructed()?; 127 | let stack_ptr = self.asyncify_stack_ptr(); 128 | self.document_content_handlers 129 | .push(handlers.into_native(stack_ptr)); 130 | 131 | Ok(()) 132 | } 133 | 134 | pub fn write(&mut self, chunk: &[u8]) -> JsResult<()> { 135 | self.inner_mut()? 136 | .write(chunk) 137 | .map_err(rewriting_error_to_js) 138 | } 139 | 140 | pub fn end(&mut self) -> JsResult<()> { 141 | self.inner_mut()?; 142 | // Rewriter must be constructed by self.inner_mut() 143 | self.inner 144 | .take() 145 | .unwrap() 146 | .end() 147 | .map_err(rewriting_error_to_js) 148 | } 149 | 150 | #[wasm_bindgen(method, getter=asyncifyStackPtr)] 151 | pub fn asyncify_stack_ptr(&mut self) -> *mut u8 { 152 | self.asyncify_stack.as_mut_ptr() 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use js_sys::TypeError; 2 | use lol_html::html_content::ContentType as NativeContentType; 3 | use std::cell::Cell; 4 | use std::convert::Into; 5 | use std::marker::PhantomData; 6 | use std::mem; 7 | use std::ops::Drop; 8 | use std::rc::Rc; 9 | use wasm_bindgen::prelude::*; 10 | 11 | type JsResult = Result; 12 | 13 | struct Anchor<'r> { 14 | poisoned: Rc>, 15 | lifetime: PhantomData<&'r mut ()>, 16 | } 17 | 18 | impl<'r> Anchor<'r> { 19 | pub fn new(poisoned: Rc>) -> Self { 20 | Anchor { 21 | poisoned, 22 | lifetime: PhantomData, 23 | } 24 | } 25 | } 26 | 27 | impl Drop for Anchor<'_> { 28 | fn drop(&mut self) { 29 | self.poisoned.replace(true); 30 | } 31 | } 32 | 33 | // NOTE: wasm_bindgen doesn't allow structures with lifetimes. To workaround that 34 | // we create a wrapper that erases all the lifetime information from the inner reference 35 | // and provides an anchor object that keeps track of the lifetime in the runtime. 36 | // 37 | // When anchor goes out of scope, wrapper becomes poisoned and any attempt to get inner 38 | // object results in exception. 39 | #[derive(Clone)] 40 | struct NativeRefWrap { 41 | inner_ptr: *mut R, 42 | poisoned: Rc>, 43 | stack_ptr: *mut u8, 44 | } 45 | 46 | impl NativeRefWrap { 47 | pub fn wrap(inner: &mut I, stack_ptr: *mut u8) -> (Self, Anchor) { 48 | let wrap = NativeRefWrap { 49 | inner_ptr: unsafe { mem::transmute(inner) }, 50 | poisoned: Rc::new(Cell::new(false)), 51 | stack_ptr, 52 | }; 53 | 54 | let anchor = Anchor::new(Rc::clone(&wrap.poisoned)); 55 | 56 | (wrap, anchor) 57 | } 58 | 59 | fn assert_not_poisoned(&self) -> JsResult<()> { 60 | if self.poisoned.get() { 61 | Err(TypeError::new( 62 | "This content token is no longer valid. Content tokens are only valid during the execution of the relevant content handler.", 63 | ).into()) 64 | } else { 65 | Ok(()) 66 | } 67 | } 68 | 69 | pub fn get(&self) -> JsResult<&R> { 70 | self.assert_not_poisoned()?; 71 | 72 | Ok(unsafe { self.inner_ptr.as_ref() }.unwrap()) 73 | } 74 | 75 | pub fn get_mut(&mut self) -> JsResult<&mut R> { 76 | self.assert_not_poisoned()?; 77 | 78 | Ok(unsafe { self.inner_ptr.as_mut() }.unwrap()) 79 | } 80 | } 81 | 82 | trait IntoJsResult { 83 | fn into_js_result(self) -> JsResult; 84 | } 85 | 86 | impl IntoJsResult for Result { 87 | #[inline] 88 | fn into_js_result(self) -> JsResult { 89 | self.map_err(|e| { 90 | let mut msg = String::from("Parser error: "); 91 | msg.push_str(&e.to_string()); 92 | TypeError::new(&msg).into() 93 | }) 94 | } 95 | } 96 | 97 | trait IntoNative { 98 | fn into_native(self) -> T; 99 | } 100 | 101 | #[wasm_bindgen] 102 | extern "C" { 103 | pub type ContentTypeOptions; 104 | 105 | #[wasm_bindgen(method, getter)] 106 | fn html(this: &ContentTypeOptions) -> Option; 107 | } 108 | 109 | impl IntoNative for Option { 110 | fn into_native(self) -> NativeContentType { 111 | match self { 112 | Some(opts) => match opts.html() { 113 | Some(true) => NativeContentType::Html, 114 | Some(false) => NativeContentType::Text, 115 | None => NativeContentType::Text, 116 | }, 117 | None => NativeContentType::Text, 118 | } 119 | } 120 | } 121 | 122 | macro_rules! impl_mutations { 123 | ($Ty:ident) => { 124 | #[wasm_bindgen] 125 | impl $Ty { 126 | pub fn before( 127 | &mut self, 128 | content: &str, 129 | content_type: Option, 130 | ) -> Result<(), JsValue> { 131 | self.0 132 | .get_mut() 133 | .map(|o| o.before(content, content_type.into_native())) 134 | } 135 | 136 | pub fn after( 137 | &mut self, 138 | content: &str, 139 | content_type: Option, 140 | ) -> Result<(), JsValue> { 141 | self.0 142 | .get_mut() 143 | .map(|o| o.after(content, content_type.into_native())) 144 | } 145 | 146 | pub fn replace( 147 | &mut self, 148 | content: &str, 149 | content_type: Option, 150 | ) -> Result<(), JsValue> { 151 | self.0 152 | .get_mut() 153 | .map(|o| o.replace(content, content_type.into_native())) 154 | } 155 | 156 | pub fn remove(&mut self) -> Result<(), JsValue> { 157 | self.0.get_mut().map(|o| o.remove()) 158 | } 159 | 160 | #[wasm_bindgen(method, getter)] 161 | pub fn removed(&self) -> JsResult { 162 | self.0.get().map(|o| o.removed()) 163 | } 164 | } 165 | }; 166 | } 167 | 168 | macro_rules! impl_from_native { 169 | ($Ty:ident --> $JsTy:ident) => { 170 | impl $JsTy { 171 | pub(crate) fn from_native<'r>( 172 | inner: &'r mut $Ty, 173 | stack_ptr: *mut u8, 174 | ) -> (Self, Anchor<'r>) { 175 | let (ref_wrap, anchor) = NativeRefWrap::wrap(inner, stack_ptr); 176 | 177 | ($JsTy(ref_wrap), anchor) 178 | } 179 | } 180 | }; 181 | } 182 | 183 | mod comment; 184 | mod doctype; 185 | mod document_end; 186 | mod element; 187 | mod end_tag; 188 | mod handlers; 189 | mod html_rewriter; 190 | mod text_chunk; 191 | -------------------------------------------------------------------------------- /src/text_chunk.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use lol_html::html_content::TextChunk as NativeTextChunk; 3 | 4 | #[wasm_bindgen] 5 | pub struct TextChunk(NativeRefWrap>); 6 | 7 | impl_from_native!(NativeTextChunk --> TextChunk); 8 | impl_mutations!(TextChunk); 9 | 10 | #[wasm_bindgen] 11 | impl TextChunk { 12 | #[wasm_bindgen(method, getter)] 13 | pub fn text(&self) -> JsResult { 14 | self.0.get().map(|c| c.as_str().into()) 15 | } 16 | 17 | #[wasm_bindgen(method, getter=lastInTextNode)] 18 | pub fn last_in_text_node(&self) -> JsResult { 19 | self.0.get().map(|c| c.last_in_text_node()) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /test/comments.spec.ts: -------------------------------------------------------------------------------- 1 | import test, { Macro } from "ava"; 2 | import { Comment } from ".."; 3 | import { HTMLRewriter, mutationsMacro, wait } from "."; 4 | 5 | const commentsMutationsInput = "

"; 6 | const commentsMutationsExpected = { 7 | beforeAfter: [ 8 | "

", 9 | "<span>before</span>", 10 | "before html", 11 | "", 12 | "after html", 13 | "<span>after</span>", 14 | "

", 15 | ].join(""), 16 | replace: "

<span>replace</span>

", 17 | replaceHtml: "

replace

", 18 | remove: "

", 19 | }; 20 | 21 | const commentPropertiesMacro: Macro< 22 | [(rw: HTMLRewriter, comments: (comment: Comment) => void) => HTMLRewriter] 23 | > = async (t, func) => { 24 | t.plan(3); 25 | const res = await func(new HTMLRewriter(), (comment) => { 26 | t.false(comment.removed); 27 | t.is(comment.text, "test"); 28 | comment.text = "new"; 29 | }).transform("

"); 30 | t.is(res, "

"); 31 | }; 32 | test("handles comment properties", commentPropertiesMacro, (rw, comments) => 33 | rw.on("p", { comments }) 34 | ); 35 | test( 36 | "handles comment mutations", 37 | mutationsMacro, 38 | (rw, comments) => rw.on("p", { comments }), 39 | commentsMutationsInput, 40 | commentsMutationsExpected 41 | ); 42 | test("comment allows chaining", async (t) => { 43 | t.plan(4); 44 | await new HTMLRewriter() 45 | .on("p", { 46 | comments(comment) { 47 | t.is(comment.before(""), comment); 48 | t.is(comment.after(""), comment); 49 | t.is(comment.replace(""), comment); 50 | t.is(comment.remove(), comment); 51 | }, 52 | }) 53 | .transform("

"); 54 | }); 55 | const commentAsyncHandlerMacro: Macro< 56 | [(rw: HTMLRewriter, comments: (c: Comment) => Promise) => HTMLRewriter] 57 | > = async (t, func) => { 58 | const res = await func(new HTMLRewriter(), async (comment) => { 59 | await wait(50); 60 | comment.text = "new"; 61 | }).transform("

"); 62 | t.is(res, "

"); 63 | }; 64 | test( 65 | "handles comment async handler", 66 | commentAsyncHandlerMacro, 67 | (rw, comments) => rw.on("p", { comments }) 68 | ); 69 | const commentClassHandlerMacro: Macro< 70 | [(rw: HTMLRewriter, h: { comments: (c: Comment) => void }) => HTMLRewriter] 71 | > = async (t, func) => { 72 | class Handler { 73 | constructor(private content: string) {} 74 | // noinspection JSUnusedGlobalSymbols 75 | comments(comment: Comment) { 76 | comment.text = this.content; 77 | } 78 | } 79 | const res = await func(new HTMLRewriter(), new Handler("new")).transform( 80 | "

" 81 | ); 82 | t.is(res, "

"); 83 | }; 84 | test("handles comment class handler", commentClassHandlerMacro, (rw, handler) => 85 | rw.on("p", handler) 86 | ); 87 | 88 | test( 89 | "handles document comment properties", 90 | commentPropertiesMacro, 91 | (rw, comments) => rw.onDocument({ comments }) 92 | ); 93 | test( 94 | "handles document comment mutations", 95 | mutationsMacro, 96 | (rw, comments) => rw.onDocument({ comments }), 97 | commentsMutationsInput, 98 | commentsMutationsExpected 99 | ); 100 | test( 101 | "handles document comment async handler", 102 | commentAsyncHandlerMacro, 103 | (rw, comments) => rw.onDocument({ comments }) 104 | ); 105 | test( 106 | "handles document comment class handler", 107 | commentClassHandlerMacro, 108 | (rw, handler) => rw.onDocument(handler) 109 | ); 110 | -------------------------------------------------------------------------------- /test/doctype.spec.ts: -------------------------------------------------------------------------------- 1 | import test from "ava"; 2 | import { Doctype } from ".."; 3 | import { HTMLRewriter, wait } from "."; 4 | 5 | const doctypeInput = 6 | ''; 7 | test("handles document doctype properties", async (t) => { 8 | t.plan(4); 9 | const res = await new HTMLRewriter() 10 | .onDocument({ 11 | doctype(doctype) { 12 | t.is(doctype.name, "html"); 13 | t.is(doctype.publicId, "-//W3C//DTD HTML 4.01//EN"); 14 | t.is(doctype.systemId, "http://www.w3.org/TR/html4/strict.dtd"); 15 | }, 16 | }) 17 | .transform(doctypeInput); 18 | t.is(res, doctypeInput); 19 | }); 20 | test("handles document doctype properties for empty doctype", async (t) => { 21 | t.plan(3); 22 | await new HTMLRewriter() 23 | .onDocument({ 24 | doctype(doctype) { 25 | t.is(doctype.name, null); 26 | t.is(doctype.publicId, null); 27 | t.is(doctype.systemId, null); 28 | }, 29 | }) 30 | .transform(""); 31 | }); 32 | test("handles document doctype async handler", async (t) => { 33 | const res = await new HTMLRewriter() 34 | .onDocument({ 35 | async doctype(doctype) { 36 | await wait(50); 37 | t.is(doctype.name, "html"); 38 | }, 39 | }) 40 | .transform(doctypeInput); 41 | t.is(res, doctypeInput); 42 | }); 43 | test("handles document doctype class handler", async (t) => { 44 | class Handler { 45 | constructor(private content: string) {} 46 | // noinspection JSUnusedGlobalSymbols 47 | doctype(doctype: Doctype) { 48 | t.is(doctype.name, "html"); 49 | t.is(this.content, "new"); 50 | } 51 | } 52 | const res = await new HTMLRewriter() 53 | .onDocument(new Handler("new")) 54 | .transform(doctypeInput); 55 | t.is(res, doctypeInput); 56 | }); 57 | -------------------------------------------------------------------------------- /test/document_end.spec.ts: -------------------------------------------------------------------------------- 1 | import test from "ava"; 2 | import { DocumentEnd } from ".."; 3 | import { HTMLRewriter, wait } from "."; 4 | 5 | test("handles document end specific mutations", async (t) => { 6 | // append 7 | const res = await new HTMLRewriter() 8 | .onDocument({ 9 | end(end) { 10 | end.append("append"); 11 | end.append("append html", { html: true }); 12 | }, 13 | }) 14 | .transform("

test

"); 15 | t.is( 16 | res, 17 | [ 18 | "

", 19 | "test", 20 | "

", 21 | "<span>append</span>", 22 | "append html", 23 | ].join("") 24 | ); 25 | }); 26 | test("document end allows chaining", async (t) => { 27 | t.plan(1); 28 | await new HTMLRewriter() 29 | .onDocument({ 30 | end(end) { 31 | t.is(end.append(""), end); 32 | }, 33 | }) 34 | .transform("

test

"); 35 | }); 36 | test("handles document end async handler", async (t) => { 37 | const res = await new HTMLRewriter() 38 | .onDocument({ 39 | async end(end) { 40 | await wait(50); 41 | end.append("append html", { html: true }); 42 | }, 43 | }) 44 | .transform("

test

"); 45 | t.is(res, "

test

append html"); 46 | }); 47 | test("handles document end class handler", async (t) => { 48 | class Handler { 49 | constructor(private content: string) {} 50 | // noinspection JSUnusedGlobalSymbols 51 | end(end: DocumentEnd) { 52 | end.append(this.content, { html: true }); 53 | } 54 | } 55 | const res = await new HTMLRewriter() 56 | .onDocument(new Handler("append html")) 57 | .transform("

test

"); 58 | t.is(res, "

test

append html"); 59 | }); 60 | -------------------------------------------------------------------------------- /test/element.spec.ts: -------------------------------------------------------------------------------- 1 | import test from "ava"; 2 | import { Element } from ".."; 3 | import { HTMLRewriter, mutationsMacro, wait } from "."; 4 | 5 | const elementMutationsInput = "

test

"; 6 | const elementMutationsExpected = { 7 | beforeAfter: [ 8 | "<span>before</span>", 9 | "before html", 10 | "

", 11 | "test", 12 | "

", 13 | "after html", 14 | "<span>after</span>", 15 | ].join(""), 16 | replace: "<span>replace</span>", 17 | replaceHtml: "replace", 18 | remove: "", 19 | }; 20 | 21 | test("handles element properties", async (t) => { 22 | t.plan(6); 23 | const res = await new HTMLRewriter() 24 | .on("p", { 25 | element(element) { 26 | t.is(element.tagName, "p"); 27 | element.tagName = "h1"; 28 | t.false(element.removed); 29 | t.is(element.namespaceURI, "http://www.w3.org/1999/xhtml"); 30 | 31 | // Check element.attributes is an IterableIterator 32 | t.deepEqual(element.attributes.next(), { 33 | value: ["class", "red"], 34 | done: false, 35 | }); 36 | t.deepEqual([...element.attributes], [["class", "red"]]); 37 | }, 38 | }) 39 | .transform('

test

'); 40 | t.is(res, '

test

'); 41 | }); 42 | test("handles element attribute methods", async (t) => { 43 | t.plan(5); 44 | const res = await new HTMLRewriter() 45 | .on("p", { 46 | element(element) { 47 | t.is(element.getAttribute("class"), "red"); 48 | t.is(element.getAttribute("id"), null); 49 | t.true(element.hasAttribute("class")); 50 | t.false(element.hasAttribute("id")); 51 | element.setAttribute("id", "header"); 52 | element.removeAttribute("class"); 53 | }, 54 | }) 55 | .transform('

test

'); 56 | t.is(res, ''); 57 | }); 58 | test( 59 | "handles element mutations", 60 | mutationsMacro, 61 | (rw, element) => rw.on("p", { element }), 62 | elementMutationsInput, 63 | elementMutationsExpected 64 | ); 65 | test("handles element specific mutations", async (t) => { 66 | // prepend/append 67 | let res = await new HTMLRewriter() 68 | .on("p", { 69 | element(element) { 70 | element.prepend("prepend"); 71 | element.prepend("prepend html", { html: true }); 72 | element.append("append"); 73 | element.append("append html", { html: true }); 74 | }, 75 | }) 76 | .transform("

test

"); 77 | t.is( 78 | res, 79 | [ 80 | "

", 81 | "prepend html", 82 | "<span>prepend</span>", 83 | "test", 84 | "<span>append</span>", 85 | "append html", 86 | "

", 87 | ].join("") 88 | ); 89 | 90 | // setInnerContent 91 | res = await new HTMLRewriter() 92 | .on("p", { 93 | element(element) { 94 | element.setInnerContent("replace"); 95 | }, 96 | }) 97 | .transform("

test

"); 98 | t.is(res, "

<span>replace</span>

"); 99 | res = await new HTMLRewriter() 100 | .on("p", { 101 | element(element) { 102 | element.setInnerContent("replace", { html: true }); 103 | }, 104 | }) 105 | .transform("

test

"); 106 | t.is(res, "

replace

"); 107 | 108 | // removeAndKeepContent 109 | res = await new HTMLRewriter() 110 | .on("p", { 111 | element(element) { 112 | element.removeAndKeepContent(); 113 | }, 114 | }) 115 | .transform("

test

"); 116 | t.is(res, "test"); 117 | }); 118 | test("element allows chaining", async (t) => { 119 | t.plan(10); 120 | await new HTMLRewriter() 121 | .on("p", { 122 | element(element) { 123 | t.is(element.before(""), element); 124 | t.is(element.after(""), element); 125 | t.is(element.replace(""), element); 126 | t.is(element.remove(), element); 127 | t.is(element.setAttribute("test", ""), element); 128 | t.is(element.removeAttribute("test"), element); 129 | t.is(element.prepend(""), element); 130 | t.is(element.append(""), element); 131 | t.is(element.setInnerContent(""), element); 132 | t.is(element.removeAndKeepContent(), element); 133 | }, 134 | }) 135 | .transform("

test

"); 136 | }); 137 | test.serial("handles element async handler", async (t) => { 138 | const res = await new HTMLRewriter() 139 | .on("p", { 140 | async element(element) { 141 | await wait(50); 142 | element.setInnerContent("new"); 143 | }, 144 | }) 145 | .transform("

test

"); 146 | t.is(res, "

new

"); 147 | }); 148 | test("handles element class handler", async (t) => { 149 | class Handler { 150 | constructor(private content: string) {} 151 | // noinspection JSUnusedGlobalSymbols 152 | element(element: Element) { 153 | element.setInnerContent(this.content); 154 | } 155 | } 156 | const res = await new HTMLRewriter() 157 | .on("p", new Handler("new")) 158 | .transform("

test

"); 159 | t.is(res, "

new

"); 160 | }); 161 | 162 | test("handles end tag properties", async (t) => { 163 | const res = await new HTMLRewriter() 164 | .on("p", { 165 | element(element) { 166 | element.onEndTag(function (end) { 167 | t.is(this, element); 168 | t.is(end.name, "p"); 169 | end.name = "h1"; 170 | }); 171 | }, 172 | }) 173 | .transform("

test

"); 174 | t.is(res, "

test

"); 175 | }); 176 | test("handles end tag mutations", async (t) => { 177 | const input = "

test

"; 178 | const beforeAfterExpected = [ 179 | "

", 180 | "test", 181 | "<span>before</span>", 182 | "before html", 183 | "

", 184 | "after html", 185 | "<span>after</span>", 186 | ].join(""); 187 | const removeExpected = "

test"; 188 | 189 | // before/after 190 | let res = await new HTMLRewriter() 191 | .on("p", { 192 | element(element) { 193 | const that = this; 194 | element.onEndTag((end) => { 195 | t.is(this, that); 196 | end.before("before"); 197 | end.before("before html", { html: true }); 198 | end.after("after"); 199 | end.after("after html", { html: true }); 200 | }); 201 | }, 202 | }) 203 | .transform(input); 204 | t.is(res, beforeAfterExpected); 205 | 206 | // remove 207 | res = await new HTMLRewriter() 208 | .on("p", { 209 | element(element) { 210 | element.onEndTag((end) => { 211 | end.remove(); 212 | }); 213 | }, 214 | }) 215 | .transform(input); 216 | t.is(res, removeExpected); 217 | }); 218 | test("end tag allows chaining", async (t) => { 219 | t.plan(3); 220 | await new HTMLRewriter() 221 | .on("p", { 222 | element(element) { 223 | element.onEndTag((end) => { 224 | t.is(end.before(""), end); 225 | t.is(end.after(""), end); 226 | t.is(end.remove(), end); 227 | }); 228 | }, 229 | }) 230 | .transform("

test

"); 231 | }); 232 | test.serial("handles end tag async handler", async (t) => { 233 | const res = await new HTMLRewriter() 234 | .on("p", { 235 | element(element) { 236 | element.onEndTag(async (end) => { 237 | await wait(50); 238 | end.before("!"); 239 | }); 240 | }, 241 | }) 242 | .transform("

test

"); 243 | t.is(res, "

test!

"); 244 | }); 245 | test("uses last end tag handler", async (t) => { 246 | const res = await new HTMLRewriter() 247 | .on("p", { 248 | element(element) { 249 | element.onEndTag((end) => { 250 | end.before("1"); 251 | }); 252 | element.onEndTag((end) => { 253 | end.before("2"); 254 | }); 255 | }, 256 | }) 257 | .transform("

test

"); 258 | t.is(res, "

test2

"); 259 | }); 260 | test("throws error on no end tag", async (t) => { 261 | const res = new HTMLRewriter() 262 | .on("img", { 263 | element(element) { 264 | element.onEndTag(() => t.fail()); 265 | }, 266 | }) 267 | .transform(''); 268 | await t.throwsAsync(res, { 269 | instanceOf: TypeError, 270 | message: "Parser error: No end tag.", 271 | }); 272 | }); 273 | -------------------------------------------------------------------------------- /test/index.ts: -------------------------------------------------------------------------------- 1 | import { TextEncoder, TextDecoder } from "util"; 2 | import { Macro } from "ava"; 3 | import { 4 | Comment, 5 | DocumentHandlers, 6 | Element, 7 | ElementHandlers, 8 | HTMLRewriter as RawHTMLRewriter, 9 | HTMLRewriterOptions as RawHTMLRewriterOptions, 10 | TextChunk, 11 | } from ".."; 12 | 13 | const encoder = new TextEncoder(); 14 | const decoder = new TextDecoder(); 15 | 16 | export class HTMLRewriter { 17 | private elementHandlers: [selector: string, handlers: ElementHandlers][] = []; 18 | private documentHandlers: DocumentHandlers[] = []; 19 | 20 | constructor(private readonly options?: RawHTMLRewriterOptions) {} 21 | 22 | on(selector: string, handlers: ElementHandlers): this { 23 | this.elementHandlers.push([selector, handlers]); 24 | return this; 25 | } 26 | 27 | onDocument(handlers: DocumentHandlers): this { 28 | this.documentHandlers.push(handlers); 29 | return this; 30 | } 31 | 32 | async transform(input: string): Promise { 33 | let output = ""; 34 | const rewriter = new RawHTMLRewriter((chunk) => { 35 | output += decoder.decode(chunk); 36 | }, this.options); 37 | for (const [selector, handlers] of this.elementHandlers) { 38 | rewriter.on(selector, handlers); 39 | } 40 | for (const handlers of this.documentHandlers) { 41 | rewriter.onDocument(handlers); 42 | } 43 | try { 44 | await rewriter.write(encoder.encode(input)); 45 | await rewriter.end(); 46 | return output; 47 | } finally { 48 | rewriter.free(); 49 | } 50 | } 51 | } 52 | 53 | export function wait(t: number): Promise { 54 | return new Promise((resolve) => setTimeout(resolve, t)); 55 | } 56 | 57 | export const mutationsMacro: Macro< 58 | [ 59 | ( 60 | rw: HTMLRewriter, 61 | handler: (token: Element | TextChunk | Comment) => void 62 | ) => HTMLRewriter, 63 | string, 64 | { 65 | beforeAfter: string; 66 | replace: string; 67 | replaceHtml: string; 68 | remove: string; 69 | } 70 | ] 71 | > = async (t, func, input, expected) => { 72 | // In all these tests, only process text chunks containing text. All test 73 | // inputs for text handlers will be single characters, so we'll only process 74 | // text nodes once. 75 | 76 | // before/after 77 | let res = await func(new HTMLRewriter(), (token) => { 78 | if ("text" in token && !token.text) return; 79 | token.before("before"); 80 | token.before("before html", { html: true }); 81 | token.after("after"); 82 | token.after("after html", { html: true }); 83 | }).transform(input); 84 | t.is(res, expected.beforeAfter); 85 | 86 | // replace 87 | res = await func(new HTMLRewriter(), (token) => { 88 | if ("text" in token && !token.text) return; 89 | token.replace("replace"); 90 | }).transform(input); 91 | t.is(res, expected.replace); 92 | res = await func(new HTMLRewriter(), (token) => { 93 | if ("text" in token && !token.text) return; 94 | token.replace("replace", { html: true }); 95 | }).transform(input); 96 | t.is(res, expected.replaceHtml); 97 | 98 | // remove 99 | res = await func(new HTMLRewriter(), (token) => { 100 | if ("text" in token && !token.text) return; 101 | t.false(token.removed); 102 | token.remove(); 103 | t.true(token.removed); 104 | }).transform(input); 105 | t.is(res, expected.remove); 106 | }; 107 | -------------------------------------------------------------------------------- /test/misc.spec.ts: -------------------------------------------------------------------------------- 1 | import { TextEncoder, TextDecoder } from "util"; 2 | import vm from "vm"; 3 | import test from "ava"; 4 | import { HTMLRewriter as RawHTMLRewriter, ElementHandlers } from ".."; 5 | import { HTMLRewriter, wait } from "."; 6 | 7 | test("handles multiple element handlers", async (t) => { 8 | const res = await new HTMLRewriter() 9 | .on("h1", { 10 | element(element) { 11 | element.setInnerContent("new h1"); 12 | }, 13 | }) 14 | .on("h2", { 15 | element(element) { 16 | element.setInnerContent("new h2"); 17 | }, 18 | }) 19 | .on("p", { 20 | element(element) { 21 | element.setInnerContent("new p"); 22 | }, 23 | }) 24 | .transform("

old h1

old h2

old p

"); 25 | t.is(res, "

new h1

new h2

new p

"); 26 | }); 27 | 28 | test("handles streaming", async (t) => { 29 | t.plan(8); // 6 for text handler + 2 at the end 30 | const expectedTextChunks = ["te", "st", ""]; 31 | 32 | const outputChunks: string[] = []; 33 | const decoder = new TextDecoder(); 34 | const rewriter = new RawHTMLRewriter((chunk) => 35 | outputChunks.push(decoder.decode(chunk)) 36 | ).on("p", { 37 | text(text) { 38 | t.is(text.text, expectedTextChunks.shift()); 39 | t.is(text.lastInTextNode, text.text === ""); 40 | }, 41 | }); 42 | 43 | const inputChunks = [ 44 | '', 45 | "", 47 | "

", 48 | "te", 49 | "st", 50 | "

", 51 | "", 52 | ]; 53 | const encoder = new TextEncoder(); 54 | for (const chunk of inputChunks) { 55 | await rewriter.write(encoder.encode(chunk)); 56 | await wait(50); 57 | } 58 | await rewriter.end(); 59 | 60 | t.true(outputChunks.length >= 2); 61 | t.is( 62 | outputChunks.join(""), 63 | '

test

' 64 | ); 65 | }); 66 | 67 | test("handles empty chunk", async (t) => { 68 | const res = await new HTMLRewriter().transform(""); 69 | t.is(res, ""); 70 | }); 71 | 72 | test("rethrows error thrown in handler", async (t) => { 73 | const rewriter = new RawHTMLRewriter(() => {}).on("p", { 74 | element() { 75 | throw new Error("Whoops!"); 76 | }, 77 | }); 78 | 79 | const promise = rewriter.write(new TextEncoder().encode("

test

")); 80 | await t.throwsAsync(promise, { message: "Whoops!" }); 81 | }); 82 | 83 | test("rethrows error thrown in async handler", async (t) => { 84 | const rewriter = new RawHTMLRewriter(() => {}).on("p", { 85 | async element() { 86 | throw new Error("Whoops!"); 87 | }, 88 | }); 89 | 90 | const promise = rewriter.write(new TextEncoder().encode("

test

")); 91 | await t.throwsAsync(promise, { message: "Whoops!" }); 92 | }); 93 | 94 | test.serial("handles concurrent rewriters with async handlers", async (t) => { 95 | // Note this test requires the "safe" HTMLRewriter, see comments in 96 | // src/modules/rewriter.ts for more details 97 | const rewriter = (i: number) => 98 | new HTMLRewriter() 99 | .on("p", { 100 | async element(element) { 101 | await wait(50); 102 | element.setInnerContent(`new ${i}`); 103 | }, 104 | }) 105 | .transform(`

old ${i}

`); 106 | 107 | const res1 = rewriter(1); 108 | const res2 = rewriter(2); 109 | t.is(await res1, "

new 1

"); 110 | t.is(await res2, "

new 2

"); 111 | 112 | const res3 = rewriter(3); 113 | const res4 = rewriter(4); 114 | const texts = await Promise.all([res3, res4]); 115 | t.deepEqual(texts, ["

new 3

", "

new 4

"]); 116 | }); 117 | 118 | test.serial("handles many async handlers for single chunk write", async (t) => { 119 | const rewriter = new HTMLRewriter(); 120 | rewriter.on("h1", { 121 | async element(element) { 122 | await wait(50); 123 | element.setInnerContent("new h1"); 124 | }, 125 | }); 126 | rewriter.on("p", { 127 | async element(element) { 128 | await wait(50); 129 | element.setInnerContent("new p"); 130 | }, 131 | }); 132 | const res = await rewriter.transform("

old h1

old p

"); 133 | t.is(res, "

new h1

new p

"); 134 | }); 135 | 136 | test("rewriter allows chaining", (t) => { 137 | const rewriter = new RawHTMLRewriter(() => {}); 138 | t.is(rewriter.on("p", {}), rewriter); 139 | t.is(rewriter.onDocument({}), rewriter); 140 | }); 141 | 142 | test.serial("handles async handler in different realm", async (t) => { 143 | const context = vm.createContext({ HTMLRewriter, wait }); 144 | const res = await vm.runInContext( 145 | ` 146 | const rewriter = new HTMLRewriter(); 147 | rewriter.on("p", { 148 | async element(element) { 149 | await wait(50); 150 | element.setInnerContent("new"); 151 | }, 152 | }); 153 | rewriter.transform("

old

"); 154 | `, 155 | context 156 | ); 157 | t.is(res, "

new

"); 158 | }); 159 | 160 | test("treats esi tags as void tags if option enabled", async (t) => { 161 | const handlers: ElementHandlers = { 162 | element(element) { 163 | element.replace("replacement"); 164 | }, 165 | }; 166 | 167 | const input = ' text'; 168 | 169 | // Check with option disabled 170 | let res = await new HTMLRewriter() 171 | .on("esi\\:include", handlers) 172 | .transform(input); 173 | t.is(res, "replacement"); 174 | 175 | // Check with option enabled 176 | res = await new HTMLRewriter({ enableEsiTags: true }) 177 | .on("esi\\:include", handlers) 178 | .transform(input); 179 | t.is(res, "replacement text"); 180 | }); 181 | -------------------------------------------------------------------------------- /test/selectors.spec.ts: -------------------------------------------------------------------------------- 1 | import test, { Macro } from "ava"; 2 | import { HTMLRewriter } from "."; 3 | 4 | const selectorMacro: Macro< 5 | [selector: string, input: string, expected: string] 6 | > = async (t, selector, input, expected) => { 7 | const res = await new HTMLRewriter() 8 | .on(selector, { 9 | element(element) { 10 | element.setInnerContent("new"); 11 | }, 12 | }) 13 | .transform(input); 14 | t.is(res, expected); 15 | }; 16 | selectorMacro.title = (providedTitle) => `handles ${providedTitle} selector`; 17 | 18 | test("*", selectorMacro, "*", "

1

2

", "

new

new

"); 19 | test("E", selectorMacro, "p", "

1

2

", "

1

new

"); 20 | test( 21 | "E:nth-child(n)", 22 | selectorMacro, 23 | "p:nth-child(2)", 24 | "

1

2

3

", 25 | "

1

new

3

" 26 | ); 27 | test( 28 | "E:first-child", 29 | selectorMacro, 30 | "p:first-child", 31 | "

1

2

3

", 32 | "

new

2

3

" 33 | ); 34 | test( 35 | "E:nth-of-type(n)", 36 | selectorMacro, 37 | "p:nth-of-type(2)", 38 | "

1

2

3

4

5

", 39 | "

1

2

new

4

5

" 40 | ); 41 | test( 42 | "E:first-of-type", 43 | selectorMacro, 44 | "p:first-of-type", 45 | "

1

2

3

", 46 | "

1

new

3

" 47 | ); 48 | test( 49 | "E:not(s)", 50 | selectorMacro, 51 | "p:not(:first-child)", 52 | "

1

2

3

", 53 | "

1

new

new

" 54 | ); 55 | test( 56 | "E.class", 57 | selectorMacro, 58 | "p.red", 59 | '

1

2

', 60 | '

new

2

' 61 | ); 62 | test( 63 | "E#id", 64 | selectorMacro, 65 | "h1#header", 66 | '

1

2

', 67 | '

new

2

' 68 | ); 69 | test( 70 | "E[attr]", 71 | selectorMacro, 72 | "p[data-test]", 73 | "

1

2

", 74 | "

new

2

" 75 | ); 76 | test( 77 | 'E[attr="value"]', 78 | selectorMacro, 79 | 'p[data-test="one"]', 80 | '

1

2

', 81 | '

new

2

' 82 | ); 83 | test( 84 | 'E[attr="value" i]', 85 | selectorMacro, 86 | 'p[data-test="one" i]', 87 | '

1

2

3

', 88 | '

new

new

3

' 89 | ); 90 | test( 91 | 'E[attr="value" s]', 92 | selectorMacro, 93 | 'p[data-test="one" s]', 94 | '

1

2

3

', 95 | '

new

2

3

' 96 | ); 97 | test( 98 | 'E[attr~="value"]', 99 | selectorMacro, 100 | 'p[data-test~="two"]', 101 | '

1

2

3

', 102 | '

new

new

3

' 103 | ); 104 | test( 105 | 'E[attr^="value"]', 106 | selectorMacro, 107 | 'p[data-test^="a"]', 108 | '

1

2

3

', 109 | '

new

new

3

' 110 | ); 111 | test( 112 | 'E[attr$="value"]', 113 | selectorMacro, 114 | 'p[data-test$="1"]', 115 | '

1

2

3

', 116 | '

new

2

new

' 117 | ); 118 | test( 119 | 'E[attr*="value"]', 120 | selectorMacro, 121 | 'p[data-test*="b"]', 122 | '

1

2

3

', 123 | '

new

new

3

' 124 | ); 125 | test( 126 | 'E[attr|="value"]', 127 | selectorMacro, 128 | 'p[data-test|="a"]', 129 | '

1

2

3

', 130 | '

new

new

3

' 131 | ); 132 | test( 133 | "E F", 134 | selectorMacro, 135 | "div span", 136 | "

1

23
", 137 | "

new

new3
" 138 | ); 139 | test( 140 | "E > F", 141 | selectorMacro, 142 | "div > span", 143 | "

1

23
", 144 | "

1

new3
" 145 | ); 146 | 147 | test("throws error on unsupported selector", async (t) => { 148 | t.plan(1); 149 | const res = new HTMLRewriter() 150 | .on("p:last-child", { 151 | element(element) { 152 | element.setInnerContent("new"); 153 | }, 154 | }) 155 | .transform("

old

"); 156 | await t.throwsAsync(res, { 157 | instanceOf: TypeError, 158 | message: 159 | "Parser error: Unsupported pseudo-class or pseudo-element in selector.", 160 | }); 161 | }); 162 | -------------------------------------------------------------------------------- /test/text_chunk.spec.ts: -------------------------------------------------------------------------------- 1 | import test, { Macro } from "ava"; 2 | import { TextChunk } from ".."; 3 | import { HTMLRewriter, mutationsMacro, wait } from "."; 4 | 5 | const textMutationsInput = "

t

"; // Single character will be single chunk 6 | const textMutationsExpected = { 7 | beforeAfter: [ 8 | "

", 9 | "<span>before</span>", 10 | "before html", 11 | "t", 12 | "after html", 13 | "<span>after</span>", 14 | "

", 15 | ].join(""), 16 | replace: "

<span>replace</span>

", 17 | replaceHtml: "

replace

", 18 | remove: "

", 19 | }; 20 | 21 | const textPropertiesMacro: Macro< 22 | [(rw: HTMLRewriter, text: (text: TextChunk) => void) => HTMLRewriter] 23 | > = async (t, func) => { 24 | t.plan(6); 25 | const res = await func(new HTMLRewriter(), (text) => { 26 | // This handler should get called twice, once with lastInTextNode true 27 | t.false(text.removed); 28 | if (text.lastInTextNode) { 29 | t.pass(); 30 | t.is(text.text, ""); 31 | } else { 32 | t.is(text.text, "t"); 33 | } 34 | }).transform("

t

"); 35 | t.is(res, "

t

"); 36 | }; 37 | test("handles text properties", textPropertiesMacro, (rw, text) => 38 | rw.on("p", { text }) 39 | ); 40 | test( 41 | "handles text mutations", 42 | mutationsMacro, 43 | (rw, text) => rw.on("p", { text }), 44 | textMutationsInput, 45 | textMutationsExpected 46 | ); 47 | test("text allows chaining", async (t) => { 48 | t.plan(4); 49 | await new HTMLRewriter() 50 | .on("p", { 51 | text(text) { 52 | if (text.text === "t") { 53 | t.is(text.before(""), text); 54 | t.is(text.after(""), text); 55 | t.is(text.replace(""), text); 56 | t.is(text.remove(), text); 57 | } 58 | }, 59 | }) 60 | .transform("

t Promise) => HTMLRewriter] 64 | > = async (t, func) => { 65 | const res = await func(new HTMLRewriter(), async (text) => { 66 | if (text.text === "t") { 67 | await wait(50); 68 | text.after(" new"); 69 | } 70 | }).transform("

t

"); 71 | t.is(res, "

t new

"); 72 | }; 73 | test("handles text async handler", textAsyncHandlerMacro, (rw, text) => 74 | rw.on("p", { text }) 75 | ); 76 | const textClassHandlerMacro: Macro< 77 | [ 78 | ( 79 | rw: HTMLRewriter, 80 | handler: { text: (text: TextChunk) => void } 81 | ) => HTMLRewriter 82 | ] 83 | > = async (t, func) => { 84 | class Handler { 85 | constructor(private content: string) {} 86 | text(text: TextChunk) { 87 | if (text.text === "t") text.after(this.content); 88 | } 89 | } 90 | const res = await func(new HTMLRewriter(), new Handler(" new")).transform( 91 | "

t

" 92 | ); 93 | t.is(res, "

t new

"); 94 | }; 95 | test("handles text class handler", textClassHandlerMacro, (rw, handler) => 96 | rw.on("p", handler) 97 | ); 98 | 99 | test("handles document text properties", textPropertiesMacro, (rw, text) => 100 | rw.onDocument({ text }) 101 | ); 102 | test( 103 | "handles document text mutations", 104 | mutationsMacro, 105 | (rw, text) => rw.onDocument({ text }), 106 | textMutationsInput, 107 | textMutationsExpected 108 | ); 109 | test("handles document text async handler", textAsyncHandlerMacro, (rw, text) => 110 | rw.onDocument({ text }) 111 | ); 112 | test( 113 | "handles document text class handler", 114 | textClassHandlerMacro, 115 | (rw, handler) => rw.onDocument(handler) 116 | ); 117 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "target": "es2018", 5 | "lib": ["es2018"], 6 | "strict": true, 7 | "moduleResolution": "node", 8 | "esModuleInterop": true, 9 | "types": ["node"], 10 | "sourceMap": true 11 | } 12 | } 13 | --------------------------------------------------------------------------------