├── .github └── workflows │ └── npm-publish.yml ├── .gitignore ├── LICENSE.md ├── README.md ├── package.json ├── pnpm-lock.yaml ├── src ├── index.ts ├── perf_hooks.js ├── polyfill.ts └── support.ts ├── test ├── patch-global.js └── test.js ├── tsconfig.cjs.json └── tsconfig.json /.github/workflows/npm-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will run tests using node and then publish a package to GitHub Packages when a release is created 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/publishing-nodejs-packages 3 | 4 | name: Node.js Package 5 | 6 | on: 7 | push: 8 | tags: 9 | - v* 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v2 16 | - uses: actions/setup-node@v1 17 | with: 18 | node-version: 12 19 | - uses: pnpm/action-setup@v2 20 | with: 21 | version: 6 22 | run_install: true 23 | - run: npm test 24 | 25 | publish-npm: 26 | needs: build 27 | runs-on: ubuntu-latest 28 | steps: 29 | - uses: actions/checkout@v2 30 | - uses: actions/setup-node@v1 31 | with: 32 | node-version: 12 33 | registry-url: https://registry.npmjs.org/ 34 | - uses: pnpm/action-setup@v2 35 | with: 36 | version: 6 37 | run_install: true 38 | - run: npm publish 39 | env: 40 | NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}} 41 | 42 | # publish-gpr: 43 | # needs: build 44 | # runs-on: ubuntu-latest 45 | # steps: 46 | # - uses: actions/checkout@v2 47 | # - uses: actions/setup-node@v1 48 | # with: 49 | # node-version: 12 50 | # registry-url: https://npm.pkg.github.com/ 51 | # - run: npm ci 52 | # - run: npm publish 53 | # env: 54 | # NODE_AUTH_TOKEN: ${{secrets.GITHUB_TOKEN}} 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | cjs 3 | dist 4 | *.tgz 5 | /*.d.ts 6 | /*.js 7 | /*.js.map 8 | /*.d.ts.map 9 | import-map.json -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # The MIT License 2 | 3 | Copyright (c) 2021 Florian Klampfer (https://qwtel.com/) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parsed HTML Rewriter 2 | A DOM-based implementation of [Cloudflare Worker's `HTMLRewriter`](https://developers.cloudflare.com/workers/runtime-apis/html-rewriter). 3 | 4 | *** 5 | 6 | ___UPDATE: While this module works just fine, I've made [a new verison](https://github.com/worker-tools/html-rewriter) that is WASM/streaming based for much better performance.___ 7 | 8 | *** 9 | 10 | Unlike the original, this implementation parses the entire DOM (provided by [`linkedom`](https://github.com/WebReflection/linkedom)), 11 | and runs selectors against this representation. As a result, it is slower, more memory intensive, and can't process streaming data. 12 | 13 | Note that this approach was chosen to quickly implement the functionality of `HTMLRewriter`, as there is currently no JS implementation available. 14 | A better implementation would replicate the streaming approach of [`lol-html`](https://github.com/cloudflare/lol-html), or even use a WebAssembly version of it. _Update: [Now available here](https://github.com/worker-tools/html-rewriter)_. 15 | 16 | However, this implementation should run in most JS contexts (including Web Workers, Service Workers and Deno) without modification and handle many, if not most, use cases of `HTMLRewriter`. 17 | It should be good enough for testing and offline Workers development. 18 | 19 | ## Usage 20 | This module can be used in two ways. 21 | 22 | As a standalone module: 23 | 24 | ```ts 25 | import { ParsedHTMLRewriter } from '@worker-tools/parsed-html-rewriter' 26 | 27 | await new ParsedHTMLRewriter() 28 | .transform(new Response('')) 29 | .text(); 30 | ``` 31 | 32 | Or as a polyfill: 33 | 34 | ```ts 35 | import '@worker-tools/parsed-html-rewriter/polyfill' 36 | 37 | await new HTMLRewriter() // Will use the native version when running in a Worker 38 | .transform(new Response('')) 39 | .text(); 40 | ``` 41 | 42 | ### innerHTML 43 | Unlike the current (March 2021) version on CF Workers, this implementation already supports the [proposed `innerHTML` handler](https://github.com/cloudflare/lol-html/issues/40#issuecomment-567126687). 44 | Note that this feature is unstable and will likely change as the real version materializes. 45 | 46 | ```ts 47 | await new HTMLRewriter() 48 | .on('body', { 49 | innerHTML(html) { 50 | console.log(html) // => '
bar
' 51 | }, 52 | }) 53 | .transform(new Response('
bar
')) 54 | .text(); 55 | ``` 56 | 57 | ## Caveats 58 | - Because this version isn't based on streaming data, the order in which handlers are called can differ. Some measure have been taken to simulate the order, but differences may occur. 59 | - Texts never arrive in chunks. There is always just one chunk, followed by an empty one with `lastInTextNode` set to `true`. 60 | 61 | -------- 62 | 63 |

64 |

This module is part of the Worker Tools collection
⁕ 65 | 66 | [Worker Tools](https://workers.tools) are a collection of TypeScript libraries for writing web servers in [Worker Runtimes](https://workers.js.org) such as Cloudflare Workers, Deno Deploy and Service Workers in the browser. 67 | 68 | If you liked this module, you might also like: 69 | 70 | - 🧭 [__Worker Router__][router] --- Complete routing solution that works across CF Workers, Deno and Service Workers 71 | - 🔋 [__Worker Middleware__][middleware] --- A suite of standalone HTTP server-side middleware with TypeScript support 72 | - 📄 [__Worker HTML__][html] --- HTML templating and streaming response library 73 | - 📦 [__Storage Area__][kv-storage] --- Key-value store abstraction across [Cloudflare KV][cloudflare-kv-storage], [Deno][deno-kv-storage] and browsers. 74 | - 🆗 [__Response Creators__][response-creators] --- Factory functions for responses with pre-filled status and status text 75 | - 🎏 [__Stream Response__][stream-response] --- Use async generators to build streaming responses for SSE, etc... 76 | - 🥏 [__JSON Fetch__][json-fetch] --- Drop-in replacements for Fetch API classes with first class support for JSON. 77 | - 🦑 [__JSON Stream__][json-stream] --- Streaming JSON parser/stingifier with first class support for web streams. 78 | 79 | Worker Tools also includes a number of polyfills that help bridge the gap between Worker Runtimes: 80 | - ✏️ [__HTML Rewriter__][html-rewriter] --- Cloudflare's HTML Rewriter for use in Deno, browsers, etc... 81 | - 📍 [__Location Polyfill__][location-polyfill] --- A `Location` polyfill for Cloudflare Workers. 82 | - 🦕 [__Deno Fetch Event Adapter__][deno-fetch-event-adapter] --- Dispatches global `fetch` events using Deno’s native HTTP server. 83 | 84 | [router]: https://workers.tools/router 85 | [middleware]: https://workers.tools/middleware 86 | [html]: https://workers.tools/html 87 | [kv-storage]: https://workers.tools/kv-storage 88 | [cloudflare-kv-storage]: https://workers.tools/cloudflare-kv-storage 89 | [deno-kv-storage]: https://workers.tools/deno-kv-storage 90 | [kv-storage-polyfill]: https://workers.tools/kv-storage-polyfill 91 | [response-creators]: https://workers.tools/response-creators 92 | [stream-response]: https://workers.tools/stream-response 93 | [json-fetch]: https://workers.tools/json-fetch 94 | [json-stream]: https://workers.tools/json-stream 95 | [request-cookie-store]: https://workers.tools/request-cookie-store 96 | [extendable-promise]: https://workers.tools/extendable-promise 97 | [html-rewriter]: https://workers.tools/html-rewriter 98 | [location-polyfill]: https://workers.tools/location-polyfill 99 | [deno-fetch-event-adapter]: https://workers.tools/deno-fetch-event-adapter 100 | 101 | Fore more visit [workers.tools](https://workers.tools). 102 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@worker-tools/parsed-html-rewriter", 3 | "version": "0.1.11", 4 | "description": "A DOM-based implementation of Cloudflare Worker's HTMLRewriter.", 5 | "type": "module", 6 | "main": "cjs/index.cjs", 7 | "module": "index.js", 8 | "types": "index.d.ts", 9 | "exports": { 10 | ".": { 11 | "import": "./index.js", 12 | "require": "./cjs/index.cjs" 13 | }, 14 | "./polyfill": { 15 | "import": "./polyfill.js", 16 | "require": "./cjs/polyfill.cjs" 17 | } 18 | }, 19 | "files": [ 20 | "src", 21 | "cjs", 22 | "dist", 23 | "*.js*", 24 | "*.d.ts*" 25 | ], 26 | "publishConfig": { 27 | "access": "public" 28 | }, 29 | "scripts": { 30 | "clean": "shx rm -rf cjs dist *.d.ts *.js *.js.map *.d.ts.map *.tgz", 31 | "test": "npm run build:mjs && node test/test.js", 32 | "build": "npm run build:mjs & npm run build:cjs & npm run build:dist & wait", 33 | "build:mjs": "tsc -p tsconfig.json", 34 | "build:cjs": "tsc -p tsconfig.cjs.json && npm run sed && npm run mv", 35 | "build:dist": "esbuild src/index.ts --bundle --format=esm --target=es2020 --outfile=dist/index.js --tsconfig=tsconfig.json", 36 | "sed": "shx sed -i 's/\\.(.*)\\.js/\\.$1\\.cjs/g' cjs/*.js > /dev/null ", 37 | "mv": "for f in cjs/*.js; do shx mv \"$f\" \"${f%.js}.cjs\"; done", 38 | "prepack": "npm run clean && npm run build" 39 | }, 40 | "author": "Florian Klampfer (https://qwtel.com/)", 41 | "license": "MIT", 42 | "dependencies": { 43 | "@cloudflare/workers-types": "^2.2.2", 44 | "linkedom": "^0.13.2", 45 | "tslib": "^2.3.1", 46 | "whatwg-stream-to-async-iter": "^0.4.1" 47 | }, 48 | "devDependencies": { 49 | "esbuild": "^0.14.20", 50 | "node-fetch-polyfill": "^2.0.6", 51 | "node-web-streams": "^0.2.2", 52 | "shx": "^0.3.4", 53 | "typed-array-utils": "^0.2.2", 54 | "typescript": "^4.5.5", 55 | "web-streams-polyfill": "^3.2.0" 56 | }, 57 | "repository": { 58 | "type": "git", 59 | "url": "git+https://github.com/worker-tools/parsed-html-rewriter.git" 60 | }, 61 | "bugs": { 62 | "url": "https://github.com/worker-tools/parsed-html-rewriter/issues" 63 | }, 64 | "homepage": "https://github.com/worker-tools/parsed-html-rewriter#readme", 65 | "keywords": [ 66 | "polyfill", 67 | "html", 68 | "dom", 69 | "cloudflare-workers", 70 | "html-rewriter" 71 | ] 72 | } 73 | -------------------------------------------------------------------------------- /pnpm-lock.yaml: -------------------------------------------------------------------------------- 1 | lockfileVersion: 5.3 2 | 3 | specifiers: 4 | '@cloudflare/workers-types': ^2.2.2 5 | esbuild: ^0.14.20 6 | linkedom: ^0.13.2 7 | node-fetch-polyfill: ^2.0.6 8 | node-web-streams: ^0.2.2 9 | shx: ^0.3.4 10 | tslib: ^2.3.1 11 | typed-array-utils: ^0.2.2 12 | typescript: ^4.5.5 13 | web-streams-polyfill: ^3.2.0 14 | whatwg-stream-to-async-iter: ^0.4.1 15 | 16 | dependencies: 17 | '@cloudflare/workers-types': 2.2.2 18 | linkedom: 0.13.2 19 | tslib: 2.3.1 20 | whatwg-stream-to-async-iter: 0.4.1 21 | 22 | devDependencies: 23 | esbuild: 0.14.20 24 | node-fetch-polyfill: 2.0.6 25 | node-web-streams: 0.2.2 26 | shx: 0.3.4 27 | typed-array-utils: 0.2.2 28 | typescript: 4.5.5 29 | web-streams-polyfill: 3.2.0 30 | 31 | packages: 32 | 33 | /@cloudflare/workers-types/2.2.2: 34 | resolution: {integrity: sha512-kaMn2rueJ0PL1TYVGknTCh0X0x0d9G+FNXAFep7/4uqecEZoQb/63o6rOmMuiqI09zLuHV6xhKRXinokV/MY9A==} 35 | dev: false 36 | 37 | /balanced-match/1.0.2: 38 | resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} 39 | dev: true 40 | 41 | /boolbase/1.0.0: 42 | resolution: {integrity: sha1-aN/1++YMUes3cl6p4+0xDcwed24=} 43 | dev: false 44 | 45 | /brace-expansion/1.1.11: 46 | resolution: {integrity: sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==} 47 | dependencies: 48 | balanced-match: 1.0.2 49 | concat-map: 0.0.1 50 | dev: true 51 | 52 | /concat-map/0.0.1: 53 | resolution: {integrity: sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=} 54 | dev: true 55 | 56 | /css-select/4.2.1: 57 | resolution: {integrity: sha512-/aUslKhzkTNCQUB2qTX84lVmfia9NyjP3WpDGtj/WxhwBzWBYUV3DgUpurHTme8UTPcPlAD1DJ+b0nN/t50zDQ==} 58 | dependencies: 59 | boolbase: 1.0.0 60 | css-what: 5.1.0 61 | domhandler: 4.3.0 62 | domutils: 2.8.0 63 | nth-check: 2.0.1 64 | dev: false 65 | 66 | /css-what/5.1.0: 67 | resolution: {integrity: sha512-arSMRWIIFY0hV8pIxZMEfmMI47Wj3R/aWpZDDxWYCPEiOMv6tfOrnpDtgxBYPEQD4V0Y/958+1TdC3iWTFcUPw==} 68 | engines: {node: '>= 6'} 69 | dev: false 70 | 71 | /cssom/0.5.0: 72 | resolution: {integrity: sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw==} 73 | dev: false 74 | 75 | /dom-serializer/1.3.2: 76 | resolution: {integrity: sha512-5c54Bk5Dw4qAxNOI1pFEizPSjVsx5+bpJKmL2kPn8JhBUq2q09tTCa3mjijun2NfK78NMouDYNMBkOrPZiS+ig==} 77 | dependencies: 78 | domelementtype: 2.2.0 79 | domhandler: 4.3.0 80 | entities: 2.2.0 81 | dev: false 82 | 83 | /domelementtype/2.2.0: 84 | resolution: {integrity: sha512-DtBMo82pv1dFtUmHyr48beiuq792Sxohr+8Hm9zoxklYPfa6n0Z3Byjj2IV7bmr2IyqClnqEQhfgHJJ5QF0R5A==} 85 | dev: false 86 | 87 | /domhandler/4.3.0: 88 | resolution: {integrity: sha512-fC0aXNQXqKSFTr2wDNZDhsEYjCiYsDWl3D01kwt25hm1YIPyDGHvvi3rw+PLqHAl/m71MaiF7d5zvBr0p5UB2g==} 89 | engines: {node: '>= 4'} 90 | dependencies: 91 | domelementtype: 2.2.0 92 | dev: false 93 | 94 | /domutils/2.8.0: 95 | resolution: {integrity: sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==} 96 | dependencies: 97 | dom-serializer: 1.3.2 98 | domelementtype: 2.2.0 99 | domhandler: 4.3.0 100 | dev: false 101 | 102 | /encoding/0.1.13: 103 | resolution: {integrity: sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==} 104 | dependencies: 105 | iconv-lite: 0.6.2 106 | dev: true 107 | 108 | /entities/2.2.0: 109 | resolution: {integrity: sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==} 110 | dev: false 111 | 112 | /entities/3.0.1: 113 | resolution: {integrity: sha512-WiyBqoomrwMdFG1e0kqvASYfnlb0lp8M5o5Fw2OFq1hNZxxcNk8Ik0Xm7LxzBhuidnZB/UtBqVCgUz3kBOP51Q==} 114 | engines: {node: '>=0.12'} 115 | dev: false 116 | 117 | /esbuild-android-arm64/0.14.20: 118 | resolution: {integrity: sha512-MPKVDe3TMjGDRB5WmY9XnBaXEsPiiTpkz6GjXgBhBkMFZm27PhvZT4JE0vZ1fsLb5hnGC/fYsfAnp9rsxTZhIg==} 119 | engines: {node: '>=12'} 120 | cpu: [arm64] 121 | os: [android] 122 | requiresBuild: true 123 | dev: true 124 | optional: true 125 | 126 | /esbuild-darwin-64/0.14.20: 127 | resolution: {integrity: sha512-09PPWejM3rRFsGHvtaTuRlG+KOQlOMwPW4HwwzRlO4TuP+FNV1nTW4x2Nid3dYLzCkcjznJWQ0oylLBQvGTRyQ==} 128 | engines: {node: '>=12'} 129 | cpu: [x64] 130 | os: [darwin] 131 | requiresBuild: true 132 | dev: true 133 | optional: true 134 | 135 | /esbuild-darwin-arm64/0.14.20: 136 | resolution: {integrity: sha512-jYLrSXAwygoFF2lpRJSUAghre+9IThbcPvJQbcZMONBQaaZft9nclNsrN3k4u7zQaC8v+xZDVSHkmw593tQvkg==} 137 | engines: {node: '>=12'} 138 | cpu: [arm64] 139 | os: [darwin] 140 | requiresBuild: true 141 | dev: true 142 | optional: true 143 | 144 | /esbuild-freebsd-64/0.14.20: 145 | resolution: {integrity: sha512-XShznPLW3QsK8/7iCx1euZTowWaWlcrlkq4YTlRqDKXkJRe98FJ6+V2QyoSTwwCoo5koaYwc+h/SYdglF5369A==} 146 | engines: {node: '>=12'} 147 | cpu: [x64] 148 | os: [freebsd] 149 | requiresBuild: true 150 | dev: true 151 | optional: true 152 | 153 | /esbuild-freebsd-arm64/0.14.20: 154 | resolution: {integrity: sha512-flb3tDd6SScKhBqzWAESVCErpaqrGmMSRrssjx1aC+Ai5ZQrEyhfs5OWL4A9qHuixkhfmXffci7rFD+bNeXmZg==} 155 | engines: {node: '>=12'} 156 | cpu: [arm64] 157 | os: [freebsd] 158 | requiresBuild: true 159 | dev: true 160 | optional: true 161 | 162 | /esbuild-linux-32/0.14.20: 163 | resolution: {integrity: sha512-Avtxbd0MHFJ2QhNxj/e8VGGm1/VnEJZq9qiHUl3wQZ4S0o2Wf4ReAfhqmgAbOPFTuxuZm070rRDZYiZifWzFGQ==} 164 | engines: {node: '>=12'} 165 | cpu: [ia32] 166 | os: [linux] 167 | requiresBuild: true 168 | dev: true 169 | optional: true 170 | 171 | /esbuild-linux-64/0.14.20: 172 | resolution: {integrity: sha512-ugisoRA/ajCr9JMszsQnT9hKkpbD7Gr1yl1mWdZhWQnGt6JKGIndGiihMURcrR44IK/2OMkixVe66D4gCHKdPA==} 173 | engines: {node: '>=12'} 174 | cpu: [x64] 175 | os: [linux] 176 | requiresBuild: true 177 | dev: true 178 | optional: true 179 | 180 | /esbuild-linux-arm/0.14.20: 181 | resolution: {integrity: sha512-uo++Mo31+P2EA38oQgOeSIWgD7GMCMpZkaLfsCqtKJTIIL9fVzQHQYLDRIiFGpLHvs1faWWHDCEcXEFSP1Ou0g==} 182 | engines: {node: '>=12'} 183 | cpu: [arm] 184 | os: [linux] 185 | requiresBuild: true 186 | dev: true 187 | optional: true 188 | 189 | /esbuild-linux-arm64/0.14.20: 190 | resolution: {integrity: sha512-hsrMbNzhh+ud3zUyhONlR41vpYMjINS7BHEzXHbzo4YiCsG9Ht3arbiSuNGrhR/ybLr+8J/0fYVCipiVeAjy3Q==} 191 | engines: {node: '>=12'} 192 | cpu: [arm64] 193 | os: [linux] 194 | requiresBuild: true 195 | dev: true 196 | optional: true 197 | 198 | /esbuild-linux-mips64le/0.14.20: 199 | resolution: {integrity: sha512-MBUu2Q+pzdTBWclPe7AwmRUMTUL0R99ONa8Hswpb987fXgFUdN4XBNBcEa5zy/l2UrIJK+9FUN1jjedZlxgP2A==} 200 | engines: {node: '>=12'} 201 | cpu: [mips64el] 202 | os: [linux] 203 | requiresBuild: true 204 | dev: true 205 | optional: true 206 | 207 | /esbuild-linux-ppc64le/0.14.20: 208 | resolution: {integrity: sha512-xkYjQtITA6q/b+/5aAf5n2L063pOxLyXUIad+zYT8GpZh0Sa7aSn18BmrFa2fHb0QSGgTEeRfYkTcBGgoPDjBA==} 209 | engines: {node: '>=12'} 210 | cpu: [ppc64] 211 | os: [linux] 212 | requiresBuild: true 213 | dev: true 214 | optional: true 215 | 216 | /esbuild-linux-s390x/0.14.20: 217 | resolution: {integrity: sha512-AAcj3x80TXIedpNVuZgjYNETXr2iciOBQv5pGdNGAy6rv7k6Y6sT6SXQ58l2LH2AHbaeTPQjze+Y6qgX1efzrA==} 218 | engines: {node: '>=12'} 219 | cpu: [s390x] 220 | os: [linux] 221 | requiresBuild: true 222 | dev: true 223 | optional: true 224 | 225 | /esbuild-netbsd-64/0.14.20: 226 | resolution: {integrity: sha512-30GQKCnsID1WddUi6tr5HFUxJD0t7Uitf6tO9Cf1WqF6C44pf8EflwrhyDFmUyvkddlyfb4OrYI6NNLC/G3ajg==} 227 | engines: {node: '>=12'} 228 | cpu: [x64] 229 | os: [netbsd] 230 | requiresBuild: true 231 | dev: true 232 | optional: true 233 | 234 | /esbuild-openbsd-64/0.14.20: 235 | resolution: {integrity: sha512-zVrf8fY46BK57AkxDdqu2S8TV3p7oLmYIiW707IOHrveI0TwJ2iypAxnwOQuCvowM3UWqVBO2RDBzV7S7t0klg==} 236 | engines: {node: '>=12'} 237 | cpu: [x64] 238 | os: [openbsd] 239 | requiresBuild: true 240 | dev: true 241 | optional: true 242 | 243 | /esbuild-sunos-64/0.14.20: 244 | resolution: {integrity: sha512-MYRsS1O7+aBr2T/0aA4OJrju6eMku4rm81fwGF1KLFwmymIpPGmj7n69n5JW3NKyW5j+FBt0GcyDh9nEnUL1FQ==} 245 | engines: {node: '>=12'} 246 | cpu: [x64] 247 | os: [sunos] 248 | requiresBuild: true 249 | dev: true 250 | optional: true 251 | 252 | /esbuild-windows-32/0.14.20: 253 | resolution: {integrity: sha512-7VqDITqTU65LQ1Uka/4jx4sUIZc1L8NPlvc7HBRdR15TUyPxmHRQaxMGXd8aakI1FEBcImpJ9SQ4JLmPwRlS1w==} 254 | engines: {node: '>=12'} 255 | cpu: [ia32] 256 | os: [win32] 257 | requiresBuild: true 258 | dev: true 259 | optional: true 260 | 261 | /esbuild-windows-64/0.14.20: 262 | resolution: {integrity: sha512-q4GxY4m5+nXSgqCKx6Cc5pavnhd2g5mHn+K8kNdfCMZsWPDlHLMRjYF5NVQ3/5mJ1M7iR3/Ai4ISjxmsCeGOGA==} 263 | engines: {node: '>=12'} 264 | cpu: [x64] 265 | os: [win32] 266 | requiresBuild: true 267 | dev: true 268 | optional: true 269 | 270 | /esbuild-windows-arm64/0.14.20: 271 | resolution: {integrity: sha512-vOxfU7YwuBMjsUNUygMBhC8T60aCzeYptnHu4k7azqqOVo5EAyoueyWSkFR5GpX6bae5cXyB0vcOV/bfwqRwAg==} 272 | engines: {node: '>=12'} 273 | cpu: [arm64] 274 | os: [win32] 275 | requiresBuild: true 276 | dev: true 277 | optional: true 278 | 279 | /esbuild/0.14.20: 280 | resolution: {integrity: sha512-7aRJRnTjHZ6rFEre52tsAYZxatVELSA/QvYGUBf1iOsYKCnSJICE5seugQFFJgV1Gyl0/mngxQPhxBIqgYG2BA==} 281 | engines: {node: '>=12'} 282 | hasBin: true 283 | requiresBuild: true 284 | optionalDependencies: 285 | esbuild-android-arm64: 0.14.20 286 | esbuild-darwin-64: 0.14.20 287 | esbuild-darwin-arm64: 0.14.20 288 | esbuild-freebsd-64: 0.14.20 289 | esbuild-freebsd-arm64: 0.14.20 290 | esbuild-linux-32: 0.14.20 291 | esbuild-linux-64: 0.14.20 292 | esbuild-linux-arm: 0.14.20 293 | esbuild-linux-arm64: 0.14.20 294 | esbuild-linux-mips64le: 0.14.20 295 | esbuild-linux-ppc64le: 0.14.20 296 | esbuild-linux-s390x: 0.14.20 297 | esbuild-netbsd-64: 0.14.20 298 | esbuild-openbsd-64: 0.14.20 299 | esbuild-sunos-64: 0.14.20 300 | esbuild-windows-32: 0.14.20 301 | esbuild-windows-64: 0.14.20 302 | esbuild-windows-arm64: 0.14.20 303 | dev: true 304 | 305 | /fs.realpath/1.0.0: 306 | resolution: {integrity: sha1-FQStJSMVjKpA20onh8sBQRmU6k8=} 307 | dev: true 308 | 309 | /function-bind/1.1.1: 310 | resolution: {integrity: sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==} 311 | dev: true 312 | 313 | /glob/7.2.0: 314 | resolution: {integrity: sha512-lmLf6gtyrPq8tTjSmrO94wBeQbFR3HbLHbuyD69wuyQkImp2hWqMGB47OX65FBkPffO641IP9jWa1z4ivqG26Q==} 315 | dependencies: 316 | fs.realpath: 1.0.0 317 | inflight: 1.0.6 318 | inherits: 2.0.4 319 | minimatch: 3.0.4 320 | once: 1.4.0 321 | path-is-absolute: 1.0.1 322 | dev: true 323 | 324 | /has/1.0.3: 325 | resolution: {integrity: sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==} 326 | engines: {node: '>= 0.4.0'} 327 | dependencies: 328 | function-bind: 1.1.1 329 | dev: true 330 | 331 | /html-escaper/3.0.3: 332 | resolution: {integrity: sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==} 333 | dev: false 334 | 335 | /htmlparser2/7.2.0: 336 | resolution: {integrity: sha512-H7MImA4MS6cw7nbyURtLPO1Tms7C5H602LRETv95z1MxO/7CP7rDVROehUYeYBUYEON94NXXDEPmZuq+hX4sog==} 337 | dependencies: 338 | domelementtype: 2.2.0 339 | domhandler: 4.3.0 340 | domutils: 2.8.0 341 | entities: 3.0.1 342 | dev: false 343 | 344 | /iconv-lite/0.6.2: 345 | resolution: {integrity: sha512-2y91h5OpQlolefMPmUlivelittSWy0rP+oYVpn6A7GwVHNE8AWzoYOBNmlwks3LobaJxgHCYZAnyNo2GgpNRNQ==} 346 | engines: {node: '>=0.10.0'} 347 | dependencies: 348 | safer-buffer: 2.1.2 349 | dev: true 350 | 351 | /inflight/1.0.6: 352 | resolution: {integrity: sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=} 353 | dependencies: 354 | once: 1.4.0 355 | wrappy: 1.0.2 356 | dev: true 357 | 358 | /inherits/2.0.4: 359 | resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} 360 | dev: true 361 | 362 | /interpret/1.4.0: 363 | resolution: {integrity: sha512-agE4QfB2Lkp9uICn7BAqoscw4SZP9kTE2hxiFI3jBPmXJfdqiahTbUuKGsMoN2GtqL9AxhYioAcVvgsb1HvRbA==} 364 | engines: {node: '>= 0.10'} 365 | dev: true 366 | 367 | /is-core-module/2.8.1: 368 | resolution: {integrity: sha512-SdNCUs284hr40hFTFP6l0IfZ/RSrMXF3qgoRHd3/79unUTvrFO/JoXwkGm+5J/Oe3E/b5GsnG330uUNgRpu1PA==} 369 | dependencies: 370 | has: 1.0.3 371 | dev: true 372 | 373 | /is-stream/1.1.0: 374 | resolution: {integrity: sha1-EtSj3U5o4Lec6428hBc66A2RykQ=} 375 | engines: {node: '>=0.10.0'} 376 | dev: true 377 | 378 | /linkedom/0.13.2: 379 | resolution: {integrity: sha512-lQPdDnml1Rl/T8QW3j10jJ37LMRcZqryy5kwHDIw9AYMabeE4P6kMp2mqJXWjjeXxJ4ebJC05Qx9Xxs4jWhNMw==} 380 | dependencies: 381 | css-select: 4.2.1 382 | cssom: 0.5.0 383 | html-escaper: 3.0.3 384 | htmlparser2: 7.2.0 385 | uhyphen: 0.1.0 386 | dev: false 387 | 388 | /minimatch/3.0.4: 389 | resolution: {integrity: sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==} 390 | dependencies: 391 | brace-expansion: 1.1.11 392 | dev: true 393 | 394 | /minimist/1.2.5: 395 | resolution: {integrity: sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==} 396 | dev: true 397 | 398 | /node-fetch-polyfill/2.0.6: 399 | resolution: {integrity: sha1-BzzjrWgmvbmVqHKM/E44I/IEQHo=} 400 | dependencies: 401 | encoding: 0.1.13 402 | is-stream: 1.1.0 403 | node-web-streams: 0.2.2 404 | dev: true 405 | 406 | /node-web-streams/0.2.2: 407 | resolution: {integrity: sha1-CH52u+t+jcVmhrJdtOYMX/nbCR8=} 408 | dependencies: 409 | is-stream: 1.1.0 410 | web-streams-polyfill: github.com/gwicke/web-streams-polyfill/42c488428adea1dc0c0245014e4896ad456b1ded 411 | dev: true 412 | 413 | /nth-check/2.0.1: 414 | resolution: {integrity: sha512-it1vE95zF6dTT9lBsYbxvqh0Soy4SPowchj0UBGj/V6cTPnXXtQOPUbhZ6CmGzAD/rW22LQK6E96pcdJXk4A4w==} 415 | dependencies: 416 | boolbase: 1.0.0 417 | dev: false 418 | 419 | /once/1.4.0: 420 | resolution: {integrity: sha1-WDsap3WWHUsROsF9nFC6753Xa9E=} 421 | dependencies: 422 | wrappy: 1.0.2 423 | dev: true 424 | 425 | /path-is-absolute/1.0.1: 426 | resolution: {integrity: sha1-F0uSaHNVNP+8es5r9TpanhtcX18=} 427 | engines: {node: '>=0.10.0'} 428 | dev: true 429 | 430 | /path-parse/1.0.7: 431 | resolution: {integrity: sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==} 432 | dev: true 433 | 434 | /rechoir/0.6.2: 435 | resolution: {integrity: sha1-hSBLVNuoLVdC4oyWdW70OvUOM4Q=} 436 | engines: {node: '>= 0.10'} 437 | dependencies: 438 | resolve: 1.22.0 439 | dev: true 440 | 441 | /resolve/1.22.0: 442 | resolution: {integrity: sha512-Hhtrw0nLeSrFQ7phPp4OOcVjLPIeMnRlr5mcnVuMe7M/7eBn98A3hmFRLoFo3DLZkivSYwhRUJTyPyWAk56WLw==} 443 | hasBin: true 444 | dependencies: 445 | is-core-module: 2.8.1 446 | path-parse: 1.0.7 447 | supports-preserve-symlinks-flag: 1.0.0 448 | dev: true 449 | 450 | /safer-buffer/2.1.2: 451 | resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==} 452 | dev: true 453 | 454 | /shelljs/0.8.5: 455 | resolution: {integrity: sha512-TiwcRcrkhHvbrZbnRcFYMLl30Dfov3HKqzp5tO5b4pt6G/SezKcYhmDg15zXVBswHmctSAQKznqNW2LO5tTDow==} 456 | engines: {node: '>=4'} 457 | hasBin: true 458 | dependencies: 459 | glob: 7.2.0 460 | interpret: 1.4.0 461 | rechoir: 0.6.2 462 | dev: true 463 | 464 | /shx/0.3.4: 465 | resolution: {integrity: sha512-N6A9MLVqjxZYcVn8hLmtneQWIJtp8IKzMP4eMnx+nqkvXoqinUPCbUFLp2UcWTEIUONhlk0ewxr/jaVGlc+J+g==} 466 | engines: {node: '>=6'} 467 | hasBin: true 468 | dependencies: 469 | minimist: 1.2.5 470 | shelljs: 0.8.5 471 | dev: true 472 | 473 | /supports-preserve-symlinks-flag/1.0.0: 474 | resolution: {integrity: sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==} 475 | engines: {node: '>= 0.4'} 476 | dev: true 477 | 478 | /tslib/2.3.1: 479 | resolution: {integrity: sha512-77EbyPPpMz+FRFRuAFlWMtmgUWGe9UOG2Z25NqCwiIjRhOf5iKGuzSe5P2w1laq+FkRy4p+PCuVkJSGkzTEKVw==} 480 | dev: false 481 | 482 | /typed-array-utils/0.2.2: 483 | resolution: {integrity: sha512-xq2oNAgDAZKsmzu7KQXDe9cLKiOYZn+6h5VMzcg1BmiiGPlCxMu1mh+tiWzydfIFGdNBs8/WS78ZPkTdHhnxlg==} 484 | dev: true 485 | 486 | /typescript/4.5.5: 487 | resolution: {integrity: sha512-TCTIul70LyWe6IJWT8QSYeA54WQe8EjQFU4wY52Fasj5UKx88LNYKCgBEHcOMOrFF1rKGbD8v/xcNWVUq9SymA==} 488 | engines: {node: '>=4.2.0'} 489 | hasBin: true 490 | dev: true 491 | 492 | /uhyphen/0.1.0: 493 | resolution: {integrity: sha512-o0QVGuFg24FK765Qdd5kk0zU/U4dEsCtN/GSiwNI9i8xsSVtjIAOdTaVhLwZ1nrbWxFVMxNDDl+9fednsOMsBw==} 494 | dev: false 495 | 496 | /web-streams-polyfill/3.2.0: 497 | resolution: {integrity: sha512-EqPmREeOzttaLRm5HS7io98goBgZ7IVz79aDvqjD0kYXLtFZTc0T/U6wHTPKyIjb+MdN7DFIIX6hgdBEpWmfPA==} 498 | engines: {node: '>= 8'} 499 | dev: true 500 | 501 | /whatwg-stream-to-async-iter/0.4.1: 502 | resolution: {integrity: sha512-HYd5mfV1yprWEX2mwoEayVFsuSsXMFkcAEIG/AncHMC3OJKYuH2XN2XVgXY/6Kjyfd4tYXcyZj2Xg3aJNw5Mfg==} 503 | dependencies: 504 | tslib: 2.3.1 505 | dev: false 506 | 507 | /wrappy/1.0.2: 508 | resolution: {integrity: sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=} 509 | dev: true 510 | 511 | github.com/gwicke/web-streams-polyfill/42c488428adea1dc0c0245014e4896ad456b1ded: 512 | resolution: {tarball: https://codeload.github.com/gwicke/web-streams-polyfill/tar.gz/42c488428adea1dc0c0245014e4896ad456b1ded} 513 | name: web-streams-polyfill 514 | version: 1.2.2 515 | dev: true 516 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import { parseHTML } from 'linkedom' 2 | import { asyncIterableToStream } from 'whatwg-stream-to-async-iter'; 3 | import { 4 | ParsedHTMLRewriterElement, 5 | ParsedHTMLRewriterText, 6 | ParsedHTMLRewriterComment, 7 | ParsedHTMLRewriterDocumentType, 8 | ParsedHTMLRewriterEnd, 9 | promiseToAsyncIterable, 10 | append, 11 | treeWalkerToIter, 12 | Awaitable, 13 | } from './support.js'; 14 | 15 | const ELEMENT_NODE = 1; 16 | const ATTRIBUTE_NODE = 2; 17 | const TEXT_NODE = 3; 18 | const COMMENT_NODE = 8; 19 | const DOCUMENT_NODE = 9; 20 | const DOCUMENT_TYPE_NODE = 10; 21 | const DOCUMENT_FRAGMENT_NODE = 11; 22 | const SHOW_ALL = -1; 23 | const SHOW_ELEMENT = 1; 24 | const SHOW_TEXT = 4; 25 | const SHOW_COMMENT = 128; 26 | 27 | const isText = (n?: Node | null): n is Text => n?.nodeType === TEXT_NODE; 28 | const isElement = (n?: Node | null): n is Element => n?.nodeType === ELEMENT_NODE; 29 | const isComment = (n?: Node | null): n is Comment => n?.nodeType === COMMENT_NODE; 30 | 31 | function* findTextNodes(el: Element, document: any): Iterable { 32 | const tw = document.createTreeWalker(el, SHOW_TEXT); 33 | for (const node of treeWalkerToIter(tw)) 34 | yield node as Text; 35 | } 36 | 37 | function* findCommentNodes(el: Element, document: any): Iterable { 38 | const tw = document.createTreeWalker(el, SHOW_COMMENT); 39 | for (const node of treeWalkerToIter(tw)) 40 | yield node as Comment; 41 | } 42 | 43 | function findNext(el: Node | null): Node | null { 44 | while (el && !el.nextSibling) el = el.parentNode; 45 | return el && el.nextSibling; 46 | } 47 | 48 | export type ParsedElementHandler = ElementHandler & { 49 | innerHTML?(html: string): void | Promise; 50 | } 51 | 52 | /** 53 | * A DOM-based implementation of Cloudflare's `HTMLRewriter`. 54 | */ 55 | export class ParsedHTMLRewriter implements HTMLRewriter { 56 | #onMap = new Map(); 57 | #onDocument = new Array(); 58 | 59 | public on(selector: string, handlers: ParsedElementHandler): HTMLRewriter { 60 | append(this.#onMap, selector, handlers); 61 | return this; 62 | } 63 | 64 | public onDocument(handlers: DocumentHandler): HTMLRewriter { 65 | this.#onDocument.push(handlers); 66 | return this; 67 | } 68 | 69 | public transform(response: Response): Response { 70 | // This dance (promise => async gen => stream) is necessary because 71 | // a) the `Response` constructor doesn't accept async data, except via (byte) streams, and 72 | // b) `HTMLRewriter.transform` is not an async function. 73 | return new Response(asyncIterableToStream(promiseToAsyncIterable((async () => { 74 | // This is where the "parse" part comes in: We're not actually stream processing, 75 | // instead we'll just build the DOM in memory and run the selectors. 76 | const htmlText = await response.text(); 77 | const { document } = parseHTML(htmlText); 78 | // const document = new DOMParser().parseFromString(htmlText, 'text/html') 79 | 80 | // After that, the hardest part is getting the order right. 81 | // First, we'll build a map of all elements that are "interesting", based on the registered handlers. 82 | // We take advantage of existing DOM APIs: 83 | const elemMap = new Map Awaitable)[]>(); 84 | const htmlMap = new Map Awaitable)][]>(); 85 | const textMap = new Map Awaitable)[]>(); 86 | const commMap = new Map Awaitable)[]>(); 87 | 88 | for (const [selector, handlers] of this.#onMap) { 89 | for (const elem of document.querySelectorAll(selector)) { 90 | for (const handler of handlers) { 91 | if (handler.element) { 92 | append(elemMap, elem, handler.element.bind(handler)); 93 | } 94 | 95 | // The `innerHTML` handler needs to run at the beginning of the next sibling node, 96 | // after all the inner handlers have completed: 97 | if (handler.innerHTML) { 98 | append(htmlMap, findNext(elem), [elem, handler.innerHTML.bind(handler)]); 99 | } 100 | 101 | // Non-element handlers are odd, in the sense that they run for _any_ children, not just the immediate ones: 102 | if (handler.text) { 103 | for (const text of findTextNodes(elem, document)) { 104 | append(textMap, text, handler.text.bind(handler)) 105 | } 106 | } 107 | 108 | if (handler.comments) { 109 | for (const comm of findCommentNodes(elem, document)) { 110 | append(commMap, comm, handler.comments.bind(handler)) 111 | } 112 | } 113 | } 114 | } 115 | } 116 | 117 | // Handle document doctype before everything else 118 | if(document.doctype) { 119 | const doctype = new ParsedHTMLRewriterDocumentType(document.doctype); 120 | for (const handler of this.#onDocument) { 121 | await handler.doctype?.(doctype); 122 | } 123 | } 124 | 125 | // We'll then walk the DOM and run the registered handlers each time we encounter an "interesting" node. 126 | // Because we've stored them in a hash map, and can retrieve them via object identity: 127 | const walker = document.createTreeWalker(document, SHOW_ELEMENT | SHOW_TEXT | SHOW_COMMENT); 128 | 129 | // We need to walk the entire tree ahead of time, 130 | // otherwise the order might change based on added/deleted elements: 131 | // We're also adding `null` at the end to handle the edge case of `innerHTML` of the last element. 132 | const nodes = [...treeWalkerToIter(walker), null]; 133 | 134 | for (const node of nodes) { 135 | for (const [prevElem, handler] of htmlMap.get(node) ?? []) { 136 | await handler(prevElem.innerHTML); 137 | } 138 | 139 | if (isElement(node)) { 140 | const handlers = elemMap.get(node) ?? []; 141 | for (const handler of handlers) { 142 | await handler(new ParsedHTMLRewriterElement(node, document) as unknown as Element); 143 | } 144 | } 145 | else if (isText(node)) { 146 | const handlers = textMap.get(node) ?? []; 147 | const text = new ParsedHTMLRewriterText(node, document) as unknown as Text; 148 | for (const handler of handlers) { 149 | await handler(text); 150 | } 151 | for (const handler of this.#onDocument) { 152 | await handler.text?.(text); 153 | } 154 | if (!isText(node.nextSibling)) { 155 | const textLast = new ParsedHTMLRewriterText(null, document) as unknown as Text; 156 | for (const handler of handlers) { 157 | await handler(textLast); 158 | } 159 | for (const handler of this.#onDocument) { 160 | await handler.text?.(textLast); 161 | } 162 | } 163 | } 164 | else if (isComment(node)) { 165 | const handlers = commMap.get(node) ?? []; 166 | const comment = new ParsedHTMLRewriterComment(node, document) as unknown as Comment; 167 | for (const handler of handlers) { 168 | await handler(comment); 169 | } 170 | for (const handler of this.#onDocument) { 171 | await handler.comments?.(comment); 172 | } 173 | } 174 | } 175 | 176 | // Handle document end after everything else 177 | const end = new ParsedHTMLRewriterEnd(document); 178 | for (const handler of this.#onDocument) { 179 | await handler.end?.(end); 180 | } 181 | 182 | return new TextEncoder().encode(document.toString()); 183 | })())), response); 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /src/perf_hooks.js: -------------------------------------------------------------------------------- 1 | const { 2 | performance, 3 | Performance, 4 | PerformanceEntry, 5 | PerformanceMark, 6 | PerformanceMeasure, 7 | PerformanceNavigation, 8 | PerformanceNavigationTiming, 9 | PerformanceObserver, 10 | PerformanceObserverEntryList, 11 | PerformanceResourceTiming, 12 | PerformanceTiming, 13 | } = globalThis; 14 | 15 | export { 16 | performance, 17 | Performance, 18 | PerformanceEntry, 19 | PerformanceMark, 20 | PerformanceMeasure, 21 | PerformanceNavigation, 22 | PerformanceNavigationTiming, 23 | PerformanceObserver, 24 | PerformanceObserverEntryList, 25 | PerformanceResourceTiming, 26 | PerformanceTiming, 27 | }; 28 | -------------------------------------------------------------------------------- /src/polyfill.ts: -------------------------------------------------------------------------------- 1 | import { ParsedHTMLRewriter } from './index.js'; 2 | 3 | if (!('HTMLRewriter' in self)) { 4 | Object.defineProperty(self, 'HTMLRewriter', { 5 | configurable: false, 6 | enumerable: false, 7 | writable: false, 8 | value: ParsedHTMLRewriter 9 | }) 10 | } -------------------------------------------------------------------------------- /src/support.ts: -------------------------------------------------------------------------------- 1 | export type Awaitable = T | Promise; 2 | 3 | export function append(m: Map, k: K, v: V) { 4 | const vs = m.get(k) ?? []; 5 | vs.push(v); 6 | return m.set(k, vs); 7 | } 8 | 9 | export async function* promiseToAsyncIterable(promise: Promise): AsyncIterableIterator { 10 | yield await promise; 11 | } 12 | 13 | export function* treeWalkerToIter(walker: TreeWalker): IterableIterator { 14 | let node = walker.nextNode(); 15 | while (node) { 16 | yield node; 17 | node = walker.nextNode(); 18 | } 19 | } 20 | 21 | type Content = string; 22 | 23 | /** Fragment form string function that works with linkedom. */ 24 | function fragmentFromString(document: Document, html: string) { 25 | const temp = document.createElement('template'); 26 | temp.innerHTML = html; 27 | return temp.content; 28 | } 29 | 30 | function replace(document: Document, node: Element | Text | Comment | null, content: string, opts?: ContentOptions) { 31 | node?.replaceWith(...opts?.html 32 | ? fragmentFromString(document, content).childNodes // depends on DOM.Iterable 33 | : [content]); 34 | } 35 | 36 | export class ParsedHTMLRewriterNode { 37 | #node: Element | Text | Comment | null; 38 | #doc: Document; 39 | constructor(node: Element | Text | Comment | null, document: Document) { 40 | this.#node = node; 41 | this.#doc = document; 42 | } 43 | 44 | get removed() { return !this.#doc.contains(this.#node) } 45 | 46 | before(content: Content, opts?: ContentOptions): this { 47 | const before = this.#doc.createComment(''); 48 | this.#node?.parentElement?.insertBefore(before, this.#node) 49 | replace(this.#doc, before, content, opts); 50 | return this; 51 | } 52 | 53 | after(content: Content, opts?: ContentOptions): this { 54 | const after = this.#doc.createComment(''); 55 | this.#node?.parentElement?.insertBefore(after, this.#node.nextSibling) 56 | replace(this.#doc, after, content, opts); 57 | return this; 58 | } 59 | 60 | replace(content: Content, opts?: ContentOptions): this { 61 | replace(this.#doc, this.#node, content, opts); 62 | return this; 63 | } 64 | 65 | remove(): this { 66 | this.#node?.remove() 67 | return this; 68 | } 69 | } 70 | 71 | export class ParsedHTMLRewriterElement extends ParsedHTMLRewriterNode { 72 | #node: Element; 73 | 74 | constructor(node: Element, document: Document) { 75 | super(node, document) 76 | this.#node = node; 77 | } 78 | 79 | get tagName() { return this.#node.tagName.toLowerCase() } 80 | get attributes(): Iterable<[string, string]> { 81 | return [...this.#node.attributes].map(attr => [attr.name, attr.value]); 82 | } 83 | get namespaceURI() { return this.#node.namespaceURI } 84 | 85 | getAttribute(name: string) { 86 | return this.#node.getAttribute(name); 87 | } 88 | 89 | hasAttribute(name: string) { 90 | return this.#node.hasAttribute(name); 91 | } 92 | 93 | setAttribute(name: string, value: string): this { 94 | this.#node.setAttribute(name, value); 95 | return this; 96 | } 97 | 98 | removeAttribute(name: string): this { 99 | this.#node.removeAttribute(name); 100 | return this; 101 | } 102 | 103 | prepend(content: Content, opts?: ContentOptions): this { 104 | return this.before(content, opts); 105 | } 106 | 107 | append(content: Content, opts?: ContentOptions): this { 108 | return this.after(content, opts); 109 | } 110 | 111 | setInnerContent(content: Content, opts?: ContentOptions): this { 112 | this.#node[opts?.html ? 'innerHTML' : 'textContent'] = content; 113 | return this; 114 | } 115 | 116 | removeAndKeepContent(): this { 117 | this.#node?.replaceWith(...this.#node.childNodes); 118 | return this; 119 | } 120 | } 121 | 122 | export class ParsedHTMLRewriterText extends ParsedHTMLRewriterNode { 123 | #text: Text | null; 124 | #done: boolean; 125 | 126 | constructor(text: Text | null, document: Document) { 127 | super(text, document); 128 | this.#text = text; 129 | this.#done = text === null; 130 | } 131 | get text() { return this.#text?.textContent ?? '' } 132 | get lastInTextNode() { return this.#done } 133 | } 134 | 135 | export class ParsedHTMLRewriterComment extends ParsedHTMLRewriterNode { 136 | #comm: Comment; 137 | constructor(comm: Comment, document: Document) { 138 | super(comm, document); 139 | this.#comm = comm; 140 | } 141 | get text() { return this.#comm.textContent ?? '' } 142 | set text(value: string) { this.#comm.textContent = value } 143 | } 144 | 145 | export class ParsedHTMLRewriterDocumentType { 146 | #doctype: DocumentType; 147 | 148 | constructor(doctype: DocumentType) { 149 | this.#doctype = doctype; 150 | } 151 | get name() { return this.#doctype.name } 152 | get publicId() { return this.#doctype.publicId } 153 | get systemId() { return this.#doctype.systemId } 154 | } 155 | 156 | export class ParsedHTMLRewriterEnd { 157 | #doc: Document; 158 | 159 | constructor(document: Document) { 160 | this.#doc = document; 161 | } 162 | 163 | append(content: Content, opts?: ContentOptions): this { 164 | const after = this.#doc.createComment(''); 165 | this.#doc.insertBefore(after, null); 166 | replace(this.#doc, after, content, opts); 167 | return this; 168 | } 169 | } 170 | 171 | // function* ancestors(el: Node) { 172 | // while (el.parentElement) { 173 | // yield el.parentElement 174 | // el = el.parentElement 175 | // } 176 | // } 177 | 178 | // function root(el: Node): globalThis.HTMLElement | undefined { 179 | // const ancs = [...ancestors(el)] 180 | // return ancs[ancs.length - 1]; 181 | // } 182 | 183 | // function* zip(xs: Iterable, ys: Iterable): IterableIterator<[X, Y]> { 184 | // const xit = xs[Symbol.iterator](); 185 | // const yit = ys[Symbol.iterator](); 186 | // while (true) { 187 | // const [xr, yr] = [xit.next(), yit.next()]; 188 | // if (xr.done || yr.done) break; 189 | // yield [xr.value, yr.value]; 190 | // } 191 | // } 192 | 193 | // /* Checks if this element or any of its parents matches a given `selector`. */ 194 | // function matchesAncestors(el: Element | null, selector: string): Element | null { 195 | // let curr = el; 196 | // while (curr != null) { 197 | // if (curr.matches(selector)) return curr; 198 | // curr = curr.parentElement; 199 | // } 200 | // return null; 201 | // } -------------------------------------------------------------------------------- /test/patch-global.js: -------------------------------------------------------------------------------- 1 | import webStreams from 'node-web-streams'; 2 | import fetch from 'node-fetch-polyfill'; 3 | import { concatBufferSources } from 'typed-array-utils'; 4 | 5 | Object.assign(global, webStreams); 6 | Object.assign(global, fetch); 7 | 8 | class FixedResponse extends Response { 9 | async text() { 10 | if (this._rawBody instanceof ReadableStream) { 11 | const r = this._rawBody.getReader(); 12 | const bs = []; 13 | let i = await r.read(); 14 | while (!i.done) { 15 | bs.push(i.value); 16 | i = await r.read() 17 | } 18 | return new TextDecoder().decode(concatBufferSources(...bs)) 19 | } else { 20 | return super.text(); 21 | } 22 | } 23 | } 24 | 25 | global.Response = FixedResponse; 26 | -------------------------------------------------------------------------------- /test/test.js: -------------------------------------------------------------------------------- 1 | import './patch-global.js'; 2 | 3 | import assert from 'assert'; 4 | import { ParsedHTMLRewriter } from '../index.js'; 5 | // import { parseHTML, DOMParser } from 'linkedom' 6 | 7 | ; (async () => { 8 | try { 9 | // Testing the environment first 10 | assert.ok(Response) 11 | assert.ok(new Response()) 12 | assert.ok(ReadableStream) 13 | assert.ok(new ReadableStream({})) 14 | assert.strictEqual( 15 | await new Response('').text(), 16 | '', 17 | ); 18 | 19 | assert.ok(ParsedHTMLRewriter) 20 | assert.ok(new ParsedHTMLRewriter()) 21 | assert.strictEqual( 22 | await new ParsedHTMLRewriter().transform(new Response('')).text(), 23 | '', 24 | ); 25 | 26 | const htmlText = 'Hello span content text.'; 27 | let calledBodyElem = false; 28 | let calledBodyText = false; 29 | let calledBodyComm = false; 30 | let calledSpanElem = false; 31 | let calledSpanText = false; 32 | const texts = ['Hello ', '', 'span content', '', ' text.', '']; 33 | await new ParsedHTMLRewriter() 34 | .on('body', { 35 | element(el) { 36 | calledBodyElem = true; 37 | assert.ok(el); 38 | assert.ok(el.hasAttribute); 39 | assert.ok(el.hasAttribute('id')); 40 | assert.ok(el.hasAttribute('class')); 41 | assert.strictEqual(el.getAttribute('id'), 'id'); 42 | assert.strictEqual(el.getAttribute('class'), 'body'); 43 | 44 | // assert.deepStrictEqual( 45 | // new Set(Object.keys(el)), 46 | // new Set(['removed', 'attributes', 'tagName', 'namespaceURI']), 47 | // ); 48 | 49 | // Remove an attribute 50 | assert.strictEqual(el.removeAttribute('to'), el); 51 | assert.strictEqual(el.hasAttribute('to'), false); 52 | assert.deepStrictEqual( 53 | new Map([...el.attributes]), 54 | new Map([['id', 'id'], ['class', 'body'], ['zzz', ''], ['remove', '2']]), 55 | ); 56 | 57 | // Remove another attribute 58 | el.removeAttribute('remove'); 59 | assert.strictEqual(el.hasAttribute('remove'), false); 60 | assert.deepStrictEqual( 61 | new Map([...el.attributes]), 62 | new Map([['id', 'id'], ['class', 'body'], ['zzz', '']]), 63 | ); 64 | 65 | // Change an attribute 66 | assert.strictEqual(el.setAttribute('id', 'foo'), el); 67 | assert.strictEqual(el.getAttribute('id'), 'foo'); 68 | }, 69 | text(span) { 70 | calledBodyText = true; 71 | assert.ok(span); 72 | assert.ok('lastInTextNode' in span); 73 | // assert.deepStrictEqual( 74 | // new Set(Object.keys(span)), 75 | // new Set(['removed', 'text', 'lastInTextNode']), 76 | // ); 77 | assert.strictEqual(span.text, texts.shift()); 78 | }, 79 | comments(comm) { 80 | calledBodyComm = true; 81 | assert.ok(comm) 82 | assert.strictEqual(comm.text, 'more') 83 | // assert.deepStrictEqual( 84 | // new Set(Object.keys(comm)), 85 | // new Set(['removed', 'text']), 86 | // ); 87 | } 88 | }) 89 | .on('span[id]', { 90 | element(span) { 91 | calledSpanElem = true 92 | assert.ok(span) 93 | assert.ok(span.hasAttribute('id')); 94 | assert.strictEqual(span.getAttribute('id'), 'span'); 95 | }, 96 | text(span) { 97 | calledSpanText = true 98 | assert.ok(span); 99 | assert.ok('lastInTextNode' in span); 100 | if (span.lastInTextNode) 101 | assert.strictEqual(span.text, ''); 102 | else 103 | assert.strictEqual(span.text, 'span content'); 104 | } 105 | }) 106 | .transform(new Response(htmlText)) 107 | .text() 108 | 109 | assert.ok(calledBodyElem); 110 | assert.ok(calledBodyText); 111 | assert.ok(calledBodyComm); 112 | assert.ok(calledSpanElem); 113 | assert.ok(calledSpanText); 114 | 115 | assert.strictEqual( 116 | await new ParsedHTMLRewriter() 117 | .on('main', { element(el) { el.replace('

Foobar
') } }) 118 | .transform(new Response('
')) 119 | .text(), 120 | '<div>Foobar</div>', 121 | ); 122 | assert.strictEqual( 123 | await new ParsedHTMLRewriter() 124 | .on('main', { element(el) { el.replace('
Foobar
', { html: true }) } }) 125 | .transform(new Response('
')) 126 | .text(), 127 | '
Foobar
', 128 | ); 129 | 130 | assert.strictEqual( 131 | await new ParsedHTMLRewriter() 132 | .on('main', { element(el) { el.remove() } }) 133 | .transform(new Response('
H
M
')) 134 | .text(), 135 | '
H
', 136 | ); 137 | 138 | assert.strictEqual( 139 | await new ParsedHTMLRewriter() 140 | .on('main', { element(el) { el.removeAndKeepContent() } }) 141 | .transform(new Response('
H
M
')) 142 | .text(), 143 | '
H
M', 144 | ); 145 | 146 | assert.strictEqual( 147 | await new ParsedHTMLRewriter() 148 | .on('main', { element(el) { el.setInnerContent('
D
') } }) 149 | .transform(new Response('
H
M
')) 150 | .text(), 151 | '
H
<div>D</div>
', 152 | ); 153 | 154 | // Test innerHTML handler 155 | let innerHTMLCalled = false; 156 | assert.strictEqual( 157 | await new ParsedHTMLRewriter() 158 | .on('main', { 159 | element(el) { el.setInnerContent('
D
', { html: true }) }, 160 | innerHTML(html) { 161 | innerHTMLCalled = true; 162 | assert.strictEqual(html, '
D
'); 163 | }, 164 | }) 165 | .transform(new Response('
H
M
')) 166 | .text(), 167 | '
H
D
', 168 | ); 169 | assert.ok(innerHTMLCalled); 170 | 171 | innerHTMLCalled = false; 172 | await new ParsedHTMLRewriter() 173 | .on('div[id]', { 174 | innerHTML(html) { 175 | innerHTMLCalled = true; 176 | assert.strictEqual(html, 'D') 177 | }, 178 | }) 179 | .transform(new Response('
H
M
')) 180 | .text() 181 | assert.ok(innerHTMLCalled); 182 | 183 | // innerHTML being called after inner handlers run: 184 | innerHTMLCalled = false; 185 | await new ParsedHTMLRewriter() 186 | .on('main', { 187 | innerHTML(html) { 188 | innerHTMLCalled = true; 189 | assert.strictEqual(html, '3') 190 | }, 191 | }) 192 | .on('main div[id="1"]', { element(el) { el.removeAndKeepContent() }}) 193 | .on('main div[id="2"]', { element(el) { el.removeAndKeepContent() }}) 194 | .transform(new Response('
3
')) 195 | .text() 196 | assert.ok(innerHTMLCalled); 197 | 198 | // Test onDocument handler 199 | const documentHtmlText = 'TB'; 200 | let calledDocumentDoctype = false 201 | let calledDocumentComm = false; 202 | let calledDocumentText = false; 203 | let calledDocumentEnd = false; 204 | const documentTexts = ['T', '', 'B', '']; 205 | const newDocument = await new ParsedHTMLRewriter() 206 | .onDocument({ 207 | doctype(doctype) { 208 | calledDocumentDoctype = true; 209 | assert.ok(doctype); 210 | assert.strictEqual(doctype.name, 'html'); 211 | assert.strictEqual(doctype.publicId, '-//W3C//DTD HTML 4.01//EN'); 212 | assert.strictEqual(doctype.systemId, 'http://www.w3.org/TR/html4/strict.dtd'); 213 | }, 214 | comments(comm) { 215 | calledDocumentComm = true; 216 | assert.ok(comm); 217 | assert.strictEqual(comm.text, "outside"); 218 | comm.before("", { html: true }); 219 | }, 220 | text(span) { 221 | calledDocumentText = true; 222 | assert.ok(span); 223 | assert.ok('lastInTextNode' in span); 224 | assert.strictEqual(span.text, documentTexts.shift()); 225 | }, 226 | end(end) { 227 | calledDocumentEnd = true; 228 | assert.ok(end); 229 | end.append("", { html: true }); 230 | } 231 | }) 232 | .transform(new Response(documentHtmlText)) 233 | .text(); 234 | 235 | assert.ok(calledDocumentDoctype); 236 | assert.ok(calledDocumentComm); 237 | assert.ok(calledDocumentText); 238 | assert.ok(calledDocumentEnd); 239 | assert.strictEqual(newDocument, 'TB'); 240 | 241 | } catch (err) { 242 | console.error(err) 243 | } 244 | })() -------------------------------------------------------------------------------- /tsconfig.cjs.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "lib": [ 4 | "ES2020", 5 | "ESNext.AsyncIterable", 6 | "DOM", 7 | "DOM.Iterable" 8 | ], 9 | "target": "ES2015", 10 | "moduleResolution": "Node", 11 | "outDir": "cjs", 12 | "types": [ 13 | "@cloudflare/workers-types", 14 | ], 15 | "importHelpers": true, 16 | "experimentalDecorators": true, 17 | "strict": true, 18 | "skipLibCheck": true, 19 | "skipDefaultLibCheck": true, 20 | "module": "CommonJS", 21 | }, 22 | "include": [ 23 | "typings/*.d.ts", 24 | "src/**/.ts", 25 | "src/*.ts" 26 | ], 27 | "exclude": [], 28 | } -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "lib": [ 4 | "ES2020", 5 | "ESNext.AsyncIterable", 6 | "DOM", 7 | "DOM.Iterable" 8 | ], 9 | "target": "ES2019", 10 | "moduleResolution": "Node", 11 | "outDir": ".", 12 | "types": [ 13 | "@cloudflare/workers-types", 14 | ], 15 | "paths": { 16 | "perf_hooks": ["./src/perf_hooks.js"], 17 | }, 18 | "importHelpers": true, 19 | "experimentalDecorators": true, 20 | "strict": true, 21 | "skipLibCheck": true, 22 | "skipDefaultLibCheck": true, 23 | "declaration": true, 24 | "declarationMap": true, 25 | "sourceMap": true, 26 | "inlineSources": true, 27 | }, 28 | "include": [ 29 | "typings/*.d.ts", 30 | "src/**/.ts", 31 | "src/*.ts" 32 | ], 33 | "exclude": [] 34 | } --------------------------------------------------------------------------------