├── .eslintrc.json
├── .gitattributes
├── .github
    ├── FUNDING.yml
    ├── dependabot.yml
    └── workflows
    │   ├── codeql-analysis.yml
    │   ├── dependabot-automerge.yml
    │   └── nodejs-test.yml
├── .gitignore
├── LICENSE
├── README.md
├── SECURITY.md
├── WritableStream.js
├── _config.yml
├── package-lock.json
├── package.json
├── src
    ├── FeedHandler.spec.ts
    ├── Parser.events.spec.ts
    ├── Parser.spec.ts
    ├── Parser.ts
    ├── Tokenizer.spec.ts
    ├── Tokenizer.ts
    ├── WritableStream.spec.ts
    ├── WritableStream.ts
    ├── __fixtures__
    │   ├── Documents
    │   │   ├── Atom_Example.xml
    │   │   ├── Attributes.html
    │   │   ├── Basic.html
    │   │   ├── RDF_Example.xml
    │   │   ├── RSS_Example.xml
    │   │   └── Svg.html
    │   └── testHelper.ts
    ├── __snapshots__
    │   ├── FeedHandler.spec.ts.snap
    │   ├── Parser.events.spec.ts.snap
    │   ├── Tokenizer.spec.ts.snap
    │   ├── WritableStream.spec.ts.snap
    │   └── index.spec.ts.snap
    ├── index.spec.ts
    └── index.ts
└── tsconfig.json


/.eslintrc.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "extends": [
  3 |         "eslint:recommended",
  4 |         "prettier",
  5 |         "plugin:n/recommended",
  6 |         "plugin:unicorn/recommended"
  7 |     ],
  8 |     "env": {
  9 |         "node": true,
 10 |         "es6": true
 11 |     },
 12 |     "rules": {
 13 |         "eqeqeq": [2, "smart"],
 14 |         "no-caller": 2,
 15 |         "dot-notation": 2,
 16 |         "no-var": 2,
 17 |         "prefer-const": 2,
 18 |         "prefer-arrow-callback": [2, { "allowNamedFunctions": true }],
 19 |         "arrow-body-style": [2, "as-needed"],
 20 |         "object-shorthand": 2,
 21 |         "prefer-template": 2,
 22 |         "one-var": [2, "never"],
 23 |         "prefer-destructuring": [2, { "object": true }],
 24 |         "capitalized-comments": 2,
 25 |         "multiline-comment-style": [2, "starred-block"],
 26 |         "spaced-comment": 2,
 27 |         "yoda": [2, "never"],
 28 |         "curly": [2, "multi-line"],
 29 |         "no-else-return": 2,
 30 | 
 31 |         "n/no-unpublished-import": 0,
 32 | 
 33 |         "unicorn/filename-case": [
 34 |             2,
 35 |             {
 36 |                 "cases": {
 37 |                     "camelCase": true,
 38 |                     "pascalCase": true
 39 |                 }
 40 |             }
 41 |         ],
 42 |         "unicorn/no-null": 0,
 43 |         "unicorn/prefer-code-point": 0,
 44 |         "unicorn/prefer-string-slice": 0,
 45 |         "unicorn/prefer-add-event-listener": 0,
 46 |         "unicorn/prefer-at": 0,
 47 |         "unicorn/prefer-string-replace-all": 0
 48 |     },
 49 |     "overrides": [
 50 |         {
 51 |             "files": "*.ts",
 52 |             "extends": [
 53 |                 "plugin:@typescript-eslint/eslint-recommended",
 54 |                 "plugin:@typescript-eslint/recommended",
 55 |                 "prettier"
 56 |             ],
 57 |             "parserOptions": {
 58 |                 "sourceType": "module",
 59 |                 "project": "./tsconfig.json"
 60 |             },
 61 |             "rules": {
 62 |                 "curly": [2, "multi-line"],
 63 | 
 64 |                 "@typescript-eslint/prefer-for-of": 0,
 65 |                 "@typescript-eslint/member-ordering": 0,
 66 |                 "@typescript-eslint/explicit-function-return-type": 0,
 67 |                 "@typescript-eslint/no-unused-vars": 0,
 68 |                 "@typescript-eslint/no-use-before-define": [
 69 |                     2,
 70 |                     { "functions": false }
 71 |                 ],
 72 |                 "@typescript-eslint/consistent-type-definitions": [
 73 |                     2,
 74 |                     "interface"
 75 |                 ],
 76 |                 "@typescript-eslint/prefer-function-type": 2,
 77 |                 "@typescript-eslint/no-unnecessary-type-arguments": 2,
 78 |                 "@typescript-eslint/prefer-string-starts-ends-with": 2,
 79 |                 "@typescript-eslint/prefer-readonly": 2,
 80 |                 "@typescript-eslint/prefer-includes": 2,
 81 |                 "@typescript-eslint/no-unnecessary-condition": 2,
 82 |                 "@typescript-eslint/switch-exhaustiveness-check": 2,
 83 |                 "@typescript-eslint/prefer-nullish-coalescing": 2,
 84 |                 "@typescript-eslint/consistent-type-imports": [
 85 |                     2,
 86 |                     { "fixStyle": "inline-type-imports" }
 87 |                 ],
 88 |                 "@typescript-eslint/consistent-type-exports": 2,
 89 | 
 90 |                 "n/no-missing-import": 0,
 91 |                 "n/no-unsupported-features/es-syntax": 0
 92 |             }
 93 |         },
 94 |         {
 95 |             "files": "*.spec.ts",
 96 |             "rules": {
 97 |                 "n/no-unsupported-features/node-builtins": 0
 98 |             }
 99 |         }
100 |     ]
101 | }
102 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text eol=lf


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [fb55]
2 | tidelift: npm/htmlparser2
3 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |     - package-ecosystem: npm
 4 |       directory: "/"
 5 |       schedule:
 6 |           interval: daily
 7 |       open-pull-requests-limit: 10
 8 |       versioning-strategy: increase
 9 |     - package-ecosystem: "github-actions"
10 |       directory: "/"
11 |       schedule:
12 |           interval: daily
13 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL"
 2 | 
 3 | on:
 4 |     push:
 5 |         branches: [master]
 6 |     pull_request:
 7 |         # The branches below must be a subset of the branches above
 8 |         branches: [master]
 9 |     schedule:
10 |         - cron: "0 0 * * 0"
11 | 
12 | jobs:
13 |     analyze:
14 |         name: Analyze
15 |         runs-on: ubuntu-latest
16 |         permissions:
17 |             actions: read
18 |             contents: read
19 |             security-events: write
20 | 
21 |         steps:
22 |             - name: Checkout repository
23 |               uses: actions/checkout@v4
24 | 
25 |             - name: Initialize CodeQL
26 |               uses: github/codeql-action/init@v3
27 |               with:
28 |                   languages: "javascript"
29 | 
30 |             - name: Perform CodeQL Analysis
31 |               uses: github/codeql-action/analyze@v3
32 | 


--------------------------------------------------------------------------------
/.github/workflows/dependabot-automerge.yml:
--------------------------------------------------------------------------------
 1 | # Based on https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/automating-dependabot-with-github-actions#enable-auto-merge-on-a-pull-request
 2 | name: Dependabot auto-merge
 3 | on: pull_request_target
 4 | 
 5 | permissions:
 6 |     pull-requests: write
 7 |     contents: write
 8 | 
 9 | jobs:
10 |     dependabot:
11 |         runs-on: ubuntu-latest
12 |         if: ${{ github.actor == 'dependabot[bot]' }}
13 |         steps:
14 |             - name: Dependabot metadata
15 |               id: metadata
16 |               uses: dependabot/fetch-metadata@v2.4.0
17 |               with:
18 |                   github-token: "${{ secrets.GITHUB_TOKEN }}"
19 |             - name: Enable auto-merge for Dependabot PRs
20 |               # Automatically merge semver-patch and semver-minor PRs
21 |               if: "${{ steps.metadata.outputs.update-type ==
22 |                   'version-update:semver-minor' ||
23 |                   steps.metadata.outputs.update-type ==
24 |                   'version-update:semver-patch' }}"
25 |               run: gh pr merge --auto --squash "$PR_URL"
26 |               env:
27 |                   PR_URL: ${{github.event.pull_request.html_url}}
28 |                   GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
29 | 


--------------------------------------------------------------------------------
/.github/workflows/nodejs-test.yml:
--------------------------------------------------------------------------------
 1 | name: Node.js CI
 2 | 
 3 | on:
 4 |     push:
 5 |         branches-ignore:
 6 |             - "dependabot/**"
 7 |     pull_request:
 8 | 
 9 | env:
10 |     CI: true
11 |     FORCE_COLOR: 2
12 |     NODE_COV: lts/* # The Node.js version to run coveralls on
13 | 
14 | permissions:
15 |     contents: read #  to fetch code (actions/checkout)
16 | 
17 | jobs:
18 |     lint:
19 |         runs-on: ubuntu-latest
20 |         steps:
21 |             - uses: actions/checkout@v4
22 |             - uses: actions/setup-node@v4
23 |               with:
24 |                   node-version: lts/*
25 |                   cache: npm
26 |             - run: npm ci
27 |             - run: npm run lint
28 | 
29 |     test:
30 |         permissions:
31 |             contents: read #  to fetch code (actions/checkout)
32 |             checks: write #  to create new checks (coverallsapp/github-action)
33 | 
34 |         name: Node ${{ matrix.node }}
35 |         runs-on: ubuntu-latest
36 | 
37 |         strategy:
38 |             fail-fast: false
39 |             matrix:
40 |                 node:
41 |                     - 18
42 |                     - 20
43 |                     - 22
44 |                     - lts/*
45 | 
46 |         steps:
47 |             - uses: actions/checkout@v4
48 |             - name: Use Node.js ${{ matrix.node }}
49 |               uses: actions/setup-node@v4
50 |               with:
51 |                   node-version: ${{ matrix.node }}
52 |                   cache: npm
53 |             - run: npm ci
54 |             - run: npm run build --if-present
55 | 
56 |             - name: Run tests
57 |               run: npm run test:vi
58 |               if: matrix.node != env.NODE_COV
59 | 
60 |             - name: Run tests with coverage
61 |               run: npm run test:vi -- --coverage
62 |               if: matrix.node == env.NODE_COV
63 | 
64 |             - name: Run Coveralls
65 |               uses: coverallsapp/github-action@v2.3.6
66 |               if: matrix.node == env.NODE_COV
67 |               continue-on-error: true
68 |               with:
69 |                   github-token: "${{ secrets.GITHUB_TOKEN }}"
70 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | coverage/
3 | dist/
4 | .tshy/
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2010, 2011, Chris Winberry <chris@winberry.net>. All rights reserved.
 2 | Permission is hereby granted, free of charge, to any person obtaining a copy
 3 | of this software and associated documentation files (the "Software"), to
 4 | deal in the Software without restriction, including without limitation the
 5 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 6 | sell copies of the Software, and to permit persons to whom the Software is
 7 | furnished to do so, subject to the following conditions:
 8 |  
 9 | The above copyright notice and this permission notice shall be included in
10 | all copies or substantial portions of the Software.
11 |  
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
17 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
18 | IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # htmlparser2
  2 | 
  3 | [![NPM version](https://img.shields.io/npm/v/htmlparser2.svg)](https://npmjs.org/package/htmlparser2)
  4 | [![Downloads](https://img.shields.io/npm/dm/htmlparser2.svg)](https://npmjs.org/package/htmlparser2)
  5 | [![Node.js CI](https://github.com/fb55/htmlparser2/actions/workflows/nodejs-test.yml/badge.svg)](https://github.com/fb55/htmlparser2/actions/workflows/nodejs-test.yml)
  6 | [![Coverage](https://img.shields.io/coveralls/fb55/htmlparser2.svg)](https://coveralls.io/r/fb55/htmlparser2)
  7 | 
  8 | The fast & forgiving HTML/XML parser.
  9 | 
 10 | _htmlparser2 is [the fastest HTML parser](#performance), and takes some shortcuts to get there. If you need strict HTML spec compliance, have a look at [parse5](https://github.com/inikulin/parse5)._
 11 | 
 12 | ## Installation
 13 | 
 14 |     npm install htmlparser2
 15 | 
 16 | A live demo of `htmlparser2` is available [on AST Explorer](https://astexplorer.net/#/2AmVrGuGVJ).
 17 | 
 18 | ## Ecosystem
 19 | 
 20 | | Name                                                          | Description                                             |
 21 | | ------------------------------------------------------------- | ------------------------------------------------------- |
 22 | | [htmlparser2](https://github.com/fb55/htmlparser2)            | Fast & forgiving HTML/XML parser                        |
 23 | | [domhandler](https://github.com/fb55/domhandler)              | Handler for htmlparser2 that turns documents into a DOM |
 24 | | [domutils](https://github.com/fb55/domutils)                  | Utilities for working with domhandler's DOM             |
 25 | | [css-select](https://github.com/fb55/css-select)              | CSS selector engine, compatible with domhandler's DOM   |
 26 | | [cheerio](https://github.com/cheeriojs/cheerio)               | The jQuery API for domhandler's DOM                     |
 27 | | [dom-serializer](https://github.com/cheeriojs/dom-serializer) | Serializer for domhandler's DOM                         |
 28 | 
 29 | ## Usage
 30 | 
 31 | `htmlparser2` itself provides a callback interface that allows consumption of documents with minimal allocations.
 32 | For a more ergonomic experience, read [Getting a DOM](#getting-a-dom) below.
 33 | 
 34 | ```js
 35 | import * as htmlparser2 from "htmlparser2";
 36 | 
 37 | const parser = new htmlparser2.Parser({
 38 |     onopentag(name, attributes) {
 39 |         /*
 40 |          * This fires when a new tag is opened.
 41 |          *
 42 |          * If you don't need an aggregated `attributes` object,
 43 |          * have a look at the `onopentagname` and `onattribute` events.
 44 |          */
 45 |         if (name === "script" && attributes.type === "text/javascript") {
 46 |             console.log("JS! Hooray!");
 47 |         }
 48 |     },
 49 |     ontext(text) {
 50 |         /*
 51 |          * Fires whenever a section of text was processed.
 52 |          *
 53 |          * Note that this can fire at any point within text and you might
 54 |          * have to stitch together multiple pieces.
 55 |          */
 56 |         console.log("-->", text);
 57 |     },
 58 |     onclosetag(tagname) {
 59 |         /*
 60 |          * Fires when a tag is closed.
 61 |          *
 62 |          * You can rely on this event only firing when you have received an
 63 |          * equivalent opening tag before. Closing tags without corresponding
 64 |          * opening tags will be ignored.
 65 |          */
 66 |         if (tagname === "script") {
 67 |             console.log("That's it?!");
 68 |         }
 69 |     },
 70 | });
 71 | parser.write(
 72 |     "Xyz <script type='text/javascript'>const foo = '<<bar>>';</script>",
 73 | );
 74 | parser.end();
 75 | ```
 76 | 
 77 | Output (with multiple text events combined):
 78 | 
 79 | ```
 80 | --> Xyz
 81 | JS! Hooray!
 82 | --> const foo = '<<bar>>';
 83 | That's it?!
 84 | ```
 85 | 
 86 | This example only shows three of the possible events.
 87 | Read more about the parser, its events and options in the [wiki](https://github.com/fb55/htmlparser2/wiki/Parser-options).
 88 | 
 89 | ### Usage with streams
 90 | 
 91 | While the `Parser` interface closely resembles Node.js streams, it's not a 100% match.
 92 | Use the `WritableStream` interface to process a streaming input:
 93 | 
 94 | ```js
 95 | import { WritableStream } from "htmlparser2/WritableStream";
 96 | 
 97 | const parserStream = new WritableStream({
 98 |     ontext(text) {
 99 |         console.log("Streaming:", text);
100 |     },
101 | });
102 | 
103 | const htmlStream = fs.createReadStream("./my-file.html");
104 | htmlStream.pipe(parserStream).on("finish", () => console.log("done"));
105 | ```
106 | 
107 | ## Getting a DOM
108 | 
109 | The `DomHandler` produces a DOM (document object model) that can be manipulated using the [`DomUtils`](https://github.com/fb55/DomUtils) helper.
110 | 
111 | ```js
112 | import * as htmlparser2 from "htmlparser2";
113 | 
114 | const dom = htmlparser2.parseDocument(htmlString);
115 | ```
116 | 
117 | The `DomHandler`, while still bundled with this module, was moved to its [own module](https://github.com/fb55/domhandler).
118 | Have a look at that for further information.
119 | 
120 | ## Parsing Feeds
121 | 
122 | `htmlparser2` makes it easy to parse RSS, RDF and Atom feeds, by providing a `parseFeed` method:
123 | 
124 | ```javascript
125 | const feed = htmlparser2.parseFeed(content, options);
126 | ```
127 | 
128 | ## Performance
129 | 
130 | After having some artificial benchmarks for some time, **@AndreasMadsen** published his [`htmlparser-benchmark`](https://github.com/AndreasMadsen/htmlparser-benchmark), which benchmarks HTML parses based on real-world websites.
131 | 
132 | At the time of writing, the latest versions of all supported parsers show the following performance characteristics on GitHub Actions (sourced from [here](https://github.com/AndreasMadsen/htmlparser-benchmark/blob/e78cd8fc6c2adac08deedd4f274c33537451186b/stats.txt)):
133 | 
134 | ```
135 | htmlparser2        : 2.17215 ms/file ± 3.81587
136 | node-html-parser   : 2.35983 ms/file ± 1.54487
137 | html5parser        : 2.43468 ms/file ± 2.81501
138 | neutron-html5parser: 2.61356 ms/file ± 1.70324
139 | htmlparser2-dom    : 3.09034 ms/file ± 4.77033
140 | html-dom-parser    : 3.56804 ms/file ± 5.15621
141 | libxmljs           : 4.07490 ms/file ± 2.99869
142 | htmljs-parser      : 6.15812 ms/file ± 7.52497
143 | parse5             : 9.70406 ms/file ± 6.74872
144 | htmlparser         : 15.0596 ms/file ± 89.0826
145 | html-parser        : 28.6282 ms/file ± 22.6652
146 | saxes              : 45.7921 ms/file ± 128.691
147 | html5              : 120.844 ms/file ± 153.944
148 | ```
149 | 
150 | ## How does this module differ from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
151 | 
152 | In 2011, this module started as a fork of the `htmlparser` module.
153 | `htmlparser2` was rewritten multiple times and, while it maintains an API that's mostly compatible with `htmlparser`, the projects don't share any code anymore.
154 | 
155 | The parser now provides a callback interface inspired by [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)).
156 | As a result, old handlers won't work anymore.
157 | 
158 | The `DefaultHandler` was renamed to clarify its purpose (to `DomHandler`). The old name is still available when requiring `htmlparser2` and your code should work as expected.
159 | 
160 | The `RssHandler` was replaced with a `getFeed` function that takes a `DomHandler` DOM and returns a feed object. There is a `parseFeed` helper function that can be used to parse a feed from a string.
161 | 
162 | ## Security contact information
163 | 
164 | To report a security vulnerability, please use the [Tidelift security contact](https://tidelift.com/security).
165 | Tidelift will coordinate the fix and disclosure.
166 | 
167 | ## `htmlparser2` for enterprise
168 | 
169 | Available as part of the Tidelift Subscription.
170 | 
171 | The maintainers of `htmlparser2` and thousands of other packages are working with Tidelift to deliver commercial support and maintenance for the open source dependencies you use to build your applications. Save time, reduce risk, and improve code health, while paying the maintainers of the exact dependencies you use. [Learn more.](https://tidelift.com/subscription/pkg/npm-htmlparser2?utm_source=npm-htmlparser2&utm_medium=referral&utm_campaign=enterprise&utm_term=repo)
172 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Supported Versions
 4 | 
 5 | Only the current version is supported. Please make sure to update to the latest release.
 6 | 
 7 | ## Reporting a Vulnerability
 8 | 
 9 | To report a security vulnerability, please use the [Tidelift security contact](https://tidelift.com/security).
10 | Tidelift will coordinate the fix and disclosure.
11 | 


--------------------------------------------------------------------------------
/WritableStream.js:
--------------------------------------------------------------------------------
1 | // Make exports work in Node < 12
2 | // eslint-disable-next-line no-undef, unicorn/prefer-module
3 | module.exports = require("./dist/commonjs/WritableStream.js");
4 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman
2 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "name": "htmlparser2",
  3 |     "version": "10.0.0",
  4 |     "description": "Fast & forgiving HTML/XML parser",
  5 |     "keywords": [
  6 |         "html",
  7 |         "parser",
  8 |         "streams",
  9 |         "xml",
 10 |         "dom",
 11 |         "rss",
 12 |         "feed",
 13 |         "atom"
 14 |     ],
 15 |     "repository": {
 16 |         "type": "git",
 17 |         "url": "git://github.com/fb55/htmlparser2.git"
 18 |     },
 19 |     "funding": [
 20 |         "https://github.com/fb55/htmlparser2?sponsor=1",
 21 |         {
 22 |             "type": "github",
 23 |             "url": "https://github.com/sponsors/fb55"
 24 |         }
 25 |     ],
 26 |     "license": "MIT",
 27 |     "author": "Felix Boehm <me@feedic.com>",
 28 |     "sideEffects": false,
 29 |     "type": "module",
 30 |     "exports": {
 31 |         ".": {
 32 |             "import": {
 33 |                 "types": "./dist/esm/index.d.ts",
 34 |                 "default": "./dist/esm/index.js"
 35 |             },
 36 |             "require": {
 37 |                 "types": "./dist/commonjs/index.d.ts",
 38 |                 "default": "./dist/commonjs/index.js"
 39 |             }
 40 |         },
 41 |         "./WritableStream": {
 42 |             "import": {
 43 |                 "types": "./dist/esm/WritableStream.d.ts",
 44 |                 "default": "./dist/esm/WritableStream.js"
 45 |             },
 46 |             "require": {
 47 |                 "types": "./dist/commonjs/WritableStream.d.ts",
 48 |                 "default": "./dist/commonjs/WritableStream.js"
 49 |             }
 50 |         }
 51 |     },
 52 |     "main": "./dist/commonjs/index.js",
 53 |     "module": "./dist/esm/index.js",
 54 |     "types": "./dist/commonjs/index.d.ts",
 55 |     "files": [
 56 |         "WritableStream.js",
 57 |         "dist",
 58 |         "src"
 59 |     ],
 60 |     "scripts": {
 61 |         "build": "tshy",
 62 |         "format": "npm run format:es && npm run format:prettier",
 63 |         "format:es": "npm run lint:es -- --fix",
 64 |         "format:prettier": "npm run format:prettier:raw -- --write",
 65 |         "format:prettier:raw": "prettier '**/*.{ts,md,json,yml}'",
 66 |         "lint": "npm run lint:es && npm run lint:ts && npm run lint:prettier",
 67 |         "lint:es": "eslint src",
 68 |         "lint:prettier": "npm run format:prettier:raw -- --check",
 69 |         "lint:ts": "tsc --noEmit",
 70 |         "prepare": "npm run build",
 71 |         "test": "npm run test:vi && npm run lint",
 72 |         "test:vi": "vitest run"
 73 |     },
 74 |     "prettier": {
 75 |         "tabWidth": 4
 76 |     },
 77 |     "dependencies": {
 78 |         "domelementtype": "^2.3.0",
 79 |         "domhandler": "^5.0.3",
 80 |         "domutils": "^3.2.2",
 81 |         "entities": "^6.0.0"
 82 |     },
 83 |     "devDependencies": {
 84 |         "@types/node": "^22.15.27",
 85 |         "@typescript-eslint/eslint-plugin": "^8.33.0",
 86 |         "@typescript-eslint/parser": "^8.32.1",
 87 |         "@vitest/coverage-v8": "^2.1.8",
 88 |         "eslint": "^8.57.1",
 89 |         "eslint-config-prettier": "^10.1.5",
 90 |         "eslint-plugin-n": "^17.18.0",
 91 |         "eslint-plugin-unicorn": "^56.0.1",
 92 |         "prettier": "^3.5.3",
 93 |         "tshy": "^3.0.2",
 94 |         "typescript": "^5.8.3",
 95 |         "vitest": "^2.0.2"
 96 |     },
 97 |     "tshy": {
 98 |         "exclude": [
 99 |             "**/*.spec.ts",
100 |             "**/__fixtures__/*",
101 |             "**/__tests__/*",
102 |             "**/__snapshots__/*"
103 |         ],
104 |         "exports": {
105 |             ".": "./src/index.ts",
106 |             "./WritableStream": "./src/WritableStream.ts"
107 |         }
108 |     }
109 | }
110 | 


--------------------------------------------------------------------------------
/src/FeedHandler.spec.ts:
--------------------------------------------------------------------------------
 1 | import fs from "node:fs/promises";
 2 | import { describe, it, expect } from "vitest";
 3 | import { parseFeed } from "./index.js";
 4 | 
 5 | const documents = new URL("__fixtures__/Documents/", import.meta.url);
 6 | 
 7 | describe("parseFeed", () => {
 8 |     it("(rssFeed)", async () =>
 9 |         expect(
10 |             parseFeed(
11 |                 await fs.readFile(
12 |                     new URL("RSS_Example.xml", documents),
13 |                     "utf8",
14 |                 ),
15 |             ),
16 |         ).toMatchSnapshot());
17 | 
18 |     it("(atomFeed)", async () =>
19 |         expect(
20 |             parseFeed(
21 |                 await fs.readFile(
22 |                     new URL("Atom_Example.xml", documents),
23 |                     "utf8",
24 |                 ),
25 |             ),
26 |         ).toMatchSnapshot());
27 | 
28 |     it("(rdfFeed)", async () =>
29 |         expect(
30 |             parseFeed(
31 |                 await fs.readFile(
32 |                     new URL("RDF_Example.xml", documents),
33 |                     "utf8",
34 |                 ),
35 |             ),
36 |         ).toMatchSnapshot());
37 | });
38 | 


--------------------------------------------------------------------------------
/src/Parser.events.spec.ts:
--------------------------------------------------------------------------------
  1 | import { describe, it, expect, vi } from "vitest";
  2 | import { Parser, type ParserOptions } from "./Parser.js";
  3 | import * as helper from "./__fixtures__/testHelper.js";
  4 | 
  5 | /**
  6 |  * Write to the parser twice, once a bytes, once as a single blob. Then check
  7 |  * that we received the expected events.
  8 |  *
  9 |  * @internal
 10 |  * @param input Data to write.
 11 |  * @param options Parser options.
 12 |  * @returns Promise that resolves if the test passes.
 13 |  */
 14 | function runTest(input: string, options?: ParserOptions) {
 15 |     let firstResult: unknown[] | undefined;
 16 | 
 17 |     return new Promise<void>((resolve, reject) => {
 18 |         const handler = helper.getEventCollector((error, actual) => {
 19 |             if (error) {
 20 |                 return reject(error);
 21 |             }
 22 | 
 23 |             if (firstResult) {
 24 |                 expect(actual).toEqual(firstResult);
 25 |                 resolve();
 26 |             } else {
 27 |                 firstResult = actual;
 28 |                 expect(actual).toMatchSnapshot();
 29 |             }
 30 |         });
 31 | 
 32 |         const parser = new Parser(handler, options);
 33 |         // First, try to run the test via chunks
 34 |         for (let index = 0; index < input.length; index++) {
 35 |             parser.write(input.charAt(index));
 36 |         }
 37 |         parser.end();
 38 |         // Then, parse everything
 39 |         parser.parseComplete(input);
 40 |     });
 41 | }
 42 | 
 43 | describe("Events", () => {
 44 |     it("simple", () => runTest("<h1 class=test>adsf</h1>"));
 45 | 
 46 |     it("Template script tags", () =>
 47 |         runTest(
 48 |             '<p><script type="text/template"><h1>Heading1</h1></script></p>',
 49 |         ));
 50 | 
 51 |     it("Lowercase tags", () =>
 52 |         runTest("<H1 class=test>adsf</H1>", { lowerCaseTags: true }));
 53 | 
 54 |     it("CDATA", () =>
 55 |         runTest("<tag><![CDATA[ asdf ><asdf></adsf><> fo]]></tag><![CD>", {
 56 |             xmlMode: true,
 57 |         }));
 58 | 
 59 |     it("CDATA (inside special)", () =>
 60 |         runTest(
 61 |             "<script>/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/</script>",
 62 |         ));
 63 | 
 64 |     it("leading lt", () => runTest(">a>"));
 65 | 
 66 |     it("end slash: void element ending with />", () =>
 67 |         runTest("<hr / ><p>Hold the line."));
 68 | 
 69 |     it("end slash: void element ending with >", () =>
 70 |         runTest("<hr   ><p>Hold the line."));
 71 | 
 72 |     it("end slash: void element ending with >, xmlMode=true", () =>
 73 |         runTest("<hr   ><p>Hold the line.", { xmlMode: true }));
 74 | 
 75 |     it("end slash: non-void element ending with />", () =>
 76 |         runTest("<xx / ><p>Hold the line."));
 77 | 
 78 |     it("end slash: non-void element ending with />, xmlMode=true", () =>
 79 |         runTest("<xx / ><p>Hold the line.", { xmlMode: true }));
 80 | 
 81 |     it("end slash: non-void element ending with />, recognizeSelfClosing=true", () =>
 82 |         runTest("<xx / ><p>Hold the line.", { recognizeSelfClosing: true }));
 83 | 
 84 |     it("end slash: as part of attrib value of void element", () =>
 85 |         runTest("<img src=gif.com/123/><p>Hold the line."));
 86 | 
 87 |     it("end slash: as part of attrib value of non-void element", () =>
 88 |         runTest("<a href=http://test.com/>Foo</a><p>Hold the line."));
 89 | 
 90 |     it("Implicit close tags", () =>
 91 |         runTest(
 92 |             "<ol><li class=test><div><table style=width:100%><tr><th>TH<td colspan=2><h3>Heading</h3><tr><td><div>Div</div><td><div>Div2</div></table></div><li><div><h3>Heading 2</h3></div></li></ol><p>Para<h4>Heading 4</h4><p><ul><li>Hi<li>bye</ul>",
 93 |         ));
 94 | 
 95 |     it("attributes (no white space, no value, no quotes)", () =>
 96 |         runTest(
 97 |             '<button class="test0"title="test1" disabled value=test2>adsf</button>',
 98 |         ));
 99 | 
100 |     it("crazy attribute", () => runTest("<p < = '' FAIL>stuff</p><a"));
101 | 
102 |     it("Scripts creating other scripts", () =>
103 |         runTest("<p><script>var str = '<script></'+'script>';</script></p>"));
104 | 
105 |     it("Long comment ending", () =>
106 |         runTest("<meta id='before'><!-- text ---><meta id='after'>"));
107 | 
108 |     it("Long CDATA ending", () =>
109 |         runTest("<before /><tag><![CDATA[ text ]]]></tag><after />", {
110 |             xmlMode: true,
111 |         }));
112 | 
113 |     it("Implicit open p and br tags", () =>
114 |         runTest("<div>Hallo</p>World</br></ignore></div></p></br>"));
115 | 
116 |     it("lt followed by whitespace", () => runTest("a < b"));
117 | 
118 |     it("double attribute", () => runTest("<h1 class=test class=boo></h1>"));
119 | 
120 |     it("numeric entities", () =>
121 |         runTest("&#x61;&#x62&#99;&#100&#x66g&#x;&#x68"));
122 | 
123 |     it("legacy entities", () => runTest("&AMPel&iacutee&ampeer;s&lter&sum"));
124 | 
125 |     it("named entities", () =>
126 |         runTest("&amp;el&lt;er&CounterClockwiseContourIntegral;foo&bar"));
127 | 
128 |     it("xml entities", () =>
129 |         runTest("&amp;&gt;&amp&lt;&uuml;&#x61;&#x62&#99;&#100&#101", {
130 |             xmlMode: true,
131 |         }));
132 | 
133 |     it("entity in attribute", () =>
134 |         runTest(
135 |             "<a href='http://example.com/p&#x61;#x61ge?param=value&param2&param3=&lt;val&; & &'>",
136 |         ));
137 | 
138 |     it("double brackets", () =>
139 |         runTest("<<princess-purpose>>testing</princess-purpose>"));
140 | 
141 |     it("legacy entities fail", () => runTest("M&M"));
142 | 
143 |     it("Special special tags", () =>
144 |         runTest(
145 |             "<tItLe><b>foo</b><title></TiTlE><sitle><b></b></sitle><ttyle><b></b></ttyle><sCriPT></scripter</soo</sCript><STyLE></styler</STylE><sCiPt><stylee><scriptee><soo>",
146 |         ));
147 | 
148 |     it("Empty tag name", () => runTest("< ></ >"));
149 | 
150 |     it("Not quite closed", () => runTest("<foo /bar></foo bar>"));
151 | 
152 |     it("Entities in attributes", () =>
153 |         runTest("<foo bar=&amp; baz=\"&amp;\" boo='&amp;' noo=>"));
154 | 
155 |     it("CDATA in HTML", () => runTest("<![CDATA[ foo ]]>"));
156 | 
157 |     it("Comment edge-cases", () => runTest("<!-foo><!-- --- --><!--foo"));
158 | 
159 |     it("CDATA edge-cases", () =>
160 |         runTest("<![CDATA><![CDATA[[]]sdaf]]><![CDATA[foo", {
161 |             recognizeCDATA: true,
162 |         }));
163 | 
164 |     it("Comment false ending", () => runTest("<!-- a-b-> -->"));
165 | 
166 |     it("Scripts ending with <", () => runTest("<script><</script>"));
167 | 
168 |     it("CDATA more edge-cases", () =>
169 |         runTest("<![CDATA[foo]bar]>baz]]>", { recognizeCDATA: true }));
170 | 
171 |     it("tag names are not ASCII alpha", () => runTest("<12>text</12>"));
172 | 
173 |     it("open-implies-close case of (non-br) void close tag in non-XML mode", () =>
174 |         runTest("<select><input></select>", { lowerCaseAttributeNames: true }));
175 | 
176 |     it("entity in attribute (#276)", () =>
177 |         runTest(
178 |             '<img src="?&image_uri=1&&image;=2&image=3"/>?&image_uri=1&&image;=2&image=3',
179 |         ));
180 | 
181 |     it("entity in title (#592)", () => runTest("<title>the &quot;title&quot"));
182 | 
183 |     it("entity in title - decodeEntities=false (#592)", () =>
184 |         runTest("<title>the &quot;title&quot;", { decodeEntities: false }));
185 | 
186 |     it("</title> in <script> (#745)", () =>
187 |         runTest("<script>'</title>'</script>"));
188 | 
189 |     it("XML tags", () => runTest("<:foo><_bar>", { xmlMode: true }));
190 | 
191 |     it("Trailing legacy entity", () => runTest("&timesbar;&timesbar"));
192 | 
193 |     it("Trailing numeric entity", () => runTest("&#53&#53"));
194 | 
195 |     it("Multi-byte entity", () => runTest("&NotGreaterFullEqual;"));
196 | 
197 |     it("Start & end indices from domhandler", () =>
198 |         runTest(
199 |             "<!DOCTYPE html> <html> <title>The Title</title> <body class='foo'>Hello world <p></p></body> <!-- the comment --> </html> ",
200 |         ));
201 | 
202 |     it("Self-closing indices (#941)", () =>
203 |         runTest("<xml><a/><b/></xml>", { xmlMode: true }));
204 | 
205 |     it("Entity after <", () => runTest("<&amp;"));
206 | 
207 |     it("Attribute in XML (see #1350)", () =>
208 |         runTest(
209 |             '<Page\n    title="Hello world"\n    actionBarVisible="false"/>',
210 |             { xmlMode: true },
211 |         ));
212 | });
213 | 
214 | describe("Helper", () => {
215 |     it("should handle errors", () => {
216 |         const eventCallback = vi.fn();
217 |         const parser = new Parser(helper.getEventCollector(eventCallback));
218 | 
219 |         parser.end();
220 |         parser.write("foo");
221 | 
222 |         expect(eventCallback).toHaveBeenCalledTimes(2);
223 |         expect(eventCallback).toHaveBeenNthCalledWith(1, null, []);
224 |         expect(eventCallback).toHaveBeenLastCalledWith(
225 |             new Error(".write() after done!"),
226 |         );
227 |     });
228 | });
229 | 


--------------------------------------------------------------------------------
/src/Parser.spec.ts:
--------------------------------------------------------------------------------
  1 | import { describe, it, expect, vi } from "vitest";
  2 | import { Parser, Tokenizer } from "./index.js";
  3 | import type { Handler } from "./Parser.js";
  4 | 
  5 | describe("API", () => {
  6 |     it("should work without callbacks", () => {
  7 |         const cbs: Partial<Handler> = { onerror: vi.fn() };
  8 |         const p = new Parser(cbs, {
  9 |             xmlMode: true,
 10 |             lowerCaseAttributeNames: true,
 11 |         });
 12 | 
 13 |         p.end("<a foo><bar></a><!-- --><![CDATA[]]]><?foo?><!bar><boo/>boohay");
 14 |         p.write("foo");
 15 | 
 16 |         // Check for an error
 17 |         p.end();
 18 |         p.write("foo");
 19 |         expect(cbs.onerror).toHaveBeenLastCalledWith(
 20 |             new Error(".write() after done!"),
 21 |         );
 22 |         p.end();
 23 |         expect(cbs.onerror).toHaveBeenLastCalledWith(
 24 |             new Error(".end() after done!"),
 25 |         );
 26 | 
 27 |         // Should ignore the error if there is no callback
 28 |         delete cbs.onerror;
 29 |         p.write("foo");
 30 | 
 31 |         p.reset();
 32 | 
 33 |         // Remove method
 34 |         cbs.onopentag = vi.fn();
 35 |         p.write("<a foo");
 36 |         delete cbs.onopentag;
 37 |         p.write(">");
 38 | 
 39 |         // Pause/resume
 40 |         const onText = vi.fn();
 41 |         cbs.ontext = onText;
 42 |         p.pause();
 43 |         p.write("foo");
 44 |         expect(onText).not.toHaveBeenCalled();
 45 |         p.resume();
 46 |         expect(onText).toHaveBeenLastCalledWith("foo");
 47 |         p.pause();
 48 |         expect(onText).toHaveBeenCalledTimes(1);
 49 |         p.resume();
 50 |         expect(onText).toHaveBeenCalledTimes(1);
 51 |         p.pause();
 52 |         p.end("bar");
 53 |         expect(onText).toHaveBeenCalledTimes(1);
 54 |         p.resume();
 55 |         expect(onText).toHaveBeenCalledTimes(2);
 56 |         expect(onText).toHaveBeenLastCalledWith("bar");
 57 |     });
 58 | 
 59 |     it("should back out of numeric entities (#125)", () => {
 60 |         const onend = vi.fn();
 61 |         let text = "";
 62 |         const p = new Parser({
 63 |             ontext(data) {
 64 |                 text += data;
 65 |             },
 66 |             onend,
 67 |         });
 68 | 
 69 |         p.end("id=770&#anchor");
 70 | 
 71 |         expect(onend).toHaveBeenCalledTimes(1);
 72 |         expect(text).toBe("id=770&#anchor");
 73 | 
 74 |         p.reset();
 75 |         text = "";
 76 | 
 77 |         p.end("0&#xn");
 78 | 
 79 |         expect(onend).toHaveBeenCalledTimes(2);
 80 |         expect(text).toBe("0&#xn");
 81 |     });
 82 | 
 83 |     it("should not have the start index be greater than the end index", () => {
 84 |         const onopentag = vi.fn();
 85 |         const onclosetag = vi.fn();
 86 | 
 87 |         const p = new Parser({
 88 |             onopentag(tag) {
 89 |                 expect(p.startIndex).toBeLessThanOrEqual(p.endIndex);
 90 |                 onopentag(tag, p.startIndex, p.endIndex);
 91 |             },
 92 |             onclosetag(tag) {
 93 |                 expect(p.startIndex).toBeLessThanOrEqual(p.endIndex);
 94 |                 onclosetag(tag, p.endIndex);
 95 |             },
 96 |         });
 97 | 
 98 |         p.write("<p>");
 99 | 
100 |         expect(onopentag).toHaveBeenLastCalledWith("p", 0, 2);
101 |         expect(onclosetag).not.toHaveBeenCalled();
102 | 
103 |         p.write("Foo");
104 | 
105 |         p.write("<hr>");
106 | 
107 |         expect(onopentag).toHaveBeenLastCalledWith("hr", 6, 9);
108 |         expect(onclosetag).toHaveBeenCalledTimes(2);
109 |         expect(onclosetag).toHaveBeenNthCalledWith(1, "p", 9);
110 |         expect(onclosetag).toHaveBeenNthCalledWith(2, "hr", 9);
111 |     });
112 | 
113 |     it("should update the position when a single tag is spread across multiple chunks", () => {
114 |         let called = false;
115 |         const p = new Parser({
116 |             onopentag() {
117 |                 called = true;
118 |                 expect(p.startIndex).toBe(0);
119 |                 expect(p.endIndex).toBe(12);
120 |             },
121 |         });
122 | 
123 |         p.write("<div ");
124 |         p.write("foo=bar>");
125 | 
126 |         expect(called).toBe(true);
127 |     });
128 | 
129 |     it("should have the correct position for implied opening tags", () => {
130 |         let called = false;
131 |         const p = new Parser({
132 |             onopentag() {
133 |                 called = true;
134 |                 expect(p.startIndex).toBe(0);
135 |                 expect(p.endIndex).toBe(3);
136 |             },
137 |         });
138 | 
139 |         p.write("</p>");
140 |         expect(called).toBe(true);
141 |     });
142 | 
143 |     it("should parse <__proto__> (#387)", () => {
144 |         const p = new Parser(null);
145 | 
146 |         // Should not throw
147 |         p.parseChunk("<__proto__>");
148 |     });
149 | 
150 |     it("should support custom tokenizer", () => {
151 |         class CustomTokenizer extends Tokenizer {}
152 | 
153 |         const p = new Parser(
154 |             {
155 |                 onparserinit(parser: Parser) {
156 |                     // @ts-expect-error Accessing private tokenizer here
157 |                     expect(parser.tokenizer).toBeInstanceOf(CustomTokenizer);
158 |                 },
159 |             },
160 |             { Tokenizer: CustomTokenizer },
161 |         );
162 |         p.done();
163 |     });
164 | });
165 | 


--------------------------------------------------------------------------------
/src/Parser.ts:
--------------------------------------------------------------------------------
  1 | import Tokenizer, { type Callbacks, QuoteType } from "./Tokenizer.js";
  2 | import { fromCodePoint } from "entities/decode";
  3 | 
  4 | const formTags = new Set([
  5 |     "input",
  6 |     "option",
  7 |     "optgroup",
  8 |     "select",
  9 |     "button",
 10 |     "datalist",
 11 |     "textarea",
 12 | ]);
 13 | const pTag = new Set(["p"]);
 14 | const tableSectionTags = new Set(["thead", "tbody"]);
 15 | const ddtTags = new Set(["dd", "dt"]);
 16 | const rtpTags = new Set(["rt", "rp"]);
 17 | 
 18 | const openImpliesClose = new Map<string, Set<string>>([
 19 |     ["tr", new Set(["tr", "th", "td"])],
 20 |     ["th", new Set(["th"])],
 21 |     ["td", new Set(["thead", "th", "td"])],
 22 |     ["body", new Set(["head", "link", "script"])],
 23 |     ["li", new Set(["li"])],
 24 |     ["p", pTag],
 25 |     ["h1", pTag],
 26 |     ["h2", pTag],
 27 |     ["h3", pTag],
 28 |     ["h4", pTag],
 29 |     ["h5", pTag],
 30 |     ["h6", pTag],
 31 |     ["select", formTags],
 32 |     ["input", formTags],
 33 |     ["output", formTags],
 34 |     ["button", formTags],
 35 |     ["datalist", formTags],
 36 |     ["textarea", formTags],
 37 |     ["option", new Set(["option"])],
 38 |     ["optgroup", new Set(["optgroup", "option"])],
 39 |     ["dd", ddtTags],
 40 |     ["dt", ddtTags],
 41 |     ["address", pTag],
 42 |     ["article", pTag],
 43 |     ["aside", pTag],
 44 |     ["blockquote", pTag],
 45 |     ["details", pTag],
 46 |     ["div", pTag],
 47 |     ["dl", pTag],
 48 |     ["fieldset", pTag],
 49 |     ["figcaption", pTag],
 50 |     ["figure", pTag],
 51 |     ["footer", pTag],
 52 |     ["form", pTag],
 53 |     ["header", pTag],
 54 |     ["hr", pTag],
 55 |     ["main", pTag],
 56 |     ["nav", pTag],
 57 |     ["ol", pTag],
 58 |     ["pre", pTag],
 59 |     ["section", pTag],
 60 |     ["table", pTag],
 61 |     ["ul", pTag],
 62 |     ["rt", rtpTags],
 63 |     ["rp", rtpTags],
 64 |     ["tbody", tableSectionTags],
 65 |     ["tfoot", tableSectionTags],
 66 | ]);
 67 | 
 68 | const voidElements = new Set([
 69 |     "area",
 70 |     "base",
 71 |     "basefont",
 72 |     "br",
 73 |     "col",
 74 |     "command",
 75 |     "embed",
 76 |     "frame",
 77 |     "hr",
 78 |     "img",
 79 |     "input",
 80 |     "isindex",
 81 |     "keygen",
 82 |     "link",
 83 |     "meta",
 84 |     "param",
 85 |     "source",
 86 |     "track",
 87 |     "wbr",
 88 | ]);
 89 | 
 90 | const foreignContextElements = new Set(["math", "svg"]);
 91 | 
 92 | const htmlIntegrationElements = new Set([
 93 |     "mi",
 94 |     "mo",
 95 |     "mn",
 96 |     "ms",
 97 |     "mtext",
 98 |     "annotation-xml",
 99 |     "foreignobject",
100 |     "desc",
101 |     "title",
102 | ]);
103 | 
104 | export interface ParserOptions {
105 |     /**
106 |      * Indicates whether special tags (`<script>`, `<style>`, and `<title>`) should get special treatment
107 |      * and if "empty" tags (eg. `<br>`) can have children.  If `false`, the content of special tags
108 |      * will be text only. For feeds and other XML content (documents that don't consist of HTML),
109 |      * set this to `true`.
110 |      *
111 |      * @default false
112 |      */
113 |     xmlMode?: boolean;
114 | 
115 |     /**
116 |      * Decode entities within the document.
117 |      *
118 |      * @default true
119 |      */
120 |     decodeEntities?: boolean;
121 | 
122 |     /**
123 |      * If set to true, all tags will be lowercased.
124 |      *
125 |      * @default !xmlMode
126 |      */
127 |     lowerCaseTags?: boolean;
128 | 
129 |     /**
130 |      * If set to `true`, all attribute names will be lowercased. This has noticeable impact on speed.
131 |      *
132 |      * @default !xmlMode
133 |      */
134 |     lowerCaseAttributeNames?: boolean;
135 | 
136 |     /**
137 |      * If set to true, CDATA sections will be recognized as text even if the xmlMode option is not enabled.
138 |      * NOTE: If xmlMode is set to `true` then CDATA sections will always be recognized as text.
139 |      *
140 |      * @default xmlMode
141 |      */
142 |     recognizeCDATA?: boolean;
143 | 
144 |     /**
145 |      * If set to `true`, self-closing tags will trigger the onclosetag event even if xmlMode is not set to `true`.
146 |      * NOTE: If xmlMode is set to `true` then self-closing tags will always be recognized.
147 |      *
148 |      * @default xmlMode
149 |      */
150 |     recognizeSelfClosing?: boolean;
151 | 
152 |     /**
153 |      * Allows the default tokenizer to be overwritten.
154 |      */
155 |     Tokenizer?: typeof Tokenizer;
156 | }
157 | 
158 | export interface Handler {
159 |     onparserinit(parser: Parser): void;
160 | 
161 |     /**
162 |      * Resets the handler back to starting state
163 |      */
164 |     onreset(): void;
165 | 
166 |     /**
167 |      * Signals the handler that parsing is done
168 |      */
169 |     onend(): void;
170 |     onerror(error: Error): void;
171 |     onclosetag(name: string, isImplied: boolean): void;
172 |     onopentagname(name: string): void;
173 |     /**
174 |      *
175 |      * @param name Name of the attribute
176 |      * @param value Value of the attribute.
177 |      * @param quote Quotes used around the attribute. `null` if the attribute has no quotes around the value, `undefined` if the attribute has no value.
178 |      */
179 |     onattribute(
180 |         name: string,
181 |         value: string,
182 |         quote?: string | undefined | null,
183 |     ): void;
184 |     onopentag(
185 |         name: string,
186 |         attribs: { [s: string]: string },
187 |         isImplied: boolean,
188 |     ): void;
189 |     ontext(data: string): void;
190 |     oncomment(data: string): void;
191 |     oncdatastart(): void;
192 |     oncdataend(): void;
193 |     oncommentend(): void;
194 |     onprocessinginstruction(name: string, data: string): void;
195 | }
196 | 
197 | const reNameEnd = /\s|\//;
198 | 
199 | export class Parser implements Callbacks {
200 |     /** The start index of the last event. */
201 |     public startIndex = 0;
202 |     /** The end index of the last event. */
203 |     public endIndex = 0;
204 |     /**
205 |      * Store the start index of the current open tag,
206 |      * so we can update the start index for attributes.
207 |      */
208 |     private openTagStart = 0;
209 | 
210 |     private tagname = "";
211 |     private attribname = "";
212 |     private attribvalue = "";
213 |     private attribs: null | { [key: string]: string } = null;
214 |     private readonly stack: string[] = [];
215 |     /** Determines whether self-closing tags are recognized. */
216 |     private readonly foreignContext: boolean[];
217 |     private readonly cbs: Partial<Handler>;
218 |     private readonly lowerCaseTagNames: boolean;
219 |     private readonly lowerCaseAttributeNames: boolean;
220 |     private readonly recognizeSelfClosing: boolean;
221 |     /** We are parsing HTML. Inverse of the `xmlMode` option. */
222 |     private readonly htmlMode: boolean;
223 |     private readonly tokenizer: Tokenizer;
224 | 
225 |     private readonly buffers: string[] = [];
226 |     private bufferOffset = 0;
227 |     /** The index of the last written buffer. Used when resuming after a `pause()`. */
228 |     private writeIndex = 0;
229 |     /** Indicates whether the parser has finished running / `.end` has been called. */
230 |     private ended = false;
231 | 
232 |     constructor(
233 |         cbs?: Partial<Handler> | null,
234 |         private readonly options: ParserOptions = {},
235 |     ) {
236 |         this.cbs = cbs ?? {};
237 |         this.htmlMode = !this.options.xmlMode;
238 |         this.lowerCaseTagNames = options.lowerCaseTags ?? this.htmlMode;
239 |         this.lowerCaseAttributeNames =
240 |             options.lowerCaseAttributeNames ?? this.htmlMode;
241 |         this.recognizeSelfClosing =
242 |             options.recognizeSelfClosing ?? !this.htmlMode;
243 |         this.tokenizer = new (options.Tokenizer ?? Tokenizer)(
244 |             this.options,
245 |             this,
246 |         );
247 |         this.foreignContext = [!this.htmlMode];
248 |         this.cbs.onparserinit?.(this);
249 |     }
250 | 
251 |     // Tokenizer event handlers
252 | 
253 |     /** @internal */
254 |     ontext(start: number, endIndex: number): void {
255 |         const data = this.getSlice(start, endIndex);
256 |         this.endIndex = endIndex - 1;
257 |         this.cbs.ontext?.(data);
258 |         this.startIndex = endIndex;
259 |     }
260 | 
261 |     /** @internal */
262 |     ontextentity(cp: number, endIndex: number): void {
263 |         this.endIndex = endIndex - 1;
264 |         this.cbs.ontext?.(fromCodePoint(cp));
265 |         this.startIndex = endIndex;
266 |     }
267 | 
268 |     /**
269 |      * Checks if the current tag is a void element. Override this if you want
270 |      * to specify your own additional void elements.
271 |      */
272 |     protected isVoidElement(name: string): boolean {
273 |         return this.htmlMode && voidElements.has(name);
274 |     }
275 | 
276 |     /** @internal */
277 |     onopentagname(start: number, endIndex: number): void {
278 |         this.endIndex = endIndex;
279 | 
280 |         let name = this.getSlice(start, endIndex);
281 | 
282 |         if (this.lowerCaseTagNames) {
283 |             name = name.toLowerCase();
284 |         }
285 | 
286 |         this.emitOpenTag(name);
287 |     }
288 | 
289 |     private emitOpenTag(name: string) {
290 |         this.openTagStart = this.startIndex;
291 |         this.tagname = name;
292 | 
293 |         const impliesClose = this.htmlMode && openImpliesClose.get(name);
294 | 
295 |         if (impliesClose) {
296 |             while (this.stack.length > 0 && impliesClose.has(this.stack[0])) {
297 |                 const element = this.stack.shift()!;
298 |                 this.cbs.onclosetag?.(element, true);
299 |             }
300 |         }
301 |         if (!this.isVoidElement(name)) {
302 |             this.stack.unshift(name);
303 | 
304 |             if (this.htmlMode) {
305 |                 if (foreignContextElements.has(name)) {
306 |                     this.foreignContext.unshift(true);
307 |                 } else if (htmlIntegrationElements.has(name)) {
308 |                     this.foreignContext.unshift(false);
309 |                 }
310 |             }
311 |         }
312 |         this.cbs.onopentagname?.(name);
313 |         if (this.cbs.onopentag) this.attribs = {};
314 |     }
315 | 
316 |     private endOpenTag(isImplied: boolean) {
317 |         this.startIndex = this.openTagStart;
318 | 
319 |         if (this.attribs) {
320 |             this.cbs.onopentag?.(this.tagname, this.attribs, isImplied);
321 |             this.attribs = null;
322 |         }
323 |         if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
324 |             this.cbs.onclosetag(this.tagname, true);
325 |         }
326 | 
327 |         this.tagname = "";
328 |     }
329 | 
330 |     /** @internal */
331 |     onopentagend(endIndex: number): void {
332 |         this.endIndex = endIndex;
333 |         this.endOpenTag(false);
334 | 
335 |         // Set `startIndex` for next node
336 |         this.startIndex = endIndex + 1;
337 |     }
338 | 
339 |     /** @internal */
340 |     onclosetag(start: number, endIndex: number): void {
341 |         this.endIndex = endIndex;
342 | 
343 |         let name = this.getSlice(start, endIndex);
344 | 
345 |         if (this.lowerCaseTagNames) {
346 |             name = name.toLowerCase();
347 |         }
348 | 
349 |         if (
350 |             this.htmlMode &&
351 |             (foreignContextElements.has(name) ||
352 |                 htmlIntegrationElements.has(name))
353 |         ) {
354 |             this.foreignContext.shift();
355 |         }
356 | 
357 |         if (!this.isVoidElement(name)) {
358 |             const pos = this.stack.indexOf(name);
359 |             if (pos !== -1) {
360 |                 for (let index = 0; index <= pos; index++) {
361 |                     const element = this.stack.shift()!;
362 |                     // We know the stack has sufficient elements.
363 |                     this.cbs.onclosetag?.(element, index !== pos);
364 |                 }
365 |             } else if (this.htmlMode && name === "p") {
366 |                 // Implicit open before close
367 |                 this.emitOpenTag("p");
368 |                 this.closeCurrentTag(true);
369 |             }
370 |         } else if (this.htmlMode && name === "br") {
371 |             // We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
372 |             this.cbs.onopentagname?.("br");
373 |             this.cbs.onopentag?.("br", {}, true);
374 |             this.cbs.onclosetag?.("br", false);
375 |         }
376 | 
377 |         // Set `startIndex` for next node
378 |         this.startIndex = endIndex + 1;
379 |     }
380 | 
381 |     /** @internal */
382 |     onselfclosingtag(endIndex: number): void {
383 |         this.endIndex = endIndex;
384 |         if (this.recognizeSelfClosing || this.foreignContext[0]) {
385 |             this.closeCurrentTag(false);
386 | 
387 |             // Set `startIndex` for next node
388 |             this.startIndex = endIndex + 1;
389 |         } else {
390 |             // Ignore the fact that the tag is self-closing.
391 |             this.onopentagend(endIndex);
392 |         }
393 |     }
394 | 
395 |     private closeCurrentTag(isOpenImplied: boolean) {
396 |         const name = this.tagname;
397 |         this.endOpenTag(isOpenImplied);
398 | 
399 |         // Self-closing tags will be on the top of the stack
400 |         if (this.stack[0] === name) {
401 |             // If the opening tag isn't implied, the closing tag has to be implied.
402 |             this.cbs.onclosetag?.(name, !isOpenImplied);
403 |             this.stack.shift();
404 |         }
405 |     }
406 | 
407 |     /** @internal */
408 |     onattribname(start: number, endIndex: number): void {
409 |         this.startIndex = start;
410 |         const name = this.getSlice(start, endIndex);
411 | 
412 |         this.attribname = this.lowerCaseAttributeNames
413 |             ? name.toLowerCase()
414 |             : name;
415 |     }
416 | 
417 |     /** @internal */
418 |     onattribdata(start: number, endIndex: number): void {
419 |         this.attribvalue += this.getSlice(start, endIndex);
420 |     }
421 | 
422 |     /** @internal */
423 |     onattribentity(cp: number): void {
424 |         this.attribvalue += fromCodePoint(cp);
425 |     }
426 | 
427 |     /** @internal */
428 |     onattribend(quote: QuoteType, endIndex: number): void {
429 |         this.endIndex = endIndex;
430 | 
431 |         this.cbs.onattribute?.(
432 |             this.attribname,
433 |             this.attribvalue,
434 |             quote === QuoteType.Double
435 |                 ? '"'
436 |                 : quote === QuoteType.Single
437 |                   ? "'"
438 |                   : quote === QuoteType.NoValue
439 |                     ? undefined
440 |                     : null,
441 |         );
442 | 
443 |         if (
444 |             this.attribs &&
445 |             !Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)
446 |         ) {
447 |             this.attribs[this.attribname] = this.attribvalue;
448 |         }
449 |         this.attribvalue = "";
450 |     }
451 | 
452 |     private getInstructionName(value: string) {
453 |         const index = value.search(reNameEnd);
454 |         let name = index < 0 ? value : value.substr(0, index);
455 | 
456 |         if (this.lowerCaseTagNames) {
457 |             name = name.toLowerCase();
458 |         }
459 | 
460 |         return name;
461 |     }
462 | 
463 |     /** @internal */
464 |     ondeclaration(start: number, endIndex: number): void {
465 |         this.endIndex = endIndex;
466 |         const value = this.getSlice(start, endIndex);
467 | 
468 |         if (this.cbs.onprocessinginstruction) {
469 |             const name = this.getInstructionName(value);
470 |             this.cbs.onprocessinginstruction(`!${name}`, `!${value}`);
471 |         }
472 | 
473 |         // Set `startIndex` for next node
474 |         this.startIndex = endIndex + 1;
475 |     }
476 | 
477 |     /** @internal */
478 |     onprocessinginstruction(start: number, endIndex: number): void {
479 |         this.endIndex = endIndex;
480 |         const value = this.getSlice(start, endIndex);
481 | 
482 |         if (this.cbs.onprocessinginstruction) {
483 |             const name = this.getInstructionName(value);
484 |             this.cbs.onprocessinginstruction(`?${name}`, `?${value}`);
485 |         }
486 | 
487 |         // Set `startIndex` for next node
488 |         this.startIndex = endIndex + 1;
489 |     }
490 | 
491 |     /** @internal */
492 |     oncomment(start: number, endIndex: number, offset: number): void {
493 |         this.endIndex = endIndex;
494 | 
495 |         this.cbs.oncomment?.(this.getSlice(start, endIndex - offset));
496 |         this.cbs.oncommentend?.();
497 | 
498 |         // Set `startIndex` for next node
499 |         this.startIndex = endIndex + 1;
500 |     }
501 | 
502 |     /** @internal */
503 |     oncdata(start: number, endIndex: number, offset: number): void {
504 |         this.endIndex = endIndex;
505 |         const value = this.getSlice(start, endIndex - offset);
506 | 
507 |         if (!this.htmlMode || this.options.recognizeCDATA) {
508 |             this.cbs.oncdatastart?.();
509 |             this.cbs.ontext?.(value);
510 |             this.cbs.oncdataend?.();
511 |         } else {
512 |             this.cbs.oncomment?.(`[CDATA[${value}]]`);
513 |             this.cbs.oncommentend?.();
514 |         }
515 | 
516 |         // Set `startIndex` for next node
517 |         this.startIndex = endIndex + 1;
518 |     }
519 | 
520 |     /** @internal */
521 |     onend(): void {
522 |         if (this.cbs.onclosetag) {
523 |             // Set the end index for all remaining tags
524 |             this.endIndex = this.startIndex;
525 |             for (let index = 0; index < this.stack.length; index++) {
526 |                 this.cbs.onclosetag(this.stack[index], true);
527 |             }
528 |         }
529 |         this.cbs.onend?.();
530 |     }
531 | 
532 |     /**
533 |      * Resets the parser to a blank state, ready to parse a new HTML document
534 |      */
535 |     public reset(): void {
536 |         this.cbs.onreset?.();
537 |         this.tokenizer.reset();
538 |         this.tagname = "";
539 |         this.attribname = "";
540 |         this.attribs = null;
541 |         this.stack.length = 0;
542 |         this.startIndex = 0;
543 |         this.endIndex = 0;
544 |         this.cbs.onparserinit?.(this);
545 |         this.buffers.length = 0;
546 |         this.foreignContext.length = 0;
547 |         this.foreignContext.unshift(!this.htmlMode);
548 |         this.bufferOffset = 0;
549 |         this.writeIndex = 0;
550 |         this.ended = false;
551 |     }
552 | 
553 |     /**
554 |      * Resets the parser, then parses a complete document and
555 |      * pushes it to the handler.
556 |      *
557 |      * @param data Document to parse.
558 |      */
559 |     public parseComplete(data: string): void {
560 |         this.reset();
561 |         this.end(data);
562 |     }
563 | 
564 |     private getSlice(start: number, end: number) {
565 |         while (start - this.bufferOffset >= this.buffers[0].length) {
566 |             this.shiftBuffer();
567 |         }
568 | 
569 |         let slice = this.buffers[0].slice(
570 |             start - this.bufferOffset,
571 |             end - this.bufferOffset,
572 |         );
573 | 
574 |         while (end - this.bufferOffset > this.buffers[0].length) {
575 |             this.shiftBuffer();
576 |             slice += this.buffers[0].slice(0, end - this.bufferOffset);
577 |         }
578 | 
579 |         return slice;
580 |     }
581 | 
582 |     private shiftBuffer(): void {
583 |         this.bufferOffset += this.buffers[0].length;
584 |         this.writeIndex--;
585 |         this.buffers.shift();
586 |     }
587 | 
588 |     /**
589 |      * Parses a chunk of data and calls the corresponding callbacks.
590 |      *
591 |      * @param chunk Chunk to parse.
592 |      */
593 |     public write(chunk: string): void {
594 |         if (this.ended) {
595 |             this.cbs.onerror?.(new Error(".write() after done!"));
596 |             return;
597 |         }
598 | 
599 |         this.buffers.push(chunk);
600 |         if (this.tokenizer.running) {
601 |             this.tokenizer.write(chunk);
602 |             this.writeIndex++;
603 |         }
604 |     }
605 | 
606 |     /**
607 |      * Parses the end of the buffer and clears the stack, calls onend.
608 |      *
609 |      * @param chunk Optional final chunk to parse.
610 |      */
611 |     public end(chunk?: string): void {
612 |         if (this.ended) {
613 |             this.cbs.onerror?.(new Error(".end() after done!"));
614 |             return;
615 |         }
616 | 
617 |         if (chunk) this.write(chunk);
618 |         this.ended = true;
619 |         this.tokenizer.end();
620 |     }
621 | 
622 |     /**
623 |      * Pauses parsing. The parser won't emit events until `resume` is called.
624 |      */
625 |     public pause(): void {
626 |         this.tokenizer.pause();
627 |     }
628 | 
629 |     /**
630 |      * Resumes parsing after `pause` was called.
631 |      */
632 |     public resume(): void {
633 |         this.tokenizer.resume();
634 | 
635 |         while (
636 |             this.tokenizer.running &&
637 |             this.writeIndex < this.buffers.length
638 |         ) {
639 |             this.tokenizer.write(this.buffers[this.writeIndex++]);
640 |         }
641 | 
642 |         if (this.ended) this.tokenizer.end();
643 |     }
644 | 
645 |     /**
646 |      * Alias of `write`, for backwards compatibility.
647 |      *
648 |      * @param chunk Chunk to parse.
649 |      * @deprecated
650 |      */
651 |     public parseChunk(chunk: string): void {
652 |         this.write(chunk);
653 |     }
654 |     /**
655 |      * Alias of `end`, for backwards compatibility.
656 |      *
657 |      * @param chunk Optional final chunk to parse.
658 |      * @deprecated
659 |      */
660 |     public done(chunk?: string): void {
661 |         this.end(chunk);
662 |     }
663 | }
664 | 


--------------------------------------------------------------------------------
/src/Tokenizer.spec.ts:
--------------------------------------------------------------------------------
  1 | import { describe, it, expect } from "vitest";
  2 | import { Tokenizer } from "./index.js";
  3 | import type { Callbacks } from "./Tokenizer.js";
  4 | 
  5 | function tokenize(data: string, options = {}) {
  6 |     const log: unknown[][] = [];
  7 |     const tokenizer = new Tokenizer(
  8 |         options,
  9 |         new Proxy(
 10 |             {},
 11 |             {
 12 |                 get(_, property) {
 13 |                     return (...values: unknown[]) =>
 14 |                         log.push([property, ...values]);
 15 |                 },
 16 |             },
 17 |         ) as Callbacks,
 18 |     );
 19 | 
 20 |     tokenizer.write(data);
 21 |     tokenizer.end();
 22 | 
 23 |     return log;
 24 | }
 25 | 
 26 | describe("Tokenizer", () => {
 27 |     describe("should support self-closing special tags", () => {
 28 |         it("for self-closing script tag", () => {
 29 |             expect(tokenize("<script /><div></div>")).toMatchSnapshot();
 30 |         });
 31 |         it("for self-closing style tag", () => {
 32 |             expect(tokenize("<style /><div></div>")).toMatchSnapshot();
 33 |         });
 34 |         it("for self-closing title tag", () => {
 35 |             expect(tokenize("<title /><div></div>")).toMatchSnapshot();
 36 |         });
 37 |         it("for self-closing textarea tag", () => {
 38 |             expect(tokenize("<textarea /><div></div>")).toMatchSnapshot();
 39 |         });
 40 |         it("for self-closing xmp tag", () => {
 41 |             expect(tokenize("<xmp /><div></div>")).toMatchSnapshot();
 42 |         });
 43 |     });
 44 | 
 45 |     describe("should support standard special tags", () => {
 46 |         it("for normal script tag", () => {
 47 |             expect(tokenize("<script></script><div></div>")).toMatchSnapshot();
 48 |         });
 49 |         it("for normal style tag", () => {
 50 |             expect(tokenize("<style></style><div></div>")).toMatchSnapshot();
 51 |         });
 52 |         it("for normal sitle tag", () => {
 53 |             expect(tokenize("<title></title><div></div>")).toMatchSnapshot();
 54 |         });
 55 |         it("for normal textarea tag", () => {
 56 |             expect(
 57 |                 tokenize("<textarea></textarea><div></div>"),
 58 |             ).toMatchSnapshot();
 59 |         });
 60 |         it("for normal xmp tag", () => {
 61 |             expect(tokenize("<xmp></xmp><div></div>")).toMatchSnapshot();
 62 |         });
 63 |     });
 64 | 
 65 |     describe("should treat html inside special tags as text", () => {
 66 |         it("for div inside script tag", () => {
 67 |             expect(tokenize("<script><div></div></script>")).toMatchSnapshot();
 68 |         });
 69 |         it("for div inside style tag", () => {
 70 |             expect(tokenize("<style><div></div></style>")).toMatchSnapshot();
 71 |         });
 72 |         it("for div inside title tag", () => {
 73 |             expect(tokenize("<title><div></div></title>")).toMatchSnapshot();
 74 |         });
 75 |         it("for div inside textarea tag", () => {
 76 |             expect(
 77 |                 tokenize("<textarea><div></div></textarea>"),
 78 |             ).toMatchSnapshot();
 79 |         });
 80 |         it("for div inside xmp tag", () => {
 81 |             expect(tokenize("<xmp><div></div></xmp>")).toMatchSnapshot();
 82 |         });
 83 |     });
 84 | 
 85 |     describe("should correctly mark attributes", () => {
 86 |         it("for no value attribute", () => {
 87 |             expect(tokenize("<div aaaaaaa >")).toMatchSnapshot();
 88 |         });
 89 |         it("for no quotes attribute", () => {
 90 |             expect(tokenize("<div aaa=aaa >")).toMatchSnapshot();
 91 |         });
 92 |         it("for single quotes attribute", () => {
 93 |             expect(tokenize("<div aaa='a' >")).toMatchSnapshot();
 94 |         });
 95 |         it("for double quotes attribute", () => {
 96 |             expect(tokenize('<div aaa="a" >')).toMatchSnapshot();
 97 |         });
 98 |     });
 99 | 
100 |     describe("should not break after special tag followed by an entity", () => {
101 |         it("for normal special tag", () => {
102 |             expect(tokenize("<style>a{}</style>&apos;<br/>")).toMatchSnapshot();
103 |         });
104 |         it("for self-closing special tag", () => {
105 |             expect(tokenize("<style />&apos;<br/>")).toMatchSnapshot();
106 |         });
107 |     });
108 | 
109 |     describe("should handle entities", () => {
110 |         it("for XML entities", () =>
111 |             expect(
112 |                 tokenize("&amp;&gt;&amp&lt;&uuml;&#x61;&#x62&#99;&#100&#101", {
113 |                     xmlMode: true,
114 |                 }),
115 |             ).toMatchSnapshot());
116 | 
117 |         it("for entities in attributes (#276)", () =>
118 |             expect(
119 |                 tokenize(
120 |                     '<img src="?&image_uri=1&&image;=2&image=3"/>?&image_uri=1&&image;=2&image=3',
121 |                 ),
122 |             ).toMatchSnapshot());
123 | 
124 |         it("for trailing legacy entity", () =>
125 |             expect(tokenize("&timesbar;&timesbar")).toMatchSnapshot());
126 | 
127 |         it("for multi-byte entities", () =>
128 |             expect(tokenize("&NotGreaterFullEqual;")).toMatchSnapshot());
129 |     });
130 | 
131 |     it("should not lose data when pausing", () => {
132 |         const log: unknown[][] = [];
133 |         const tokenizer = new Tokenizer(
134 |             {},
135 |             new Proxy(
136 |                 {},
137 |                 {
138 |                     get(_, property) {
139 |                         return (...values: unknown[]) => {
140 |                             if (property === "ontext") {
141 |                                 tokenizer.pause();
142 |                             }
143 |                             log.push([property, ...values]);
144 |                         };
145 |                     },
146 |                 },
147 |             ) as Callbacks,
148 |         );
149 | 
150 |         tokenizer.write("&am");
151 |         tokenizer.write("p; it up!");
152 |         tokenizer.resume();
153 |         tokenizer.resume();
154 | 
155 |         // Tokenizer shouldn't be paused
156 |         expect(tokenizer).toHaveProperty("running", true);
157 | 
158 |         tokenizer.end();
159 | 
160 |         expect(log).toMatchSnapshot();
161 |     });
162 | });
163 | 


--------------------------------------------------------------------------------
/src/Tokenizer.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |     EntityDecoder,
  3 |     DecodingMode,
  4 |     htmlDecodeTree,
  5 |     xmlDecodeTree,
  6 | } from "entities/decode";
  7 | 
  8 | const enum CharCodes {
  9 |     Tab = 0x9, // "\t"
 10 |     NewLine = 0xa, // "\n"
 11 |     FormFeed = 0xc, // "\f"
 12 |     CarriageReturn = 0xd, // "\r"
 13 |     Space = 0x20, // " "
 14 |     ExclamationMark = 0x21, // "!"
 15 |     Number = 0x23, // "#"
 16 |     Amp = 0x26, // "&"
 17 |     SingleQuote = 0x27, // "'"
 18 |     DoubleQuote = 0x22, // '"'
 19 |     Dash = 0x2d, // "-"
 20 |     Slash = 0x2f, // "/"
 21 |     Zero = 0x30, // "0"
 22 |     Nine = 0x39, // "9"
 23 |     Semi = 0x3b, // ";"
 24 |     Lt = 0x3c, // "<"
 25 |     Eq = 0x3d, // "="
 26 |     Gt = 0x3e, // ">"
 27 |     Questionmark = 0x3f, // "?"
 28 |     UpperA = 0x41, // "A"
 29 |     LowerA = 0x61, // "a"
 30 |     UpperF = 0x46, // "F"
 31 |     LowerF = 0x66, // "f"
 32 |     UpperZ = 0x5a, // "Z"
 33 |     LowerZ = 0x7a, // "z"
 34 |     LowerX = 0x78, // "x"
 35 |     OpeningSquareBracket = 0x5b, // "["
 36 | }
 37 | 
 38 | /** All the states the tokenizer can be in. */
 39 | const enum State {
 40 |     Text = 1,
 41 |     BeforeTagName, // After <
 42 |     InTagName,
 43 |     InSelfClosingTag,
 44 |     BeforeClosingTagName,
 45 |     InClosingTagName,
 46 |     AfterClosingTagName,
 47 | 
 48 |     // Attributes
 49 |     BeforeAttributeName,
 50 |     InAttributeName,
 51 |     AfterAttributeName,
 52 |     BeforeAttributeValue,
 53 |     InAttributeValueDq, // "
 54 |     InAttributeValueSq, // '
 55 |     InAttributeValueNq,
 56 | 
 57 |     // Declarations
 58 |     BeforeDeclaration, // !
 59 |     InDeclaration,
 60 | 
 61 |     // Processing instructions
 62 |     InProcessingInstruction, // ?
 63 | 
 64 |     // Comments & CDATA
 65 |     BeforeComment,
 66 |     CDATASequence,
 67 |     InSpecialComment,
 68 |     InCommentLike,
 69 | 
 70 |     // Special tags
 71 |     BeforeSpecialS, // Decide if we deal with `<script` or `<style`
 72 |     BeforeSpecialT, // Decide if we deal with `<title` or `<textarea`
 73 |     SpecialStartSequence,
 74 |     InSpecialTag,
 75 | 
 76 |     InEntity,
 77 | }
 78 | 
 79 | function isWhitespace(c: number): boolean {
 80 |     return (
 81 |         c === CharCodes.Space ||
 82 |         c === CharCodes.NewLine ||
 83 |         c === CharCodes.Tab ||
 84 |         c === CharCodes.FormFeed ||
 85 |         c === CharCodes.CarriageReturn
 86 |     );
 87 | }
 88 | 
 89 | function isEndOfTagSection(c: number): boolean {
 90 |     return c === CharCodes.Slash || c === CharCodes.Gt || isWhitespace(c);
 91 | }
 92 | 
 93 | function isASCIIAlpha(c: number): boolean {
 94 |     return (
 95 |         (c >= CharCodes.LowerA && c <= CharCodes.LowerZ) ||
 96 |         (c >= CharCodes.UpperA && c <= CharCodes.UpperZ)
 97 |     );
 98 | }
 99 | 
100 | export enum QuoteType {
101 |     NoValue = 0,
102 |     Unquoted = 1,
103 |     Single = 2,
104 |     Double = 3,
105 | }
106 | 
107 | export interface Callbacks {
108 |     onattribdata(start: number, endIndex: number): void;
109 |     onattribentity(codepoint: number): void;
110 |     onattribend(quote: QuoteType, endIndex: number): void;
111 |     onattribname(start: number, endIndex: number): void;
112 |     oncdata(start: number, endIndex: number, endOffset: number): void;
113 |     onclosetag(start: number, endIndex: number): void;
114 |     oncomment(start: number, endIndex: number, endOffset: number): void;
115 |     ondeclaration(start: number, endIndex: number): void;
116 |     onend(): void;
117 |     onopentagend(endIndex: number): void;
118 |     onopentagname(start: number, endIndex: number): void;
119 |     onprocessinginstruction(start: number, endIndex: number): void;
120 |     onselfclosingtag(endIndex: number): void;
121 |     ontext(start: number, endIndex: number): void;
122 |     ontextentity(codepoint: number, endIndex: number): void;
123 | }
124 | 
125 | /**
126 |  * Sequences used to match longer strings.
127 |  *
128 |  * We don't have `Script`, `Style`, or `Title` here. Instead, we re-use the *End
129 |  * sequences with an increased offset.
130 |  */
131 | const Sequences = {
132 |     Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), // CDATA[
133 |     CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]), // ]]>
134 |     CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]), // `-->`
135 |     ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), // `</script`
136 |     StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), // `</style`
137 |     TitleEnd: new Uint8Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title`
138 |     TextareaEnd: new Uint8Array([
139 |         0x3c, 0x2f, 0x74, 0x65, 0x78, 0x74, 0x61, 0x72, 0x65, 0x61,
140 |     ]), // `</textarea`
141 |     XmpEnd: new Uint8Array([0x3c, 0x2f, 0x78, 0x6d, 0x70]), // `</xmp`
142 | };
143 | 
144 | export default class Tokenizer {
145 |     /** The current state the tokenizer is in. */
146 |     private state = State.Text;
147 |     /** The read buffer. */
148 |     private buffer = "";
149 |     /** The beginning of the section that is currently being read. */
150 |     private sectionStart = 0;
151 |     /** The index within the buffer that we are currently looking at. */
152 |     private index = 0;
153 |     /** The start of the last entity. */
154 |     private entityStart = 0;
155 |     /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
156 |     private baseState = State.Text;
157 |     /** For special parsing behavior inside of script and style tags. */
158 |     private isSpecial = false;
159 |     /** Indicates whether the tokenizer has been paused. */
160 |     public running = true;
161 |     /** The offset of the current buffer. */
162 |     private offset = 0;
163 | 
164 |     private readonly xmlMode: boolean;
165 |     private readonly decodeEntities: boolean;
166 |     private readonly entityDecoder: EntityDecoder;
167 | 
168 |     constructor(
169 |         {
170 |             xmlMode = false,
171 |             decodeEntities = true,
172 |         }: { xmlMode?: boolean; decodeEntities?: boolean },
173 |         private readonly cbs: Callbacks,
174 |     ) {
175 |         this.xmlMode = xmlMode;
176 |         this.decodeEntities = decodeEntities;
177 |         this.entityDecoder = new EntityDecoder(
178 |             xmlMode ? xmlDecodeTree : htmlDecodeTree,
179 |             (cp, consumed) => this.emitCodePoint(cp, consumed),
180 |         );
181 |     }
182 | 
183 |     public reset(): void {
184 |         this.state = State.Text;
185 |         this.buffer = "";
186 |         this.sectionStart = 0;
187 |         this.index = 0;
188 |         this.baseState = State.Text;
189 |         this.currentSequence = undefined!;
190 |         this.running = true;
191 |         this.offset = 0;
192 |     }
193 | 
194 |     public write(chunk: string): void {
195 |         this.offset += this.buffer.length;
196 |         this.buffer = chunk;
197 |         this.parse();
198 |     }
199 | 
200 |     public end(): void {
201 |         if (this.running) this.finish();
202 |     }
203 | 
204 |     public pause(): void {
205 |         this.running = false;
206 |     }
207 | 
208 |     public resume(): void {
209 |         this.running = true;
210 |         if (this.index < this.buffer.length + this.offset) {
211 |             this.parse();
212 |         }
213 |     }
214 | 
215 |     private stateText(c: number): void {
216 |         if (
217 |             c === CharCodes.Lt ||
218 |             (!this.decodeEntities && this.fastForwardTo(CharCodes.Lt))
219 |         ) {
220 |             if (this.index > this.sectionStart) {
221 |                 this.cbs.ontext(this.sectionStart, this.index);
222 |             }
223 |             this.state = State.BeforeTagName;
224 |             this.sectionStart = this.index;
225 |         } else if (this.decodeEntities && c === CharCodes.Amp) {
226 |             this.startEntity();
227 |         }
228 |     }
229 | 
230 |     private currentSequence: Uint8Array = undefined!;
231 |     private sequenceIndex = 0;
232 |     private stateSpecialStartSequence(c: number): void {
233 |         const isEnd = this.sequenceIndex === this.currentSequence.length;
234 |         const isMatch = isEnd
235 |             ? // If we are at the end of the sequence, make sure the tag name has ended
236 |               isEndOfTagSection(c)
237 |             : // Otherwise, do a case-insensitive comparison
238 |               (c | 0x20) === this.currentSequence[this.sequenceIndex];
239 | 
240 |         if (!isMatch) {
241 |             this.isSpecial = false;
242 |         } else if (!isEnd) {
243 |             this.sequenceIndex++;
244 |             return;
245 |         }
246 | 
247 |         this.sequenceIndex = 0;
248 |         this.state = State.InTagName;
249 |         this.stateInTagName(c);
250 |     }
251 | 
252 |     /** Look for an end tag. For <title> tags, also decode entities. */
253 |     private stateInSpecialTag(c: number): void {
254 |         if (this.sequenceIndex === this.currentSequence.length) {
255 |             if (c === CharCodes.Gt || isWhitespace(c)) {
256 |                 const endOfText = this.index - this.currentSequence.length;
257 | 
258 |                 if (this.sectionStart < endOfText) {
259 |                     // Spoof the index so that reported locations match up.
260 |                     const actualIndex = this.index;
261 |                     this.index = endOfText;
262 |                     this.cbs.ontext(this.sectionStart, endOfText);
263 |                     this.index = actualIndex;
264 |                 }
265 | 
266 |                 this.isSpecial = false;
267 |                 this.sectionStart = endOfText + 2; // Skip over the `</`
268 |                 this.stateInClosingTagName(c);
269 |                 return; // We are done; skip the rest of the function.
270 |             }
271 | 
272 |             this.sequenceIndex = 0;
273 |         }
274 | 
275 |         if ((c | 0x20) === this.currentSequence[this.sequenceIndex]) {
276 |             this.sequenceIndex += 1;
277 |         } else if (this.sequenceIndex === 0) {
278 |             if (this.currentSequence === Sequences.TitleEnd) {
279 |                 // We have to parse entities in <title> tags.
280 |                 if (this.decodeEntities && c === CharCodes.Amp) {
281 |                     this.startEntity();
282 |                 }
283 |             } else if (this.fastForwardTo(CharCodes.Lt)) {
284 |                 // Outside of <title> tags, we can fast-forward.
285 |                 this.sequenceIndex = 1;
286 |             }
287 |         } else {
288 |             // If we see a `<`, set the sequence index to 1; useful for eg. `<</script>`.
289 |             this.sequenceIndex = Number(c === CharCodes.Lt);
290 |         }
291 |     }
292 | 
293 |     private stateCDATASequence(c: number): void {
294 |         if (c === Sequences.Cdata[this.sequenceIndex]) {
295 |             if (++this.sequenceIndex === Sequences.Cdata.length) {
296 |                 this.state = State.InCommentLike;
297 |                 this.currentSequence = Sequences.CdataEnd;
298 |                 this.sequenceIndex = 0;
299 |                 this.sectionStart = this.index + 1;
300 |             }
301 |         } else {
302 |             this.sequenceIndex = 0;
303 |             this.state = State.InDeclaration;
304 |             this.stateInDeclaration(c); // Reconsume the character
305 |         }
306 |     }
307 | 
308 |     /**
309 |      * When we wait for one specific character, we can speed things up
310 |      * by skipping through the buffer until we find it.
311 |      *
312 |      * @returns Whether the character was found.
313 |      */
314 |     private fastForwardTo(c: number): boolean {
315 |         while (++this.index < this.buffer.length + this.offset) {
316 |             if (this.buffer.charCodeAt(this.index - this.offset) === c) {
317 |                 return true;
318 |             }
319 |         }
320 | 
321 |         /*
322 |          * We increment the index at the end of the `parse` loop,
323 |          * so set it to `buffer.length - 1` here.
324 |          *
325 |          * TODO: Refactor `parse` to increment index before calling states.
326 |          */
327 |         this.index = this.buffer.length + this.offset - 1;
328 | 
329 |         return false;
330 |     }
331 | 
332 |     /**
333 |      * Comments and CDATA end with `-->` and `]]>`.
334 |      *
335 |      * Their common qualities are:
336 |      * - Their end sequences have a distinct character they start with.
337 |      * - That character is then repeated, so we have to check multiple repeats.
338 |      * - All characters but the start character of the sequence can be skipped.
339 |      */
340 |     private stateInCommentLike(c: number): void {
341 |         if (c === this.currentSequence[this.sequenceIndex]) {
342 |             if (++this.sequenceIndex === this.currentSequence.length) {
343 |                 if (this.currentSequence === Sequences.CdataEnd) {
344 |                     this.cbs.oncdata(this.sectionStart, this.index, 2);
345 |                 } else {
346 |                     this.cbs.oncomment(this.sectionStart, this.index, 2);
347 |                 }
348 | 
349 |                 this.sequenceIndex = 0;
350 |                 this.sectionStart = this.index + 1;
351 |                 this.state = State.Text;
352 |             }
353 |         } else if (this.sequenceIndex === 0) {
354 |             // Fast-forward to the first character of the sequence
355 |             if (this.fastForwardTo(this.currentSequence[0])) {
356 |                 this.sequenceIndex = 1;
357 |             }
358 |         } else if (c !== this.currentSequence[this.sequenceIndex - 1]) {
359 |             // Allow long sequences, eg. --->, ]]]>
360 |             this.sequenceIndex = 0;
361 |         }
362 |     }
363 | 
364 |     /**
365 |      * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
366 |      *
367 |      * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
368 |      * We allow anything that wouldn't end the tag.
369 |      */
370 |     private isTagStartChar(c: number) {
371 |         return this.xmlMode ? !isEndOfTagSection(c) : isASCIIAlpha(c);
372 |     }
373 | 
374 |     private startSpecial(sequence: Uint8Array, offset: number) {
375 |         this.isSpecial = true;
376 |         this.currentSequence = sequence;
377 |         this.sequenceIndex = offset;
378 |         this.state = State.SpecialStartSequence;
379 |     }
380 | 
381 |     private stateBeforeTagName(c: number): void {
382 |         if (c === CharCodes.ExclamationMark) {
383 |             this.state = State.BeforeDeclaration;
384 |             this.sectionStart = this.index + 1;
385 |         } else if (c === CharCodes.Questionmark) {
386 |             this.state = State.InProcessingInstruction;
387 |             this.sectionStart = this.index + 1;
388 |         } else if (this.isTagStartChar(c)) {
389 |             const lower = c | 0x20;
390 |             this.sectionStart = this.index;
391 |             if (this.xmlMode) {
392 |                 this.state = State.InTagName;
393 |             } else if (lower === Sequences.ScriptEnd[2]) {
394 |                 this.state = State.BeforeSpecialS;
395 |             } else if (
396 |                 lower === Sequences.TitleEnd[2] ||
397 |                 lower === Sequences.XmpEnd[2]
398 |             ) {
399 |                 this.state = State.BeforeSpecialT;
400 |             } else {
401 |                 this.state = State.InTagName;
402 |             }
403 |         } else if (c === CharCodes.Slash) {
404 |             this.state = State.BeforeClosingTagName;
405 |         } else {
406 |             this.state = State.Text;
407 |             this.stateText(c);
408 |         }
409 |     }
410 |     private stateInTagName(c: number): void {
411 |         if (isEndOfTagSection(c)) {
412 |             this.cbs.onopentagname(this.sectionStart, this.index);
413 |             this.sectionStart = -1;
414 |             this.state = State.BeforeAttributeName;
415 |             this.stateBeforeAttributeName(c);
416 |         }
417 |     }
418 |     private stateBeforeClosingTagName(c: number): void {
419 |         if (isWhitespace(c)) {
420 |             // Ignore
421 |         } else if (c === CharCodes.Gt) {
422 |             this.state = State.Text;
423 |         } else {
424 |             this.state = this.isTagStartChar(c)
425 |                 ? State.InClosingTagName
426 |                 : State.InSpecialComment;
427 |             this.sectionStart = this.index;
428 |         }
429 |     }
430 |     private stateInClosingTagName(c: number): void {
431 |         if (c === CharCodes.Gt || isWhitespace(c)) {
432 |             this.cbs.onclosetag(this.sectionStart, this.index);
433 |             this.sectionStart = -1;
434 |             this.state = State.AfterClosingTagName;
435 |             this.stateAfterClosingTagName(c);
436 |         }
437 |     }
438 |     private stateAfterClosingTagName(c: number): void {
439 |         // Skip everything until ">"
440 |         if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
441 |             this.state = State.Text;
442 |             this.sectionStart = this.index + 1;
443 |         }
444 |     }
445 |     private stateBeforeAttributeName(c: number): void {
446 |         if (c === CharCodes.Gt) {
447 |             this.cbs.onopentagend(this.index);
448 |             if (this.isSpecial) {
449 |                 this.state = State.InSpecialTag;
450 |                 this.sequenceIndex = 0;
451 |             } else {
452 |                 this.state = State.Text;
453 |             }
454 |             this.sectionStart = this.index + 1;
455 |         } else if (c === CharCodes.Slash) {
456 |             this.state = State.InSelfClosingTag;
457 |         } else if (!isWhitespace(c)) {
458 |             this.state = State.InAttributeName;
459 |             this.sectionStart = this.index;
460 |         }
461 |     }
462 |     private stateInSelfClosingTag(c: number): void {
463 |         if (c === CharCodes.Gt) {
464 |             this.cbs.onselfclosingtag(this.index);
465 |             this.state = State.Text;
466 |             this.sectionStart = this.index + 1;
467 |             this.isSpecial = false; // Reset special state, in case of self-closing special tags
468 |         } else if (!isWhitespace(c)) {
469 |             this.state = State.BeforeAttributeName;
470 |             this.stateBeforeAttributeName(c);
471 |         }
472 |     }
473 |     private stateInAttributeName(c: number): void {
474 |         if (c === CharCodes.Eq || isEndOfTagSection(c)) {
475 |             this.cbs.onattribname(this.sectionStart, this.index);
476 |             this.sectionStart = this.index;
477 |             this.state = State.AfterAttributeName;
478 |             this.stateAfterAttributeName(c);
479 |         }
480 |     }
481 |     private stateAfterAttributeName(c: number): void {
482 |         if (c === CharCodes.Eq) {
483 |             this.state = State.BeforeAttributeValue;
484 |         } else if (c === CharCodes.Slash || c === CharCodes.Gt) {
485 |             this.cbs.onattribend(QuoteType.NoValue, this.sectionStart);
486 |             this.sectionStart = -1;
487 |             this.state = State.BeforeAttributeName;
488 |             this.stateBeforeAttributeName(c);
489 |         } else if (!isWhitespace(c)) {
490 |             this.cbs.onattribend(QuoteType.NoValue, this.sectionStart);
491 |             this.state = State.InAttributeName;
492 |             this.sectionStart = this.index;
493 |         }
494 |     }
495 |     private stateBeforeAttributeValue(c: number): void {
496 |         if (c === CharCodes.DoubleQuote) {
497 |             this.state = State.InAttributeValueDq;
498 |             this.sectionStart = this.index + 1;
499 |         } else if (c === CharCodes.SingleQuote) {
500 |             this.state = State.InAttributeValueSq;
501 |             this.sectionStart = this.index + 1;
502 |         } else if (!isWhitespace(c)) {
503 |             this.sectionStart = this.index;
504 |             this.state = State.InAttributeValueNq;
505 |             this.stateInAttributeValueNoQuotes(c); // Reconsume token
506 |         }
507 |     }
508 |     private handleInAttributeValue(c: number, quote: number) {
509 |         if (
510 |             c === quote ||
511 |             (!this.decodeEntities && this.fastForwardTo(quote))
512 |         ) {
513 |             this.cbs.onattribdata(this.sectionStart, this.index);
514 |             this.sectionStart = -1;
515 |             this.cbs.onattribend(
516 |                 quote === CharCodes.DoubleQuote
517 |                     ? QuoteType.Double
518 |                     : QuoteType.Single,
519 |                 this.index + 1,
520 |             );
521 |             this.state = State.BeforeAttributeName;
522 |         } else if (this.decodeEntities && c === CharCodes.Amp) {
523 |             this.startEntity();
524 |         }
525 |     }
526 |     private stateInAttributeValueDoubleQuotes(c: number): void {
527 |         this.handleInAttributeValue(c, CharCodes.DoubleQuote);
528 |     }
529 |     private stateInAttributeValueSingleQuotes(c: number): void {
530 |         this.handleInAttributeValue(c, CharCodes.SingleQuote);
531 |     }
532 |     private stateInAttributeValueNoQuotes(c: number): void {
533 |         if (isWhitespace(c) || c === CharCodes.Gt) {
534 |             this.cbs.onattribdata(this.sectionStart, this.index);
535 |             this.sectionStart = -1;
536 |             this.cbs.onattribend(QuoteType.Unquoted, this.index);
537 |             this.state = State.BeforeAttributeName;
538 |             this.stateBeforeAttributeName(c);
539 |         } else if (this.decodeEntities && c === CharCodes.Amp) {
540 |             this.startEntity();
541 |         }
542 |     }
543 |     private stateBeforeDeclaration(c: number): void {
544 |         if (c === CharCodes.OpeningSquareBracket) {
545 |             this.state = State.CDATASequence;
546 |             this.sequenceIndex = 0;
547 |         } else {
548 |             this.state =
549 |                 c === CharCodes.Dash
550 |                     ? State.BeforeComment
551 |                     : State.InDeclaration;
552 |         }
553 |     }
554 |     private stateInDeclaration(c: number): void {
555 |         if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
556 |             this.cbs.ondeclaration(this.sectionStart, this.index);
557 |             this.state = State.Text;
558 |             this.sectionStart = this.index + 1;
559 |         }
560 |     }
561 |     private stateInProcessingInstruction(c: number): void {
562 |         if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
563 |             this.cbs.onprocessinginstruction(this.sectionStart, this.index);
564 |             this.state = State.Text;
565 |             this.sectionStart = this.index + 1;
566 |         }
567 |     }
568 |     private stateBeforeComment(c: number): void {
569 |         if (c === CharCodes.Dash) {
570 |             this.state = State.InCommentLike;
571 |             this.currentSequence = Sequences.CommentEnd;
572 |             // Allow short comments (eg. <!-->)
573 |             this.sequenceIndex = 2;
574 |             this.sectionStart = this.index + 1;
575 |         } else {
576 |             this.state = State.InDeclaration;
577 |         }
578 |     }
579 |     private stateInSpecialComment(c: number): void {
580 |         if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
581 |             this.cbs.oncomment(this.sectionStart, this.index, 0);
582 |             this.state = State.Text;
583 |             this.sectionStart = this.index + 1;
584 |         }
585 |     }
586 |     private stateBeforeSpecialS(c: number): void {
587 |         const lower = c | 0x20;
588 |         if (lower === Sequences.ScriptEnd[3]) {
589 |             this.startSpecial(Sequences.ScriptEnd, 4);
590 |         } else if (lower === Sequences.StyleEnd[3]) {
591 |             this.startSpecial(Sequences.StyleEnd, 4);
592 |         } else {
593 |             this.state = State.InTagName;
594 |             this.stateInTagName(c); // Consume the token again
595 |         }
596 |     }
597 | 
598 |     private stateBeforeSpecialT(c: number): void {
599 |         const lower = c | 0x20;
600 |         switch (lower) {
601 |             case Sequences.TitleEnd[3]: {
602 |                 this.startSpecial(Sequences.TitleEnd, 4);
603 | 
604 |                 break;
605 |             }
606 |             case Sequences.TextareaEnd[3]: {
607 |                 this.startSpecial(Sequences.TextareaEnd, 4);
608 | 
609 |                 break;
610 |             }
611 |             case Sequences.XmpEnd[3]: {
612 |                 this.startSpecial(Sequences.XmpEnd, 4);
613 | 
614 |                 break;
615 |             }
616 |             default: {
617 |                 this.state = State.InTagName;
618 |                 this.stateInTagName(c); // Consume the token again
619 |             }
620 |         }
621 |     }
622 | 
623 |     private startEntity() {
624 |         this.baseState = this.state;
625 |         this.state = State.InEntity;
626 |         this.entityStart = this.index;
627 |         this.entityDecoder.startEntity(
628 |             this.xmlMode
629 |                 ? DecodingMode.Strict
630 |                 : this.baseState === State.Text ||
631 |                     this.baseState === State.InSpecialTag
632 |                   ? DecodingMode.Legacy
633 |                   : DecodingMode.Attribute,
634 |         );
635 |     }
636 | 
637 |     private stateInEntity(): void {
638 |         const length = this.entityDecoder.write(
639 |             this.buffer,
640 |             this.index - this.offset,
641 |         );
642 | 
643 |         // If `length` is positive, we are done with the entity.
644 |         if (length >= 0) {
645 |             this.state = this.baseState;
646 | 
647 |             if (length === 0) {
648 |                 this.index = this.entityStart;
649 |             }
650 |         } else {
651 |             // Mark buffer as consumed.
652 |             this.index = this.offset + this.buffer.length - 1;
653 |         }
654 |     }
655 | 
656 |     /**
657 |      * Remove data that has already been consumed from the buffer.
658 |      */
659 |     private cleanup() {
660 |         // If we are inside of text or attributes, emit what we already have.
661 |         if (this.running && this.sectionStart !== this.index) {
662 |             if (
663 |                 this.state === State.Text ||
664 |                 (this.state === State.InSpecialTag && this.sequenceIndex === 0)
665 |             ) {
666 |                 this.cbs.ontext(this.sectionStart, this.index);
667 |                 this.sectionStart = this.index;
668 |             } else if (
669 |                 this.state === State.InAttributeValueDq ||
670 |                 this.state === State.InAttributeValueSq ||
671 |                 this.state === State.InAttributeValueNq
672 |             ) {
673 |                 this.cbs.onattribdata(this.sectionStart, this.index);
674 |                 this.sectionStart = this.index;
675 |             }
676 |         }
677 |     }
678 | 
679 |     private shouldContinue() {
680 |         return this.index < this.buffer.length + this.offset && this.running;
681 |     }
682 | 
683 |     /**
684 |      * Iterates through the buffer, calling the function corresponding to the current state.
685 |      *
686 |      * States that are more likely to be hit are higher up, as a performance improvement.
687 |      */
688 |     private parse() {
689 |         while (this.shouldContinue()) {
690 |             const c = this.buffer.charCodeAt(this.index - this.offset);
691 |             switch (this.state) {
692 |                 case State.Text: {
693 |                     this.stateText(c);
694 |                     break;
695 |                 }
696 |                 case State.SpecialStartSequence: {
697 |                     this.stateSpecialStartSequence(c);
698 |                     break;
699 |                 }
700 |                 case State.InSpecialTag: {
701 |                     this.stateInSpecialTag(c);
702 |                     break;
703 |                 }
704 |                 case State.CDATASequence: {
705 |                     this.stateCDATASequence(c);
706 |                     break;
707 |                 }
708 |                 case State.InAttributeValueDq: {
709 |                     this.stateInAttributeValueDoubleQuotes(c);
710 |                     break;
711 |                 }
712 |                 case State.InAttributeName: {
713 |                     this.stateInAttributeName(c);
714 |                     break;
715 |                 }
716 |                 case State.InCommentLike: {
717 |                     this.stateInCommentLike(c);
718 |                     break;
719 |                 }
720 |                 case State.InSpecialComment: {
721 |                     this.stateInSpecialComment(c);
722 |                     break;
723 |                 }
724 |                 case State.BeforeAttributeName: {
725 |                     this.stateBeforeAttributeName(c);
726 |                     break;
727 |                 }
728 |                 case State.InTagName: {
729 |                     this.stateInTagName(c);
730 |                     break;
731 |                 }
732 |                 case State.InClosingTagName: {
733 |                     this.stateInClosingTagName(c);
734 |                     break;
735 |                 }
736 |                 case State.BeforeTagName: {
737 |                     this.stateBeforeTagName(c);
738 |                     break;
739 |                 }
740 |                 case State.AfterAttributeName: {
741 |                     this.stateAfterAttributeName(c);
742 |                     break;
743 |                 }
744 |                 case State.InAttributeValueSq: {
745 |                     this.stateInAttributeValueSingleQuotes(c);
746 |                     break;
747 |                 }
748 |                 case State.BeforeAttributeValue: {
749 |                     this.stateBeforeAttributeValue(c);
750 |                     break;
751 |                 }
752 |                 case State.BeforeClosingTagName: {
753 |                     this.stateBeforeClosingTagName(c);
754 |                     break;
755 |                 }
756 |                 case State.AfterClosingTagName: {
757 |                     this.stateAfterClosingTagName(c);
758 |                     break;
759 |                 }
760 |                 case State.BeforeSpecialS: {
761 |                     this.stateBeforeSpecialS(c);
762 |                     break;
763 |                 }
764 |                 case State.BeforeSpecialT: {
765 |                     this.stateBeforeSpecialT(c);
766 |                     break;
767 |                 }
768 |                 case State.InAttributeValueNq: {
769 |                     this.stateInAttributeValueNoQuotes(c);
770 |                     break;
771 |                 }
772 |                 case State.InSelfClosingTag: {
773 |                     this.stateInSelfClosingTag(c);
774 |                     break;
775 |                 }
776 |                 case State.InDeclaration: {
777 |                     this.stateInDeclaration(c);
778 |                     break;
779 |                 }
780 |                 case State.BeforeDeclaration: {
781 |                     this.stateBeforeDeclaration(c);
782 |                     break;
783 |                 }
784 |                 case State.BeforeComment: {
785 |                     this.stateBeforeComment(c);
786 |                     break;
787 |                 }
788 |                 case State.InProcessingInstruction: {
789 |                     this.stateInProcessingInstruction(c);
790 |                     break;
791 |                 }
792 |                 case State.InEntity: {
793 |                     this.stateInEntity();
794 |                     break;
795 |                 }
796 |             }
797 |             this.index++;
798 |         }
799 |         this.cleanup();
800 |     }
801 | 
802 |     private finish() {
803 |         if (this.state === State.InEntity) {
804 |             this.entityDecoder.end();
805 |             this.state = this.baseState;
806 |         }
807 | 
808 |         this.handleTrailingData();
809 | 
810 |         this.cbs.onend();
811 |     }
812 | 
813 |     /** Handle any trailing data. */
814 |     private handleTrailingData() {
815 |         const endIndex = this.buffer.length + this.offset;
816 | 
817 |         // If there is no remaining data, we are done.
818 |         if (this.sectionStart >= endIndex) {
819 |             return;
820 |         }
821 | 
822 |         if (this.state === State.InCommentLike) {
823 |             if (this.currentSequence === Sequences.CdataEnd) {
824 |                 this.cbs.oncdata(this.sectionStart, endIndex, 0);
825 |             } else {
826 |                 this.cbs.oncomment(this.sectionStart, endIndex, 0);
827 |             }
828 |         } else if (
829 |             this.state === State.InTagName ||
830 |             this.state === State.BeforeAttributeName ||
831 |             this.state === State.BeforeAttributeValue ||
832 |             this.state === State.AfterAttributeName ||
833 |             this.state === State.InAttributeName ||
834 |             this.state === State.InAttributeValueSq ||
835 |             this.state === State.InAttributeValueDq ||
836 |             this.state === State.InAttributeValueNq ||
837 |             this.state === State.InClosingTagName
838 |         ) {
839 |             /*
840 |              * If we are currently in an opening or closing tag, us not calling the
841 |              * respective callback signals that the tag should be ignored.
842 |              */
843 |         } else {
844 |             this.cbs.ontext(this.sectionStart, endIndex);
845 |         }
846 |     }
847 | 
848 |     private emitCodePoint(cp: number, consumed: number): void {
849 |         if (
850 |             this.baseState !== State.Text &&
851 |             this.baseState !== State.InSpecialTag
852 |         ) {
853 |             if (this.sectionStart < this.entityStart) {
854 |                 this.cbs.onattribdata(this.sectionStart, this.entityStart);
855 |             }
856 |             this.sectionStart = this.entityStart + consumed;
857 |             this.index = this.sectionStart - 1;
858 | 
859 |             this.cbs.onattribentity(cp);
860 |         } else {
861 |             if (this.sectionStart < this.entityStart) {
862 |                 this.cbs.ontext(this.sectionStart, this.entityStart);
863 |             }
864 |             this.sectionStart = this.entityStart + consumed;
865 |             this.index = this.sectionStart - 1;
866 | 
867 |             this.cbs.ontextentity(cp, this.sectionStart);
868 |         }
869 |     }
870 | }
871 | 


--------------------------------------------------------------------------------
/src/WritableStream.spec.ts:
--------------------------------------------------------------------------------
 1 | import { createReadStream } from "node:fs";
 2 | import * as fs from "node:fs/promises";
 3 | import * as stream from "node:stream";
 4 | import { describe, it, expect, vi } from "vitest";
 5 | import type { Handler, ParserOptions } from "./Parser.js";
 6 | import { WritableStream } from "./WritableStream.js";
 7 | import * as helper from "./__fixtures__/testHelper.js";
 8 | 
 9 | describe("WritableStream", () => {
10 |     it("should decode fragmented unicode characters", () => {
11 |         const ontext = vi.fn();
12 |         const stream = new WritableStream({ ontext });
13 | 
14 |         stream.write(Buffer.from([0xe2, 0x82]));
15 |         stream.write(Buffer.from([0xac]));
16 |         stream.write("");
17 |         stream.end();
18 | 
19 |         expect(ontext).toHaveBeenCalledWith("€");
20 |     });
21 | 
22 |     it("Basic html", () => testStream("Basic.html"));
23 |     it("Attributes", () => testStream("Attributes.html"));
24 |     it("SVG", () => testStream("Svg.html"));
25 |     it("RSS feed", () => testStream("RSS_Example.xml", { xmlMode: true }));
26 |     it("Atom feed", () => testStream("Atom_Example.xml", { xmlMode: true }));
27 |     it("RDF feed", () => testStream("RDF_Example.xml", { xmlMode: true }));
28 | });
29 | 
30 | function getPromiseEventCollector(): [
31 |     handler: Partial<Handler>,
32 |     promise: Promise<unknown>,
33 | ] {
34 |     let handler: Partial<Handler> | undefined;
35 |     const promise = new Promise<unknown>((resolve, reject) => {
36 |         handler = helper.getEventCollector((error, events) => {
37 |             if (error) {
38 |                 reject(error);
39 |             } else {
40 |                 resolve(events);
41 |             }
42 |         });
43 |     });
44 | 
45 |     return [handler!, promise];
46 | }
47 | 
48 | // TODO[engine:node@>=16]: Use promise version of `stream.finished` instead.
49 | function finished(input: Parameters<typeof stream.finished>[0]): Promise<void> {
50 |     return new Promise((resolve, reject) => {
51 |         stream.finished(input, (error) => (error ? reject(error) : resolve()));
52 |     });
53 | }
54 | 
55 | async function testStream(
56 |     file: string,
57 |     options?: ParserOptions,
58 | ): Promise<void> {
59 |     const filePath = new URL(`__fixtures__/Documents/${file}`, import.meta.url);
60 | 
61 |     const [streamHandler, eventsPromise] = getPromiseEventCollector();
62 | 
63 |     const fsStream = createReadStream(filePath).pipe(
64 |         new WritableStream(streamHandler, options),
65 |     );
66 | 
67 |     await finished(fsStream);
68 | 
69 |     const events = await eventsPromise;
70 | 
71 |     expect(events).toMatchSnapshot();
72 | 
73 |     const [singlePassHandler, singlePassPromise] = getPromiseEventCollector();
74 | 
75 |     const singlePassStream = new WritableStream(singlePassHandler, options).end(
76 |         await fs.readFile(filePath),
77 |     );
78 | 
79 |     await finished(singlePassStream);
80 | 
81 |     expect(await singlePassPromise).toStrictEqual(events);
82 | }
83 | 


--------------------------------------------------------------------------------
/src/WritableStream.ts:
--------------------------------------------------------------------------------
 1 | import { Parser, type Handler, type ParserOptions } from "./Parser.js";
 2 | /*
 3 |  * NOTE: If either of these two imports produces a type error,
 4 |  * please update your @types/node dependency!
 5 |  */
 6 | import { Writable } from "node:stream";
 7 | import { StringDecoder } from "node:string_decoder";
 8 | 
 9 | // Following the example in https://nodejs.org/api/stream.html#stream_decoding_buffers_in_a_writable_stream
10 | function isBuffer(_chunk: string | Buffer, encoding: string): _chunk is Buffer {
11 |     return encoding === "buffer";
12 | }
13 | 
14 | /**
15 |  * WritableStream makes the `Parser` interface available as a NodeJS stream.
16 |  *
17 |  * @see Parser
18 |  */
19 | export class WritableStream extends Writable {
20 |     private readonly _parser: Parser;
21 |     private readonly _decoder = new StringDecoder();
22 | 
23 |     constructor(cbs: Partial<Handler>, options?: ParserOptions) {
24 |         super({ decodeStrings: false });
25 |         this._parser = new Parser(cbs, options);
26 |     }
27 | 
28 |     override _write(
29 |         chunk: string | Buffer,
30 |         encoding: string,
31 |         callback: () => void,
32 |     ): void {
33 |         this._parser.write(
34 |             isBuffer(chunk, encoding) ? this._decoder.write(chunk) : chunk,
35 |         );
36 |         callback();
37 |     }
38 | 
39 |     override _final(callback: () => void): void {
40 |         this._parser.end(this._decoder.end());
41 |         callback();
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/__fixtures__/Documents/Atom_Example.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- http://en.wikipedia.org/wiki/Atom_%28standard%29 -->
 3 | <feed xmlns="http://www.w3.org/2005/Atom">
 4 | 	<title>Example Feed</title>
 5 | 	<subtitle>A subtitle.</subtitle>
 6 | 	<link href="http://example.org/feed/" rel="self" />
 7 | 	<link href="http://example.org/" />
 8 | 	<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
 9 | 	<updated>2003-12-13T18:30:02Z</updated>
10 | 	<author>
11 | 		<name>John Doe</name>
12 | 		<email>johndoe@example.com</email>
13 | 	</author>
14 | 
15 | 	<entry>
16 | 		<title>Atom-Powered Robots Run Amok</title>
17 | 		<link href="http://example.org/2003/12/13/atom03" />
18 | 		<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
19 | 		<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
20 | 		<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
21 | 		<updated>2003-12-13T18:30:02Z</updated>
22 | 		<content type="html"><p>Some content.</p></content>
23 | 	</entry>
24 | 
25 | 	<entry/>
26 | 
27 | </feed>
28 | 


--------------------------------------------------------------------------------
/src/__fixtures__/Documents/Attributes.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | <head>
 4 | 	<title>Attributes test</title>
 5 | </head>
 6 | <body>
 7 | 	<!-- Normal attributes -->
 8 | 	<button id="test0" class="value0" title="value1">class="value0" title="value1"</button>
 9 | 
10 | 	<!-- Attributes with no quotes or value -->
11 | 	<button id="test1" class=value2 disabled>class=value2 disabled</button>
12 | 
13 | 	<!-- Attributes with no space between them. No valid, but accepted by the browser -->
14 | 	<button id="test2" class="value4"title="value5">class="value4"title="value5"</button>
15 | </body>
16 | </html>


--------------------------------------------------------------------------------
/src/__fixtures__/Documents/Basic.html:
--------------------------------------------------------------------------------
1 | <!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>


--------------------------------------------------------------------------------
/src/__fixtures__/Documents/RDF_Example.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:ev="http://purl.org/rss/1.0/modules/event/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:admin="http://webns.net/mvcb/">
 3 | 	<channel rdf:about="https://github.com/fb55/htmlparser2/">
 4 | 		<title>A title to parse and remember</title>
 5 | 		<link>https://github.com/fb55/htmlparser2/</link>
 6 | 		<description/>
 7 | 		<dc:language>en-us</dc:language>
 8 | 		<dc:rights>Copyright 2015 the authors</dc:rights>
 9 | 		<dc:publisher>webmaster@thisisafakedoma.in</dc:publisher>
10 | 		<dc:creator>webmaster@thisisafakedoma.in</dc:creator>
11 | 		<dc:source>https://github.com/fb55/htmlparser2/</dc:source>
12 | 		<dc:title>A title to parse and remember</dc:title>
13 | 		<dc:type>Collection</dc:type>
14 | 		<syn:updateBase>2011-11-04T09:39:10-07:00</syn:updateBase>
15 | 		<syn:updateFrequency>4</syn:updateFrequency>
16 | 		<syn:updatePeriod>hourly</syn:updatePeriod>
17 | 		<items>
18 | 			<rdf:Seq>
19 | 				<rdf:li rdf:resource="http://somefakesite/path/to/something.html"/>
20 | 			</rdf:Seq>
21 | 		</items>
22 | 	</channel>
23 | 	<item rdf:about="http://somefakesite/path/to/something.html">
24 | 		<title><![CDATA[ Fast HTML Parsing ]]></title>
25 | 		<link>
26 | http://somefakesite/path/to/something.html
27 | </link>
28 | 		<description><![CDATA[
29 | Great test content<br>A link: <a href="http://github.com">Github</a>
30 | ]]></description>
31 | 		<dc:date>2011-11-04T09:35:17-07:00</dc:date>
32 | 		<dc:language>en-us</dc:language>
33 | 		<dc:rights>Copyright 2015 the authors</dc:rights>
34 | 		<dc:source>
35 | http://somefakesite/path/to/something.html
36 | </dc:source>
37 | 		<dc:title><![CDATA[ Fast HTML Parsing ]]></dc:title>
38 | 		<dc:type>text</dc:type>
39 | 		<dcterms:issued>2011-11-04T09:35:17-07:00</dcterms:issued>
40 | 	</item>
41 | 	<item rdf:about="http://somefakesite/path/to/something-else.html">
42 | 		<title><![CDATA[
43 | This space intentionally left blank
44 | ]]></title>
45 | 		<link>
46 | http://somefakesite/path/to/something-else.html
47 | </link>
48 | 		<description><![CDATA[
49 | The early bird gets the worm
50 | ]]></description>
51 | 		<dc:date>2011-11-04T09:34:54-07:00</dc:date>
52 | 		<dc:language>en-us</dc:language>
53 | 		<dc:rights>Copyright 2015 the authors</dc:rights>
54 | 		<dc:source>
55 | http://somefakesite/path/to/something-else.html
56 | </dc:source>
57 | 		<dc:title><![CDATA[
58 | This space intentionally left blank
59 | ]]></dc:title>
60 | 		<dc:type>text</dc:type>
61 | 		<dcterms:issued>2011-11-04T09:34:54-07:00</dcterms:issued>
62 | 	</item>
63 | </rdf:RDF>


--------------------------------------------------------------------------------
/src/__fixtures__/Documents/RSS_Example.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!-- http://cyber.law.harvard.edu/rss/examples/rss2sample.xml -->
 3 | <rss version="2.0">
 4 |    <channel>
 5 |       <title>Liftoff News</title>
 6 |       <link>http://liftoff.msfc.nasa.gov/</link>
 7 |       <description>Liftoff to Space Exploration.</description>
 8 |       <language>en-us</language>
 9 |       <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
10 | 
11 |       <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
12 |       <docs>http://blogs.law.harvard.edu/tech/rss</docs>
13 |       <generator>Weblog Editor 2.0</generator>
14 |       <managingEditor>editor@example.com</managingEditor>
15 |       <webMaster>webmaster@example.com</webMaster>
16 |       <item>
17 | 
18 |          <title>Star City</title>
19 |          <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
20 |          <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
21 |          <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
22 |          <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
23 | 
24 |       </item>
25 |       <item>
26 |          <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
27 |          <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
28 |          <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
29 | 
30 |       </item>
31 |       <item>
32 |          <title>The Engine That Does More</title>
33 |          <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
34 |          <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly.  The proposed VASIMR engine would do that.</description>
35 |          <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
36 |          <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
37 | 
38 |       </item>
39 |       <item>
40 |          <title>Astronauts' Dirty Laundry</title>
41 |          <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
42 |          <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them.  Instead, astronauts have other options.</description>
43 |          <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
44 |          <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
45 | 
46 |          <media:content height="200" medium="image" url="https://picsum.photos/200" width="200"/>
47 |       </item>
48 |    </channel>
49 | </rss>


--------------------------------------------------------------------------------
/src/__fixtures__/Documents/Svg.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 | <head>
 4 | 	<title>SVG test</title>
 5 | </head>
 6 | <body>
 7 | 	<svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 8 | 		<title>Test</title>
 9 | 		<animate />
10 | 		<polygon />
11 | 		<g>
12 | 			<path>
13 | 				<title>x</title>
14 | 				<animate />
15 | 			</path>
16 | 		</g>
17 | 	</svg>
18 | </body>
19 | </html>
20 | 


--------------------------------------------------------------------------------
/src/__fixtures__/testHelper.ts:
--------------------------------------------------------------------------------
 1 | import type { Parser, Handler } from "../Parser.js";
 2 | 
 3 | interface Event {
 4 |     $event: string;
 5 |     data: unknown[];
 6 |     startIndex: number;
 7 |     endIndex: number;
 8 | }
 9 | 
10 | /**
11 |  * Creates a handler that calls the supplied callback with simplified events on
12 |  * completion.
13 |  *
14 |  * @internal
15 |  * @param callback Function to call with all events.
16 |  */
17 | export function getEventCollector(
18 |     callback: (error: Error | null, events?: Event[]) => void,
19 | ): Partial<Handler> {
20 |     const events: Event[] = [];
21 |     let parser: Parser;
22 | 
23 |     function handle(event: string, data: unknown[]): void {
24 |         switch (event) {
25 |             case "onerror": {
26 |                 callback(data[0] as Error);
27 | 
28 |                 break;
29 |             }
30 |             case "onend": {
31 |                 callback(null, events);
32 | 
33 |                 break;
34 |             }
35 |             case "onreset": {
36 |                 events.length = 0;
37 | 
38 |                 break;
39 |             }
40 |             case "onparserinit": {
41 |                 parser = data[0] as Parser;
42 | 
43 |                 // Don't collect event
44 |                 break;
45 |             }
46 |             default: {
47 |                 // eslint-disable-next-line unicorn/prefer-at
48 |                 const last = events[events.length - 1];
49 | 
50 |                 // Combine text nodes
51 |                 // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
52 |                 if (event === "ontext" && last && last.$event === "text") {
53 |                     (last.data[0] as string) += data[0];
54 |                     last.endIndex = parser.endIndex;
55 | 
56 |                     break;
57 |                 }
58 | 
59 |                 // Remove `undefined`s from attribute responses, as they cannot be represented in JSON.
60 |                 if (event === "onattribute" && data[2] === undefined) {
61 |                     data.pop();
62 |                 }
63 | 
64 |                 if (!(parser.startIndex <= parser.endIndex)) {
65 |                     throw new Error(
66 |                         `Invalid start/end index ${parser.startIndex} > ${parser.endIndex}`,
67 |                     );
68 |                 }
69 | 
70 |                 events.push({
71 |                     $event: event.slice(2),
72 |                     startIndex: parser.startIndex,
73 |                     endIndex: parser.endIndex,
74 |                     data,
75 |                 });
76 |             }
77 |         }
78 |     }
79 | 
80 |     return new Proxy(
81 |         {},
82 |         {
83 |             get:
84 |                 (_, event: string) =>
85 |                 (...data: unknown[]) =>
86 |                     handle(event, data),
87 |         },
88 |     );
89 | }
90 | 


--------------------------------------------------------------------------------
/src/__snapshots__/FeedHandler.spec.ts.snap:
--------------------------------------------------------------------------------
  1 | // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
  2 | 
  3 | exports[`parseFeed > (atomFeed) 1`] = `
  4 | {
  5 |   "author": "johndoe@example.com",
  6 |   "description": "A subtitle.",
  7 |   "id": "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6",
  8 |   "items": [
  9 |     {
 10 |       "description": "Some content.",
 11 |       "id": "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
 12 |       "link": "http://example.org/2003/12/13/atom03",
 13 |       "media": [],
 14 |       "pubDate": 2003-12-13T18:30:02.000Z,
 15 |       "title": "Atom-Powered Robots Run Amok",
 16 |     },
 17 |     {
 18 |       "media": [],
 19 |     },
 20 |   ],
 21 |   "link": "http://example.org/feed/",
 22 |   "title": "Example Feed",
 23 |   "type": "atom",
 24 |   "updated": 2003-12-13T18:30:02.000Z,
 25 | }
 26 | `;
 27 | 
 28 | exports[`parseFeed > (rdfFeed) 1`] = `
 29 | {
 30 |   "id": "",
 31 |   "items": [
 32 |     {
 33 |       "description": "Great test content<br>A link: <a href="http://github.com">Github</a>",
 34 |       "link": "http://somefakesite/path/to/something.html",
 35 |       "media": [],
 36 |       "pubDate": 2011-11-04T16:35:17.000Z,
 37 |       "title": "Fast HTML Parsing",
 38 |     },
 39 |     {
 40 |       "description": "The early bird gets the worm",
 41 |       "link": "http://somefakesite/path/to/something-else.html",
 42 |       "media": [],
 43 |       "pubDate": 2011-11-04T16:34:54.000Z,
 44 |       "title": "This space intentionally left blank",
 45 |     },
 46 |   ],
 47 |   "link": "https://github.com/fb55/htmlparser2/",
 48 |   "title": "A title to parse and remember",
 49 |   "type": "rdf",
 50 | }
 51 | `;
 52 | 
 53 | exports[`parseFeed > (rssFeed) 1`] = `
 54 | {
 55 |   "author": "editor@example.com",
 56 |   "description": "Liftoff to Space Exploration.",
 57 |   "id": "",
 58 |   "items": [
 59 |     {
 60 |       "description": "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.",
 61 |       "id": "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573",
 62 |       "link": "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp",
 63 |       "media": [],
 64 |       "pubDate": 2003-06-03T09:39:21.000Z,
 65 |       "title": "Star City",
 66 |     },
 67 |     {
 68 |       "description": "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st.",
 69 |       "id": "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572",
 70 |       "media": [],
 71 |       "pubDate": 2003-05-30T11:06:42.000Z,
 72 |     },
 73 |     {
 74 |       "description": "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly.  The proposed VASIMR engine would do that.",
 75 |       "id": "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571",
 76 |       "link": "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp",
 77 |       "media": [],
 78 |       "pubDate": 2003-05-27T08:37:32.000Z,
 79 |       "title": "The Engine That Does More",
 80 |     },
 81 |     {
 82 |       "description": "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them.  Instead, astronauts have other options.",
 83 |       "id": "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570",
 84 |       "link": "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp",
 85 |       "media": [
 86 |         {
 87 |           "height": 200,
 88 |           "isDefault": false,
 89 |           "medium": "image",
 90 |           "url": "https://picsum.photos/200",
 91 |           "width": 200,
 92 |         },
 93 |       ],
 94 |       "pubDate": 2003-05-20T08:56:02.000Z,
 95 |       "title": "Astronauts' Dirty Laundry",
 96 |     },
 97 |   ],
 98 |   "link": "http://liftoff.msfc.nasa.gov/",
 99 |   "title": "Liftoff News",
100 |   "type": "rss",
101 |   "updated": 2003-06-10T09:41:01.000Z,
102 | }
103 | `;
104 | 


--------------------------------------------------------------------------------
/src/__snapshots__/Parser.events.spec.ts.snap:
--------------------------------------------------------------------------------
   1 | // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
   2 | 
   3 | exports[`Events > </title> in <script> (#745) 1`] = `
   4 | [
   5 |   {
   6 |     "$event": "opentagname",
   7 |     "data": [
   8 |       "script",
   9 |     ],
  10 |     "endIndex": 7,
  11 |     "startIndex": 0,
  12 |   },
  13 |   {
  14 |     "$event": "opentag",
  15 |     "data": [
  16 |       "script",
  17 |       {},
  18 |       false,
  19 |     ],
  20 |     "endIndex": 7,
  21 |     "startIndex": 0,
  22 |   },
  23 |   {
  24 |     "$event": "text",
  25 |     "data": [
  26 |       "'</title>'",
  27 |     ],
  28 |     "endIndex": 17,
  29 |     "startIndex": 8,
  30 |   },
  31 |   {
  32 |     "$event": "closetag",
  33 |     "data": [
  34 |       "script",
  35 |       false,
  36 |     ],
  37 |     "endIndex": 26,
  38 |     "startIndex": 18,
  39 |   },
  40 | ]
  41 | `;
  42 | 
  43 | exports[`Events > Attribute in XML (see #1350) 1`] = `
  44 | [
  45 |   {
  46 |     "$event": "opentagname",
  47 |     "data": [
  48 |       "Page",
  49 |     ],
  50 |     "endIndex": 5,
  51 |     "startIndex": 0,
  52 |   },
  53 |   {
  54 |     "$event": "attribute",
  55 |     "data": [
  56 |       "title",
  57 |       "Hello world",
  58 |       """,
  59 |     ],
  60 |     "endIndex": 29,
  61 |     "startIndex": 10,
  62 |   },
  63 |   {
  64 |     "$event": "attribute",
  65 |     "data": [
  66 |       "actionBarVisible",
  67 |       "false",
  68 |       """,
  69 |     ],
  70 |     "endIndex": 58,
  71 |     "startIndex": 34,
  72 |   },
  73 |   {
  74 |     "$event": "opentag",
  75 |     "data": [
  76 |       "Page",
  77 |       {
  78 |         "actionBarVisible": "false",
  79 |         "title": "Hello world",
  80 |       },
  81 |       false,
  82 |     ],
  83 |     "endIndex": 59,
  84 |     "startIndex": 0,
  85 |   },
  86 |   {
  87 |     "$event": "closetag",
  88 |     "data": [
  89 |       "Page",
  90 |       true,
  91 |     ],
  92 |     "endIndex": 59,
  93 |     "startIndex": 0,
  94 |   },
  95 | ]
  96 | `;
  97 | 
  98 | exports[`Events > CDATA (inside special) 1`] = `
  99 | [
 100 |   {
 101 |     "$event": "opentagname",
 102 |     "data": [
 103 |       "script",
 104 |     ],
 105 |     "endIndex": 7,
 106 |     "startIndex": 0,
 107 |   },
 108 |   {
 109 |     "$event": "opentag",
 110 |     "data": [
 111 |       "script",
 112 |       {},
 113 |       false,
 114 |     ],
 115 |     "endIndex": 7,
 116 |     "startIndex": 0,
 117 |   },
 118 |   {
 119 |     "$event": "text",
 120 |     "data": [
 121 |       "/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/",
 122 |     ],
 123 |     "endIndex": 52,
 124 |     "startIndex": 8,
 125 |   },
 126 |   {
 127 |     "$event": "closetag",
 128 |     "data": [
 129 |       "script",
 130 |       false,
 131 |     ],
 132 |     "endIndex": 61,
 133 |     "startIndex": 53,
 134 |   },
 135 | ]
 136 | `;
 137 | 
 138 | exports[`Events > CDATA 1`] = `
 139 | [
 140 |   {
 141 |     "$event": "opentagname",
 142 |     "data": [
 143 |       "tag",
 144 |     ],
 145 |     "endIndex": 4,
 146 |     "startIndex": 0,
 147 |   },
 148 |   {
 149 |     "$event": "opentag",
 150 |     "data": [
 151 |       "tag",
 152 |       {},
 153 |       false,
 154 |     ],
 155 |     "endIndex": 4,
 156 |     "startIndex": 0,
 157 |   },
 158 |   {
 159 |     "$event": "cdatastart",
 160 |     "data": [],
 161 |     "endIndex": 41,
 162 |     "startIndex": 5,
 163 |   },
 164 |   {
 165 |     "$event": "text",
 166 |     "data": [
 167 |       " asdf ><asdf></adsf><> fo",
 168 |     ],
 169 |     "endIndex": 41,
 170 |     "startIndex": 5,
 171 |   },
 172 |   {
 173 |     "$event": "cdataend",
 174 |     "data": [],
 175 |     "endIndex": 41,
 176 |     "startIndex": 5,
 177 |   },
 178 |   {
 179 |     "$event": "closetag",
 180 |     "data": [
 181 |       "tag",
 182 |       false,
 183 |     ],
 184 |     "endIndex": 47,
 185 |     "startIndex": 42,
 186 |   },
 187 |   {
 188 |     "$event": "processinginstruction",
 189 |     "data": [
 190 |       "![CD",
 191 |       "![CD",
 192 |     ],
 193 |     "endIndex": 53,
 194 |     "startIndex": 48,
 195 |   },
 196 | ]
 197 | `;
 198 | 
 199 | exports[`Events > CDATA edge-cases 1`] = `
 200 | [
 201 |   {
 202 |     "$event": "processinginstruction",
 203 |     "data": [
 204 |       "![cdata",
 205 |       "![CDATA",
 206 |     ],
 207 |     "endIndex": 8,
 208 |     "startIndex": 0,
 209 |   },
 210 |   {
 211 |     "$event": "cdatastart",
 212 |     "data": [],
 213 |     "endIndex": 27,
 214 |     "startIndex": 9,
 215 |   },
 216 |   {
 217 |     "$event": "text",
 218 |     "data": [
 219 |       "[]]sdaf",
 220 |     ],
 221 |     "endIndex": 27,
 222 |     "startIndex": 9,
 223 |   },
 224 |   {
 225 |     "$event": "cdataend",
 226 |     "data": [],
 227 |     "endIndex": 27,
 228 |     "startIndex": 9,
 229 |   },
 230 |   {
 231 |     "$event": "cdatastart",
 232 |     "data": [],
 233 |     "endIndex": 40,
 234 |     "startIndex": 28,
 235 |   },
 236 |   {
 237 |     "$event": "text",
 238 |     "data": [
 239 |       "foo",
 240 |     ],
 241 |     "endIndex": 40,
 242 |     "startIndex": 28,
 243 |   },
 244 |   {
 245 |     "$event": "cdataend",
 246 |     "data": [],
 247 |     "endIndex": 40,
 248 |     "startIndex": 28,
 249 |   },
 250 | ]
 251 | `;
 252 | 
 253 | exports[`Events > CDATA in HTML 1`] = `
 254 | [
 255 |   {
 256 |     "$event": "comment",
 257 |     "data": [
 258 |       "[CDATA[ foo ]]",
 259 |     ],
 260 |     "endIndex": 16,
 261 |     "startIndex": 0,
 262 |   },
 263 |   {
 264 |     "$event": "commentend",
 265 |     "data": [],
 266 |     "endIndex": 16,
 267 |     "startIndex": 0,
 268 |   },
 269 | ]
 270 | `;
 271 | 
 272 | exports[`Events > CDATA more edge-cases 1`] = `
 273 | [
 274 |   {
 275 |     "$event": "cdatastart",
 276 |     "data": [],
 277 |     "endIndex": 23,
 278 |     "startIndex": 0,
 279 |   },
 280 |   {
 281 |     "$event": "text",
 282 |     "data": [
 283 |       "foo]bar]>baz",
 284 |     ],
 285 |     "endIndex": 23,
 286 |     "startIndex": 0,
 287 |   },
 288 |   {
 289 |     "$event": "cdataend",
 290 |     "data": [],
 291 |     "endIndex": 23,
 292 |     "startIndex": 0,
 293 |   },
 294 | ]
 295 | `;
 296 | 
 297 | exports[`Events > Comment edge-cases 1`] = `
 298 | [
 299 |   {
 300 |     "$event": "processinginstruction",
 301 |     "data": [
 302 |       "!-foo",
 303 |       "!-foo",
 304 |     ],
 305 |     "endIndex": 6,
 306 |     "startIndex": 0,
 307 |   },
 308 |   {
 309 |     "$event": "comment",
 310 |     "data": [
 311 |       " --- ",
 312 |     ],
 313 |     "endIndex": 18,
 314 |     "startIndex": 7,
 315 |   },
 316 |   {
 317 |     "$event": "commentend",
 318 |     "data": [],
 319 |     "endIndex": 18,
 320 |     "startIndex": 7,
 321 |   },
 322 |   {
 323 |     "$event": "comment",
 324 |     "data": [
 325 |       "foo",
 326 |     ],
 327 |     "endIndex": 26,
 328 |     "startIndex": 19,
 329 |   },
 330 |   {
 331 |     "$event": "commentend",
 332 |     "data": [],
 333 |     "endIndex": 26,
 334 |     "startIndex": 19,
 335 |   },
 336 | ]
 337 | `;
 338 | 
 339 | exports[`Events > Comment false ending 1`] = `
 340 | [
 341 |   {
 342 |     "$event": "comment",
 343 |     "data": [
 344 |       " a-b-> ",
 345 |     ],
 346 |     "endIndex": 13,
 347 |     "startIndex": 0,
 348 |   },
 349 |   {
 350 |     "$event": "commentend",
 351 |     "data": [],
 352 |     "endIndex": 13,
 353 |     "startIndex": 0,
 354 |   },
 355 | ]
 356 | `;
 357 | 
 358 | exports[`Events > Empty tag name 1`] = `
 359 | [
 360 |   {
 361 |     "$event": "text",
 362 |     "data": [
 363 |       "< ></ >",
 364 |     ],
 365 |     "endIndex": 6,
 366 |     "startIndex": 0,
 367 |   },
 368 | ]
 369 | `;
 370 | 
 371 | exports[`Events > Entities in attributes 1`] = `
 372 | [
 373 |   {
 374 |     "$event": "opentagname",
 375 |     "data": [
 376 |       "foo",
 377 |     ],
 378 |     "endIndex": 4,
 379 |     "startIndex": 0,
 380 |   },
 381 |   {
 382 |     "$event": "attribute",
 383 |     "data": [
 384 |       "bar",
 385 |       "&",
 386 |       null,
 387 |     ],
 388 |     "endIndex": 14,
 389 |     "startIndex": 5,
 390 |   },
 391 |   {
 392 |     "$event": "attribute",
 393 |     "data": [
 394 |       "baz",
 395 |       "&",
 396 |       """,
 397 |     ],
 398 |     "endIndex": 26,
 399 |     "startIndex": 15,
 400 |   },
 401 |   {
 402 |     "$event": "attribute",
 403 |     "data": [
 404 |       "boo",
 405 |       "&",
 406 |       "'",
 407 |     ],
 408 |     "endIndex": 38,
 409 |     "startIndex": 27,
 410 |   },
 411 |   {
 412 |     "$event": "attribute",
 413 |     "data": [
 414 |       "noo",
 415 |       "",
 416 |       null,
 417 |     ],
 418 |     "endIndex": 43,
 419 |     "startIndex": 39,
 420 |   },
 421 |   {
 422 |     "$event": "opentag",
 423 |     "data": [
 424 |       "foo",
 425 |       {
 426 |         "bar": "&",
 427 |         "baz": "&",
 428 |         "boo": "&",
 429 |         "noo": "",
 430 |       },
 431 |       false,
 432 |     ],
 433 |     "endIndex": 43,
 434 |     "startIndex": 0,
 435 |   },
 436 |   {
 437 |     "$event": "closetag",
 438 |     "data": [
 439 |       "foo",
 440 |       true,
 441 |     ],
 442 |     "endIndex": 44,
 443 |     "startIndex": 44,
 444 |   },
 445 | ]
 446 | `;
 447 | 
 448 | exports[`Events > Entity after < 1`] = `
 449 | [
 450 |   {
 451 |     "$event": "text",
 452 |     "data": [
 453 |       "<&",
 454 |     ],
 455 |     "endIndex": 5,
 456 |     "startIndex": 0,
 457 |   },
 458 | ]
 459 | `;
 460 | 
 461 | exports[`Events > Implicit close tags 1`] = `
 462 | [
 463 |   {
 464 |     "$event": "opentagname",
 465 |     "data": [
 466 |       "ol",
 467 |     ],
 468 |     "endIndex": 3,
 469 |     "startIndex": 0,
 470 |   },
 471 |   {
 472 |     "$event": "opentag",
 473 |     "data": [
 474 |       "ol",
 475 |       {},
 476 |       false,
 477 |     ],
 478 |     "endIndex": 3,
 479 |     "startIndex": 0,
 480 |   },
 481 |   {
 482 |     "$event": "opentagname",
 483 |     "data": [
 484 |       "li",
 485 |     ],
 486 |     "endIndex": 7,
 487 |     "startIndex": 4,
 488 |   },
 489 |   {
 490 |     "$event": "attribute",
 491 |     "data": [
 492 |       "class",
 493 |       "test",
 494 |       null,
 495 |     ],
 496 |     "endIndex": 18,
 497 |     "startIndex": 8,
 498 |   },
 499 |   {
 500 |     "$event": "opentag",
 501 |     "data": [
 502 |       "li",
 503 |       {
 504 |         "class": "test",
 505 |       },
 506 |       false,
 507 |     ],
 508 |     "endIndex": 18,
 509 |     "startIndex": 4,
 510 |   },
 511 |   {
 512 |     "$event": "opentagname",
 513 |     "data": [
 514 |       "div",
 515 |     ],
 516 |     "endIndex": 23,
 517 |     "startIndex": 19,
 518 |   },
 519 |   {
 520 |     "$event": "opentag",
 521 |     "data": [
 522 |       "div",
 523 |       {},
 524 |       false,
 525 |     ],
 526 |     "endIndex": 23,
 527 |     "startIndex": 19,
 528 |   },
 529 |   {
 530 |     "$event": "opentagname",
 531 |     "data": [
 532 |       "table",
 533 |     ],
 534 |     "endIndex": 30,
 535 |     "startIndex": 24,
 536 |   },
 537 |   {
 538 |     "$event": "attribute",
 539 |     "data": [
 540 |       "style",
 541 |       "width:100%",
 542 |       null,
 543 |     ],
 544 |     "endIndex": 47,
 545 |     "startIndex": 31,
 546 |   },
 547 |   {
 548 |     "$event": "opentag",
 549 |     "data": [
 550 |       "table",
 551 |       {
 552 |         "style": "width:100%",
 553 |       },
 554 |       false,
 555 |     ],
 556 |     "endIndex": 47,
 557 |     "startIndex": 24,
 558 |   },
 559 |   {
 560 |     "$event": "opentagname",
 561 |     "data": [
 562 |       "tr",
 563 |     ],
 564 |     "endIndex": 51,
 565 |     "startIndex": 48,
 566 |   },
 567 |   {
 568 |     "$event": "opentag",
 569 |     "data": [
 570 |       "tr",
 571 |       {},
 572 |       false,
 573 |     ],
 574 |     "endIndex": 51,
 575 |     "startIndex": 48,
 576 |   },
 577 |   {
 578 |     "$event": "opentagname",
 579 |     "data": [
 580 |       "th",
 581 |     ],
 582 |     "endIndex": 55,
 583 |     "startIndex": 52,
 584 |   },
 585 |   {
 586 |     "$event": "opentag",
 587 |     "data": [
 588 |       "th",
 589 |       {},
 590 |       false,
 591 |     ],
 592 |     "endIndex": 55,
 593 |     "startIndex": 52,
 594 |   },
 595 |   {
 596 |     "$event": "text",
 597 |     "data": [
 598 |       "TH",
 599 |     ],
 600 |     "endIndex": 57,
 601 |     "startIndex": 56,
 602 |   },
 603 |   {
 604 |     "$event": "closetag",
 605 |     "data": [
 606 |       "th",
 607 |       true,
 608 |     ],
 609 |     "endIndex": 61,
 610 |     "startIndex": 58,
 611 |   },
 612 |   {
 613 |     "$event": "opentagname",
 614 |     "data": [
 615 |       "td",
 616 |     ],
 617 |     "endIndex": 61,
 618 |     "startIndex": 58,
 619 |   },
 620 |   {
 621 |     "$event": "attribute",
 622 |     "data": [
 623 |       "colspan",
 624 |       "2",
 625 |       null,
 626 |     ],
 627 |     "endIndex": 71,
 628 |     "startIndex": 62,
 629 |   },
 630 |   {
 631 |     "$event": "opentag",
 632 |     "data": [
 633 |       "td",
 634 |       {
 635 |         "colspan": "2",
 636 |       },
 637 |       false,
 638 |     ],
 639 |     "endIndex": 71,
 640 |     "startIndex": 58,
 641 |   },
 642 |   {
 643 |     "$event": "opentagname",
 644 |     "data": [
 645 |       "h3",
 646 |     ],
 647 |     "endIndex": 75,
 648 |     "startIndex": 72,
 649 |   },
 650 |   {
 651 |     "$event": "opentag",
 652 |     "data": [
 653 |       "h3",
 654 |       {},
 655 |       false,
 656 |     ],
 657 |     "endIndex": 75,
 658 |     "startIndex": 72,
 659 |   },
 660 |   {
 661 |     "$event": "text",
 662 |     "data": [
 663 |       "Heading",
 664 |     ],
 665 |     "endIndex": 82,
 666 |     "startIndex": 76,
 667 |   },
 668 |   {
 669 |     "$event": "closetag",
 670 |     "data": [
 671 |       "h3",
 672 |       false,
 673 |     ],
 674 |     "endIndex": 87,
 675 |     "startIndex": 83,
 676 |   },
 677 |   {
 678 |     "$event": "closetag",
 679 |     "data": [
 680 |       "td",
 681 |       true,
 682 |     ],
 683 |     "endIndex": 91,
 684 |     "startIndex": 88,
 685 |   },
 686 |   {
 687 |     "$event": "closetag",
 688 |     "data": [
 689 |       "tr",
 690 |       true,
 691 |     ],
 692 |     "endIndex": 91,
 693 |     "startIndex": 88,
 694 |   },
 695 |   {
 696 |     "$event": "opentagname",
 697 |     "data": [
 698 |       "tr",
 699 |     ],
 700 |     "endIndex": 91,
 701 |     "startIndex": 88,
 702 |   },
 703 |   {
 704 |     "$event": "opentag",
 705 |     "data": [
 706 |       "tr",
 707 |       {},
 708 |       false,
 709 |     ],
 710 |     "endIndex": 91,
 711 |     "startIndex": 88,
 712 |   },
 713 |   {
 714 |     "$event": "opentagname",
 715 |     "data": [
 716 |       "td",
 717 |     ],
 718 |     "endIndex": 95,
 719 |     "startIndex": 92,
 720 |   },
 721 |   {
 722 |     "$event": "opentag",
 723 |     "data": [
 724 |       "td",
 725 |       {},
 726 |       false,
 727 |     ],
 728 |     "endIndex": 95,
 729 |     "startIndex": 92,
 730 |   },
 731 |   {
 732 |     "$event": "opentagname",
 733 |     "data": [
 734 |       "div",
 735 |     ],
 736 |     "endIndex": 100,
 737 |     "startIndex": 96,
 738 |   },
 739 |   {
 740 |     "$event": "opentag",
 741 |     "data": [
 742 |       "div",
 743 |       {},
 744 |       false,
 745 |     ],
 746 |     "endIndex": 100,
 747 |     "startIndex": 96,
 748 |   },
 749 |   {
 750 |     "$event": "text",
 751 |     "data": [
 752 |       "Div",
 753 |     ],
 754 |     "endIndex": 103,
 755 |     "startIndex": 101,
 756 |   },
 757 |   {
 758 |     "$event": "closetag",
 759 |     "data": [
 760 |       "div",
 761 |       false,
 762 |     ],
 763 |     "endIndex": 109,
 764 |     "startIndex": 104,
 765 |   },
 766 |   {
 767 |     "$event": "closetag",
 768 |     "data": [
 769 |       "td",
 770 |       true,
 771 |     ],
 772 |     "endIndex": 113,
 773 |     "startIndex": 110,
 774 |   },
 775 |   {
 776 |     "$event": "opentagname",
 777 |     "data": [
 778 |       "td",
 779 |     ],
 780 |     "endIndex": 113,
 781 |     "startIndex": 110,
 782 |   },
 783 |   {
 784 |     "$event": "opentag",
 785 |     "data": [
 786 |       "td",
 787 |       {},
 788 |       false,
 789 |     ],
 790 |     "endIndex": 113,
 791 |     "startIndex": 110,
 792 |   },
 793 |   {
 794 |     "$event": "opentagname",
 795 |     "data": [
 796 |       "div",
 797 |     ],
 798 |     "endIndex": 118,
 799 |     "startIndex": 114,
 800 |   },
 801 |   {
 802 |     "$event": "opentag",
 803 |     "data": [
 804 |       "div",
 805 |       {},
 806 |       false,
 807 |     ],
 808 |     "endIndex": 118,
 809 |     "startIndex": 114,
 810 |   },
 811 |   {
 812 |     "$event": "text",
 813 |     "data": [
 814 |       "Div2",
 815 |     ],
 816 |     "endIndex": 122,
 817 |     "startIndex": 119,
 818 |   },
 819 |   {
 820 |     "$event": "closetag",
 821 |     "data": [
 822 |       "div",
 823 |       false,
 824 |     ],
 825 |     "endIndex": 128,
 826 |     "startIndex": 123,
 827 |   },
 828 |   {
 829 |     "$event": "closetag",
 830 |     "data": [
 831 |       "td",
 832 |       true,
 833 |     ],
 834 |     "endIndex": 136,
 835 |     "startIndex": 129,
 836 |   },
 837 |   {
 838 |     "$event": "closetag",
 839 |     "data": [
 840 |       "tr",
 841 |       true,
 842 |     ],
 843 |     "endIndex": 136,
 844 |     "startIndex": 129,
 845 |   },
 846 |   {
 847 |     "$event": "closetag",
 848 |     "data": [
 849 |       "table",
 850 |       false,
 851 |     ],
 852 |     "endIndex": 136,
 853 |     "startIndex": 129,
 854 |   },
 855 |   {
 856 |     "$event": "closetag",
 857 |     "data": [
 858 |       "div",
 859 |       false,
 860 |     ],
 861 |     "endIndex": 142,
 862 |     "startIndex": 137,
 863 |   },
 864 |   {
 865 |     "$event": "closetag",
 866 |     "data": [
 867 |       "li",
 868 |       true,
 869 |     ],
 870 |     "endIndex": 146,
 871 |     "startIndex": 143,
 872 |   },
 873 |   {
 874 |     "$event": "opentagname",
 875 |     "data": [
 876 |       "li",
 877 |     ],
 878 |     "endIndex": 146,
 879 |     "startIndex": 143,
 880 |   },
 881 |   {
 882 |     "$event": "opentag",
 883 |     "data": [
 884 |       "li",
 885 |       {},
 886 |       false,
 887 |     ],
 888 |     "endIndex": 146,
 889 |     "startIndex": 143,
 890 |   },
 891 |   {
 892 |     "$event": "opentagname",
 893 |     "data": [
 894 |       "div",
 895 |     ],
 896 |     "endIndex": 151,
 897 |     "startIndex": 147,
 898 |   },
 899 |   {
 900 |     "$event": "opentag",
 901 |     "data": [
 902 |       "div",
 903 |       {},
 904 |       false,
 905 |     ],
 906 |     "endIndex": 151,
 907 |     "startIndex": 147,
 908 |   },
 909 |   {
 910 |     "$event": "opentagname",
 911 |     "data": [
 912 |       "h3",
 913 |     ],
 914 |     "endIndex": 155,
 915 |     "startIndex": 152,
 916 |   },
 917 |   {
 918 |     "$event": "opentag",
 919 |     "data": [
 920 |       "h3",
 921 |       {},
 922 |       false,
 923 |     ],
 924 |     "endIndex": 155,
 925 |     "startIndex": 152,
 926 |   },
 927 |   {
 928 |     "$event": "text",
 929 |     "data": [
 930 |       "Heading 2",
 931 |     ],
 932 |     "endIndex": 164,
 933 |     "startIndex": 156,
 934 |   },
 935 |   {
 936 |     "$event": "closetag",
 937 |     "data": [
 938 |       "h3",
 939 |       false,
 940 |     ],
 941 |     "endIndex": 169,
 942 |     "startIndex": 165,
 943 |   },
 944 |   {
 945 |     "$event": "closetag",
 946 |     "data": [
 947 |       "div",
 948 |       false,
 949 |     ],
 950 |     "endIndex": 175,
 951 |     "startIndex": 170,
 952 |   },
 953 |   {
 954 |     "$event": "closetag",
 955 |     "data": [
 956 |       "li",
 957 |       false,
 958 |     ],
 959 |     "endIndex": 180,
 960 |     "startIndex": 176,
 961 |   },
 962 |   {
 963 |     "$event": "closetag",
 964 |     "data": [
 965 |       "ol",
 966 |       false,
 967 |     ],
 968 |     "endIndex": 185,
 969 |     "startIndex": 181,
 970 |   },
 971 |   {
 972 |     "$event": "opentagname",
 973 |     "data": [
 974 |       "p",
 975 |     ],
 976 |     "endIndex": 188,
 977 |     "startIndex": 186,
 978 |   },
 979 |   {
 980 |     "$event": "opentag",
 981 |     "data": [
 982 |       "p",
 983 |       {},
 984 |       false,
 985 |     ],
 986 |     "endIndex": 188,
 987 |     "startIndex": 186,
 988 |   },
 989 |   {
 990 |     "$event": "text",
 991 |     "data": [
 992 |       "Para",
 993 |     ],
 994 |     "endIndex": 192,
 995 |     "startIndex": 189,
 996 |   },
 997 |   {
 998 |     "$event": "closetag",
 999 |     "data": [
1000 |       "p",
1001 |       true,
1002 |     ],
1003 |     "endIndex": 196,
1004 |     "startIndex": 193,
1005 |   },
1006 |   {
1007 |     "$event": "opentagname",
1008 |     "data": [
1009 |       "h4",
1010 |     ],
1011 |     "endIndex": 196,
1012 |     "startIndex": 193,
1013 |   },
1014 |   {
1015 |     "$event": "opentag",
1016 |     "data": [
1017 |       "h4",
1018 |       {},
1019 |       false,
1020 |     ],
1021 |     "endIndex": 196,
1022 |     "startIndex": 193,
1023 |   },
1024 |   {
1025 |     "$event": "text",
1026 |     "data": [
1027 |       "Heading 4",
1028 |     ],
1029 |     "endIndex": 205,
1030 |     "startIndex": 197,
1031 |   },
1032 |   {
1033 |     "$event": "closetag",
1034 |     "data": [
1035 |       "h4",
1036 |       false,
1037 |     ],
1038 |     "endIndex": 210,
1039 |     "startIndex": 206,
1040 |   },
1041 |   {
1042 |     "$event": "opentagname",
1043 |     "data": [
1044 |       "p",
1045 |     ],
1046 |     "endIndex": 213,
1047 |     "startIndex": 211,
1048 |   },
1049 |   {
1050 |     "$event": "opentag",
1051 |     "data": [
1052 |       "p",
1053 |       {},
1054 |       false,
1055 |     ],
1056 |     "endIndex": 213,
1057 |     "startIndex": 211,
1058 |   },
1059 |   {
1060 |     "$event": "closetag",
1061 |     "data": [
1062 |       "p",
1063 |       true,
1064 |     ],
1065 |     "endIndex": 217,
1066 |     "startIndex": 214,
1067 |   },
1068 |   {
1069 |     "$event": "opentagname",
1070 |     "data": [
1071 |       "ul",
1072 |     ],
1073 |     "endIndex": 217,
1074 |     "startIndex": 214,
1075 |   },
1076 |   {
1077 |     "$event": "opentag",
1078 |     "data": [
1079 |       "ul",
1080 |       {},
1081 |       false,
1082 |     ],
1083 |     "endIndex": 217,
1084 |     "startIndex": 214,
1085 |   },
1086 |   {
1087 |     "$event": "opentagname",
1088 |     "data": [
1089 |       "li",
1090 |     ],
1091 |     "endIndex": 221,
1092 |     "startIndex": 218,
1093 |   },
1094 |   {
1095 |     "$event": "opentag",
1096 |     "data": [
1097 |       "li",
1098 |       {},
1099 |       false,
1100 |     ],
1101 |     "endIndex": 221,
1102 |     "startIndex": 218,
1103 |   },
1104 |   {
1105 |     "$event": "text",
1106 |     "data": [
1107 |       "Hi",
1108 |     ],
1109 |     "endIndex": 223,
1110 |     "startIndex": 222,
1111 |   },
1112 |   {
1113 |     "$event": "closetag",
1114 |     "data": [
1115 |       "li",
1116 |       true,
1117 |     ],
1118 |     "endIndex": 227,
1119 |     "startIndex": 224,
1120 |   },
1121 |   {
1122 |     "$event": "opentagname",
1123 |     "data": [
1124 |       "li",
1125 |     ],
1126 |     "endIndex": 227,
1127 |     "startIndex": 224,
1128 |   },
1129 |   {
1130 |     "$event": "opentag",
1131 |     "data": [
1132 |       "li",
1133 |       {},
1134 |       false,
1135 |     ],
1136 |     "endIndex": 227,
1137 |     "startIndex": 224,
1138 |   },
1139 |   {
1140 |     "$event": "text",
1141 |     "data": [
1142 |       "bye",
1143 |     ],
1144 |     "endIndex": 230,
1145 |     "startIndex": 228,
1146 |   },
1147 |   {
1148 |     "$event": "closetag",
1149 |     "data": [
1150 |       "li",
1151 |       true,
1152 |     ],
1153 |     "endIndex": 235,
1154 |     "startIndex": 231,
1155 |   },
1156 |   {
1157 |     "$event": "closetag",
1158 |     "data": [
1159 |       "ul",
1160 |       false,
1161 |     ],
1162 |     "endIndex": 235,
1163 |     "startIndex": 231,
1164 |   },
1165 | ]
1166 | `;
1167 | 
1168 | exports[`Events > Implicit open p and br tags 1`] = `
1169 | [
1170 |   {
1171 |     "$event": "opentagname",
1172 |     "data": [
1173 |       "div",
1174 |     ],
1175 |     "endIndex": 4,
1176 |     "startIndex": 0,
1177 |   },
1178 |   {
1179 |     "$event": "opentag",
1180 |     "data": [
1181 |       "div",
1182 |       {},
1183 |       false,
1184 |     ],
1185 |     "endIndex": 4,
1186 |     "startIndex": 0,
1187 |   },
1188 |   {
1189 |     "$event": "text",
1190 |     "data": [
1191 |       "Hallo",
1192 |     ],
1193 |     "endIndex": 9,
1194 |     "startIndex": 5,
1195 |   },
1196 |   {
1197 |     "$event": "opentagname",
1198 |     "data": [
1199 |       "p",
1200 |     ],
1201 |     "endIndex": 13,
1202 |     "startIndex": 10,
1203 |   },
1204 |   {
1205 |     "$event": "opentag",
1206 |     "data": [
1207 |       "p",
1208 |       {},
1209 |       true,
1210 |     ],
1211 |     "endIndex": 13,
1212 |     "startIndex": 10,
1213 |   },
1214 |   {
1215 |     "$event": "closetag",
1216 |     "data": [
1217 |       "p",
1218 |       false,
1219 |     ],
1220 |     "endIndex": 13,
1221 |     "startIndex": 10,
1222 |   },
1223 |   {
1224 |     "$event": "text",
1225 |     "data": [
1226 |       "World",
1227 |     ],
1228 |     "endIndex": 18,
1229 |     "startIndex": 14,
1230 |   },
1231 |   {
1232 |     "$event": "opentagname",
1233 |     "data": [
1234 |       "br",
1235 |     ],
1236 |     "endIndex": 23,
1237 |     "startIndex": 19,
1238 |   },
1239 |   {
1240 |     "$event": "opentag",
1241 |     "data": [
1242 |       "br",
1243 |       {},
1244 |       true,
1245 |     ],
1246 |     "endIndex": 23,
1247 |     "startIndex": 19,
1248 |   },
1249 |   {
1250 |     "$event": "closetag",
1251 |     "data": [
1252 |       "br",
1253 |       false,
1254 |     ],
1255 |     "endIndex": 23,
1256 |     "startIndex": 19,
1257 |   },
1258 |   {
1259 |     "$event": "closetag",
1260 |     "data": [
1261 |       "div",
1262 |       false,
1263 |     ],
1264 |     "endIndex": 38,
1265 |     "startIndex": 33,
1266 |   },
1267 |   {
1268 |     "$event": "opentagname",
1269 |     "data": [
1270 |       "p",
1271 |     ],
1272 |     "endIndex": 42,
1273 |     "startIndex": 39,
1274 |   },
1275 |   {
1276 |     "$event": "opentag",
1277 |     "data": [
1278 |       "p",
1279 |       {},
1280 |       true,
1281 |     ],
1282 |     "endIndex": 42,
1283 |     "startIndex": 39,
1284 |   },
1285 |   {
1286 |     "$event": "closetag",
1287 |     "data": [
1288 |       "p",
1289 |       false,
1290 |     ],
1291 |     "endIndex": 42,
1292 |     "startIndex": 39,
1293 |   },
1294 |   {
1295 |     "$event": "opentagname",
1296 |     "data": [
1297 |       "br",
1298 |     ],
1299 |     "endIndex": 47,
1300 |     "startIndex": 43,
1301 |   },
1302 |   {
1303 |     "$event": "opentag",
1304 |     "data": [
1305 |       "br",
1306 |       {},
1307 |       true,
1308 |     ],
1309 |     "endIndex": 47,
1310 |     "startIndex": 43,
1311 |   },
1312 |   {
1313 |     "$event": "closetag",
1314 |     "data": [
1315 |       "br",
1316 |       false,
1317 |     ],
1318 |     "endIndex": 47,
1319 |     "startIndex": 43,
1320 |   },
1321 | ]
1322 | `;
1323 | 
1324 | exports[`Events > Long CDATA ending 1`] = `
1325 | [
1326 |   {
1327 |     "$event": "opentagname",
1328 |     "data": [
1329 |       "before",
1330 |     ],
1331 |     "endIndex": 7,
1332 |     "startIndex": 0,
1333 |   },
1334 |   {
1335 |     "$event": "opentag",
1336 |     "data": [
1337 |       "before",
1338 |       {},
1339 |       false,
1340 |     ],
1341 |     "endIndex": 9,
1342 |     "startIndex": 0,
1343 |   },
1344 |   {
1345 |     "$event": "closetag",
1346 |     "data": [
1347 |       "before",
1348 |       true,
1349 |     ],
1350 |     "endIndex": 9,
1351 |     "startIndex": 0,
1352 |   },
1353 |   {
1354 |     "$event": "opentagname",
1355 |     "data": [
1356 |       "tag",
1357 |     ],
1358 |     "endIndex": 14,
1359 |     "startIndex": 10,
1360 |   },
1361 |   {
1362 |     "$event": "opentag",
1363 |     "data": [
1364 |       "tag",
1365 |       {},
1366 |       false,
1367 |     ],
1368 |     "endIndex": 14,
1369 |     "startIndex": 10,
1370 |   },
1371 |   {
1372 |     "$event": "cdatastart",
1373 |     "data": [],
1374 |     "endIndex": 33,
1375 |     "startIndex": 15,
1376 |   },
1377 |   {
1378 |     "$event": "text",
1379 |     "data": [
1380 |       " text ]",
1381 |     ],
1382 |     "endIndex": 33,
1383 |     "startIndex": 15,
1384 |   },
1385 |   {
1386 |     "$event": "cdataend",
1387 |     "data": [],
1388 |     "endIndex": 33,
1389 |     "startIndex": 15,
1390 |   },
1391 |   {
1392 |     "$event": "closetag",
1393 |     "data": [
1394 |       "tag",
1395 |       false,
1396 |     ],
1397 |     "endIndex": 39,
1398 |     "startIndex": 34,
1399 |   },
1400 |   {
1401 |     "$event": "opentagname",
1402 |     "data": [
1403 |       "after",
1404 |     ],
1405 |     "endIndex": 46,
1406 |     "startIndex": 40,
1407 |   },
1408 |   {
1409 |     "$event": "opentag",
1410 |     "data": [
1411 |       "after",
1412 |       {},
1413 |       false,
1414 |     ],
1415 |     "endIndex": 48,
1416 |     "startIndex": 40,
1417 |   },
1418 |   {
1419 |     "$event": "closetag",
1420 |     "data": [
1421 |       "after",
1422 |       true,
1423 |     ],
1424 |     "endIndex": 48,
1425 |     "startIndex": 40,
1426 |   },
1427 | ]
1428 | `;
1429 | 
1430 | exports[`Events > Long comment ending 1`] = `
1431 | [
1432 |   {
1433 |     "$event": "opentagname",
1434 |     "data": [
1435 |       "meta",
1436 |     ],
1437 |     "endIndex": 5,
1438 |     "startIndex": 0,
1439 |   },
1440 |   {
1441 |     "$event": "attribute",
1442 |     "data": [
1443 |       "id",
1444 |       "before",
1445 |       "'",
1446 |     ],
1447 |     "endIndex": 17,
1448 |     "startIndex": 6,
1449 |   },
1450 |   {
1451 |     "$event": "opentag",
1452 |     "data": [
1453 |       "meta",
1454 |       {
1455 |         "id": "before",
1456 |       },
1457 |       false,
1458 |     ],
1459 |     "endIndex": 17,
1460 |     "startIndex": 0,
1461 |   },
1462 |   {
1463 |     "$event": "closetag",
1464 |     "data": [
1465 |       "meta",
1466 |       true,
1467 |     ],
1468 |     "endIndex": 17,
1469 |     "startIndex": 0,
1470 |   },
1471 |   {
1472 |     "$event": "comment",
1473 |     "data": [
1474 |       " text -",
1475 |     ],
1476 |     "endIndex": 31,
1477 |     "startIndex": 18,
1478 |   },
1479 |   {
1480 |     "$event": "commentend",
1481 |     "data": [],
1482 |     "endIndex": 31,
1483 |     "startIndex": 18,
1484 |   },
1485 |   {
1486 |     "$event": "opentagname",
1487 |     "data": [
1488 |       "meta",
1489 |     ],
1490 |     "endIndex": 37,
1491 |     "startIndex": 32,
1492 |   },
1493 |   {
1494 |     "$event": "attribute",
1495 |     "data": [
1496 |       "id",
1497 |       "after",
1498 |       "'",
1499 |     ],
1500 |     "endIndex": 48,
1501 |     "startIndex": 38,
1502 |   },
1503 |   {
1504 |     "$event": "opentag",
1505 |     "data": [
1506 |       "meta",
1507 |       {
1508 |         "id": "after",
1509 |       },
1510 |       false,
1511 |     ],
1512 |     "endIndex": 48,
1513 |     "startIndex": 32,
1514 |   },
1515 |   {
1516 |     "$event": "closetag",
1517 |     "data": [
1518 |       "meta",
1519 |       true,
1520 |     ],
1521 |     "endIndex": 48,
1522 |     "startIndex": 32,
1523 |   },
1524 | ]
1525 | `;
1526 | 
1527 | exports[`Events > Lowercase tags 1`] = `
1528 | [
1529 |   {
1530 |     "$event": "opentagname",
1531 |     "data": [
1532 |       "h1",
1533 |     ],
1534 |     "endIndex": 3,
1535 |     "startIndex": 0,
1536 |   },
1537 |   {
1538 |     "$event": "attribute",
1539 |     "data": [
1540 |       "class",
1541 |       "test",
1542 |       null,
1543 |     ],
1544 |     "endIndex": 14,
1545 |     "startIndex": 4,
1546 |   },
1547 |   {
1548 |     "$event": "opentag",
1549 |     "data": [
1550 |       "h1",
1551 |       {
1552 |         "class": "test",
1553 |       },
1554 |       false,
1555 |     ],
1556 |     "endIndex": 14,
1557 |     "startIndex": 0,
1558 |   },
1559 |   {
1560 |     "$event": "text",
1561 |     "data": [
1562 |       "adsf",
1563 |     ],
1564 |     "endIndex": 18,
1565 |     "startIndex": 15,
1566 |   },
1567 |   {
1568 |     "$event": "closetag",
1569 |     "data": [
1570 |       "h1",
1571 |       false,
1572 |     ],
1573 |     "endIndex": 23,
1574 |     "startIndex": 19,
1575 |   },
1576 | ]
1577 | `;
1578 | 
1579 | exports[`Events > Multi-byte entity 1`] = `
1580 | [
1581 |   {
1582 |     "$event": "text",
1583 |     "data": [
1584 |       "≧̸",
1585 |     ],
1586 |     "endIndex": 20,
1587 |     "startIndex": 0,
1588 |   },
1589 | ]
1590 | `;
1591 | 
1592 | exports[`Events > Not quite closed 1`] = `
1593 | [
1594 |   {
1595 |     "$event": "opentagname",
1596 |     "data": [
1597 |       "foo",
1598 |     ],
1599 |     "endIndex": 4,
1600 |     "startIndex": 0,
1601 |   },
1602 |   {
1603 |     "$event": "attribute",
1604 |     "data": [
1605 |       "bar",
1606 |       "",
1607 |     ],
1608 |     "endIndex": 9,
1609 |     "startIndex": 6,
1610 |   },
1611 |   {
1612 |     "$event": "opentag",
1613 |     "data": [
1614 |       "foo",
1615 |       {
1616 |         "bar": "",
1617 |       },
1618 |       false,
1619 |     ],
1620 |     "endIndex": 9,
1621 |     "startIndex": 0,
1622 |   },
1623 |   {
1624 |     "$event": "closetag",
1625 |     "data": [
1626 |       "foo",
1627 |       false,
1628 |     ],
1629 |     "endIndex": 15,
1630 |     "startIndex": 10,
1631 |   },
1632 | ]
1633 | `;
1634 | 
1635 | exports[`Events > Scripts creating other scripts 1`] = `
1636 | [
1637 |   {
1638 |     "$event": "opentagname",
1639 |     "data": [
1640 |       "p",
1641 |     ],
1642 |     "endIndex": 2,
1643 |     "startIndex": 0,
1644 |   },
1645 |   {
1646 |     "$event": "opentag",
1647 |     "data": [
1648 |       "p",
1649 |       {},
1650 |       false,
1651 |     ],
1652 |     "endIndex": 2,
1653 |     "startIndex": 0,
1654 |   },
1655 |   {
1656 |     "$event": "opentagname",
1657 |     "data": [
1658 |       "script",
1659 |     ],
1660 |     "endIndex": 10,
1661 |     "startIndex": 3,
1662 |   },
1663 |   {
1664 |     "$event": "opentag",
1665 |     "data": [
1666 |       "script",
1667 |       {},
1668 |       false,
1669 |     ],
1670 |     "endIndex": 10,
1671 |     "startIndex": 3,
1672 |   },
1673 |   {
1674 |     "$event": "text",
1675 |     "data": [
1676 |       "var str = '<script></'+'script>';",
1677 |     ],
1678 |     "endIndex": 43,
1679 |     "startIndex": 11,
1680 |   },
1681 |   {
1682 |     "$event": "closetag",
1683 |     "data": [
1684 |       "script",
1685 |       false,
1686 |     ],
1687 |     "endIndex": 52,
1688 |     "startIndex": 44,
1689 |   },
1690 |   {
1691 |     "$event": "closetag",
1692 |     "data": [
1693 |       "p",
1694 |       false,
1695 |     ],
1696 |     "endIndex": 56,
1697 |     "startIndex": 53,
1698 |   },
1699 | ]
1700 | `;
1701 | 
1702 | exports[`Events > Scripts ending with < 1`] = `
1703 | [
1704 |   {
1705 |     "$event": "opentagname",
1706 |     "data": [
1707 |       "script",
1708 |     ],
1709 |     "endIndex": 7,
1710 |     "startIndex": 0,
1711 |   },
1712 |   {
1713 |     "$event": "opentag",
1714 |     "data": [
1715 |       "script",
1716 |       {},
1717 |       false,
1718 |     ],
1719 |     "endIndex": 7,
1720 |     "startIndex": 0,
1721 |   },
1722 |   {
1723 |     "$event": "text",
1724 |     "data": [
1725 |       "<",
1726 |     ],
1727 |     "endIndex": 8,
1728 |     "startIndex": 8,
1729 |   },
1730 |   {
1731 |     "$event": "closetag",
1732 |     "data": [
1733 |       "script",
1734 |       false,
1735 |     ],
1736 |     "endIndex": 17,
1737 |     "startIndex": 9,
1738 |   },
1739 | ]
1740 | `;
1741 | 
1742 | exports[`Events > Self-closing indices (#941) 1`] = `
1743 | [
1744 |   {
1745 |     "$event": "opentagname",
1746 |     "data": [
1747 |       "xml",
1748 |     ],
1749 |     "endIndex": 4,
1750 |     "startIndex": 0,
1751 |   },
1752 |   {
1753 |     "$event": "opentag",
1754 |     "data": [
1755 |       "xml",
1756 |       {},
1757 |       false,
1758 |     ],
1759 |     "endIndex": 4,
1760 |     "startIndex": 0,
1761 |   },
1762 |   {
1763 |     "$event": "opentagname",
1764 |     "data": [
1765 |       "a",
1766 |     ],
1767 |     "endIndex": 7,
1768 |     "startIndex": 5,
1769 |   },
1770 |   {
1771 |     "$event": "opentag",
1772 |     "data": [
1773 |       "a",
1774 |       {},
1775 |       false,
1776 |     ],
1777 |     "endIndex": 8,
1778 |     "startIndex": 5,
1779 |   },
1780 |   {
1781 |     "$event": "closetag",
1782 |     "data": [
1783 |       "a",
1784 |       true,
1785 |     ],
1786 |     "endIndex": 8,
1787 |     "startIndex": 5,
1788 |   },
1789 |   {
1790 |     "$event": "opentagname",
1791 |     "data": [
1792 |       "b",
1793 |     ],
1794 |     "endIndex": 11,
1795 |     "startIndex": 9,
1796 |   },
1797 |   {
1798 |     "$event": "opentag",
1799 |     "data": [
1800 |       "b",
1801 |       {},
1802 |       false,
1803 |     ],
1804 |     "endIndex": 12,
1805 |     "startIndex": 9,
1806 |   },
1807 |   {
1808 |     "$event": "closetag",
1809 |     "data": [
1810 |       "b",
1811 |       true,
1812 |     ],
1813 |     "endIndex": 12,
1814 |     "startIndex": 9,
1815 |   },
1816 |   {
1817 |     "$event": "closetag",
1818 |     "data": [
1819 |       "xml",
1820 |       false,
1821 |     ],
1822 |     "endIndex": 18,
1823 |     "startIndex": 13,
1824 |   },
1825 | ]
1826 | `;
1827 | 
1828 | exports[`Events > Special special tags 1`] = `
1829 | [
1830 |   {
1831 |     "$event": "opentagname",
1832 |     "data": [
1833 |       "title",
1834 |     ],
1835 |     "endIndex": 6,
1836 |     "startIndex": 0,
1837 |   },
1838 |   {
1839 |     "$event": "opentag",
1840 |     "data": [
1841 |       "title",
1842 |       {},
1843 |       false,
1844 |     ],
1845 |     "endIndex": 6,
1846 |     "startIndex": 0,
1847 |   },
1848 |   {
1849 |     "$event": "text",
1850 |     "data": [
1851 |       "<b>foo</b><title>",
1852 |     ],
1853 |     "endIndex": 23,
1854 |     "startIndex": 7,
1855 |   },
1856 |   {
1857 |     "$event": "closetag",
1858 |     "data": [
1859 |       "title",
1860 |       false,
1861 |     ],
1862 |     "endIndex": 31,
1863 |     "startIndex": 24,
1864 |   },
1865 |   {
1866 |     "$event": "opentagname",
1867 |     "data": [
1868 |       "sitle",
1869 |     ],
1870 |     "endIndex": 38,
1871 |     "startIndex": 32,
1872 |   },
1873 |   {
1874 |     "$event": "opentag",
1875 |     "data": [
1876 |       "sitle",
1877 |       {},
1878 |       false,
1879 |     ],
1880 |     "endIndex": 38,
1881 |     "startIndex": 32,
1882 |   },
1883 |   {
1884 |     "$event": "opentagname",
1885 |     "data": [
1886 |       "b",
1887 |     ],
1888 |     "endIndex": 41,
1889 |     "startIndex": 39,
1890 |   },
1891 |   {
1892 |     "$event": "opentag",
1893 |     "data": [
1894 |       "b",
1895 |       {},
1896 |       false,
1897 |     ],
1898 |     "endIndex": 41,
1899 |     "startIndex": 39,
1900 |   },
1901 |   {
1902 |     "$event": "closetag",
1903 |     "data": [
1904 |       "b",
1905 |       false,
1906 |     ],
1907 |     "endIndex": 45,
1908 |     "startIndex": 42,
1909 |   },
1910 |   {
1911 |     "$event": "closetag",
1912 |     "data": [
1913 |       "sitle",
1914 |       false,
1915 |     ],
1916 |     "endIndex": 53,
1917 |     "startIndex": 46,
1918 |   },
1919 |   {
1920 |     "$event": "opentagname",
1921 |     "data": [
1922 |       "ttyle",
1923 |     ],
1924 |     "endIndex": 60,
1925 |     "startIndex": 54,
1926 |   },
1927 |   {
1928 |     "$event": "opentag",
1929 |     "data": [
1930 |       "ttyle",
1931 |       {},
1932 |       false,
1933 |     ],
1934 |     "endIndex": 60,
1935 |     "startIndex": 54,
1936 |   },
1937 |   {
1938 |     "$event": "opentagname",
1939 |     "data": [
1940 |       "b",
1941 |     ],
1942 |     "endIndex": 63,
1943 |     "startIndex": 61,
1944 |   },
1945 |   {
1946 |     "$event": "opentag",
1947 |     "data": [
1948 |       "b",
1949 |       {},
1950 |       false,
1951 |     ],
1952 |     "endIndex": 63,
1953 |     "startIndex": 61,
1954 |   },
1955 |   {
1956 |     "$event": "closetag",
1957 |     "data": [
1958 |       "b",
1959 |       false,
1960 |     ],
1961 |     "endIndex": 67,
1962 |     "startIndex": 64,
1963 |   },
1964 |   {
1965 |     "$event": "closetag",
1966 |     "data": [
1967 |       "ttyle",
1968 |       false,
1969 |     ],
1970 |     "endIndex": 75,
1971 |     "startIndex": 68,
1972 |   },
1973 |   {
1974 |     "$event": "opentagname",
1975 |     "data": [
1976 |       "script",
1977 |     ],
1978 |     "endIndex": 83,
1979 |     "startIndex": 76,
1980 |   },
1981 |   {
1982 |     "$event": "opentag",
1983 |     "data": [
1984 |       "script",
1985 |       {},
1986 |       false,
1987 |     ],
1988 |     "endIndex": 83,
1989 |     "startIndex": 76,
1990 |   },
1991 |   {
1992 |     "$event": "text",
1993 |     "data": [
1994 |       "</scripter</soo",
1995 |     ],
1996 |     "endIndex": 98,
1997 |     "startIndex": 84,
1998 |   },
1999 |   {
2000 |     "$event": "closetag",
2001 |     "data": [
2002 |       "script",
2003 |       false,
2004 |     ],
2005 |     "endIndex": 107,
2006 |     "startIndex": 99,
2007 |   },
2008 |   {
2009 |     "$event": "opentagname",
2010 |     "data": [
2011 |       "style",
2012 |     ],
2013 |     "endIndex": 114,
2014 |     "startIndex": 108,
2015 |   },
2016 |   {
2017 |     "$event": "opentag",
2018 |     "data": [
2019 |       "style",
2020 |       {},
2021 |       false,
2022 |     ],
2023 |     "endIndex": 114,
2024 |     "startIndex": 108,
2025 |   },
2026 |   {
2027 |     "$event": "text",
2028 |     "data": [
2029 |       "</styler",
2030 |     ],
2031 |     "endIndex": 122,
2032 |     "startIndex": 115,
2033 |   },
2034 |   {
2035 |     "$event": "closetag",
2036 |     "data": [
2037 |       "style",
2038 |       false,
2039 |     ],
2040 |     "endIndex": 130,
2041 |     "startIndex": 123,
2042 |   },
2043 |   {
2044 |     "$event": "opentagname",
2045 |     "data": [
2046 |       "scipt",
2047 |     ],
2048 |     "endIndex": 137,
2049 |     "startIndex": 131,
2050 |   },
2051 |   {
2052 |     "$event": "opentag",
2053 |     "data": [
2054 |       "scipt",
2055 |       {},
2056 |       false,
2057 |     ],
2058 |     "endIndex": 137,
2059 |     "startIndex": 131,
2060 |   },
2061 |   {
2062 |     "$event": "opentagname",
2063 |     "data": [
2064 |       "stylee",
2065 |     ],
2066 |     "endIndex": 145,
2067 |     "startIndex": 138,
2068 |   },
2069 |   {
2070 |     "$event": "opentag",
2071 |     "data": [
2072 |       "stylee",
2073 |       {},
2074 |       false,
2075 |     ],
2076 |     "endIndex": 145,
2077 |     "startIndex": 138,
2078 |   },
2079 |   {
2080 |     "$event": "opentagname",
2081 |     "data": [
2082 |       "scriptee",
2083 |     ],
2084 |     "endIndex": 155,
2085 |     "startIndex": 146,
2086 |   },
2087 |   {
2088 |     "$event": "opentag",
2089 |     "data": [
2090 |       "scriptee",
2091 |       {},
2092 |       false,
2093 |     ],
2094 |     "endIndex": 155,
2095 |     "startIndex": 146,
2096 |   },
2097 |   {
2098 |     "$event": "opentagname",
2099 |     "data": [
2100 |       "soo",
2101 |     ],
2102 |     "endIndex": 160,
2103 |     "startIndex": 156,
2104 |   },
2105 |   {
2106 |     "$event": "opentag",
2107 |     "data": [
2108 |       "soo",
2109 |       {},
2110 |       false,
2111 |     ],
2112 |     "endIndex": 160,
2113 |     "startIndex": 156,
2114 |   },
2115 |   {
2116 |     "$event": "closetag",
2117 |     "data": [
2118 |       "soo",
2119 |       true,
2120 |     ],
2121 |     "endIndex": 161,
2122 |     "startIndex": 161,
2123 |   },
2124 |   {
2125 |     "$event": "closetag",
2126 |     "data": [
2127 |       "scriptee",
2128 |       true,
2129 |     ],
2130 |     "endIndex": 161,
2131 |     "startIndex": 161,
2132 |   },
2133 |   {
2134 |     "$event": "closetag",
2135 |     "data": [
2136 |       "stylee",
2137 |       true,
2138 |     ],
2139 |     "endIndex": 161,
2140 |     "startIndex": 161,
2141 |   },
2142 |   {
2143 |     "$event": "closetag",
2144 |     "data": [
2145 |       "scipt",
2146 |       true,
2147 |     ],
2148 |     "endIndex": 161,
2149 |     "startIndex": 161,
2150 |   },
2151 | ]
2152 | `;
2153 | 
2154 | exports[`Events > Start & end indices from domhandler 1`] = `
2155 | [
2156 |   {
2157 |     "$event": "processinginstruction",
2158 |     "data": [
2159 |       "!doctype",
2160 |       "!DOCTYPE html",
2161 |     ],
2162 |     "endIndex": 14,
2163 |     "startIndex": 0,
2164 |   },
2165 |   {
2166 |     "$event": "text",
2167 |     "data": [
2168 |       " ",
2169 |     ],
2170 |     "endIndex": 15,
2171 |     "startIndex": 15,
2172 |   },
2173 |   {
2174 |     "$event": "opentagname",
2175 |     "data": [
2176 |       "html",
2177 |     ],
2178 |     "endIndex": 21,
2179 |     "startIndex": 16,
2180 |   },
2181 |   {
2182 |     "$event": "opentag",
2183 |     "data": [
2184 |       "html",
2185 |       {},
2186 |       false,
2187 |     ],
2188 |     "endIndex": 21,
2189 |     "startIndex": 16,
2190 |   },
2191 |   {
2192 |     "$event": "text",
2193 |     "data": [
2194 |       " ",
2195 |     ],
2196 |     "endIndex": 22,
2197 |     "startIndex": 22,
2198 |   },
2199 |   {
2200 |     "$event": "opentagname",
2201 |     "data": [
2202 |       "title",
2203 |     ],
2204 |     "endIndex": 29,
2205 |     "startIndex": 23,
2206 |   },
2207 |   {
2208 |     "$event": "opentag",
2209 |     "data": [
2210 |       "title",
2211 |       {},
2212 |       false,
2213 |     ],
2214 |     "endIndex": 29,
2215 |     "startIndex": 23,
2216 |   },
2217 |   {
2218 |     "$event": "text",
2219 |     "data": [
2220 |       "The Title",
2221 |     ],
2222 |     "endIndex": 38,
2223 |     "startIndex": 30,
2224 |   },
2225 |   {
2226 |     "$event": "closetag",
2227 |     "data": [
2228 |       "title",
2229 |       false,
2230 |     ],
2231 |     "endIndex": 46,
2232 |     "startIndex": 39,
2233 |   },
2234 |   {
2235 |     "$event": "text",
2236 |     "data": [
2237 |       " ",
2238 |     ],
2239 |     "endIndex": 47,
2240 |     "startIndex": 47,
2241 |   },
2242 |   {
2243 |     "$event": "opentagname",
2244 |     "data": [
2245 |       "body",
2246 |     ],
2247 |     "endIndex": 53,
2248 |     "startIndex": 48,
2249 |   },
2250 |   {
2251 |     "$event": "attribute",
2252 |     "data": [
2253 |       "class",
2254 |       "foo",
2255 |       "'",
2256 |     ],
2257 |     "endIndex": 65,
2258 |     "startIndex": 54,
2259 |   },
2260 |   {
2261 |     "$event": "opentag",
2262 |     "data": [
2263 |       "body",
2264 |       {
2265 |         "class": "foo",
2266 |       },
2267 |       false,
2268 |     ],
2269 |     "endIndex": 65,
2270 |     "startIndex": 48,
2271 |   },
2272 |   {
2273 |     "$event": "text",
2274 |     "data": [
2275 |       "Hello world ",
2276 |     ],
2277 |     "endIndex": 77,
2278 |     "startIndex": 66,
2279 |   },
2280 |   {
2281 |     "$event": "opentagname",
2282 |     "data": [
2283 |       "p",
2284 |     ],
2285 |     "endIndex": 80,
2286 |     "startIndex": 78,
2287 |   },
2288 |   {
2289 |     "$event": "opentag",
2290 |     "data": [
2291 |       "p",
2292 |       {},
2293 |       false,
2294 |     ],
2295 |     "endIndex": 80,
2296 |     "startIndex": 78,
2297 |   },
2298 |   {
2299 |     "$event": "closetag",
2300 |     "data": [
2301 |       "p",
2302 |       false,
2303 |     ],
2304 |     "endIndex": 84,
2305 |     "startIndex": 81,
2306 |   },
2307 |   {
2308 |     "$event": "closetag",
2309 |     "data": [
2310 |       "body",
2311 |       false,
2312 |     ],
2313 |     "endIndex": 91,
2314 |     "startIndex": 85,
2315 |   },
2316 |   {
2317 |     "$event": "text",
2318 |     "data": [
2319 |       " ",
2320 |     ],
2321 |     "endIndex": 92,
2322 |     "startIndex": 92,
2323 |   },
2324 |   {
2325 |     "$event": "comment",
2326 |     "data": [
2327 |       " the comment ",
2328 |     ],
2329 |     "endIndex": 112,
2330 |     "startIndex": 93,
2331 |   },
2332 |   {
2333 |     "$event": "commentend",
2334 |     "data": [],
2335 |     "endIndex": 112,
2336 |     "startIndex": 93,
2337 |   },
2338 |   {
2339 |     "$event": "text",
2340 |     "data": [
2341 |       " ",
2342 |     ],
2343 |     "endIndex": 113,
2344 |     "startIndex": 113,
2345 |   },
2346 |   {
2347 |     "$event": "closetag",
2348 |     "data": [
2349 |       "html",
2350 |       false,
2351 |     ],
2352 |     "endIndex": 120,
2353 |     "startIndex": 114,
2354 |   },
2355 |   {
2356 |     "$event": "text",
2357 |     "data": [
2358 |       " ",
2359 |     ],
2360 |     "endIndex": 121,
2361 |     "startIndex": 121,
2362 |   },
2363 | ]
2364 | `;
2365 | 
2366 | exports[`Events > Template script tags 1`] = `
2367 | [
2368 |   {
2369 |     "$event": "opentagname",
2370 |     "data": [
2371 |       "p",
2372 |     ],
2373 |     "endIndex": 2,
2374 |     "startIndex": 0,
2375 |   },
2376 |   {
2377 |     "$event": "opentag",
2378 |     "data": [
2379 |       "p",
2380 |       {},
2381 |       false,
2382 |     ],
2383 |     "endIndex": 2,
2384 |     "startIndex": 0,
2385 |   },
2386 |   {
2387 |     "$event": "opentagname",
2388 |     "data": [
2389 |       "script",
2390 |     ],
2391 |     "endIndex": 10,
2392 |     "startIndex": 3,
2393 |   },
2394 |   {
2395 |     "$event": "attribute",
2396 |     "data": [
2397 |       "type",
2398 |       "text/template",
2399 |       """,
2400 |     ],
2401 |     "endIndex": 31,
2402 |     "startIndex": 11,
2403 |   },
2404 |   {
2405 |     "$event": "opentag",
2406 |     "data": [
2407 |       "script",
2408 |       {
2409 |         "type": "text/template",
2410 |       },
2411 |       false,
2412 |     ],
2413 |     "endIndex": 31,
2414 |     "startIndex": 3,
2415 |   },
2416 |   {
2417 |     "$event": "text",
2418 |     "data": [
2419 |       "<h1>Heading1</h1>",
2420 |     ],
2421 |     "endIndex": 48,
2422 |     "startIndex": 32,
2423 |   },
2424 |   {
2425 |     "$event": "closetag",
2426 |     "data": [
2427 |       "script",
2428 |       false,
2429 |     ],
2430 |     "endIndex": 57,
2431 |     "startIndex": 49,
2432 |   },
2433 |   {
2434 |     "$event": "closetag",
2435 |     "data": [
2436 |       "p",
2437 |       false,
2438 |     ],
2439 |     "endIndex": 61,
2440 |     "startIndex": 58,
2441 |   },
2442 | ]
2443 | `;
2444 | 
2445 | exports[`Events > Trailing legacy entity 1`] = `
2446 | [
2447 |   {
2448 |     "$event": "text",
2449 |     "data": [
2450 |       "⨱×bar",
2451 |     ],
2452 |     "endIndex": 18,
2453 |     "startIndex": 0,
2454 |   },
2455 | ]
2456 | `;
2457 | 
2458 | exports[`Events > Trailing numeric entity 1`] = `
2459 | [
2460 |   {
2461 |     "$event": "text",
2462 |     "data": [
2463 |       "55",
2464 |     ],
2465 |     "endIndex": 7,
2466 |     "startIndex": 0,
2467 |   },
2468 | ]
2469 | `;
2470 | 
2471 | exports[`Events > XML tags 1`] = `
2472 | [
2473 |   {
2474 |     "$event": "opentagname",
2475 |     "data": [
2476 |       ":foo",
2477 |     ],
2478 |     "endIndex": 5,
2479 |     "startIndex": 0,
2480 |   },
2481 |   {
2482 |     "$event": "opentag",
2483 |     "data": [
2484 |       ":foo",
2485 |       {},
2486 |       false,
2487 |     ],
2488 |     "endIndex": 5,
2489 |     "startIndex": 0,
2490 |   },
2491 |   {
2492 |     "$event": "opentagname",
2493 |     "data": [
2494 |       "_bar",
2495 |     ],
2496 |     "endIndex": 11,
2497 |     "startIndex": 6,
2498 |   },
2499 |   {
2500 |     "$event": "opentag",
2501 |     "data": [
2502 |       "_bar",
2503 |       {},
2504 |       false,
2505 |     ],
2506 |     "endIndex": 11,
2507 |     "startIndex": 6,
2508 |   },
2509 |   {
2510 |     "$event": "closetag",
2511 |     "data": [
2512 |       "_bar",
2513 |       true,
2514 |     ],
2515 |     "endIndex": 12,
2516 |     "startIndex": 12,
2517 |   },
2518 |   {
2519 |     "$event": "closetag",
2520 |     "data": [
2521 |       ":foo",
2522 |       true,
2523 |     ],
2524 |     "endIndex": 12,
2525 |     "startIndex": 12,
2526 |   },
2527 | ]
2528 | `;
2529 | 
2530 | exports[`Events > attributes (no white space, no value, no quotes) 1`] = `
2531 | [
2532 |   {
2533 |     "$event": "opentagname",
2534 |     "data": [
2535 |       "button",
2536 |     ],
2537 |     "endIndex": 7,
2538 |     "startIndex": 0,
2539 |   },
2540 |   {
2541 |     "$event": "attribute",
2542 |     "data": [
2543 |       "class",
2544 |       "test0",
2545 |       """,
2546 |     ],
2547 |     "endIndex": 21,
2548 |     "startIndex": 8,
2549 |   },
2550 |   {
2551 |     "$event": "attribute",
2552 |     "data": [
2553 |       "title",
2554 |       "test1",
2555 |       """,
2556 |     ],
2557 |     "endIndex": 34,
2558 |     "startIndex": 21,
2559 |   },
2560 |   {
2561 |     "$event": "attribute",
2562 |     "data": [
2563 |       "disabled",
2564 |       "",
2565 |     ],
2566 |     "endIndex": 43,
2567 |     "startIndex": 35,
2568 |   },
2569 |   {
2570 |     "$event": "attribute",
2571 |     "data": [
2572 |       "value",
2573 |       "test2",
2574 |       null,
2575 |     ],
2576 |     "endIndex": 55,
2577 |     "startIndex": 44,
2578 |   },
2579 |   {
2580 |     "$event": "opentag",
2581 |     "data": [
2582 |       "button",
2583 |       {
2584 |         "class": "test0",
2585 |         "disabled": "",
2586 |         "title": "test1",
2587 |         "value": "test2",
2588 |       },
2589 |       false,
2590 |     ],
2591 |     "endIndex": 55,
2592 |     "startIndex": 0,
2593 |   },
2594 |   {
2595 |     "$event": "text",
2596 |     "data": [
2597 |       "adsf",
2598 |     ],
2599 |     "endIndex": 59,
2600 |     "startIndex": 56,
2601 |   },
2602 |   {
2603 |     "$event": "closetag",
2604 |     "data": [
2605 |       "button",
2606 |       false,
2607 |     ],
2608 |     "endIndex": 68,
2609 |     "startIndex": 60,
2610 |   },
2611 | ]
2612 | `;
2613 | 
2614 | exports[`Events > crazy attribute 1`] = `
2615 | [
2616 |   {
2617 |     "$event": "opentagname",
2618 |     "data": [
2619 |       "p",
2620 |     ],
2621 |     "endIndex": 2,
2622 |     "startIndex": 0,
2623 |   },
2624 |   {
2625 |     "$event": "attribute",
2626 |     "data": [
2627 |       "<",
2628 |       "",
2629 |       "'",
2630 |     ],
2631 |     "endIndex": 9,
2632 |     "startIndex": 3,
2633 |   },
2634 |   {
2635 |     "$event": "attribute",
2636 |     "data": [
2637 |       "fail",
2638 |       "",
2639 |     ],
2640 |     "endIndex": 14,
2641 |     "startIndex": 10,
2642 |   },
2643 |   {
2644 |     "$event": "opentag",
2645 |     "data": [
2646 |       "p",
2647 |       {
2648 |         "<": "",
2649 |         "fail": "",
2650 |       },
2651 |       false,
2652 |     ],
2653 |     "endIndex": 14,
2654 |     "startIndex": 0,
2655 |   },
2656 |   {
2657 |     "$event": "text",
2658 |     "data": [
2659 |       "stuff",
2660 |     ],
2661 |     "endIndex": 19,
2662 |     "startIndex": 15,
2663 |   },
2664 |   {
2665 |     "$event": "closetag",
2666 |     "data": [
2667 |       "p",
2668 |       false,
2669 |     ],
2670 |     "endIndex": 23,
2671 |     "startIndex": 20,
2672 |   },
2673 | ]
2674 | `;
2675 | 
2676 | exports[`Events > double attribute 1`] = `
2677 | [
2678 |   {
2679 |     "$event": "opentagname",
2680 |     "data": [
2681 |       "h1",
2682 |     ],
2683 |     "endIndex": 3,
2684 |     "startIndex": 0,
2685 |   },
2686 |   {
2687 |     "$event": "attribute",
2688 |     "data": [
2689 |       "class",
2690 |       "test",
2691 |       null,
2692 |     ],
2693 |     "endIndex": 14,
2694 |     "startIndex": 4,
2695 |   },
2696 |   {
2697 |     "$event": "attribute",
2698 |     "data": [
2699 |       "class",
2700 |       "boo",
2701 |       null,
2702 |     ],
2703 |     "endIndex": 24,
2704 |     "startIndex": 15,
2705 |   },
2706 |   {
2707 |     "$event": "opentag",
2708 |     "data": [
2709 |       "h1",
2710 |       {
2711 |         "class": "test",
2712 |       },
2713 |       false,
2714 |     ],
2715 |     "endIndex": 24,
2716 |     "startIndex": 0,
2717 |   },
2718 |   {
2719 |     "$event": "closetag",
2720 |     "data": [
2721 |       "h1",
2722 |       false,
2723 |     ],
2724 |     "endIndex": 29,
2725 |     "startIndex": 25,
2726 |   },
2727 | ]
2728 | `;
2729 | 
2730 | exports[`Events > double brackets 1`] = `
2731 | [
2732 |   {
2733 |     "$event": "text",
2734 |     "data": [
2735 |       "<",
2736 |     ],
2737 |     "endIndex": 0,
2738 |     "startIndex": 0,
2739 |   },
2740 |   {
2741 |     "$event": "opentagname",
2742 |     "data": [
2743 |       "princess-purpose",
2744 |     ],
2745 |     "endIndex": 18,
2746 |     "startIndex": 1,
2747 |   },
2748 |   {
2749 |     "$event": "opentag",
2750 |     "data": [
2751 |       "princess-purpose",
2752 |       {},
2753 |       false,
2754 |     ],
2755 |     "endIndex": 18,
2756 |     "startIndex": 1,
2757 |   },
2758 |   {
2759 |     "$event": "text",
2760 |     "data": [
2761 |       ">testing",
2762 |     ],
2763 |     "endIndex": 26,
2764 |     "startIndex": 19,
2765 |   },
2766 |   {
2767 |     "$event": "closetag",
2768 |     "data": [
2769 |       "princess-purpose",
2770 |       false,
2771 |     ],
2772 |     "endIndex": 45,
2773 |     "startIndex": 27,
2774 |   },
2775 | ]
2776 | `;
2777 | 
2778 | exports[`Events > end slash: as part of attrib value of non-void element 1`] = `
2779 | [
2780 |   {
2781 |     "$event": "opentagname",
2782 |     "data": [
2783 |       "a",
2784 |     ],
2785 |     "endIndex": 2,
2786 |     "startIndex": 0,
2787 |   },
2788 |   {
2789 |     "$event": "attribute",
2790 |     "data": [
2791 |       "href",
2792 |       "http://test.com/",
2793 |       null,
2794 |     ],
2795 |     "endIndex": 24,
2796 |     "startIndex": 3,
2797 |   },
2798 |   {
2799 |     "$event": "opentag",
2800 |     "data": [
2801 |       "a",
2802 |       {
2803 |         "href": "http://test.com/",
2804 |       },
2805 |       false,
2806 |     ],
2807 |     "endIndex": 24,
2808 |     "startIndex": 0,
2809 |   },
2810 |   {
2811 |     "$event": "text",
2812 |     "data": [
2813 |       "Foo",
2814 |     ],
2815 |     "endIndex": 27,
2816 |     "startIndex": 25,
2817 |   },
2818 |   {
2819 |     "$event": "closetag",
2820 |     "data": [
2821 |       "a",
2822 |       false,
2823 |     ],
2824 |     "endIndex": 31,
2825 |     "startIndex": 28,
2826 |   },
2827 |   {
2828 |     "$event": "opentagname",
2829 |     "data": [
2830 |       "p",
2831 |     ],
2832 |     "endIndex": 34,
2833 |     "startIndex": 32,
2834 |   },
2835 |   {
2836 |     "$event": "opentag",
2837 |     "data": [
2838 |       "p",
2839 |       {},
2840 |       false,
2841 |     ],
2842 |     "endIndex": 34,
2843 |     "startIndex": 32,
2844 |   },
2845 |   {
2846 |     "$event": "text",
2847 |     "data": [
2848 |       "Hold the line.",
2849 |     ],
2850 |     "endIndex": 48,
2851 |     "startIndex": 35,
2852 |   },
2853 |   {
2854 |     "$event": "closetag",
2855 |     "data": [
2856 |       "p",
2857 |       true,
2858 |     ],
2859 |     "endIndex": 49,
2860 |     "startIndex": 49,
2861 |   },
2862 | ]
2863 | `;
2864 | 
2865 | exports[`Events > end slash: as part of attrib value of void element 1`] = `
2866 | [
2867 |   {
2868 |     "$event": "opentagname",
2869 |     "data": [
2870 |       "img",
2871 |     ],
2872 |     "endIndex": 4,
2873 |     "startIndex": 0,
2874 |   },
2875 |   {
2876 |     "$event": "attribute",
2877 |     "data": [
2878 |       "src",
2879 |       "gif.com/123/",
2880 |       null,
2881 |     ],
2882 |     "endIndex": 21,
2883 |     "startIndex": 5,
2884 |   },
2885 |   {
2886 |     "$event": "opentag",
2887 |     "data": [
2888 |       "img",
2889 |       {
2890 |         "src": "gif.com/123/",
2891 |       },
2892 |       false,
2893 |     ],
2894 |     "endIndex": 21,
2895 |     "startIndex": 0,
2896 |   },
2897 |   {
2898 |     "$event": "closetag",
2899 |     "data": [
2900 |       "img",
2901 |       true,
2902 |     ],
2903 |     "endIndex": 21,
2904 |     "startIndex": 0,
2905 |   },
2906 |   {
2907 |     "$event": "opentagname",
2908 |     "data": [
2909 |       "p",
2910 |     ],
2911 |     "endIndex": 24,
2912 |     "startIndex": 22,
2913 |   },
2914 |   {
2915 |     "$event": "opentag",
2916 |     "data": [
2917 |       "p",
2918 |       {},
2919 |       false,
2920 |     ],
2921 |     "endIndex": 24,
2922 |     "startIndex": 22,
2923 |   },
2924 |   {
2925 |     "$event": "text",
2926 |     "data": [
2927 |       "Hold the line.",
2928 |     ],
2929 |     "endIndex": 38,
2930 |     "startIndex": 25,
2931 |   },
2932 |   {
2933 |     "$event": "closetag",
2934 |     "data": [
2935 |       "p",
2936 |       true,
2937 |     ],
2938 |     "endIndex": 39,
2939 |     "startIndex": 39,
2940 |   },
2941 | ]
2942 | `;
2943 | 
2944 | exports[`Events > end slash: non-void element ending with /> 1`] = `
2945 | [
2946 |   {
2947 |     "$event": "opentagname",
2948 |     "data": [
2949 |       "xx",
2950 |     ],
2951 |     "endIndex": 3,
2952 |     "startIndex": 0,
2953 |   },
2954 |   {
2955 |     "$event": "opentag",
2956 |     "data": [
2957 |       "xx",
2958 |       {},
2959 |       false,
2960 |     ],
2961 |     "endIndex": 6,
2962 |     "startIndex": 0,
2963 |   },
2964 |   {
2965 |     "$event": "opentagname",
2966 |     "data": [
2967 |       "p",
2968 |     ],
2969 |     "endIndex": 9,
2970 |     "startIndex": 7,
2971 |   },
2972 |   {
2973 |     "$event": "opentag",
2974 |     "data": [
2975 |       "p",
2976 |       {},
2977 |       false,
2978 |     ],
2979 |     "endIndex": 9,
2980 |     "startIndex": 7,
2981 |   },
2982 |   {
2983 |     "$event": "text",
2984 |     "data": [
2985 |       "Hold the line.",
2986 |     ],
2987 |     "endIndex": 23,
2988 |     "startIndex": 10,
2989 |   },
2990 |   {
2991 |     "$event": "closetag",
2992 |     "data": [
2993 |       "p",
2994 |       true,
2995 |     ],
2996 |     "endIndex": 24,
2997 |     "startIndex": 24,
2998 |   },
2999 |   {
3000 |     "$event": "closetag",
3001 |     "data": [
3002 |       "xx",
3003 |       true,
3004 |     ],
3005 |     "endIndex": 24,
3006 |     "startIndex": 24,
3007 |   },
3008 | ]
3009 | `;
3010 | 
3011 | exports[`Events > end slash: non-void element ending with />, recognizeSelfClosing=true 1`] = `
3012 | [
3013 |   {
3014 |     "$event": "opentagname",
3015 |     "data": [
3016 |       "xx",
3017 |     ],
3018 |     "endIndex": 3,
3019 |     "startIndex": 0,
3020 |   },
3021 |   {
3022 |     "$event": "opentag",
3023 |     "data": [
3024 |       "xx",
3025 |       {},
3026 |       false,
3027 |     ],
3028 |     "endIndex": 6,
3029 |     "startIndex": 0,
3030 |   },
3031 |   {
3032 |     "$event": "closetag",
3033 |     "data": [
3034 |       "xx",
3035 |       true,
3036 |     ],
3037 |     "endIndex": 6,
3038 |     "startIndex": 0,
3039 |   },
3040 |   {
3041 |     "$event": "opentagname",
3042 |     "data": [
3043 |       "p",
3044 |     ],
3045 |     "endIndex": 9,
3046 |     "startIndex": 7,
3047 |   },
3048 |   {
3049 |     "$event": "opentag",
3050 |     "data": [
3051 |       "p",
3052 |       {},
3053 |       false,
3054 |     ],
3055 |     "endIndex": 9,
3056 |     "startIndex": 7,
3057 |   },
3058 |   {
3059 |     "$event": "text",
3060 |     "data": [
3061 |       "Hold the line.",
3062 |     ],
3063 |     "endIndex": 23,
3064 |     "startIndex": 10,
3065 |   },
3066 |   {
3067 |     "$event": "closetag",
3068 |     "data": [
3069 |       "p",
3070 |       true,
3071 |     ],
3072 |     "endIndex": 24,
3073 |     "startIndex": 24,
3074 |   },
3075 | ]
3076 | `;
3077 | 
3078 | exports[`Events > end slash: non-void element ending with />, xmlMode=true 1`] = `
3079 | [
3080 |   {
3081 |     "$event": "opentagname",
3082 |     "data": [
3083 |       "xx",
3084 |     ],
3085 |     "endIndex": 3,
3086 |     "startIndex": 0,
3087 |   },
3088 |   {
3089 |     "$event": "opentag",
3090 |     "data": [
3091 |       "xx",
3092 |       {},
3093 |       false,
3094 |     ],
3095 |     "endIndex": 6,
3096 |     "startIndex": 0,
3097 |   },
3098 |   {
3099 |     "$event": "closetag",
3100 |     "data": [
3101 |       "xx",
3102 |       true,
3103 |     ],
3104 |     "endIndex": 6,
3105 |     "startIndex": 0,
3106 |   },
3107 |   {
3108 |     "$event": "opentagname",
3109 |     "data": [
3110 |       "p",
3111 |     ],
3112 |     "endIndex": 9,
3113 |     "startIndex": 7,
3114 |   },
3115 |   {
3116 |     "$event": "opentag",
3117 |     "data": [
3118 |       "p",
3119 |       {},
3120 |       false,
3121 |     ],
3122 |     "endIndex": 9,
3123 |     "startIndex": 7,
3124 |   },
3125 |   {
3126 |     "$event": "text",
3127 |     "data": [
3128 |       "Hold the line.",
3129 |     ],
3130 |     "endIndex": 23,
3131 |     "startIndex": 10,
3132 |   },
3133 |   {
3134 |     "$event": "closetag",
3135 |     "data": [
3136 |       "p",
3137 |       true,
3138 |     ],
3139 |     "endIndex": 24,
3140 |     "startIndex": 24,
3141 |   },
3142 | ]
3143 | `;
3144 | 
3145 | exports[`Events > end slash: void element ending with /> 1`] = `
3146 | [
3147 |   {
3148 |     "$event": "opentagname",
3149 |     "data": [
3150 |       "hr",
3151 |     ],
3152 |     "endIndex": 3,
3153 |     "startIndex": 0,
3154 |   },
3155 |   {
3156 |     "$event": "opentag",
3157 |     "data": [
3158 |       "hr",
3159 |       {},
3160 |       false,
3161 |     ],
3162 |     "endIndex": 6,
3163 |     "startIndex": 0,
3164 |   },
3165 |   {
3166 |     "$event": "closetag",
3167 |     "data": [
3168 |       "hr",
3169 |       true,
3170 |     ],
3171 |     "endIndex": 6,
3172 |     "startIndex": 0,
3173 |   },
3174 |   {
3175 |     "$event": "opentagname",
3176 |     "data": [
3177 |       "p",
3178 |     ],
3179 |     "endIndex": 9,
3180 |     "startIndex": 7,
3181 |   },
3182 |   {
3183 |     "$event": "opentag",
3184 |     "data": [
3185 |       "p",
3186 |       {},
3187 |       false,
3188 |     ],
3189 |     "endIndex": 9,
3190 |     "startIndex": 7,
3191 |   },
3192 |   {
3193 |     "$event": "text",
3194 |     "data": [
3195 |       "Hold the line.",
3196 |     ],
3197 |     "endIndex": 23,
3198 |     "startIndex": 10,
3199 |   },
3200 |   {
3201 |     "$event": "closetag",
3202 |     "data": [
3203 |       "p",
3204 |       true,
3205 |     ],
3206 |     "endIndex": 24,
3207 |     "startIndex": 24,
3208 |   },
3209 | ]
3210 | `;
3211 | 
3212 | exports[`Events > end slash: void element ending with > 1`] = `
3213 | [
3214 |   {
3215 |     "$event": "opentagname",
3216 |     "data": [
3217 |       "hr",
3218 |     ],
3219 |     "endIndex": 3,
3220 |     "startIndex": 0,
3221 |   },
3222 |   {
3223 |     "$event": "opentag",
3224 |     "data": [
3225 |       "hr",
3226 |       {},
3227 |       false,
3228 |     ],
3229 |     "endIndex": 6,
3230 |     "startIndex": 0,
3231 |   },
3232 |   {
3233 |     "$event": "closetag",
3234 |     "data": [
3235 |       "hr",
3236 |       true,
3237 |     ],
3238 |     "endIndex": 6,
3239 |     "startIndex": 0,
3240 |   },
3241 |   {
3242 |     "$event": "opentagname",
3243 |     "data": [
3244 |       "p",
3245 |     ],
3246 |     "endIndex": 9,
3247 |     "startIndex": 7,
3248 |   },
3249 |   {
3250 |     "$event": "opentag",
3251 |     "data": [
3252 |       "p",
3253 |       {},
3254 |       false,
3255 |     ],
3256 |     "endIndex": 9,
3257 |     "startIndex": 7,
3258 |   },
3259 |   {
3260 |     "$event": "text",
3261 |     "data": [
3262 |       "Hold the line.",
3263 |     ],
3264 |     "endIndex": 23,
3265 |     "startIndex": 10,
3266 |   },
3267 |   {
3268 |     "$event": "closetag",
3269 |     "data": [
3270 |       "p",
3271 |       true,
3272 |     ],
3273 |     "endIndex": 24,
3274 |     "startIndex": 24,
3275 |   },
3276 | ]
3277 | `;
3278 | 
3279 | exports[`Events > end slash: void element ending with >, xmlMode=true 1`] = `
3280 | [
3281 |   {
3282 |     "$event": "opentagname",
3283 |     "data": [
3284 |       "hr",
3285 |     ],
3286 |     "endIndex": 3,
3287 |     "startIndex": 0,
3288 |   },
3289 |   {
3290 |     "$event": "opentag",
3291 |     "data": [
3292 |       "hr",
3293 |       {},
3294 |       false,
3295 |     ],
3296 |     "endIndex": 6,
3297 |     "startIndex": 0,
3298 |   },
3299 |   {
3300 |     "$event": "opentagname",
3301 |     "data": [
3302 |       "p",
3303 |     ],
3304 |     "endIndex": 9,
3305 |     "startIndex": 7,
3306 |   },
3307 |   {
3308 |     "$event": "opentag",
3309 |     "data": [
3310 |       "p",
3311 |       {},
3312 |       false,
3313 |     ],
3314 |     "endIndex": 9,
3315 |     "startIndex": 7,
3316 |   },
3317 |   {
3318 |     "$event": "text",
3319 |     "data": [
3320 |       "Hold the line.",
3321 |     ],
3322 |     "endIndex": 23,
3323 |     "startIndex": 10,
3324 |   },
3325 |   {
3326 |     "$event": "closetag",
3327 |     "data": [
3328 |       "p",
3329 |       true,
3330 |     ],
3331 |     "endIndex": 24,
3332 |     "startIndex": 24,
3333 |   },
3334 |   {
3335 |     "$event": "closetag",
3336 |     "data": [
3337 |       "hr",
3338 |       true,
3339 |     ],
3340 |     "endIndex": 24,
3341 |     "startIndex": 24,
3342 |   },
3343 | ]
3344 | `;
3345 | 
3346 | exports[`Events > entity in attribute (#276) 1`] = `
3347 | [
3348 |   {
3349 |     "$event": "opentagname",
3350 |     "data": [
3351 |       "img",
3352 |     ],
3353 |     "endIndex": 4,
3354 |     "startIndex": 0,
3355 |   },
3356 |   {
3357 |     "$event": "attribute",
3358 |     "data": [
3359 |       "src",
3360 |       "?&image_uri=1&ℑ=2&image=3",
3361 |       """,
3362 |     ],
3363 |     "endIndex": 42,
3364 |     "startIndex": 5,
3365 |   },
3366 |   {
3367 |     "$event": "opentag",
3368 |     "data": [
3369 |       "img",
3370 |       {
3371 |         "src": "?&image_uri=1&ℑ=2&image=3",
3372 |       },
3373 |       false,
3374 |     ],
3375 |     "endIndex": 43,
3376 |     "startIndex": 0,
3377 |   },
3378 |   {
3379 |     "$event": "closetag",
3380 |     "data": [
3381 |       "img",
3382 |       true,
3383 |     ],
3384 |     "endIndex": 43,
3385 |     "startIndex": 0,
3386 |   },
3387 |   {
3388 |     "$event": "text",
3389 |     "data": [
3390 |       "?&image_uri=1&ℑ=2&image=3",
3391 |     ],
3392 |     "endIndex": 74,
3393 |     "startIndex": 44,
3394 |   },
3395 | ]
3396 | `;
3397 | 
3398 | exports[`Events > entity in attribute 1`] = `
3399 | [
3400 |   {
3401 |     "$event": "opentagname",
3402 |     "data": [
3403 |       "a",
3404 |     ],
3405 |     "endIndex": 2,
3406 |     "startIndex": 0,
3407 |   },
3408 |   {
3409 |     "$event": "attribute",
3410 |     "data": [
3411 |       "href",
3412 |       "http://example.com/pa#x61ge?param=value&param2&param3=<val&; & &",
3413 |       "'",
3414 |     ],
3415 |     "endIndex": 82,
3416 |     "startIndex": 3,
3417 |   },
3418 |   {
3419 |     "$event": "opentag",
3420 |     "data": [
3421 |       "a",
3422 |       {
3423 |         "href": "http://example.com/pa#x61ge?param=value&param2&param3=<val&; & &",
3424 |       },
3425 |       false,
3426 |     ],
3427 |     "endIndex": 82,
3428 |     "startIndex": 0,
3429 |   },
3430 |   {
3431 |     "$event": "closetag",
3432 |     "data": [
3433 |       "a",
3434 |       true,
3435 |     ],
3436 |     "endIndex": 83,
3437 |     "startIndex": 83,
3438 |   },
3439 | ]
3440 | `;
3441 | 
3442 | exports[`Events > entity in title (#592) 1`] = `
3443 | [
3444 |   {
3445 |     "$event": "opentagname",
3446 |     "data": [
3447 |       "title",
3448 |     ],
3449 |     "endIndex": 6,
3450 |     "startIndex": 0,
3451 |   },
3452 |   {
3453 |     "$event": "opentag",
3454 |     "data": [
3455 |       "title",
3456 |       {},
3457 |       false,
3458 |     ],
3459 |     "endIndex": 6,
3460 |     "startIndex": 0,
3461 |   },
3462 |   {
3463 |     "$event": "text",
3464 |     "data": [
3465 |       "the "title"",
3466 |     ],
3467 |     "endIndex": 26,
3468 |     "startIndex": 7,
3469 |   },
3470 |   {
3471 |     "$event": "closetag",
3472 |     "data": [
3473 |       "title",
3474 |       true,
3475 |     ],
3476 |     "endIndex": 27,
3477 |     "startIndex": 27,
3478 |   },
3479 | ]
3480 | `;
3481 | 
3482 | exports[`Events > entity in title - decodeEntities=false (#592) 1`] = `
3483 | [
3484 |   {
3485 |     "$event": "opentagname",
3486 |     "data": [
3487 |       "title",
3488 |     ],
3489 |     "endIndex": 6,
3490 |     "startIndex": 0,
3491 |   },
3492 |   {
3493 |     "$event": "opentag",
3494 |     "data": [
3495 |       "title",
3496 |       {},
3497 |       false,
3498 |     ],
3499 |     "endIndex": 6,
3500 |     "startIndex": 0,
3501 |   },
3502 |   {
3503 |     "$event": "text",
3504 |     "data": [
3505 |       "the &quot;title&quot;",
3506 |     ],
3507 |     "endIndex": 27,
3508 |     "startIndex": 7,
3509 |   },
3510 |   {
3511 |     "$event": "closetag",
3512 |     "data": [
3513 |       "title",
3514 |       true,
3515 |     ],
3516 |     "endIndex": 28,
3517 |     "startIndex": 28,
3518 |   },
3519 | ]
3520 | `;
3521 | 
3522 | exports[`Events > leading lt 1`] = `
3523 | [
3524 |   {
3525 |     "$event": "text",
3526 |     "data": [
3527 |       ">a>",
3528 |     ],
3529 |     "endIndex": 2,
3530 |     "startIndex": 0,
3531 |   },
3532 | ]
3533 | `;
3534 | 
3535 | exports[`Events > legacy entities 1`] = `
3536 | [
3537 |   {
3538 |     "$event": "text",
3539 |     "data": [
3540 |       "&elíe&eer;s<er&sum",
3541 |     ],
3542 |     "endIndex": 31,
3543 |     "startIndex": 0,
3544 |   },
3545 | ]
3546 | `;
3547 | 
3548 | exports[`Events > legacy entities fail 1`] = `
3549 | [
3550 |   {
3551 |     "$event": "text",
3552 |     "data": [
3553 |       "M&M",
3554 |     ],
3555 |     "endIndex": 2,
3556 |     "startIndex": 0,
3557 |   },
3558 | ]
3559 | `;
3560 | 
3561 | exports[`Events > lt followed by whitespace 1`] = `
3562 | [
3563 |   {
3564 |     "$event": "text",
3565 |     "data": [
3566 |       "a < b",
3567 |     ],
3568 |     "endIndex": 4,
3569 |     "startIndex": 0,
3570 |   },
3571 | ]
3572 | `;
3573 | 
3574 | exports[`Events > named entities 1`] = `
3575 | [
3576 |   {
3577 |     "$event": "text",
3578 |     "data": [
3579 |       "&el<er∳foo&bar",
3580 |     ],
3581 |     "endIndex": 52,
3582 |     "startIndex": 0,
3583 |   },
3584 | ]
3585 | `;
3586 | 
3587 | exports[`Events > numeric entities 1`] = `
3588 | [
3589 |   {
3590 |     "$event": "text",
3591 |     "data": [
3592 |       "abcdfg&#x;h",
3593 |     ],
3594 |     "endIndex": 35,
3595 |     "startIndex": 0,
3596 |   },
3597 | ]
3598 | `;
3599 | 
3600 | exports[`Events > open-implies-close case of (non-br) void close tag in non-XML mode 1`] = `
3601 | [
3602 |   {
3603 |     "$event": "opentagname",
3604 |     "data": [
3605 |       "select",
3606 |     ],
3607 |     "endIndex": 7,
3608 |     "startIndex": 0,
3609 |   },
3610 |   {
3611 |     "$event": "opentag",
3612 |     "data": [
3613 |       "select",
3614 |       {},
3615 |       false,
3616 |     ],
3617 |     "endIndex": 7,
3618 |     "startIndex": 0,
3619 |   },
3620 |   {
3621 |     "$event": "closetag",
3622 |     "data": [
3623 |       "select",
3624 |       true,
3625 |     ],
3626 |     "endIndex": 14,
3627 |     "startIndex": 8,
3628 |   },
3629 |   {
3630 |     "$event": "opentagname",
3631 |     "data": [
3632 |       "input",
3633 |     ],
3634 |     "endIndex": 14,
3635 |     "startIndex": 8,
3636 |   },
3637 |   {
3638 |     "$event": "opentag",
3639 |     "data": [
3640 |       "input",
3641 |       {},
3642 |       false,
3643 |     ],
3644 |     "endIndex": 14,
3645 |     "startIndex": 8,
3646 |   },
3647 |   {
3648 |     "$event": "closetag",
3649 |     "data": [
3650 |       "input",
3651 |       true,
3652 |     ],
3653 |     "endIndex": 14,
3654 |     "startIndex": 8,
3655 |   },
3656 | ]
3657 | `;
3658 | 
3659 | exports[`Events > simple 1`] = `
3660 | [
3661 |   {
3662 |     "$event": "opentagname",
3663 |     "data": [
3664 |       "h1",
3665 |     ],
3666 |     "endIndex": 3,
3667 |     "startIndex": 0,
3668 |   },
3669 |   {
3670 |     "$event": "attribute",
3671 |     "data": [
3672 |       "class",
3673 |       "test",
3674 |       null,
3675 |     ],
3676 |     "endIndex": 14,
3677 |     "startIndex": 4,
3678 |   },
3679 |   {
3680 |     "$event": "opentag",
3681 |     "data": [
3682 |       "h1",
3683 |       {
3684 |         "class": "test",
3685 |       },
3686 |       false,
3687 |     ],
3688 |     "endIndex": 14,
3689 |     "startIndex": 0,
3690 |   },
3691 |   {
3692 |     "$event": "text",
3693 |     "data": [
3694 |       "adsf",
3695 |     ],
3696 |     "endIndex": 18,
3697 |     "startIndex": 15,
3698 |   },
3699 |   {
3700 |     "$event": "closetag",
3701 |     "data": [
3702 |       "h1",
3703 |       false,
3704 |     ],
3705 |     "endIndex": 23,
3706 |     "startIndex": 19,
3707 |   },
3708 | ]
3709 | `;
3710 | 
3711 | exports[`Events > tag names are not ASCII alpha 1`] = `
3712 | [
3713 |   {
3714 |     "$event": "text",
3715 |     "data": [
3716 |       "<12>text",
3717 |     ],
3718 |     "endIndex": 7,
3719 |     "startIndex": 0,
3720 |   },
3721 |   {
3722 |     "$event": "comment",
3723 |     "data": [
3724 |       "12",
3725 |     ],
3726 |     "endIndex": 12,
3727 |     "startIndex": 8,
3728 |   },
3729 |   {
3730 |     "$event": "commentend",
3731 |     "data": [],
3732 |     "endIndex": 12,
3733 |     "startIndex": 8,
3734 |   },
3735 | ]
3736 | `;
3737 | 
3738 | exports[`Events > xml entities 1`] = `
3739 | [
3740 |   {
3741 |     "$event": "text",
3742 |     "data": [
3743 |       "&>&amp<&uuml;a&#x62c&#100&#101",
3744 |     ],
3745 |     "endIndex": 48,
3746 |     "startIndex": 0,
3747 |   },
3748 | ]
3749 | `;
3750 | 


--------------------------------------------------------------------------------
/src/__snapshots__/Tokenizer.spec.ts.snap:
--------------------------------------------------------------------------------
  1 | // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
  2 | 
  3 | exports[`Tokenizer > should correctly mark attributes > for double quotes attribute 1`] = `
  4 | [
  5 |   [
  6 |     "onopentagname",
  7 |     1,
  8 |     4,
  9 |   ],
 10 |   [
 11 |     "onattribname",
 12 |     5,
 13 |     8,
 14 |   ],
 15 |   [
 16 |     "onattribdata",
 17 |     10,
 18 |     11,
 19 |   ],
 20 |   [
 21 |     "onattribend",
 22 |     3,
 23 |     12,
 24 |   ],
 25 |   [
 26 |     "onopentagend",
 27 |     13,
 28 |   ],
 29 |   [
 30 |     "onend",
 31 |   ],
 32 | ]
 33 | `;
 34 | 
 35 | exports[`Tokenizer > should correctly mark attributes > for no quotes attribute 1`] = `
 36 | [
 37 |   [
 38 |     "onopentagname",
 39 |     1,
 40 |     4,
 41 |   ],
 42 |   [
 43 |     "onattribname",
 44 |     5,
 45 |     8,
 46 |   ],
 47 |   [
 48 |     "onattribdata",
 49 |     9,
 50 |     12,
 51 |   ],
 52 |   [
 53 |     "onattribend",
 54 |     1,
 55 |     12,
 56 |   ],
 57 |   [
 58 |     "onopentagend",
 59 |     13,
 60 |   ],
 61 |   [
 62 |     "onend",
 63 |   ],
 64 | ]
 65 | `;
 66 | 
 67 | exports[`Tokenizer > should correctly mark attributes > for no value attribute 1`] = `
 68 | [
 69 |   [
 70 |     "onopentagname",
 71 |     1,
 72 |     4,
 73 |   ],
 74 |   [
 75 |     "onattribname",
 76 |     5,
 77 |     12,
 78 |   ],
 79 |   [
 80 |     "onattribend",
 81 |     0,
 82 |     12,
 83 |   ],
 84 |   [
 85 |     "onopentagend",
 86 |     13,
 87 |   ],
 88 |   [
 89 |     "onend",
 90 |   ],
 91 | ]
 92 | `;
 93 | 
 94 | exports[`Tokenizer > should correctly mark attributes > for single quotes attribute 1`] = `
 95 | [
 96 |   [
 97 |     "onopentagname",
 98 |     1,
 99 |     4,
100 |   ],
101 |   [
102 |     "onattribname",
103 |     5,
104 |     8,
105 |   ],
106 |   [
107 |     "onattribdata",
108 |     10,
109 |     11,
110 |   ],
111 |   [
112 |     "onattribend",
113 |     2,
114 |     12,
115 |   ],
116 |   [
117 |     "onopentagend",
118 |     13,
119 |   ],
120 |   [
121 |     "onend",
122 |   ],
123 | ]
124 | `;
125 | 
126 | exports[`Tokenizer > should handle entities > for XML entities 1`] = `
127 | [
128 |   [
129 |     "ontextentity",
130 |     38,
131 |     5,
132 |   ],
133 |   [
134 |     "ontextentity",
135 |     62,
136 |     9,
137 |   ],
138 |   [
139 |     "ontext",
140 |     9,
141 |     13,
142 |   ],
143 |   [
144 |     "ontextentity",
145 |     60,
146 |     17,
147 |   ],
148 |   [
149 |     "ontext",
150 |     17,
151 |     23,
152 |   ],
153 |   [
154 |     "ontextentity",
155 |     97,
156 |     29,
157 |   ],
158 |   [
159 |     "ontext",
160 |     29,
161 |     34,
162 |   ],
163 |   [
164 |     "ontextentity",
165 |     99,
166 |     39,
167 |   ],
168 |   [
169 |     "ontext",
170 |     39,
171 |     49,
172 |   ],
173 |   [
174 |     "onend",
175 |   ],
176 | ]
177 | `;
178 | 
179 | exports[`Tokenizer > should handle entities > for entities in attributes (#276) 1`] = `
180 | [
181 |   [
182 |     "onopentagname",
183 |     1,
184 |     4,
185 |   ],
186 |   [
187 |     "onattribname",
188 |     5,
189 |     8,
190 |   ],
191 |   [
192 |     "onattribdata",
193 |     10,
194 |     24,
195 |   ],
196 |   [
197 |     "onattribentity",
198 |     8465,
199 |   ],
200 |   [
201 |     "onattribdata",
202 |     31,
203 |     41,
204 |   ],
205 |   [
206 |     "onattribend",
207 |     3,
208 |     42,
209 |   ],
210 |   [
211 |     "onselfclosingtag",
212 |     43,
213 |   ],
214 |   [
215 |     "ontext",
216 |     44,
217 |     58,
218 |   ],
219 |   [
220 |     "ontextentity",
221 |     8465,
222 |     65,
223 |   ],
224 |   [
225 |     "ontext",
226 |     65,
227 |     75,
228 |   ],
229 |   [
230 |     "onend",
231 |   ],
232 | ]
233 | `;
234 | 
235 | exports[`Tokenizer > should handle entities > for multi-byte entities 1`] = `
236 | [
237 |   [
238 |     "ontextentity",
239 |     8807,
240 |     21,
241 |   ],
242 |   [
243 |     "ontextentity",
244 |     824,
245 |     21,
246 |   ],
247 |   [
248 |     "onend",
249 |   ],
250 | ]
251 | `;
252 | 
253 | exports[`Tokenizer > should handle entities > for trailing legacy entity 1`] = `
254 | [
255 |   [
256 |     "ontextentity",
257 |     10801,
258 |     10,
259 |   ],
260 |   [
261 |     "ontextentity",
262 |     215,
263 |     16,
264 |   ],
265 |   [
266 |     "ontext",
267 |     16,
268 |     19,
269 |   ],
270 |   [
271 |     "onend",
272 |   ],
273 | ]
274 | `;
275 | 
276 | exports[`Tokenizer > should not break after special tag followed by an entity > for normal special tag 1`] = `
277 | [
278 |   [
279 |     "onopentagname",
280 |     1,
281 |     6,
282 |   ],
283 |   [
284 |     "onopentagend",
285 |     6,
286 |   ],
287 |   [
288 |     "ontext",
289 |     7,
290 |     10,
291 |   ],
292 |   [
293 |     "onclosetag",
294 |     12,
295 |     17,
296 |   ],
297 |   [
298 |     "ontextentity",
299 |     39,
300 |     24,
301 |   ],
302 |   [
303 |     "onopentagname",
304 |     25,
305 |     27,
306 |   ],
307 |   [
308 |     "onselfclosingtag",
309 |     28,
310 |   ],
311 |   [
312 |     "onend",
313 |   ],
314 | ]
315 | `;
316 | 
317 | exports[`Tokenizer > should not break after special tag followed by an entity > for self-closing special tag 1`] = `
318 | [
319 |   [
320 |     "onopentagname",
321 |     1,
322 |     6,
323 |   ],
324 |   [
325 |     "onselfclosingtag",
326 |     8,
327 |   ],
328 |   [
329 |     "ontextentity",
330 |     39,
331 |     15,
332 |   ],
333 |   [
334 |     "onopentagname",
335 |     16,
336 |     18,
337 |   ],
338 |   [
339 |     "onselfclosingtag",
340 |     19,
341 |   ],
342 |   [
343 |     "onend",
344 |   ],
345 | ]
346 | `;
347 | 
348 | exports[`Tokenizer > should not lose data when pausing 1`] = `
349 | [
350 |   [
351 |     "ontextentity",
352 |     38,
353 |     5,
354 |   ],
355 |   [
356 |     "ontext",
357 |     5,
358 |     12,
359 |   ],
360 |   [
361 |     "onend",
362 |   ],
363 | ]
364 | `;
365 | 
366 | exports[`Tokenizer > should support self-closing special tags > for self-closing script tag 1`] = `
367 | [
368 |   [
369 |     "onopentagname",
370 |     1,
371 |     7,
372 |   ],
373 |   [
374 |     "onselfclosingtag",
375 |     9,
376 |   ],
377 |   [
378 |     "onopentagname",
379 |     11,
380 |     14,
381 |   ],
382 |   [
383 |     "onopentagend",
384 |     14,
385 |   ],
386 |   [
387 |     "onclosetag",
388 |     17,
389 |     20,
390 |   ],
391 |   [
392 |     "onend",
393 |   ],
394 | ]
395 | `;
396 | 
397 | exports[`Tokenizer > should support self-closing special tags > for self-closing style tag 1`] = `
398 | [
399 |   [
400 |     "onopentagname",
401 |     1,
402 |     6,
403 |   ],
404 |   [
405 |     "onselfclosingtag",
406 |     8,
407 |   ],
408 |   [
409 |     "onopentagname",
410 |     10,
411 |     13,
412 |   ],
413 |   [
414 |     "onopentagend",
415 |     13,
416 |   ],
417 |   [
418 |     "onclosetag",
419 |     16,
420 |     19,
421 |   ],
422 |   [
423 |     "onend",
424 |   ],
425 | ]
426 | `;
427 | 
428 | exports[`Tokenizer > should support self-closing special tags > for self-closing textarea tag 1`] = `
429 | [
430 |   [
431 |     "onopentagname",
432 |     1,
433 |     9,
434 |   ],
435 |   [
436 |     "onselfclosingtag",
437 |     11,
438 |   ],
439 |   [
440 |     "onopentagname",
441 |     13,
442 |     16,
443 |   ],
444 |   [
445 |     "onopentagend",
446 |     16,
447 |   ],
448 |   [
449 |     "onclosetag",
450 |     19,
451 |     22,
452 |   ],
453 |   [
454 |     "onend",
455 |   ],
456 | ]
457 | `;
458 | 
459 | exports[`Tokenizer > should support self-closing special tags > for self-closing title tag 1`] = `
460 | [
461 |   [
462 |     "onopentagname",
463 |     1,
464 |     6,
465 |   ],
466 |   [
467 |     "onselfclosingtag",
468 |     8,
469 |   ],
470 |   [
471 |     "onopentagname",
472 |     10,
473 |     13,
474 |   ],
475 |   [
476 |     "onopentagend",
477 |     13,
478 |   ],
479 |   [
480 |     "onclosetag",
481 |     16,
482 |     19,
483 |   ],
484 |   [
485 |     "onend",
486 |   ],
487 | ]
488 | `;
489 | 
490 | exports[`Tokenizer > should support self-closing special tags > for self-closing xmp tag 1`] = `
491 | [
492 |   [
493 |     "onopentagname",
494 |     1,
495 |     4,
496 |   ],
497 |   [
498 |     "onselfclosingtag",
499 |     6,
500 |   ],
501 |   [
502 |     "onopentagname",
503 |     8,
504 |     11,
505 |   ],
506 |   [
507 |     "onopentagend",
508 |     11,
509 |   ],
510 |   [
511 |     "onclosetag",
512 |     14,
513 |     17,
514 |   ],
515 |   [
516 |     "onend",
517 |   ],
518 | ]
519 | `;
520 | 
521 | exports[`Tokenizer > should support standard special tags > for normal script tag 1`] = `
522 | [
523 |   [
524 |     "onopentagname",
525 |     1,
526 |     7,
527 |   ],
528 |   [
529 |     "onopentagend",
530 |     7,
531 |   ],
532 |   [
533 |     "onclosetag",
534 |     10,
535 |     16,
536 |   ],
537 |   [
538 |     "onopentagname",
539 |     18,
540 |     21,
541 |   ],
542 |   [
543 |     "onopentagend",
544 |     21,
545 |   ],
546 |   [
547 |     "onclosetag",
548 |     24,
549 |     27,
550 |   ],
551 |   [
552 |     "onend",
553 |   ],
554 | ]
555 | `;
556 | 
557 | exports[`Tokenizer > should support standard special tags > for normal sitle tag 1`] = `
558 | [
559 |   [
560 |     "onopentagname",
561 |     1,
562 |     6,
563 |   ],
564 |   [
565 |     "onopentagend",
566 |     6,
567 |   ],
568 |   [
569 |     "onclosetag",
570 |     9,
571 |     14,
572 |   ],
573 |   [
574 |     "onopentagname",
575 |     16,
576 |     19,
577 |   ],
578 |   [
579 |     "onopentagend",
580 |     19,
581 |   ],
582 |   [
583 |     "onclosetag",
584 |     22,
585 |     25,
586 |   ],
587 |   [
588 |     "onend",
589 |   ],
590 | ]
591 | `;
592 | 
593 | exports[`Tokenizer > should support standard special tags > for normal style tag 1`] = `
594 | [
595 |   [
596 |     "onopentagname",
597 |     1,
598 |     6,
599 |   ],
600 |   [
601 |     "onopentagend",
602 |     6,
603 |   ],
604 |   [
605 |     "onclosetag",
606 |     9,
607 |     14,
608 |   ],
609 |   [
610 |     "onopentagname",
611 |     16,
612 |     19,
613 |   ],
614 |   [
615 |     "onopentagend",
616 |     19,
617 |   ],
618 |   [
619 |     "onclosetag",
620 |     22,
621 |     25,
622 |   ],
623 |   [
624 |     "onend",
625 |   ],
626 | ]
627 | `;
628 | 
629 | exports[`Tokenizer > should support standard special tags > for normal textarea tag 1`] = `
630 | [
631 |   [
632 |     "onopentagname",
633 |     1,
634 |     9,
635 |   ],
636 |   [
637 |     "onopentagend",
638 |     9,
639 |   ],
640 |   [
641 |     "onclosetag",
642 |     12,
643 |     20,
644 |   ],
645 |   [
646 |     "onopentagname",
647 |     22,
648 |     25,
649 |   ],
650 |   [
651 |     "onopentagend",
652 |     25,
653 |   ],
654 |   [
655 |     "onclosetag",
656 |     28,
657 |     31,
658 |   ],
659 |   [
660 |     "onend",
661 |   ],
662 | ]
663 | `;
664 | 
665 | exports[`Tokenizer > should support standard special tags > for normal xmp tag 1`] = `
666 | [
667 |   [
668 |     "onopentagname",
669 |     1,
670 |     4,
671 |   ],
672 |   [
673 |     "onopentagend",
674 |     4,
675 |   ],
676 |   [
677 |     "onclosetag",
678 |     7,
679 |     10,
680 |   ],
681 |   [
682 |     "onopentagname",
683 |     12,
684 |     15,
685 |   ],
686 |   [
687 |     "onopentagend",
688 |     15,
689 |   ],
690 |   [
691 |     "onclosetag",
692 |     18,
693 |     21,
694 |   ],
695 |   [
696 |     "onend",
697 |   ],
698 | ]
699 | `;
700 | 
701 | exports[`Tokenizer > should treat html inside special tags as text > for div inside script tag 1`] = `
702 | [
703 |   [
704 |     "onopentagname",
705 |     1,
706 |     7,
707 |   ],
708 |   [
709 |     "onopentagend",
710 |     7,
711 |   ],
712 |   [
713 |     "ontext",
714 |     8,
715 |     19,
716 |   ],
717 |   [
718 |     "onclosetag",
719 |     21,
720 |     27,
721 |   ],
722 |   [
723 |     "onend",
724 |   ],
725 | ]
726 | `;
727 | 
728 | exports[`Tokenizer > should treat html inside special tags as text > for div inside style tag 1`] = `
729 | [
730 |   [
731 |     "onopentagname",
732 |     1,
733 |     6,
734 |   ],
735 |   [
736 |     "onopentagend",
737 |     6,
738 |   ],
739 |   [
740 |     "ontext",
741 |     7,
742 |     18,
743 |   ],
744 |   [
745 |     "onclosetag",
746 |     20,
747 |     25,
748 |   ],
749 |   [
750 |     "onend",
751 |   ],
752 | ]
753 | `;
754 | 
755 | exports[`Tokenizer > should treat html inside special tags as text > for div inside textarea tag 1`] = `
756 | [
757 |   [
758 |     "onopentagname",
759 |     1,
760 |     9,
761 |   ],
762 |   [
763 |     "onopentagend",
764 |     9,
765 |   ],
766 |   [
767 |     "ontext",
768 |     10,
769 |     21,
770 |   ],
771 |   [
772 |     "onclosetag",
773 |     23,
774 |     31,
775 |   ],
776 |   [
777 |     "onend",
778 |   ],
779 | ]
780 | `;
781 | 
782 | exports[`Tokenizer > should treat html inside special tags as text > for div inside title tag 1`] = `
783 | [
784 |   [
785 |     "onopentagname",
786 |     1,
787 |     6,
788 |   ],
789 |   [
790 |     "onopentagend",
791 |     6,
792 |   ],
793 |   [
794 |     "ontext",
795 |     7,
796 |     18,
797 |   ],
798 |   [
799 |     "onclosetag",
800 |     20,
801 |     25,
802 |   ],
803 |   [
804 |     "onend",
805 |   ],
806 | ]
807 | `;
808 | 
809 | exports[`Tokenizer > should treat html inside special tags as text > for div inside xmp tag 1`] = `
810 | [
811 |   [
812 |     "onopentagname",
813 |     1,
814 |     4,
815 |   ],
816 |   [
817 |     "onopentagend",
818 |     4,
819 |   ],
820 |   [
821 |     "ontext",
822 |     5,
823 |     16,
824 |   ],
825 |   [
826 |     "onclosetag",
827 |     18,
828 |     21,
829 |   ],
830 |   [
831 |     "onend",
832 |   ],
833 | ]
834 | `;
835 | 


--------------------------------------------------------------------------------
/src/__snapshots__/index.spec.ts.snap:
--------------------------------------------------------------------------------
 1 | // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
 2 | 
 3 | exports[`Index > createDocumentStream 1`] = `
 4 | Document {
 5 |   "children": [
 6 |     &This is text,
 7 |     <!-- and comments -->,
 8 |     <tags />,
 9 |   ],
10 |   "endIndex": null,
11 |   "next": null,
12 |   "parent": null,
13 |   "prev": null,
14 |   "startIndex": null,
15 |   "type": "root",
16 | }
17 | `;
18 | 
19 | exports[`Index > createDomStream 1`] = `
20 | [
21 |   &This is text,
22 |   <!-- and comments -->,
23 |   <tags />,
24 | ]
25 | `;
26 | 
27 | exports[`Index > parseDOM 1`] = `
28 | [
29 |   <a
30 |     foo=""
31 |   >
32 |     <b>
33 |       <c>
34 |         ProcessingInstruction {
35 |           "data": "?foo",
36 |           "endIndex": null,
37 |           "name": "?foo",
38 |           "next": Yay!,
39 |           "parent": <c>
40 |             [Circular]
41 |             Yay!
42 |           </c>,
43 |           "prev": null,
44 |           "startIndex": null,
45 |           "type": "directive",
46 |         }
47 |         Yay!
48 |       </c>
49 |     </b>
50 |   </a>,
51 | ]
52 | `;
53 | 
54 | exports[`Index > parseDocument 1`] = `
55 | Document {
56 |   "children": [
57 |     <a
58 |       foo=""
59 |     >
60 |       <b>
61 |         <c>
62 |           ProcessingInstruction {
63 |             "data": "?foo",
64 |             "endIndex": null,
65 |             "name": "?foo",
66 |             "next": Yay!,
67 |             "parent": <c>
68 |               [Circular]
69 |               Yay!
70 |             </c>,
71 |             "prev": null,
72 |             "startIndex": null,
73 |             "type": "directive",
74 |           }
75 |           Yay!
76 |         </c>
77 |       </b>
78 |     </a>,
79 |   ],
80 |   "endIndex": null,
81 |   "next": null,
82 |   "parent": null,
83 |   "prev": null,
84 |   "startIndex": null,
85 |   "type": "root",
86 | }
87 | `;
88 | 


--------------------------------------------------------------------------------
/src/index.spec.ts:
--------------------------------------------------------------------------------
 1 | import { describe, it, expect } from "vitest";
 2 | import {
 3 |     parseDocument,
 4 |     parseDOM,
 5 |     createDocumentStream,
 6 |     createDomStream,
 7 |     DomHandler,
 8 |     DefaultHandler,
 9 |     type Parser,
10 | } from "./index.js";
11 | import { Element } from "domhandler";
12 | 
13 | // Add an `attributes` prop to the Element for now, to make it possible for Jest to render DOM nodes.
14 | Object.defineProperty(Element.prototype, "attributes", {
15 |     get() {
16 |         return Object.keys(this.attribs).map((name) => ({
17 |             name,
18 |             value: this.attribs[name],
19 |         }));
20 |     },
21 |     configurable: true,
22 |     enumerable: false,
23 | });
24 | 
25 | describe("Index", () => {
26 |     it("parseDocument", () => {
27 |         const dom = parseDocument("<a foo><b><c><?foo>Yay!");
28 |         expect(dom).toMatchSnapshot();
29 |     });
30 | 
31 |     it("parseDOM", () => {
32 |         const dom = parseDOM("<a foo><b><c><?foo>Yay!");
33 |         expect(dom).toMatchSnapshot();
34 |     });
35 | 
36 |     it("createDocumentStream", () => {
37 |         let documentStream!: Parser;
38 | 
39 |         const documentPromise = new Promise(
40 |             (resolve, reject) =>
41 |                 (documentStream = createDocumentStream((error, dom) =>
42 |                     error ? reject(error) : resolve(dom),
43 |                 )),
44 |         );
45 | 
46 |         for (const c of "&amp;This is text<!-- and comments --><tags>") {
47 |             documentStream.write(c);
48 |         }
49 | 
50 |         documentStream.end();
51 | 
52 |         return expect(documentPromise).resolves.toMatchSnapshot();
53 |     });
54 | 
55 |     it("createDomStream", () => {
56 |         let domStream!: Parser;
57 | 
58 |         const domPromise = new Promise(
59 |             (resolve, reject) =>
60 |                 (domStream = createDomStream((error, dom) =>
61 |                     error ? reject(error) : resolve(dom),
62 |                 )),
63 |         );
64 | 
65 |         for (const c of "&amp;This is text<!-- and comments --><tags>") {
66 |             domStream.write(c);
67 |         }
68 | 
69 |         domStream.end();
70 | 
71 |         return expect(domPromise).resolves.toMatchSnapshot();
72 |     });
73 | 
74 |     describe("API", () => {
75 |         it("should export the appropriate APIs", () => {
76 |             expect(DomHandler).toEqual(DefaultHandler);
77 |         });
78 |     });
79 | });
80 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
  1 | import { Parser, type ParserOptions } from "./Parser.js";
  2 | export type { Handler, ParserOptions } from "./Parser.js";
  3 | export { Parser } from "./Parser.js";
  4 | 
  5 | import {
  6 |     DomHandler,
  7 |     type DomHandlerOptions,
  8 |     type ChildNode,
  9 |     type Element,
 10 |     type Document,
 11 | } from "domhandler";
 12 | 
 13 | export {
 14 |     DomHandler,
 15 |     // Old name for DomHandler
 16 |     DomHandler as DefaultHandler,
 17 |     type DomHandlerOptions,
 18 | } from "domhandler";
 19 | 
 20 | export type Options = ParserOptions & DomHandlerOptions;
 21 | 
 22 | // Helper methods
 23 | 
 24 | /**
 25 |  * Parses the data, returns the resulting document.
 26 |  *
 27 |  * @param data The data that should be parsed.
 28 |  * @param options Optional options for the parser and DOM handler.
 29 |  */
 30 | export function parseDocument(data: string, options?: Options): Document {
 31 |     const handler = new DomHandler(undefined, options);
 32 |     new Parser(handler, options).end(data);
 33 |     return handler.root;
 34 | }
 35 | /**
 36 |  * Parses data, returns an array of the root nodes.
 37 |  *
 38 |  * Note that the root nodes still have a `Document` node as their parent.
 39 |  * Use `parseDocument` to get the `Document` node instead.
 40 |  *
 41 |  * @param data The data that should be parsed.
 42 |  * @param options Optional options for the parser and DOM handler.
 43 |  * @deprecated Use `parseDocument` instead.
 44 |  */
 45 | export function parseDOM(data: string, options?: Options): ChildNode[] {
 46 |     return parseDocument(data, options).children;
 47 | }
 48 | /**
 49 |  * Creates a parser instance, with an attached DOM handler.
 50 |  *
 51 |  * @param callback A callback that will be called once parsing has been completed, with the resulting document.
 52 |  * @param options Optional options for the parser and DOM handler.
 53 |  * @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
 54 |  */
 55 | export function createDocumentStream(
 56 |     callback: (error: Error | null, document: Document) => void,
 57 |     options?: Options,
 58 |     elementCallback?: (element: Element) => void,
 59 | ): Parser {
 60 |     const handler: DomHandler = new DomHandler(
 61 |         (error: Error | null) => callback(error, handler.root),
 62 |         options,
 63 |         elementCallback,
 64 |     );
 65 |     return new Parser(handler, options);
 66 | }
 67 | /**
 68 |  * Creates a parser instance, with an attached DOM handler.
 69 |  *
 70 |  * @param callback A callback that will be called once parsing has been completed, with an array of root nodes.
 71 |  * @param options Optional options for the parser and DOM handler.
 72 |  * @param elementCallback An optional callback that will be called every time a tag has been completed inside of the DOM.
 73 |  * @deprecated Use `createDocumentStream` instead.
 74 |  */
 75 | export function createDomStream(
 76 |     callback: (error: Error | null, dom: ChildNode[]) => void,
 77 |     options?: Options,
 78 |     elementCallback?: (element: Element) => void,
 79 | ): Parser {
 80 |     const handler = new DomHandler(callback, options, elementCallback);
 81 |     return new Parser(handler, options);
 82 | }
 83 | 
 84 | export {
 85 |     default as Tokenizer,
 86 |     type Callbacks as TokenizerCallbacks,
 87 |     QuoteType,
 88 | } from "./Tokenizer.js";
 89 | 
 90 | /*
 91 |  * All of the following exports exist for backwards-compatibility.
 92 |  * They should probably be removed eventually.
 93 |  */
 94 | export * as ElementType from "domelementtype";
 95 | 
 96 | import { getFeed, type Feed } from "domutils";
 97 | 
 98 | export { getFeed, type Feed } from "domutils";
 99 | 
100 | const parseFeedDefaultOptions = { xmlMode: true };
101 | 
102 | /**
103 |  * Parse a feed.
104 |  *
105 |  * @param feed The feed that should be parsed, as a string.
106 |  * @param options Optionally, options for parsing. When using this, you should set `xmlMode` to `true`.
107 |  */
108 | export function parseFeed(
109 |     feed: string,
110 |     options: Options = parseFeedDefaultOptions,
111 | ): Feed | null {
112 |     return getFeed(parseDOM(feed, options));
113 | }
114 | 
115 | export * as DomUtils from "domutils";
116 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         /* Basic Options */
 4 |         "target": "es2019",
 5 |         "module": "nodenext",
 6 |         "declaration": true,
 7 |         "declarationMap": true,
 8 |         "sourceMap": true,
 9 | 
10 |         /* Strict Type-Checking Options */
11 |         "strict": true,
12 | 
13 |         /* Additional Checks */
14 |         "exactOptionalPropertyTypes": true,
15 |         "forceConsistentCasingInFileNames": true,
16 |         "isolatedModules": true,
17 |         "isolatedDeclarations": true,
18 |         "noFallthroughCasesInSwitch": true,
19 |         "noImplicitOverride": true,
20 |         "noImplicitReturns": true,
21 |         "noPropertyAccessFromIndexSignature": true,
22 |         "noUnusedLocals": true,
23 |         "noUnusedParameters": true,
24 | 
25 |         /* Module Resolution Options */
26 |         "esModuleInterop": true,
27 |         "resolveJsonModule": true
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------