├── .editorconfig
├── .gitattributes
├── .github
    ├── FUNDING.yml
    ├── dependabot.yml
    └── workflows
    │   ├── codeql-analysis.yml
    │   ├── dependabot-automerge.yml
    │   ├── nodejs-test.yml
    │   └── pages.yml
├── .gitignore
├── .gitmodules
├── .husky
    └── pre-commit
├── .prettierignore
├── .prettierrc
├── LICENSE
├── README.md
├── SECURITY.md
├── bench
    ├── .eslintrc.json
    ├── memory
    │   └── sax-parser.js
    ├── package.json
    └── perf
    │   └── index.js
├── docs
    ├── list-of-packages.md
    └── version-history.md
├── eslint.config.js
├── media
    └── logo.png
├── package-lock.json
├── package.json
├── packages
    ├── parse5-html-rewriting-stream
    │   ├── LICENSE
    │   ├── README.md
    │   ├── lib
    │   │   └── index.ts
    │   ├── package.json
    │   ├── test
    │   │   └── rewriting-stream.test.ts
    │   ├── tsconfig.json
    │   └── typedoc.json
    ├── parse5-htmlparser2-tree-adapter
    │   ├── LICENSE
    │   ├── README.md
    │   ├── lib
    │   │   └── index.ts
    │   ├── package.json
    │   ├── tsconfig.json
    │   └── typedoc.json
    ├── parse5-parser-stream
    │   ├── LICENSE
    │   ├── README.md
    │   ├── lib
    │   │   └── index.ts
    │   ├── package.json
    │   ├── test
    │   │   ├── location-info.test.ts
    │   │   ├── parser-stream.test.ts
    │   │   ├── scripting.test.ts
    │   │   └── utils
    │   │   │   └── parse-chunked.ts
    │   ├── tsconfig.json
    │   └── typedoc.json
    ├── parse5-plain-text-conversion-stream
    │   ├── LICENSE
    │   ├── README.md
    │   ├── lib
    │   │   └── index.ts
    │   ├── package.json
    │   ├── test
    │   │   └── plain-text-conversion-stream.test.ts
    │   ├── tsconfig.json
    │   └── typedoc.json
    ├── parse5-sax-parser
    │   ├── LICENSE
    │   ├── README.md
    │   ├── lib
    │   │   ├── dev-null-stream.ts
    │   │   ├── index.ts
    │   │   └── parser-feedback-simulator.ts
    │   ├── package.json
    │   ├── test
    │   │   ├── location-info.test.ts
    │   │   ├── parser-feedback-simulator.test.ts
    │   │   └── sax-parser.test.ts
    │   ├── tsconfig.json
    │   └── typedoc.json
    └── parse5
    │   ├── LICENSE
    │   ├── README.md
    │   ├── lib
    │       ├── common
    │       │   ├── doctype.ts
    │       │   ├── error-codes.ts
    │       │   ├── foreign-content.ts
    │       │   ├── html.ts
    │       │   ├── token.ts
    │       │   └── unicode.ts
    │       ├── index.ts
    │       ├── parser
    │       │   ├── formatting-element-list.test.ts
    │       │   ├── formatting-element-list.ts
    │       │   ├── index.test.ts
    │       │   ├── index.ts
    │       │   ├── open-element-stack.test.ts
    │       │   ├── open-element-stack.ts
    │       │   └── parser-location-info.test.ts
    │       ├── serializer
    │       │   ├── index.test.ts
    │       │   └── index.ts
    │       ├── tokenizer
    │       │   ├── index.test.ts
    │       │   ├── index.ts
    │       │   ├── preprocessor.ts
    │       │   └── tokenizer-location-info.test.ts
    │       └── tree-adapters
    │       │   ├── default.ts
    │       │   └── interface.ts
    │   ├── package.json
    │   ├── tsconfig.json
    │   └── typedoc.json
├── scripts
    └── generate-parser-feedback-test
    │   └── index.ts
├── test
    ├── data
    │   ├── huge-page
    │   │   └── huge-page.html
    │   ├── location-info
    │   │   ├── cern
    │   │   │   └── data.html
    │   │   ├── dx
    │   │   │   └── data.html
    │   │   ├── github-parse5
    │   │   │   └── data.html
    │   │   ├── whatwg-html
    │   │   │   └── data.html
    │   │   └── wiki-42
    │   │   │   └── data.html
    │   ├── parser-feedback
    │   │   ├── adoption01.test
    │   │   ├── adoption02.test
    │   │   ├── blocks.test
    │   │   ├── comments01.test
    │   │   ├── doctype01.test
    │   │   ├── domjs-unsafe.test
    │   │   ├── entities01.test
    │   │   ├── entities02.test
    │   │   ├── foreign-fragment.test
    │   │   ├── gh40_form_in_template.test
    │   │   ├── html5test-com.test
    │   │   ├── inbody01.test
    │   │   ├── isindex.test
    │   │   ├── main-element.test
    │   │   ├── math.test
    │   │   ├── menuitem-element.test
    │   │   ├── namespace-sensitivity.test
    │   │   ├── noscript01.test
    │   │   ├── pending-spec-changes-plain-text-unsafe.test
    │   │   ├── pending-spec-changes.test
    │   │   ├── plain-text-unsafe.test
    │   │   ├── ruby.test
    │   │   ├── scriptdata01.test
    │   │   ├── search-element.test
    │   │   ├── svg.test
    │   │   ├── tables01.test
    │   │   ├── template.test
    │   │   ├── tests1.test
    │   │   ├── tests10.test
    │   │   ├── tests11.test
    │   │   ├── tests12.test
    │   │   ├── tests14.test
    │   │   ├── tests15.test
    │   │   ├── tests16.test
    │   │   ├── tests17.test
    │   │   ├── tests18.test
    │   │   ├── tests19.test
    │   │   ├── tests2.test
    │   │   ├── tests20.test
    │   │   ├── tests21.test
    │   │   ├── tests22.test
    │   │   ├── tests23.test
    │   │   ├── tests24.test
    │   │   ├── tests25.test
    │   │   ├── tests26.test
    │   │   ├── tests3.test
    │   │   ├── tests4.test
    │   │   ├── tests5.test
    │   │   ├── tests6.test
    │   │   ├── tests7.test
    │   │   ├── tests8.test
    │   │   ├── tests9.test
    │   │   ├── tests_innerHTML_1.test
    │   │   ├── tricky01.test
    │   │   ├── webkit01.test
    │   │   └── webkit02.test
    │   ├── sax
    │   │   ├── lhc
    │   │   │   ├── expected.html
    │   │   │   └── src.html
    │   │   ├── nodejsorg
    │   │   │   ├── expected.html
    │   │   │   └── src.html
    │   │   └── npmorg
    │   │   │   ├── expected.html
    │   │   │   └── src.html
    │   ├── serialization
    │   │   └── tests.json
    │   └── tree-construction-scripting
    │   │   └── document_write.dat
    ├── package.json
    ├── tsconfig.json
    └── utils
    │   ├── common.ts
    │   ├── generate-location-info-parser-tests.ts
    │   ├── generate-parsing-tests.ts
    │   ├── generate-serializer-tests.ts
    │   ├── generate-tokenization-tests.ts
    │   ├── load-sax-parser-test-data.ts
    │   ├── parse-dat-file.ts
    │   └── serialize-to-dat-file-format.ts
├── tsconfig.json
├── typedoc.base.json
├── typedoc.json
└── vitest.config.js


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # This file is for unifying the coding style for different editors and IDEs
 2 | # editorconfig.org
 3 | 
 4 | root = true
 5 | 
 6 | [*]
 7 | end_of_line = lf
 8 | charset = utf-8
 9 | insert_final_newline = true
10 | trim_trailing_whitespace = true
11 | indent_style = space
12 | indent_size = 4
13 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Exclude the HTML files from GitHub's language statistics
2 | # https://github.com/github/linguist#using-gitattributes
3 | test/data/** linguist-vendored
4 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | open_collective: parse5
2 | github: [fb55]
3 | tidelift: 'npm/parse5'
4 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |     - package-ecosystem: npm
 4 |       directory: '/'
 5 |       schedule:
 6 |           interval: daily
 7 |       open-pull-requests-limit: 10
 8 |       versioning-strategy: increase
 9 |     - package-ecosystem: 'github-actions'
10 |       directory: '/'
11 |       schedule:
12 |           interval: daily
13 |     - package-ecosystem: gitsubmodule
14 |       directory: '/'
15 |       schedule:
16 |           interval: daily
17 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | name: 'CodeQL'
 2 | 
 3 | on:
 4 |     push:
 5 |         branches: [master]
 6 |     pull_request:
 7 |         # The branches below must be a subset of the branches above
 8 |         branches: [master]
 9 |     schedule:
10 |         - cron: '0 0 * * 0'
11 | 
12 | jobs:
13 |     analyze:
14 |         name: Analyze
15 |         runs-on: ubuntu-latest
16 |         permissions:
17 |             actions: read
18 |             contents: read
19 |             security-events: write
20 | 
21 |         steps:
22 |             - name: Checkout repository
23 |               uses: actions/checkout@v4.2.2
24 |               with:
25 |                   submodules: recursive
26 | 
27 |             - name: Initialize CodeQL
28 |               uses: github/codeql-action/init@v3.28.18
29 |               with:
30 |                   languages: 'javascript'
31 | 
32 |             - name: Perform CodeQL Analysis
33 |               uses: github/codeql-action/analyze@v3.28.18
34 | 


--------------------------------------------------------------------------------
/.github/workflows/dependabot-automerge.yml:
--------------------------------------------------------------------------------
 1 | # Based on https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/automating-dependabot-with-github-actions#enable-auto-merge-on-a-pull-request
 2 | name: Dependabot auto-merge
 3 | on: pull_request_target
 4 | 
 5 | permissions:
 6 |     pull-requests: write
 7 |     contents: write
 8 | 
 9 | jobs:
10 |     dependabot:
11 |         runs-on: ubuntu-latest
12 |         if: ${{ github.actor == 'dependabot[bot]' }}
13 |         steps:
14 |             - name: Dependabot metadata
15 |               id: metadata
16 |               uses: dependabot/fetch-metadata@v2.4.0
17 |               with:
18 |                   github-token: '${{ secrets.GITHUB_TOKEN }}'
19 |             - name: Enable auto-merge for Dependabot PRs
20 |               # Automatically merge semver-patch and semver-minor PRs
21 |               if: "${{ steps.metadata.outputs.update-type ==
22 |                   'version-update:semver-minor' ||
23 |                   steps.metadata.outputs.update-type ==
24 |                   'version-update:semver-patch' }}"
25 |               run: gh pr merge --auto --squash "$PR_URL"
26 |               env:
27 |                   PR_URL: ${{github.event.pull_request.html_url}}
28 |                   GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
29 | 


--------------------------------------------------------------------------------
/.github/workflows/nodejs-test.yml:
--------------------------------------------------------------------------------
 1 | name: Node.js CI
 2 | 
 3 | on:
 4 |     push:
 5 |         branches-ignore:
 6 |             - 'dependabot/**'
 7 |     pull_request:
 8 | 
 9 | env:
10 |     CI: true
11 |     FORCE_COLOR: 2
12 |     NODE_COV: lts/* # The Node.js version to run coveralls on
13 | 
14 | permissions:
15 |     contents: read #  to fetch code (actions/checkout)
16 | 
17 | jobs:
18 |     lint:
19 |         runs-on: ubuntu-latest
20 |         steps:
21 |             - uses: actions/checkout@v4.2.2
22 |               with:
23 |                   submodules: recursive
24 |             - name: Use Node.js ${{ matrix.node }}
25 |               uses: actions/setup-node@v4.4.0
26 |               with:
27 |                   node-version: lts/*
28 |                   cache: npm
29 |             - run: npm ci
30 |             - run: npm run lint
31 | 
32 |     test:
33 |         permissions:
34 |             contents: read #  to fetch code (actions/checkout)
35 |             checks: write #  to create new checks (coverallsapp/github-action)
36 | 
37 |         name: Node ${{ matrix.node }}
38 |         runs-on: ubuntu-latest
39 | 
40 |         strategy:
41 |             fail-fast: false
42 |             matrix:
43 |                 node:
44 |                     - 18
45 |                     - 20
46 |                     - lts/*
47 | 
48 |         steps:
49 |             - uses: actions/checkout@v4.2.2
50 |               with:
51 |                   submodules: recursive
52 |             - name: Use Node.js ${{ matrix.node }}
53 |               uses: actions/setup-node@v4.4.0
54 |               with:
55 |                   node-version: ${{ matrix.node }}
56 |                   cache: npm
57 |             - run: npm ci
58 |             - run: npm run build --if-present
59 | 
60 |             - name: Run unit tests
61 |               run: npm run unit-tests
62 |               if: matrix.node != env.NODE_COV
63 | 
64 |             - name: Run unit tests with coverage
65 |               run: npm run unit-tests-coverage
66 |               if: matrix.node == env.NODE_COV
67 | 
68 |             - name: Run Coveralls
69 |               uses: coverallsapp/github-action@v2.3.6
70 |               if: matrix.node == env.NODE_COV
71 |               continue-on-error: true
72 |               with:
73 |                   github-token: '${{ secrets.GITHUB_TOKEN }}'
74 | 


--------------------------------------------------------------------------------
/.github/workflows/pages.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy to GitHub Pages
 2 | on:
 3 |     push:
 4 |         branches:
 5 |             - master
 6 | 
 7 | jobs:
 8 |     deploy:
 9 |         name: Deploy to GitHub Pages
10 |         runs-on: ubuntu-latest
11 |         steps:
12 |             - uses: actions/checkout@v4.2.2
13 |             - uses: actions/setup-node@v4.4.0
14 |               with:
15 |                   node-version: lts/*
16 |                   cache: npm
17 |             - run: npm ci
18 |             - name: Build docs
19 |               run: npm run build:docs
20 |             - name: Deploy
21 |               uses: peaceiris/actions-gh-pages@v4.0.0
22 |               with:
23 |                   github_token: ${{ secrets.GITHUB_TOKEN }}
24 |                   publish_dir: docs/build
25 |                   cname: parse5.js.org
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | .idea
 3 | .vscode
 4 | node_modules
 5 | docs/build
 6 | packages/*/dist/
 7 | test/dist/
 8 | tsconfig.tsbuildinfo
 9 | coverage/
10 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "html5lib-tests-fork"]
2 | 	path = test/data/html5lib-tests-fork
3 | 	url = https://github.com/HTMLParseErrorWG/html5lib-tests
4 | [submodule "html5lib-tests"]
5 | 	path = test/data/html5lib-tests
6 | 	url = https://github.com/html5lib/html5lib-tests.git
7 | 


--------------------------------------------------------------------------------
/.husky/pre-commit:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | . "$(dirname "$0")/_/husky.sh"
3 | 
4 | npm run pre-commit
5 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | packages/*/dist/
2 | test/dist/
3 | docs
4 | test/data/html5lib-tests
5 | test/data/html5lib-tests-fork


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | printWidth: 120
2 | tabWidth: 4
3 | singleQuote: true
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |     <a href="https://github.com/inikulin/parse5">
 3 |         <img src="https://raw.github.com/inikulin/parse5/master/media/logo.png" alt="parse5" />
 4 |     </a>
 5 | </p>
 6 | 
 7 | <p align="center">
 8 | <i>HTML parsing/serialization toolset for Node.js. <a href="https://html.spec.whatwg.org/multipage/">WHATWG HTML Living Standard (aka HTML5)</a>-compliant.</i>
 9 | </p>
10 | 
11 | <p align="center">
12 |   <a href="https://github.com/inikulin/parse5/actions/workflows/nodejs-test.yml"><img alt="Build Status" src="https://github.com/inikulin/parse5/actions/workflows/nodejs-test.yml/badge.svg"></a>
13 |   <a href="https://www.npmjs.com/package/parse5"><img alt="NPM Version" src="https://img.shields.io/npm/v/parse5.svg"></a>
14 |   <a href="https://npmjs.org/package/parse5"><img alt="Downloads" src="https://img.shields.io/npm/dm/parse5.svg"></a>
15 |   <a href="https://npmjs.org/package/parse5"><img alt="Downloads total" src="https://img.shields.io/npm/dt/parse5.svg"></a>
16 |   <a href="https://coveralls.io/github/inikulin/parse5"><img alt="Coverage" src="https://img.shields.io/coveralls/github/inikulin/parse5/master"></a>
17 | </p>
18 | 
19 | <p align="center">
20 | <b><i>parse5</i></b> provides nearly everything you may need when dealing with HTML. It's the fastest spec-compliant HTML parser
21 | for Node to date. It parses HTML the way the latest version of your browser does. It has proven itself reliable in such projects
22 | as <a href="https://github.com/tmpvar/jsdom">jsdom</a>, <a href="https://angular.io">Angular</a>,
23 | <a href="https://lit.dev">Lit</a>, <a href="https://github.com/cheeriojs/cheerio">Cheerio</a>,
24 | <a href="https://github.com/rehypejs/rehype">rehype</a> and many more.
25 | </p>
26 | 
27 | ---
28 | 
29 | <p align="center">
30 |   <a href="https://github.com/inikulin/parse5/tree/master/docs/list-of-packages.md">List of parse5 toolset packages</a>
31 | </p>
32 | 
33 | <p align="center">
34 |   <a href="https://astexplorer.net/#/1CHlCXc4n4">Online playground</a>
35 | </p>
36 | 
37 | <p align="center">
38 |   <a href="https://github.com/inikulin/parse5/releases">Changelog</a>
39 | </p>
40 | </p>
41 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Supported Versions
 4 | 
 5 | Only the current release is supported. Please make sure to update to the latest release.
 6 | 
 7 | ## Reporting a Vulnerability
 8 | 
 9 | To report a security vulnerability, please use the [Tidelift security contact](https://tidelift.com/security).
10 | Tidelift will coordinate the fix and disclosure.
11 | 


--------------------------------------------------------------------------------
/bench/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 |     "extends": ["../.eslintrc.json"],
3 |     "rules": {
4 |         "no-console": "off"
5 |     }
6 | }
7 | 


--------------------------------------------------------------------------------
/bench/memory/sax-parser.js:
--------------------------------------------------------------------------------
 1 | import { readFile } from 'node:fs/promises';
 2 | import format from 'human-format';
 3 | import memwatch from '@airbnb/node-memwatch';
 4 | import { SAXParser } from '../../packages/parse5-sax-parser/dist/index.js';
 5 | import { finished } from 'parse5-test-utils/dist/common.js';
 6 | 
 7 | /* eslint-disable no-console */
 8 | 
 9 | const heapDiffMeasurement = new memwatch.HeapDiff();
10 | 
11 | let maxMemUsage = 0;
12 | 
13 | memwatch.on('stats', (stats) => {
14 |     maxMemUsage = Math.max(maxMemUsage, stats.used_heap_size);
15 | });
16 | 
17 | const statsPromise = new Promise((resolve) => memwatch.once('stats', resolve));
18 | 
19 | const startDate = new Date();
20 | 
21 | const parsedDataSize = await parse();
22 | const endDate = new Date();
23 | const heapDiff = heapDiffMeasurement.end();
24 | 
25 | // NOTE: we need at least one `stats` result to get maxMemUsage
26 | await statsPromise;
27 | 
28 | async function parse() {
29 |     const data = await readFile(new URL('../../test/data/huge-page/huge-page.html', import.meta.url), 'utf8');
30 |     let parsedDataSize = 0;
31 |     const stream = new SAXParser();
32 | 
33 |     for (let i = 0; i < 200; i++) {
34 |         parsedDataSize += data.length;
35 |         stream.write(data);
36 |     }
37 | 
38 |     stream.end();
39 | 
40 |     await finished(stream);
41 | 
42 |     return parsedDataSize;
43 | }
44 | 
45 | console.log('Input data size:', format(parsedDataSize, { unit: 'B' }));
46 | 
47 | const scale = new format.Scale({
48 |     seconds: 1,
49 |     minutes: 60,
50 |     hours: 3600,
51 | });
52 | 
53 | console.log('Duration:', format((endDate - startDate) / 1000, { scale }));
54 | console.log('Memory before:', heapDiff.before.size);
55 | console.log('Memory after:', heapDiff.after.size);
56 | console.log('Memory max:', format(maxMemUsage, { unit: 'B' }));
57 | 


--------------------------------------------------------------------------------
/bench/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "parse5-benchmarks",
 3 |     "private": "true",
 4 |     "type": "module",
 5 |     "version": "1.0.0",
 6 |     "description": "parse5 regression benchmarks",
 7 |     "author": "Ivan Nikulin <ifaaan@gmail.com>",
 8 |     "license": "MIT",
 9 |     "dependencies": {
10 |         "benchmark": "^2.1.4",
11 |         "human-format": "^1.2.1",
12 |         "@airbnb/node-memwatch": "^3.0.0",
13 |         "parse5": "npm:parse5"
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/bench/perf/index.js:
--------------------------------------------------------------------------------
  1 | import { readFileSync, createReadStream, readdirSync } from 'node:fs';
  2 | import Benchmark from 'benchmark';
  3 | import { loadTreeConstructionTestData } from 'parse5-test-utils/dist/generate-parsing-tests.js';
  4 | import { loadSAXParserTestData } from 'parse5-test-utils/dist/load-sax-parser-test-data.js';
  5 | import { treeAdapters, WritableStreamStub, finished } from 'parse5-test-utils/dist/common.js';
  6 | import * as parse5 from '../../packages/parse5/dist/index.js';
  7 | import { ParserStream as parse5Stream } from '../../packages/parse5-parser-stream/dist/index.js';
  8 | import * as parse5Upstream from 'parse5';
  9 | 
 10 | /* eslint-disable no-console */
 11 | 
 12 | const hugePagePath = new URL('../../test/data/huge-page/huge-page.html', import.meta.url);
 13 | const treeConstructionPath = new URL('../../test/data/html5lib-tests/tree-construction', import.meta.url);
 14 | const saxPath = new URL('../../test/data/sax/', import.meta.url);
 15 | 
 16 | //HACK: https://github.com/bestiejs/benchmark.js/issues/51
 17 | /* global workingCopy, WorkingCopyParserStream, upstreamParser, hugePage, microTests, runMicro, runPages, files */
 18 | globalThis.workingCopy = parse5;
 19 | globalThis.WorkingCopyParserStream = parse5Stream;
 20 | globalThis.upstreamParser = parse5Upstream;
 21 | 
 22 | // Huge page data
 23 | globalThis.hugePage = readFileSync(hugePagePath).toString();
 24 | 
 25 | // Micro data
 26 | globalThis.microTests = loadTreeConstructionTestData(treeConstructionPath, treeAdapters.default)
 27 |     .filter(
 28 |         (test) =>
 29 |             //NOTE: this test caused a stack overflow in parse5 v1.x
 30 |             test.input !== '<button><p><button>',
 31 |     )
 32 |     .map((test) => ({
 33 |         html: test.input,
 34 |         fragmentContext: test.fragmentContext,
 35 |     }));
 36 | 
 37 | globalThis.runMicro = function (parser) {
 38 |     for (const test of microTests) {
 39 |         if (test.fragmentContext) {
 40 |             parser.parseFragment(test.fragmentContext, test.html);
 41 |         } else {
 42 |             parser.parse(test.html);
 43 |         }
 44 |     }
 45 | };
 46 | 
 47 | // Pages data
 48 | const pages = loadSAXParserTestData().map((test) => test.src);
 49 | 
 50 | globalThis.runPages = function (parser) {
 51 |     for (const page of pages) {
 52 |         parser.parse(page);
 53 |     }
 54 | };
 55 | 
 56 | // Stream data
 57 | globalThis.files = readdirSync(saxPath).map((dirName) => new URL(`${dirName}/src.html`, saxPath).pathname);
 58 | 
 59 | // Utils
 60 | function getHz(suite, testName) {
 61 |     for (let i = 0; i < suite.length; i++) {
 62 |         if (suite[i].name === testName) {
 63 |             return suite[i].hz;
 64 |         }
 65 |     }
 66 | }
 67 | 
 68 | function runBench({ name, workingCopyFn, upstreamFn, defer = false }) {
 69 |     const suite = new Benchmark.Suite(name);
 70 | 
 71 |     suite
 72 |         .add('Working copy', workingCopyFn, { defer })
 73 |         .add('Upstream', upstreamFn, { defer })
 74 |         .on('start', () => console.log(name))
 75 |         .on('cycle', (event) => console.log(String(event.target)))
 76 |         .on('complete', () => {
 77 |             const workingCopyHz = getHz(suite, 'Working copy');
 78 |             const upstreamHz = getHz(suite, 'Upstream');
 79 | 
 80 |             if (workingCopyHz > upstreamHz) {
 81 |                 console.log(`Working copy is ${(workingCopyHz / upstreamHz).toFixed(2)}x faster.\n`);
 82 |             } else {
 83 |                 console.log(`Working copy is ${(upstreamHz / workingCopyHz).toFixed(2)}x slower.\n`);
 84 |             }
 85 |         })
 86 |         .run();
 87 | }
 88 | 
 89 | // Benchmarks
 90 | runBench({
 91 |     name: 'parse5 regression benchmark - MICRO',
 92 |     workingCopyFn: () => runMicro(workingCopy),
 93 |     upstreamFn: () => runMicro(upstreamParser),
 94 | });
 95 | 
 96 | runBench({
 97 |     name: 'parse5 regression benchmark - HUGE',
 98 |     workingCopyFn: () => workingCopy.parse(hugePage),
 99 |     upstreamFn: () => upstreamParser.parse(hugePage),
100 | });
101 | 
102 | runBench({
103 |     name: 'parse5 regression benchmark - PAGES',
104 |     workingCopyFn: () => runPages(workingCopy),
105 |     upstreamFn: () => runPages(upstreamParser),
106 | });
107 | 
108 | runBench({
109 |     name: 'parse5 regression benchmark - STREAM',
110 |     defer: true,
111 |     workingCopyFn: async (deferred) => {
112 |         const parsePromises = files.map((fileName) => {
113 |             const stream = createReadStream(fileName, 'utf8');
114 |             const parserStream = new WorkingCopyParserStream();
115 | 
116 |             stream.pipe(parserStream);
117 |             return finished(parserStream);
118 |         });
119 | 
120 |         await Promise.all(parsePromises);
121 |         deferred.resolve();
122 |     },
123 |     upstreamFn: async (deferred) => {
124 |         const parsePromises = files.map(async (fileName) => {
125 |             const stream = createReadStream(fileName, 'utf8');
126 |             const writable = new WritableStreamStub();
127 | 
128 |             stream.pipe(writable);
129 | 
130 |             await finished(writable);
131 | 
132 |             upstreamParser.parse(writable.writtenData);
133 |         });
134 | 
135 |         await Promise.all(parsePromises);
136 |         deferred.resolve();
137 |     },
138 | });
139 | 


--------------------------------------------------------------------------------
/docs/list-of-packages.md:
--------------------------------------------------------------------------------
1 | # List of parse5 toolset packages
2 | 
3 | - [parse5](https://github.com/inikulin/parse5/tree/master/packages/parse5) - HTML parser and serializer.
4 | - [parse5-htmlparser2-tree-adapter](https://github.com/inikulin/parse5/tree/master/packages/parse5-htmlparser2-tree-adapter) - [htmlparser2](https://github.com/fb55/htmlparser2) tree adapter.
5 | - [parse5-parser-stream](https://github.com/inikulin/parse5/tree/master/packages/parse5-parser-stream) - streaming HTML parser with scripting support.
6 | - [parse5-plain-text-conversion-stream](https://github.com/inikulin/parse5/tree/master/packages/parse5-plain-text-conversion-stream) - stream that converts plain text files into HTML documents.
7 | - [parse5-sax-parser](https://github.com/inikulin/parse5/tree/master/packages/parse5-sax-parser) - streaming SAX-style HTML parser.
8 | - [parse5-html-rewriting-stream](https://github.com/inikulin/parse5/tree/master/packages/parse5-html-rewriting-stream) - streaming HTML rewriter.
9 | 


--------------------------------------------------------------------------------
/docs/version-history.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 | 
3 | See [GitHub Releases](https://github.com/inikulin/parse5/releases) for the changelog.


--------------------------------------------------------------------------------
/eslint.config.js:
--------------------------------------------------------------------------------
 1 | import eslintjs from '@eslint/js';
 2 | import eslintConfigPrettier from 'eslint-config-prettier';
 3 | import { configs as tseslintConfigs } from 'typescript-eslint';
 4 | import globals from 'globals';
 5 | import eslintUnicorn from 'eslint-plugin-unicorn';
 6 | 
 7 | const { configs: eslintConfigs } = eslintjs;
 8 | 
 9 | const sourceFiles = ['bench/**/*.js', 'scripts/**/*.ts', 'packages/*/lib/**/*.ts'];
10 | const testFiles = ['test/**/*.{ts,js}', '**/*.test.ts'];
11 | const ignoreFiles = [
12 |     'test/data/html5lib-tests',
13 |     'test/data/html5lib-tests-fork',
14 |     'packages/*/dist/',
15 |     'test/dist/',
16 |     'docs/build/',
17 |     'coverage/',
18 | ];
19 | const allFiles = [...sourceFiles, ...testFiles];
20 | 
21 | export default [
22 |     {
23 |         files: allFiles,
24 |     },
25 |     {
26 |         ignores: ignoreFiles,
27 |     },
28 |     {
29 |         languageOptions: {
30 |             globals: {
31 |                 ...globals.nodeBuiltin,
32 |                 ...globals.es2019,
33 |             },
34 |         },
35 |     },
36 |     eslintConfigs.recommended,
37 |     ...tseslintConfigs.recommended,
38 |     {
39 |         rules: {
40 |             'no-console': 'error',
41 |             curly: ['error', 'all'],
42 |             'prefer-arrow-callback': 'error',
43 |             'one-var': ['error', 'never'],
44 |             'no-var': 'error',
45 |             'prefer-const': 'error',
46 |             'object-shorthand': 'error',
47 |             'prefer-destructuring': [
48 |                 'error',
49 |                 {
50 |                     object: true,
51 |                     array: false,
52 |                 },
53 |             ],
54 |             'prefer-template': 'error',
55 |             'arrow-body-style': ['error', 'as-needed'],
56 |         },
57 |     },
58 |     {
59 |         files: ['**/*.ts'],
60 |         rules: {
61 |             '@typescript-eslint/no-unsafe-declaration-merging': 'off',
62 |             '@typescript-eslint/no-non-null-assertion': 'warn',
63 |             '@typescript-eslint/explicit-function-return-type': 'error',
64 |             '@typescript-eslint/consistent-type-imports': 'error',
65 | 
66 |             '@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
67 |         },
68 |     },
69 |     {
70 |         files: testFiles,
71 |         rules: {
72 |             '@typescript-eslint/no-non-null-assertion': 'off',
73 |         },
74 |     },
75 |     eslintConfigPrettier,
76 |     eslintUnicorn.configs.recommended,
77 |     {
78 |         rules: {
79 |             'unicorn/no-null': 'off',
80 |             'unicorn/prevent-abbreviations': 'off',
81 |             'unicorn/prefer-string-slice': 'off',
82 |             'unicorn/prefer-code-point': 'off',
83 |             'unicorn/no-array-push-push': 'off',
84 |             'unicorn/no-for-loop': 'off',
85 |             'unicorn/consistent-destructuring': 'off',
86 |             'unicorn/prefer-string-replace-all': 'off',
87 |             'unicorn/prefer-at': 'off',
88 |             'unicorn/number-literal-case': 'off',
89 |             'unicorn/no-nested-ternary': 'off',
90 |             'unicorn/consistent-function-scoping': 'off',
91 |             'unicorn/prefer-switch': ['error', { emptyDefaultCase: 'do-nothing-comment' }],
92 |             'unicorn/prefer-single-call': 'off',
93 |         },
94 |     },
95 | ];
96 | 


--------------------------------------------------------------------------------
/media/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/inikulin/parse5/0e9be1ce4033c0b8faf3d1e84da9076207c3316c/media/logo.png


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "parse5-build-scripts",
 3 |     "private": true,
 4 |     "type": "module",
 5 |     "workspaces": [
 6 |         "packages/*",
 7 |         "bench",
 8 |         "test"
 9 |     ],
10 |     "devDependencies": {
11 |         "@eslint/js": "^9.28.0",
12 |         "@vitest/coverage-v8": "^3.1.4",
13 |         "eslint": "^9.28.0",
14 |         "eslint-config-prettier": "^10.1.5",
15 |         "eslint-plugin-unicorn": "^59.0.1",
16 |         "globals": "^16.2.0",
17 |         "husky": "^9.1.7",
18 |         "nano-staged": "^0.8.0",
19 |         "outdent": "^0.8.0",
20 |         "prettier": "^3.5.3",
21 |         "ts-node": "^10.9.2",
22 |         "typedoc": "^0.28.5",
23 |         "typescript": "^5.8.3",
24 |         "typescript-eslint": "^8.33.0",
25 |         "vitest": "^3.0.1"
26 |     },
27 |     "scripts": {
28 |         "build": "npm run build:esm && npm run build:cjs --workspaces --if-present",
29 |         "build:esm": "tsc --build packages/* test",
30 |         "build:docs": "typedoc",
31 |         "prettier": "prettier '**/*.{js,ts,md,json,yml}' --log-level warn",
32 |         "format": "npm run format:es && npm run format:prettier",
33 |         "format:es": "npm run lint:es -- --fix",
34 |         "format:prettier": "npm run prettier -- --write",
35 |         "lint": "npm run lint:es && npm run lint:prettier",
36 |         "lint:es": "eslint .",
37 |         "lint:prettier": "npm run prettier -- --check",
38 |         "unit-tests": "vitest run",
39 |         "unit-tests-coverage": "vitest run --coverage",
40 |         "test": "npm run lint && npm run unit-tests",
41 |         "generate-feedback-tests": "node --loader ts-node/esm scripts/generate-parser-feedback-test/index.ts test/data/html5lib-tests/tree-construction/*.dat",
42 |         "bench-perf": "npm run build && node bench/perf/index.js",
43 |         "bench-memory-sax": "npm run build && node bench/memory/sax-parser.js",
44 |         "preversion": "npm test",
45 |         "pre-commit": "nano-staged",
46 |         "publish": "npm publish --workspaces",
47 |         "prepare": "husky install",
48 |         "prepublish": "npm run build"
49 |     },
50 |     "nano-staged": {
51 |         "*.{js,ts}": [
52 |             "prettier --write",
53 |             "eslint --fix"
54 |         ],
55 |         "*.{md,json,yml}": [
56 |             "prettier --write"
57 |         ]
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/packages/parse5-html-rewriting-stream/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/packages/parse5-html-rewriting-stream/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |     <a href="https://github.com/inikulin/parse5">
 3 |         <img src="https://raw.github.com/inikulin/parse5/master/media/logo.png" alt="parse5" />
 4 |     </a>
 5 | </p>
 6 | 
 7 | <div align="center">
 8 | <h1>parse5-html-rewriting-stream</h1>
 9 | <i><b>Streaming HTML rewriter.</b></i>
10 | </div>
11 | <br>
12 | 
13 | <div align="center">
14 | <code>npm install --save parse5-html-rewriting-stream</code>
15 | </div>
16 | <br>
17 | 
18 | <p align="center">
19 |   📖 <a href="https://parse5.js.org/modules/parse5-html-rewriting-stream.html"><b>Documentation</b></a> 📖
20 | </p>
21 | 
22 | ---
23 | 
24 | <p align="center">
25 |   <a href="https://github.com/inikulin/parse5/tree/master/docs/list-of-packages.md">List of parse5 toolset packages</a>
26 | </p>
27 | 
28 | <p align="center">
29 |     <a href="https://github.com/inikulin/parse5">GitHub</a>
30 | </p>
31 | 
32 | <p align="center">
33 |     <a href="https://github.com/inikulin/parse5/releases">Changelog</a>
34 | </p>
35 | 


--------------------------------------------------------------------------------
/packages/parse5-html-rewriting-stream/lib/index.ts:
--------------------------------------------------------------------------------
  1 | import { html, type Token } from 'parse5';
  2 | import {
  3 |     SAXParser,
  4 |     type EndTag,
  5 |     type StartTag,
  6 |     type Doctype,
  7 |     type Text,
  8 |     type Comment,
  9 |     type SaxToken,
 10 | } from 'parse5-sax-parser';
 11 | import { escapeText, escapeAttribute } from 'entities/escape';
 12 | 
 13 | /**
 14 |  * Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML rewriter.
 15 |  * A [transform stream](https://nodejs.org/api/stream.html#stream_class_stream_transform) (which means you can pipe _through_ it, see example).
 16 |  *
 17 |  * The rewriter uses the raw source representation of tokens if they are not modified by the user. Therefore, the resulting
 18 |  * HTML is not affected by parser error-recovery mechanisms as in a classical parsing-serialization roundtrip.
 19 |  *
 20 |  * @example
 21 |  *
 22 |  * ```js
 23 |  * const RewritingStream = require('parse5-html-rewriting-stream');
 24 |  * const http = require('http');
 25 |  * const fs = require('fs');
 26 |  *
 27 |  * const file = fs.createWriteStream('/home/google.com.html');
 28 |  * const rewriter = new RewritingStream();
 29 |  *
 30 |  * // Replace divs with spans
 31 |  * rewriter.on('startTag', startTag => {
 32 |  *     if (startTag.tagName === 'span') {
 33 |  *         startTag.tagName = 'div';
 34 |  *     }
 35 |  *
 36 |  *     rewriter.emitStartTag(startTag);
 37 |  * });
 38 |  *
 39 |  * rewriter.on('endTag', endTag => {
 40 |  *     if (endTag.tagName === 'span') {
 41 |  *         endTag.tagName = 'div';
 42 |  *     }
 43 |  *
 44 |  *     rewriter.emitEndTag(endTag);
 45 |  * });
 46 |  *
 47 |  * // Wrap all text nodes with an <i> tag
 48 |  * rewriter.on('text', (_, raw) => {
 49 |  *     // Use the raw representation of text without HTML entities decoding
 50 |  *     rewriter.emitRaw(`<i>${raw}</i>`);
 51 |  * });
 52 |  *
 53 |  * http.get('http://google.com', res => {
 54 |  *    // Assumes response is UTF-8.
 55 |  *    res.setEncoding('utf8');
 56 |  *    // `RewritingStream` is a `Transform` stream, which means you can pipe
 57 |  *    // through it.
 58 |  *    res.pipe(rewriter).pipe(file);
 59 |  * });
 60 |  * ```
 61 |  */
 62 | export class RewritingStream extends SAXParser {
 63 |     /** Note: `sourceCodeLocationInfo` is always enabled. */
 64 |     constructor() {
 65 |         super({ sourceCodeLocationInfo: true });
 66 |     }
 67 | 
 68 |     override _transformChunk(chunk: string): string {
 69 |         // NOTE: ignore upstream return values as we want to push to
 70 |         // the `Writable` part of the `Transform` stream ourselves.
 71 |         super._transformChunk(chunk);
 72 |         return '';
 73 |     }
 74 | 
 75 |     private _getRawHtml(location: Token.Location): string {
 76 |         const { droppedBufferSize, html } = this.tokenizer.preprocessor;
 77 |         const start = location.startOffset - droppedBufferSize;
 78 |         const end = location.endOffset - droppedBufferSize;
 79 | 
 80 |         return html.slice(start, end);
 81 |     }
 82 | 
 83 |     // Events
 84 |     protected override emitIfListenerExists(eventName: string, token: SaxToken): boolean {
 85 |         if (!super.emitIfListenerExists(eventName, token)) {
 86 |             this.emitRaw(this._getRawHtml(token.sourceCodeLocation!));
 87 |         }
 88 | 
 89 |         // NOTE: don't skip new lines after `<pre>` and other tags,
 90 |         // otherwise we'll have incorrect raw data.
 91 |         this.parserFeedbackSimulator.skipNextNewLine = false;
 92 |         return true;
 93 |     }
 94 | 
 95 |     // Emitter API
 96 |     protected override _emitToken(eventName: string, token: SaxToken): void {
 97 |         this.emit(eventName, token, this._getRawHtml(token.sourceCodeLocation!));
 98 |     }
 99 | 
100 |     /** Emits a serialized document type token into the output stream. */
101 |     public emitDoctype(token: Doctype): void {
102 |         let res = `<!DOCTYPE ${token.name}`;
103 | 
104 |         if (token.publicId !== null) {
105 |             res += ` PUBLIC "${token.publicId}"`;
106 |         } else if (token.systemId !== null) {
107 |             res += ' SYSTEM';
108 |         }
109 | 
110 |         if (token.systemId !== null) {
111 |             res += ` "${token.systemId}"`;
112 |         }
113 | 
114 |         res += '>';
115 | 
116 |         this.push(res);
117 |     }
118 | 
119 |     /** Emits a serialized start tag token into the output stream. */
120 |     public emitStartTag(token: StartTag): void {
121 |         let res = `<${token.tagName}`;
122 | 
123 |         for (const attr of token.attrs) {
124 |             res += ` ${attr.name}="${escapeAttribute(attr.value)}"`;
125 |         }
126 | 
127 |         res += token.selfClosing ? '/>' : '>';
128 | 
129 |         this.push(res);
130 |     }
131 | 
132 |     /** Emits a serialized end tag token into the output stream. */
133 |     public emitEndTag(token: EndTag): void {
134 |         this.push(`</${token.tagName}>`);
135 |     }
136 | 
137 |     /** Emits a serialized text token into the output stream. */
138 |     public emitText({ text }: Text): void {
139 |         this.push(
140 |             !this.parserFeedbackSimulator.inForeignContent &&
141 |                 html.hasUnescapedText(this.tokenizer.lastStartTagName, true)
142 |                 ? text
143 |                 : escapeText(text),
144 |         );
145 |     }
146 | 
147 |     /** Emits a serialized comment token into the output stream. */
148 |     public emitComment(token: Comment): void {
149 |         this.push(`<!--${token.text}-->`);
150 |     }
151 | 
152 |     /** Emits a raw HTML string into the output stream. */
153 |     public emitRaw(html: string): void {
154 |         this.push(html);
155 |     }
156 | }
157 | 
158 | export interface RewritingStream {
159 |     /** Raised when the rewriter encounters a start tag. */
160 |     on(event: 'startTag', listener: (startTag: StartTag, rawHtml: string) => void): this;
161 |     /** Raised when rewriter encounters an end tag. */
162 |     on(event: 'endTag', listener: (endTag: EndTag, rawHtml: string) => void): this;
163 |     /** Raised when rewriter encounters a comment. */
164 |     on(event: 'comment', listener: (comment: Comment, rawHtml: string) => void): this;
165 |     /** Raised when rewriter encounters text content. */
166 |     on(event: 'text', listener: (text: Text, rawHtml: string) => void): this;
167 |     /** Raised when rewriter encounters a [document type declaration](https://en.wikipedia.org/wiki/Document_type_declaration). */
168 |     on(event: 'doctype', listener: (doctype: Doctype, rawHtml: string) => void): this;
169 | 
170 |     /**
171 |      * Base event handler.
172 |      *
173 |      * @param event Name of the event
174 |      * @param handler Event handler
175 |      */
176 |     // eslint-disable-next-line @typescript-eslint/no-explicit-any
177 |     on(event: string, handler: (...args: any[]) => void): this;
178 | }
179 | 


--------------------------------------------------------------------------------
/packages/parse5-html-rewriting-stream/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "parse5-html-rewriting-stream",
 3 |     "type": "module",
 4 |     "description": "Streaming HTML rewriter.",
 5 |     "version": "7.1.0",
 6 |     "author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)",
 7 |     "contributors": "https://github.com/inikulin/parse5/graphs/contributors",
 8 |     "homepage": "https://parse5.js.org",
 9 |     "funding": "https://github.com/inikulin/parse5?sponsor=1",
10 |     "keywords": [
11 |         "parse5",
12 |         "parser",
13 |         "stream",
14 |         "streaming",
15 |         "rewritter",
16 |         "rewrite",
17 |         "HTML"
18 |     ],
19 |     "license": "MIT",
20 |     "main": "dist/index.js",
21 |     "module": "dist/index.js",
22 |     "types": "dist/index.d.ts",
23 |     "exports": "./dist/index.js",
24 |     "dependencies": {
25 |         "entities": "^6.0.0",
26 |         "parse5": "^7.0.0",
27 |         "parse5-sax-parser": "^7.0.0"
28 |     },
29 |     "repository": {
30 |         "type": "git",
31 |         "url": "git://github.com/inikulin/parse5.git"
32 |     },
33 |     "files": [
34 |         "dist/**/*.js",
35 |         "dist/**/*.d.ts"
36 |     ]
37 | }
38 | 


--------------------------------------------------------------------------------
/packages/parse5-html-rewriting-stream/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "../../tsconfig.json",
 3 |     "compilerOptions": {
 4 |         "rootDir": "lib",
 5 |         "outDir": "dist"
 6 |     },
 7 |     "include": ["**/*.ts"],
 8 |     "exclude": ["**/*.test.ts", "dist"],
 9 |     "references": [{ "path": "../parse5/tsconfig.json" }, { "path": "../parse5-sax-parser/tsconfig.json" }]
10 | }
11 | 


--------------------------------------------------------------------------------
/packages/parse5-html-rewriting-stream/typedoc.json:
--------------------------------------------------------------------------------
1 | {
2 |     "extends": ["../../typedoc.base.json"],
3 |     "entryPoints": ["lib/index.ts"]
4 | }
5 | 


--------------------------------------------------------------------------------
/packages/parse5-htmlparser2-tree-adapter/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/packages/parse5-htmlparser2-tree-adapter/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |     <a href="https://github.com/inikulin/parse5">
 3 |         <img src="https://raw.github.com/inikulin/parse5/master/media/logo.png" alt="parse5" />
 4 |     </a>
 5 | </p>
 6 | 
 7 | <div align="center">
 8 | <h1>parse5-htmlparser2-tree-adapter</h1>
 9 | <i><b><a href="https://github.com/fb55/htmlparser2">htmlparser2</a> tree adapter for <a href="https://github.com/inikulin/parse5">parse5</a>.</b></i>
10 | </div>
11 | <br>
12 | 
13 | <div align="center">
14 | <code>npm install --save parse5-htmlparser2-tree-adapter</code>
15 | </div>
16 | <br>
17 | 
18 | <p align="center">
19 |   📖 <a href="https://parse5.js.org/modules/parse5-htmlparser2-tree-adapter.html"><b>Documentation</b></a> 📖
20 | </p>
21 | 
22 | ---
23 | 
24 | <p align="center">
25 |   <a href="https://github.com/inikulin/parse5/tree/master/docs/list-of-packages.md">List of parse5 toolset packages</a>
26 | </p>
27 | 
28 | <p align="center">
29 |     <a href="https://github.com/inikulin/parse5">GitHub</a>
30 | </p>
31 | 
32 | <p align="center">
33 |     <a href="https://github.com/inikulin/parse5/releases">Changelog</a>
34 | </p>
35 | 


--------------------------------------------------------------------------------
/packages/parse5-htmlparser2-tree-adapter/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "parse5-htmlparser2-tree-adapter",
 3 |     "type": "module",
 4 |     "description": "htmlparser2 tree adapter for parse5.",
 5 |     "version": "7.1.0",
 6 |     "author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)",
 7 |     "contributors": "https://github.com/inikulin/parse5/graphs/contributors",
 8 |     "homepage": "https://parse5.js.org",
 9 |     "funding": "https://github.com/inikulin/parse5?sponsor=1",
10 |     "keywords": [
11 |         "parse5",
12 |         "parser",
13 |         "tree adapter",
14 |         "htmlparser2"
15 |     ],
16 |     "license": "MIT",
17 |     "main": "dist/cjs/index.js",
18 |     "module": "dist/index.js",
19 |     "types": "dist/index.d.ts",
20 |     "exports": {
21 |         "import": "./dist/index.js",
22 |         "require": "./dist/cjs/index.js"
23 |     },
24 |     "dependencies": {
25 |         "domhandler": "^5.0.3",
26 |         "parse5": "^7.0.0"
27 |     },
28 |     "scripts": {
29 |         "build:cjs": "tsc --noCheck --moduleResolution node10 --module CommonJS --target ES6 --outDir dist/cjs && echo '{\"type\":\"commonjs\"}' > dist/cjs/package.json"
30 |     },
31 |     "repository": {
32 |         "type": "git",
33 |         "url": "git://github.com/inikulin/parse5.git"
34 |     },
35 |     "files": [
36 |         "dist/cjs/package.json",
37 |         "dist/**/*.js",
38 |         "dist/**/*.d.ts"
39 |     ]
40 | }
41 | 


--------------------------------------------------------------------------------
/packages/parse5-htmlparser2-tree-adapter/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "../../tsconfig.json",
 3 |     "compilerOptions": {
 4 |         "rootDir": "lib",
 5 |         "outDir": "dist"
 6 |     },
 7 |     "include": ["**/*.ts"],
 8 |     "exclude": ["**/*.test.ts", "dist"]
 9 | }
10 | 


--------------------------------------------------------------------------------
/packages/parse5-htmlparser2-tree-adapter/typedoc.json:
--------------------------------------------------------------------------------
1 | {
2 |     "extends": ["../../typedoc.base.json"],
3 |     "entryPoints": ["lib/index.ts"]
4 | }
5 | 


--------------------------------------------------------------------------------
/packages/parse5-parser-stream/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/packages/parse5-parser-stream/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |     <a href="https://github.com/inikulin/parse5">
 3 |         <img src="https://raw.github.com/inikulin/parse5/master/media/logo.png" alt="parse5" />
 4 |     </a>
 5 | </p>
 6 | 
 7 | <div align="center">
 8 | <h1>parse5-parser-stream</h1>
 9 | <i><b>Streaming HTML parser with scripting support.</b></i>
10 | </div>
11 | <br>
12 | 
13 | <div align="center">
14 | <code>npm install --save parse5-parser-stream</code>
15 | </div>
16 | <br>
17 | 
18 | <p align="center">
19 |   📖 <a href="https://parse5.js.org/modules/parse5-parser-stream.html"><b>Documentation</b></a> 📖
20 | </p>
21 | 
22 | ---
23 | 
24 | <p align="center">
25 |   <a href="https://github.com/inikulin/parse5/tree/master/docs/list-of-packages.md">List of parse5 toolset packages</a>
26 | </p>
27 | 
28 | <p align="center">
29 |     <a href="https://github.com/inikulin/parse5">GitHub</a>
30 | </p>
31 | 
32 | <p align="center">
33 |     <a href="https://github.com/inikulin/parse5/releases">Changelog</a>
34 | </p>
35 | 


--------------------------------------------------------------------------------
/packages/parse5-parser-stream/lib/index.ts:
--------------------------------------------------------------------------------
  1 | import { Writable } from 'node:stream';
  2 | import { Parser, type ParserOptions, type TreeAdapterTypeMap, type DefaultTreeAdapterMap } from 'parse5';
  3 | 
  4 | /**
  5 |  * Streaming HTML parser with scripting support.
  6 |  * A [writable stream](https://nodejs.org/api/stream.html#stream_class_stream_writable).
  7 |  *
  8 |  * @example
  9 |  *
 10 |  * ```js
 11 |  * const ParserStream = require('parse5-parser-stream');
 12 |  * const http = require('http');
 13 |  * const { finished } = require('node:stream');
 14 |  *
 15 |  * // Fetch the page content and obtain it's <head> node
 16 |  * http.get('http://inikulin.github.io/parse5/', res => {
 17 |  *     const parser = new ParserStream();
 18 |  *
 19 |  *     finished(parser, () => {
 20 |  *         console.log(parser.document.childNodes[1].childNodes[0].tagName); //> 'head'
 21 |  *     });
 22 |  *
 23 |  *     res.pipe(parser);
 24 |  * });
 25 |  * ```
 26 |  *
 27 |  */
 28 | export class ParserStream<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap> extends Writable {
 29 |     static getFragmentStream<T extends TreeAdapterTypeMap>(
 30 |         fragmentContext?: T['parentNode'] | null,
 31 |         options?: ParserOptions<T>,
 32 |     ): ParserStream<T> {
 33 |         const parser = Parser.getFragmentParser(fragmentContext, options);
 34 |         const stream = new ParserStream(options, parser);
 35 |         return stream;
 36 |     }
 37 | 
 38 |     private lastChunkWritten = false;
 39 |     private writeCallback: undefined | (() => void) = undefined;
 40 | 
 41 |     private pendingHtmlInsertions: string[] = [];
 42 |     /** The resulting document node. */
 43 |     public get document(): T['document'] {
 44 |         return this.parser.document;
 45 |     }
 46 |     public getFragment(): T['documentFragment'] {
 47 |         return this.parser.getFragment();
 48 |     }
 49 | 
 50 |     /**
 51 |      * @param options Parsing options.
 52 |      */
 53 |     constructor(
 54 |         options?: ParserOptions<T>,
 55 |         public parser: Parser<T> = new Parser(options),
 56 |     ) {
 57 |         super({ decodeStrings: false });
 58 | 
 59 |         const resume = (): void => {
 60 |             for (let i = this.pendingHtmlInsertions.length - 1; i >= 0; i--) {
 61 |                 this.parser.tokenizer.insertHtmlAtCurrentPos(this.pendingHtmlInsertions[i]);
 62 |             }
 63 | 
 64 |             this.pendingHtmlInsertions.length = 0;
 65 | 
 66 |             //NOTE: keep parsing if we don't wait for the next input chunk
 67 |             this.parser.tokenizer.resume(this.writeCallback);
 68 |         };
 69 | 
 70 |         const documentWrite = (html: string): void => {
 71 |             if (!this.parser.stopped) {
 72 |                 this.pendingHtmlInsertions.push(html);
 73 |             }
 74 |         };
 75 | 
 76 |         const scriptHandler = (scriptElement: T['element']): void => {
 77 |             if (this.listenerCount('script') > 0) {
 78 |                 this.parser.tokenizer.pause();
 79 |                 this.emit('script', scriptElement, documentWrite, resume);
 80 |             }
 81 |         };
 82 | 
 83 |         this.parser.scriptHandler = scriptHandler;
 84 |     }
 85 | 
 86 |     //WritableStream implementation
 87 |     override _write(chunk: string, _encoding: string, callback: () => void): void {
 88 |         if (typeof chunk !== 'string') {
 89 |             throw new TypeError('Parser can work only with string streams.');
 90 |         }
 91 | 
 92 |         this.writeCallback = callback;
 93 |         this.parser.tokenizer.write(chunk, this.lastChunkWritten, this.writeCallback);
 94 |     }
 95 | 
 96 |     // TODO [engine:node@>=16]: Due to issues with Node < 16, we are overriding `end` instead of `_final`.
 97 | 
 98 |     // eslint-disable-next-line @typescript-eslint/no-explicit-any
 99 |     override end(chunk?: any, encoding?: any, callback?: any): any {
100 |         this.lastChunkWritten = true;
101 |         super.end(chunk || '', encoding, callback);
102 |     }
103 | }
104 | 
105 | export interface ParserStream<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap> {
106 |     /**
107 |      * Raised when parser encounters a `<script>` element. If this event has listeners, parsing will be suspended once
108 |      * it is emitted. So, if `<script>` has the `src` attribute, you can fetch it, execute and then resume parsing just
109 |      * like browsers do.
110 |      *
111 |      * @example
112 |      *
113 |      * ```js
114 |      * const ParserStream = require('parse5-parser-stream');
115 |      * const http = require('http');
116 |      *
117 |      * const parser = new ParserStream();
118 |      *
119 |      * parser.on('script', (scriptElement, documentWrite, resume) => {
120 |      *     const src = scriptElement.attrs.find(({ name }) => name === 'src').value;
121 |      *
122 |      *     http.get(src, res => {
123 |      *         // Fetch the script content, execute it with DOM built around `parser.document` and
124 |      *         // `document.write` implemented using `documentWrite`.
125 |      *         ...
126 |      *         // Then resume parsing.
127 |      *         resume();
128 |      *     });
129 |      * });
130 |      *
131 |      * parser.end('<script src="example.com/script.js"></script>');
132 |      * ```
133 |      *
134 |      * @param event Name of the event
135 |      * @param handler
136 |      */
137 |     on(
138 |         event: 'script',
139 |         handler: (scriptElement: T['element'], documentWrite: (html: string) => void, resume: () => void) => void,
140 |     ): void;
141 |     /**
142 |      * Base event handler.
143 |      *
144 |      * @param event Name of the event
145 |      * @param handler Event handler
146 |      */
147 |     // eslint-disable-next-line @typescript-eslint/no-explicit-any
148 |     on(event: string, handler: (...args: any[]) => void): this;
149 | }
150 | 


--------------------------------------------------------------------------------
/packages/parse5-parser-stream/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "parse5-parser-stream",
 3 |     "type": "module",
 4 |     "description": "Streaming HTML parser with scripting support.",
 5 |     "version": "7.1.2",
 6 |     "author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)",
 7 |     "contributors": "https://github.com/inikulin/parse5/graphs/contributors",
 8 |     "homepage": "https://parse5.js.org",
 9 |     "funding": "https://github.com/inikulin/parse5?sponsor=1",
10 |     "keywords": [
11 |         "parse5",
12 |         "parser",
13 |         "stream",
14 |         "streaming"
15 |     ],
16 |     "license": "MIT",
17 |     "main": "dist/cjs/index.js",
18 |     "module": "dist/index.js",
19 |     "types": "dist/index.d.ts",
20 |     "exports": {
21 |         "import": "./dist/index.js",
22 |         "require": "./dist/cjs/index.js"
23 |     },
24 |     "dependencies": {
25 |         "parse5": "^7.0.0"
26 |     },
27 |     "scripts": {
28 |         "build:cjs": "tsc --noCheck --moduleResolution node10 --module CommonJS --target ES6 --outDir dist/cjs && echo '{\"type\":\"commonjs\"}' > dist/cjs/package.json"
29 |     },
30 |     "repository": {
31 |         "type": "git",
32 |         "url": "git://github.com/inikulin/parse5.git"
33 |     },
34 |     "files": [
35 |         "dist/cjs/package.json",
36 |         "dist/**/*.js",
37 |         "dist/**/*.d.ts"
38 |     ]
39 | }
40 | 


--------------------------------------------------------------------------------
/packages/parse5-parser-stream/test/location-info.test.ts:
--------------------------------------------------------------------------------
 1 | import { it, assert } from 'vitest';
 2 | import { generateLocationInfoParserTests } from 'parse5-test-utils/utils/generate-location-info-parser-tests.js';
 3 | import { generateTestsForEachTreeAdapter } from 'parse5-test-utils/utils/common.js';
 4 | import { parseChunked } from './utils/parse-chunked.js';
 5 | 
 6 | generateLocationInfoParserTests('location-info', (input, opts) =>
 7 |     // NOTE: because of performance use bigger chunks here
 8 |     parseChunked({ input }, opts, 100, 400),
 9 | );
10 | 
11 | generateTestsForEachTreeAdapter('location-info', (treeAdapter) => {
12 |     it('Regression - location info for the implicitly generated <body>, <html> and <head> (GH-44)', () => {
13 |         const html = '</head><div class="test"></div></body></html>';
14 | 
15 |         const opts = {
16 |             treeAdapter,
17 |             sourceCodeLocationInfo: true,
18 |         };
19 | 
20 |         const document = parseChunked({ input: html }, opts).node;
21 |         const htmlEl = treeAdapter.getChildNodes(document)[0];
22 |         const headEl = treeAdapter.getChildNodes(htmlEl)[0];
23 |         const bodyEl = treeAdapter.getChildNodes(htmlEl)[1];
24 | 
25 |         assert.strictEqual(treeAdapter.getNodeSourceCodeLocation(htmlEl), null);
26 |         assert.strictEqual(treeAdapter.getNodeSourceCodeLocation(headEl), null);
27 |         assert.strictEqual(treeAdapter.getNodeSourceCodeLocation(bodyEl), null);
28 |     });
29 | });
30 | 


--------------------------------------------------------------------------------
/packages/parse5-parser-stream/test/parser-stream.test.ts:
--------------------------------------------------------------------------------
 1 | import { it, assert, describe } from 'vitest';
 2 | import { ParserStream } from '../lib/index.js';
 3 | import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-tests.js';
 4 | import { parseChunked } from './utils/parse-chunked.js';
 5 | import { finished } from 'parse5-test-utils/utils/common.js';
 6 | 
 7 | generateParsingTests(
 8 |     'ParserStream',
 9 |     'ParserStream',
10 |     {
11 |         expectErrors: [
12 |             //TODO(GH-448): Foreign content behaviour was updated in the HTML spec.
13 |             //The old test suite still tests the old behaviour.
14 |             '0.foreign-fragment',
15 |             '1.foreign-fragment',
16 |             '38.foreign-fragment',
17 |             '40.foreign-fragment',
18 |             '47.foreign-fragment',
19 |             '48.foreign-fragment',
20 |         ],
21 |     },
22 |     (test, opts) => parseChunked(test, opts),
23 | );
24 | 
25 | describe('ParserStream', () => {
26 |     it('Fix empty stream parsing with ParserStream (GH-196)', async () => {
27 |         const parser = new ParserStream();
28 | 
29 |         parser.end();
30 | 
31 |         await finished(parser);
32 | 
33 |         assert.ok(parser.document.childNodes.length > 0);
34 |     });
35 | 
36 |     it('Should not accept binary input (GH-269)', () => {
37 |         const stream = new ParserStream();
38 |         const buf = Buffer.from('test');
39 | 
40 |         assert.throws(() => stream.write(buf), TypeError);
41 |     });
42 | });
43 | 


--------------------------------------------------------------------------------
/packages/parse5-parser-stream/test/scripting.test.ts:
--------------------------------------------------------------------------------
 1 | import { it } from 'vitest';
 2 | import { ParserStream } from '../lib/index.js';
 3 | import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-tests.js';
 4 | import { makeChunks, generateTestsForEachTreeAdapter, finished } from 'parse5-test-utils/utils/common.js';
 5 | import { runInNewContext } from 'node:vm';
 6 | 
 7 | function pause(): Promise<void> {
 8 |     return new Promise((resolve) => setTimeout(resolve, 5));
 9 | }
10 | 
11 | const suitePath = new URL('../../../test/data/tree-construction-scripting', import.meta.url);
12 | 
13 | generateParsingTests(
14 |     'ParserStream - Scripting',
15 |     'ParserStream - Scripting',
16 |     {
17 |         withoutErrors: true,
18 |         suitePath,
19 |     },
20 |     async (test, opts) => {
21 |         const chunks = makeChunks(test.input);
22 |         const parser = test.fragmentContext
23 |             ? ParserStream.getFragmentStream(test.fragmentContext, opts)
24 |             : new ParserStream(opts);
25 | 
26 |         parser.on('script', async (scriptElement, documentWrite, resume) => {
27 |             const scriptTextNode = opts.treeAdapter.getChildNodes(scriptElement)[0];
28 |             const script = scriptTextNode ? opts.treeAdapter.getTextNodeContent(scriptTextNode) : '';
29 | 
30 |             //NOTE: emulate postponed script execution
31 |             await pause();
32 | 
33 |             try {
34 |                 runInNewContext(script, { document: { write: documentWrite } });
35 |                 resume();
36 |             } catch (error) {
37 |                 parser.emit('error', error);
38 |             }
39 |         });
40 | 
41 |         //NOTE: emulate async input stream behavior
42 |         for (const chunk of chunks) {
43 |             parser.write(chunk);
44 |             await pause();
45 |         }
46 | 
47 |         parser.end();
48 | 
49 |         await finished(parser);
50 | 
51 |         return {
52 |             node: test.fragmentContext ? parser.getFragment() : parser.document,
53 |             chunks,
54 |         };
55 |     },
56 | );
57 | 
58 | generateTestsForEachTreeAdapter('ParserStream', (treeAdapter) => {
59 |     it('Regression - Synchronously calling resume() leads to crash (GH-98)', async () => {
60 |         const parser = new ParserStream({ treeAdapter });
61 | 
62 |         parser.on('script', (_el, _docWrite, resume) => resume());
63 | 
64 |         parser.end('<!doctype html><script>abc</script>');
65 | 
66 |         await new Promise((resolve) => {
67 |             process.nextTick(resolve);
68 |         });
69 |     });
70 | 
71 |     it('Regression - Parsing loop lock causes accidental hang ups (GH-101)', () => {
72 |         const parser = new ParserStream({ treeAdapter });
73 | 
74 |         parser.on('script', (_scriptElement, _documentWrite, resume) => process.nextTick(resume));
75 | 
76 |         parser.write('<script>yo</script>');
77 |         parser.end('dawg');
78 | 
79 |         return finished(parser);
80 |     });
81 | });
82 | 


--------------------------------------------------------------------------------
/packages/parse5-parser-stream/test/utils/parse-chunked.ts:
--------------------------------------------------------------------------------
 1 | import type { ParserOptions, TreeAdapterTypeMap } from 'parse5';
 2 | import { ParserStream } from '../../lib/index.js';
 3 | import { makeChunks } from 'parse5-test-utils/utils/common.js';
 4 | 
 5 | export function parseChunked<T extends TreeAdapterTypeMap>(
 6 |     test: { input: string; fragmentContext?: T['parentNode'] },
 7 |     opts: ParserOptions<T>,
 8 |     minChunkSize?: number,
 9 |     maxChunkSize?: number,
10 | ): { node: TreeAdapterTypeMap['document']; chunks: string[] } {
11 |     const parserStream = test.fragmentContext
12 |         ? ParserStream.getFragmentStream(test.fragmentContext, opts)
13 |         : new ParserStream(opts);
14 |     const chunks = makeChunks(test.input, minChunkSize, maxChunkSize);
15 | 
16 |     // NOTE: set small waterline for testing purposes
17 |     parserStream.parser.tokenizer.preprocessor.bufferWaterline = 8;
18 | 
19 |     for (let i = 0; i < chunks.length - 1; i++) {
20 |         parserStream.write(chunks[i]);
21 |     }
22 | 
23 |     parserStream.end(chunks[chunks.length - 1]);
24 | 
25 |     return {
26 |         node: test.fragmentContext ? parserStream.getFragment() : parserStream.document,
27 |         chunks,
28 |     };
29 | }
30 | 


--------------------------------------------------------------------------------
/packages/parse5-parser-stream/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "../../tsconfig.json",
 3 |     "compilerOptions": {
 4 |         "rootDir": "lib",
 5 |         "outDir": "dist"
 6 |     },
 7 |     "include": ["**/*.ts"],
 8 |     "exclude": ["**/*.test.ts", "dist", "test"]
 9 | }
10 | 


--------------------------------------------------------------------------------
/packages/parse5-parser-stream/typedoc.json:
--------------------------------------------------------------------------------
1 | {
2 |     "extends": ["../../typedoc.base.json"],
3 |     "entryPoints": ["lib/index.ts"]
4 | }
5 | 


--------------------------------------------------------------------------------
/packages/parse5-plain-text-conversion-stream/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/packages/parse5-plain-text-conversion-stream/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |     <a href="https://github.com/inikulin/parse5">
 3 |         <img src="https://raw.github.com/inikulin/parse5/master/media/logo.png" alt="parse5" />
 4 |     </a>
 5 | </p>
 6 | 
 7 | <div align="center">
 8 | <h1>parse5-plain-text-conversion-stream</h1>
 9 | <i><b>Stream that converts plain text into HTML document as required by <a href="https://html.spec.whatwg.org/#read-text">HTML specification</a>.</b></i>
10 | </div>
11 | <br>
12 | 
13 | <div align="center">
14 | <code>npm install --save parse5-plain-text-conversion-stream</code>
15 | </div>
16 | <br>
17 | 
18 | <p align="center">
19 |   📖 <a href="https://parse5.js.org/modules/parse5-plain-text-conversion-stream.html"><b>Documentation</b></a> 📖
20 | </p>
21 | 
22 | ---
23 | 
24 | <p align="center">
25 |   <a href="https://github.com/inikulin/parse5/tree/master/docs/list-of-packages.md">List of parse5 toolset packages</a>
26 | </p>
27 | 
28 | <p align="center">
29 |     <a href="https://github.com/inikulin/parse5">GitHub</a>
30 | </p>
31 | 
32 | <p align="center">
33 |     <a href="https://github.com/inikulin/parse5/releases">Changelog</a>
34 | </p>
35 | 


--------------------------------------------------------------------------------
/packages/parse5-plain-text-conversion-stream/lib/index.ts:
--------------------------------------------------------------------------------
 1 | import { type ParserOptions, type TreeAdapterTypeMap, html } from 'parse5';
 2 | import { ParserStream } from 'parse5-parser-stream';
 3 | 
 4 | const { TAG_ID: $, TAG_NAMES: TN } = html;
 5 | 
 6 | /**
 7 |  * Converts plain text files into HTML document as required by [HTML specification](https://html.spec.whatwg.org/#read-text).
 8 |  * A [writable stream](https://nodejs.org/api/stream.html#stream_class_stream_writable).
 9 |  *
10 |  * @example
11 |  *
12 |  * ```js
13 |  * const PlainTextConversionStream = require('parse5-plain-text-conversion-stream');
14 |  * const fs = require('fs');
15 |  * const { finished } = require('node:stream');
16 |  *
17 |  * const file = fs.createReadStream('war_and_peace.txt');
18 |  * const converter = new PlainTextConversionStream();
19 |  *
20 |  * finished(converter, () => {
21 |  *     console.log(converter.document.childNodes[1].childNodes[0].tagName); //> 'head'
22 |  * });
23 |  *
24 |  * file.pipe(converter);
25 |  * ```
26 |  */
27 | export class PlainTextConversionStream<T extends TreeAdapterTypeMap> extends ParserStream<T> {
28 |     constructor(options?: ParserOptions<T>) {
29 |         super(options);
30 | 
31 |         // NOTE: see https://html.spec.whatwg.org/#read-text
32 |         this.parser._insertFakeElement(TN.HTML, $.HTML);
33 |         this.parser._insertFakeElement(TN.HEAD, $.HEAD);
34 |         this.parser.openElements.pop();
35 |         this.parser._insertFakeElement(TN.BODY, $.BODY);
36 |         this.parser._insertFakeElement(TN.PRE, $.PRE);
37 |         this.parser.treeAdapter.insertText(this.parser.openElements.current, '\n');
38 |         this.parser.switchToPlaintextParsing();
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/packages/parse5-plain-text-conversion-stream/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "parse5-plain-text-conversion-stream",
 3 |     "type": "module",
 4 |     "description": "Stream that converts plain text files into HTML document.",
 5 |     "version": "7.0.0",
 6 |     "author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)",
 7 |     "contributors": "https://github.com/inikulin/parse5/graphs/contributors",
 8 |     "homepage": "https://parse5.js.org",
 9 |     "funding": "https://github.com/inikulin/parse5?sponsor=1",
10 |     "keywords": [
11 |         "parse5",
12 |         "parser",
13 |         "stream",
14 |         "streaming",
15 |         "plain",
16 |         "text",
17 |         "plain text"
18 |     ],
19 |     "license": "MIT",
20 |     "main": "dist/index.js",
21 |     "module": "dist/index.js",
22 |     "types": "dist/index.d.ts",
23 |     "exports": "./dist/index.js",
24 |     "dependencies": {
25 |         "parse5": "^7.0.0",
26 |         "parse5-parser-stream": "^7.0.0"
27 |     },
28 |     "repository": {
29 |         "type": "git",
30 |         "url": "git://github.com/inikulin/parse5.git"
31 |     },
32 |     "files": [
33 |         "dist/**/*.js",
34 |         "dist/**/*.d.ts"
35 |     ]
36 | }
37 | 


--------------------------------------------------------------------------------
/packages/parse5-plain-text-conversion-stream/test/plain-text-conversion-stream.test.ts:
--------------------------------------------------------------------------------
 1 | import { it, assert, describe } from 'vitest';
 2 | import { serialize } from 'parse5';
 3 | import { PlainTextConversionStream } from '../lib/index.js';
 4 | import { generateTestsForEachTreeAdapter } from 'parse5-test-utils/utils/common.js';
 5 | 
 6 | generateTestsForEachTreeAdapter('plain-test-conversion-stream', (treeAdapter) => {
 7 |     it('Plain text conversion stream', () => {
 8 |         const converter = new PlainTextConversionStream({ treeAdapter });
 9 | 
10 |         converter.write('Hey');
11 |         converter.write('\r\nyo');
12 |         converter.write('\u0000');
13 |         converter.end('<html><head><body>');
14 | 
15 |         const result = serialize(converter.document, { treeAdapter });
16 | 
17 |         assert.strictEqual(
18 |             result,
19 |             '<html><head></head><body><pre>\nHey\nyo\uFFFD&lt;html&gt;&lt;head&gt;&lt;body&gt;</pre></body></html>',
20 |         );
21 |     });
22 | });
23 | 
24 | describe('plain-text-conversion-stream', () => {
25 |     it('Should not accept binary input (GH-269)', () => {
26 |         const stream = new PlainTextConversionStream();
27 |         const buf = Buffer.from('test');
28 | 
29 |         assert.throws(() => stream.write(buf), TypeError);
30 |     });
31 | });
32 | 


--------------------------------------------------------------------------------
/packages/parse5-plain-text-conversion-stream/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "../../tsconfig.json",
 3 |     "compilerOptions": {
 4 |         "rootDir": "lib",
 5 |         "outDir": "dist"
 6 |     },
 7 |     "include": ["**/*.ts"],
 8 |     "exclude": ["**/*.test.ts", "dist", "test"]
 9 | }
10 | 


--------------------------------------------------------------------------------
/packages/parse5-plain-text-conversion-stream/typedoc.json:
--------------------------------------------------------------------------------
1 | {
2 |     "extends": ["../../typedoc.base.json"],
3 |     "entryPoints": ["lib/index.ts"]
4 | }
5 | 


--------------------------------------------------------------------------------
/packages/parse5-sax-parser/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/packages/parse5-sax-parser/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |     <a href="https://github.com/inikulin/parse5">
 3 |         <img src="https://raw.github.com/inikulin/parse5/master/media/logo.png" alt="parse5" />
 4 |     </a>
 5 | </p>
 6 | 
 7 | <div align="center">
 8 | <h1>parse5-sax-parser</h1>
 9 | <i><b>Streaming <a href="https://en.wikipedia.org/wiki/Simple_API_for_XML">SAX</a>-style HTML parser.</b></i>
10 | </div>
11 | <br>
12 | 
13 | <div align="center">
14 | <code>npm install --save parse5-sax-parser</code>
15 | </div>
16 | <br>
17 | 
18 | <p align="center">
19 |   📖 <a href="https://parse5.js.org/modules/parse5-sax-parser.html"><b>Documentation</b></a> 📖
20 | </p>
21 | 
22 | ---
23 | 
24 | <p align="center">
25 |   <a href="https://github.com/inikulin/parse5/tree/master/docs/list-of-packages.md">List of parse5 toolset packages</a>
26 | </p>
27 | 
28 | <p align="center">
29 |     <a href="https://github.com/inikulin/parse5">GitHub</a>
30 | </p>
31 | 
32 | <p align="center">
33 |     <a href="https://github.com/inikulin/parse5/releases">Changelog</a>
34 | </p>
35 | 


--------------------------------------------------------------------------------
/packages/parse5-sax-parser/lib/dev-null-stream.ts:
--------------------------------------------------------------------------------
1 | import { Writable } from 'node:stream';
2 | 
3 | export class DevNullStream extends Writable {
4 |     override _write(_chunk: string, _encoding: string, cb: () => void): void {
5 |         cb();
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |     Tokenizer,
  3 |     type TokenizerOptions,
  4 |     TokenizerMode,
  5 |     type TokenHandler,
  6 |     Token,
  7 |     foreignContent,
  8 |     html,
  9 | } from 'parse5';
 10 | 
 11 | const $ = html.TAG_ID;
 12 | 
 13 | const REPLACEMENT_CHARACTER = '\uFFFD';
 14 | const LINE_FEED_CODE_POINT = 0x0a;
 15 | 
 16 | /**
 17 |  * Simulates adjustments of the Tokenizer which are performed by the standard parser during tree construction.
 18 |  */
 19 | export class ParserFeedbackSimulator implements TokenHandler {
 20 |     private namespaceStack: html.NS[] = [];
 21 |     public inForeignContent = false;
 22 |     public skipNextNewLine = false;
 23 |     public tokenizer: Tokenizer;
 24 | 
 25 |     constructor(
 26 |         options: TokenizerOptions,
 27 |         private handler: TokenHandler,
 28 |     ) {
 29 |         this.tokenizer = new Tokenizer(options, this);
 30 |         this._enterNamespace(html.NS.HTML);
 31 |     }
 32 | 
 33 |     /** @internal */
 34 |     onNullCharacter(token: Token.CharacterToken): void {
 35 |         this.skipNextNewLine = false;
 36 | 
 37 |         if (this.inForeignContent) {
 38 |             this.handler.onCharacter({
 39 |                 type: Token.TokenType.CHARACTER,
 40 |                 chars: REPLACEMENT_CHARACTER,
 41 |                 location: token.location,
 42 |             });
 43 |         } else {
 44 |             this.handler.onNullCharacter(token);
 45 |         }
 46 |     }
 47 | 
 48 |     /** @internal */
 49 |     onWhitespaceCharacter(token: Token.CharacterToken): void {
 50 |         if (this.skipNextNewLine && token.chars.charCodeAt(0) === LINE_FEED_CODE_POINT) {
 51 |             this.skipNextNewLine = false;
 52 | 
 53 |             if (token.chars.length === 1) {
 54 |                 return;
 55 |             }
 56 | 
 57 |             token.chars = token.chars.substr(1);
 58 |         }
 59 | 
 60 |         this.handler.onWhitespaceCharacter(token);
 61 |     }
 62 | 
 63 |     /** @internal */
 64 |     onCharacter(token: Token.CharacterToken): void {
 65 |         this.skipNextNewLine = false;
 66 |         this.handler.onCharacter(token);
 67 |     }
 68 | 
 69 |     /** @internal */
 70 |     onComment(token: Token.CommentToken): void {
 71 |         this.skipNextNewLine = false;
 72 |         this.handler.onComment(token);
 73 |     }
 74 | 
 75 |     /** @internal */
 76 |     onDoctype(token: Token.DoctypeToken): void {
 77 |         this.skipNextNewLine = false;
 78 |         this.handler.onDoctype(token);
 79 |     }
 80 | 
 81 |     /** @internal */
 82 |     onEof(token: Token.EOFToken): void {
 83 |         this.skipNextNewLine = false;
 84 |         this.handler.onEof(token);
 85 |     }
 86 | 
 87 |     //Namespace stack mutations
 88 |     private _enterNamespace(namespace: html.NS): void {
 89 |         this.namespaceStack.unshift(namespace);
 90 |         this.inForeignContent = namespace !== html.NS.HTML;
 91 |         this.tokenizer.inForeignNode = this.inForeignContent;
 92 |     }
 93 | 
 94 |     private _leaveCurrentNamespace(): void {
 95 |         this.namespaceStack.shift();
 96 |         this.inForeignContent = this.namespaceStack[0] !== html.NS.HTML;
 97 |         this.tokenizer.inForeignNode = this.inForeignContent;
 98 |     }
 99 | 
100 |     //Token handlers
101 |     private _ensureTokenizerMode(tn: html.TAG_ID): void {
102 |         switch (tn) {
103 |             case $.TEXTAREA:
104 |             case $.TITLE: {
105 |                 this.tokenizer.state = TokenizerMode.RCDATA;
106 |                 break;
107 |             }
108 |             case $.PLAINTEXT: {
109 |                 this.tokenizer.state = TokenizerMode.PLAINTEXT;
110 |                 break;
111 |             }
112 |             case $.SCRIPT: {
113 |                 this.tokenizer.state = TokenizerMode.SCRIPT_DATA;
114 |                 break;
115 |             }
116 |             case $.STYLE:
117 |             case $.IFRAME:
118 |             case $.XMP:
119 |             case $.NOEMBED:
120 |             case $.NOFRAMES:
121 |             case $.NOSCRIPT: {
122 |                 this.tokenizer.state = TokenizerMode.RAWTEXT;
123 |                 break;
124 |             }
125 |             default:
126 |             // Do nothing
127 |         }
128 |     }
129 | 
130 |     /** @internal */
131 |     onStartTag(token: Token.TagToken): void {
132 |         let tn = token.tagID;
133 | 
134 |         switch (tn) {
135 |             case $.SVG: {
136 |                 this._enterNamespace(html.NS.SVG);
137 |                 break;
138 |             }
139 |             case $.MATH: {
140 |                 this._enterNamespace(html.NS.MATHML);
141 |                 break;
142 |             }
143 |             default:
144 |             // Do nothing
145 |         }
146 | 
147 |         if (this.inForeignContent) {
148 |             if (foreignContent.causesExit(token)) {
149 |                 this._leaveCurrentNamespace();
150 |             } else {
151 |                 const currentNs = this.namespaceStack[0];
152 | 
153 |                 if (currentNs === html.NS.MATHML) {
154 |                     foreignContent.adjustTokenMathMLAttrs(token);
155 |                 } else if (currentNs === html.NS.SVG) {
156 |                     foreignContent.adjustTokenSVGTagName(token);
157 |                     foreignContent.adjustTokenSVGAttrs(token);
158 |                 }
159 | 
160 |                 foreignContent.adjustTokenXMLAttrs(token);
161 | 
162 |                 tn = token.tagID;
163 | 
164 |                 if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs)) {
165 |                     this._enterNamespace(html.NS.HTML);
166 |                 }
167 |             }
168 |         } else {
169 |             switch (tn) {
170 |                 case $.PRE:
171 |                 case $.TEXTAREA:
172 |                 case $.LISTING: {
173 |                     this.skipNextNewLine = true;
174 |                     break;
175 |                 }
176 |                 case $.IMAGE: {
177 |                     token.tagName = html.TAG_NAMES.IMG;
178 |                     token.tagID = $.IMG;
179 |                     break;
180 |                 }
181 |                 default:
182 |                 // Do nothing
183 |             }
184 | 
185 |             this._ensureTokenizerMode(tn);
186 |         }
187 | 
188 |         this.handler.onStartTag(token);
189 |     }
190 | 
191 |     /** @internal */
192 |     onEndTag(token: Token.TagToken): void {
193 |         let tn = token.tagID;
194 | 
195 |         if (!this.inForeignContent) {
196 |             const previousNs = this.namespaceStack[1];
197 | 
198 |             if (previousNs === html.NS.SVG) {
199 |                 const adjustedTagName = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP.get(token.tagName);
200 | 
201 |                 if (adjustedTagName) {
202 |                     tn = html.getTagID(adjustedTagName);
203 |                 }
204 |             }
205 | 
206 |             //NOTE: check for exit from integration point
207 |             if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs)) {
208 |                 this._leaveCurrentNamespace();
209 |             }
210 |         } else if (
211 |             (tn === $.SVG && this.namespaceStack[0] === html.NS.SVG) ||
212 |             (tn === $.MATH && this.namespaceStack[0] === html.NS.MATHML)
213 |         ) {
214 |             this._leaveCurrentNamespace();
215 |         }
216 | 
217 |         // NOTE: adjust end tag name as well for consistency
218 |         if (this.namespaceStack[0] === html.NS.SVG) {
219 |             foreignContent.adjustTokenSVGTagName(token);
220 |         }
221 | 
222 |         this.handler.onEndTag(token);
223 |     }
224 | }
225 | 


--------------------------------------------------------------------------------
/packages/parse5-sax-parser/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "parse5-sax-parser",
 3 |     "type": "module",
 4 |     "description": "Streaming SAX-style HTML parser.",
 5 |     "version": "7.0.0",
 6 |     "author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)",
 7 |     "contributors": "https://github.com/inikulin/parse5/graphs/contributors",
 8 |     "homepage": "https://parse5.js.org",
 9 |     "funding": "https://github.com/inikulin/parse5?sponsor=1",
10 |     "keywords": [
11 |         "parse5",
12 |         "parser",
13 |         "stream",
14 |         "streaming",
15 |         "SAX"
16 |     ],
17 |     "license": "MIT",
18 |     "main": "dist/index.js",
19 |     "module": "dist/index.js",
20 |     "types": "dist/index.d.ts",
21 |     "exports": "./dist/index.js",
22 |     "dependencies": {
23 |         "parse5": "^7.0.0"
24 |     },
25 |     "repository": {
26 |         "type": "git",
27 |         "url": "git://github.com/inikulin/parse5.git"
28 |     },
29 |     "files": [
30 |         "dist/**/*.js",
31 |         "dist/**/*.d.ts"
32 |     ]
33 | }
34 | 


--------------------------------------------------------------------------------
/packages/parse5-sax-parser/test/location-info.test.ts:
--------------------------------------------------------------------------------
 1 | import { it, assert, describe } from 'vitest';
 2 | import { SAXParser } from '../lib/index.js';
 3 | import { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js';
 4 | import { writeChunkedToStream } from 'parse5-test-utils/utils/common.js';
 5 | import type { Token } from 'parse5';
 6 | 
 7 | function assertLocation({ sourceCodeLocation }: { sourceCodeLocation: Token.Location }): void {
 8 |     assert.strictEqual(typeof sourceCodeLocation.startLine, 'number');
 9 |     assert.strictEqual(typeof sourceCodeLocation.startCol, 'number');
10 |     assert.strictEqual(typeof sourceCodeLocation.startOffset, 'number');
11 |     assert.strictEqual(typeof sourceCodeLocation.endOffset, 'number');
12 |     assert.ok(sourceCodeLocation.startOffset < sourceCodeLocation.endOffset);
13 | }
14 | 
15 | describe('location-info', () => {
16 |     it('Location info (SAX)', () => {
17 |         for (const test of loadSAXParserTestData()) {
18 |             //NOTE: we've already tested the correctness of the location info with the Tokenizer tests.
19 |             //So here we just check that SAXParser provides this info in the handlers.
20 |             const parser = new SAXParser({ sourceCodeLocationInfo: true });
21 | 
22 |             parser.on('startTag', assertLocation);
23 |             parser.on('endTag', assertLocation);
24 |             parser.on('doctype', assertLocation);
25 |             parser.on('comment', assertLocation);
26 |             parser.on('text', assertLocation);
27 | 
28 |             writeChunkedToStream(test.src, parser);
29 |         }
30 |     });
31 | 
32 |     it('Regression - location info for text (GH-153, GH-266)', () => {
33 |         const html = '<!DOCTYPE html><html><head><title>Here is a title</title></html>';
34 |         const parser = new SAXParser({ sourceCodeLocationInfo: true });
35 | 
36 |         parser.on('text', ({ sourceCodeLocation }) => {
37 |             assert.deepStrictEqual(sourceCodeLocation, {
38 |                 startLine: 1,
39 |                 startCol: 35,
40 |                 startOffset: 34,
41 |                 endLine: 1,
42 |                 endCol: 50,
43 |                 endOffset: 49,
44 |             });
45 |         });
46 | 
47 |         parser.end(html);
48 |     });
49 | });
50 | 


--------------------------------------------------------------------------------
/packages/parse5-sax-parser/test/parser-feedback-simulator.test.ts:
--------------------------------------------------------------------------------
 1 | import { generateTokenizationTests } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
 2 | import { ParserFeedbackSimulator } from '../lib/parser-feedback-simulator.js';
 3 | 
 4 | const feedbackPath = new URL('../../../test/data/parser-feedback', import.meta.url);
 5 | 
 6 | generateTokenizationTests(
 7 |     'ParserFeedbackSimulator',
 8 |     feedbackPath.pathname,
 9 |     (handler) => new ParserFeedbackSimulator({}, handler).tokenizer,
10 | );
11 | 


--------------------------------------------------------------------------------
/packages/parse5-sax-parser/test/sax-parser.test.ts:
--------------------------------------------------------------------------------
  1 | import { it, assert, describe } from 'vitest';
  2 | import * as fs from 'node:fs';
  3 | import type { SAXParserOptions } from '../lib/index.js';
  4 | import { SAXParser } from '../lib/index.js';
  5 | import { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js';
  6 | import {
  7 |     finished,
  8 |     getStringDiffMsg,
  9 |     writeChunkedToStream,
 10 |     removeNewLines,
 11 |     WritableStreamStub,
 12 | } from 'parse5-test-utils/utils/common.js';
 13 | 
 14 | function sanitizeForComparison(str: string): string {
 15 |     return removeNewLines(str).replace(/\s/g, '').replace(/'/g, '"').toLowerCase();
 16 | }
 17 | 
 18 | function createBasicTest(html: string, expected: string, options?: SAXParserOptions) {
 19 |     return async function (): Promise<void> {
 20 |         //NOTE: the idea of the test is to serialize back given HTML using SAXParser handlers
 21 |         let actual = '';
 22 |         const parser = new SAXParser(options);
 23 | 
 24 |         parser.on('doctype', ({ name, publicId, systemId }) => {
 25 |             actual += `<!DOCTYPE ${name}`;
 26 | 
 27 |             if (publicId !== null) {
 28 |                 actual += ` PUBLIC "${publicId}"`;
 29 |             } else if (systemId !== null) {
 30 |                 actual += ' SYSTEM';
 31 |             }
 32 | 
 33 |             if (systemId !== null) {
 34 |                 actual += ` "${systemId}"`;
 35 |             }
 36 | 
 37 |             actual += '>';
 38 |         });
 39 | 
 40 |         parser.on('startTag', ({ tagName, attrs, selfClosing }) => {
 41 |             actual += `<${tagName}`;
 42 |             for (const attr of attrs) {
 43 |                 actual += ` ${attr.name}="${attr.value}"`;
 44 |             }
 45 |             actual += selfClosing ? '/>' : '>';
 46 |         });
 47 | 
 48 |         parser.on('endTag', ({ tagName }) => {
 49 |             actual += `</${tagName}>`;
 50 |         });
 51 | 
 52 |         parser.on('text', ({ text }) => {
 53 |             actual += text;
 54 |         });
 55 | 
 56 |         parser.on('comment', ({ text }) => {
 57 |             actual += `<!--${text}-->`;
 58 |         });
 59 | 
 60 |         writeChunkedToStream(html, parser);
 61 | 
 62 |         await finished(parser);
 63 | 
 64 |         expected = sanitizeForComparison(expected);
 65 |         actual = sanitizeForComparison(actual);
 66 | 
 67 |         //NOTE: use ok assertion, so output will not be polluted by the whole content of the strings
 68 |         assert.ok(actual === expected, getStringDiffMsg(actual, expected));
 69 |     };
 70 | }
 71 | 
 72 | const hugePage = new URL('../../../test/data/huge-page/huge-page.html', import.meta.url);
 73 | 
 74 | describe('SAX parser', () => {
 75 |     //Basic tests
 76 |     for (const [idx, data] of loadSAXParserTestData().entries())
 77 |         it(`${idx + 1}.${data.name}`, createBasicTest(data.src, data.expected));
 78 | 
 79 |     it('Piping and .stop()', async () => {
 80 |         const parser = new SAXParser();
 81 |         const writable = new WritableStreamStub();
 82 |         let handlerCallCount = 0;
 83 | 
 84 |         function handler(): void {
 85 |             handlerCallCount++;
 86 | 
 87 |             if (handlerCallCount === 10) {
 88 |                 parser.stop();
 89 |             }
 90 |         }
 91 | 
 92 |         fs.createReadStream(hugePage, 'utf8').pipe(parser).pipe(writable);
 93 | 
 94 |         parser.on('startTag', handler);
 95 |         parser.on('endTag', handler);
 96 |         parser.on('doctype', handler);
 97 |         parser.on('comment', handler);
 98 |         parser.on('text', handler);
 99 | 
100 |         await finished(writable);
101 | 
102 |         const expected = fs.readFileSync(hugePage).toString();
103 | 
104 |         assert.strictEqual(handlerCallCount, 10);
105 |         assert.strictEqual(writable.writtenData, expected);
106 |     });
107 | 
108 |     it('Parser silently exits on big files (GH-97)', () => {
109 |         const parser = new SAXParser();
110 | 
111 |         fs.createReadStream(hugePage, 'utf8').pipe(parser);
112 | 
113 |         //NOTE: This is a smoke test - in case of regression it will fail with timeout.
114 |         return finished(parser);
115 |     });
116 | 
117 |     it('Last text chunk must be flushed (GH-271)', async () => {
118 |         const parser = new SAXParser();
119 |         let foundText = false;
120 | 
121 |         parser.on('text', ({ text }) => {
122 |             foundText = true;
123 |             assert.strictEqual(text, 'text');
124 |         });
125 | 
126 |         parser.write('text');
127 |         parser.end();
128 | 
129 |         await finished(parser);
130 | 
131 |         assert.ok(foundText);
132 |     });
133 | 
134 |     it('Should not accept binary input (GH-269)', () => {
135 |         const stream = new SAXParser();
136 |         const buf = Buffer.from('test');
137 | 
138 |         assert.throws(() => stream.write(buf), TypeError);
139 |     });
140 | 
141 |     it('Should treat NULL characters as normal text', async () => {
142 |         const parser = new SAXParser();
143 |         let foundText = false;
144 | 
145 |         parser.on('text', ({ text }) => {
146 |             foundText = true;
147 |             assert.strictEqual(text, '\0');
148 |         });
149 | 
150 |         parser.write('\0');
151 |         parser.end();
152 | 
153 |         await finished(parser);
154 | 
155 |         assert.strictEqual(foundText, true);
156 |     });
157 | });
158 | 


--------------------------------------------------------------------------------
/packages/parse5-sax-parser/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "../../tsconfig.json",
 3 |     "compilerOptions": {
 4 |         "rootDir": "lib",
 5 |         "outDir": "dist"
 6 |     },
 7 |     "include": ["**/*.ts"],
 8 |     "exclude": ["**/*.test.ts", "dist", "test"]
 9 | }
10 | 


--------------------------------------------------------------------------------
/packages/parse5-sax-parser/typedoc.json:
--------------------------------------------------------------------------------
1 | {
2 |     "extends": ["../../typedoc.base.json"],
3 |     "entryPoints": ["lib/index.ts"]
4 | }
5 | 


--------------------------------------------------------------------------------
/packages/parse5/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/packages/parse5/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |     <a href="https://github.com/inikulin/parse5">
 3 |         <img src="https://raw.github.com/inikulin/parse5/master/media/logo.png" alt="parse5" />
 4 |     </a>
 5 | </p>
 6 | 
 7 | <div align="center">
 8 | <h1>parse5</h1>
 9 | <i><b>HTML parser and serializer.</b></i>
10 | </div>
11 | <br>
12 | 
13 | <div align="center">
14 | <code>npm install --save parse5</code>
15 | </div>
16 | <br>
17 | 
18 | <p align="center">
19 |   📖 <a href="https://parse5.js.org/modules/parse5.html"><b>Documentation</b></a> 📖
20 | </p>
21 | 
22 | ---
23 | 
24 | <p align="center">
25 |   <a href="https://github.com/inikulin/parse5/tree/master/docs/list-of-packages.md">List of parse5 toolset packages</a>
26 | </p>
27 | 
28 | <p align="center">
29 |     <a href="https://github.com/inikulin/parse5">GitHub</a>
30 | </p>
31 | 
32 | <p align="center">
33 |   <a href="http://astexplorer.net/#/1CHlCXc4n4">Online playground</a>
34 | </p>
35 | 
36 | <p align="center">
37 |     <a href="https://github.com/inikulin/parse5/releases">Changelog</a>
38 | </p>
39 | 


--------------------------------------------------------------------------------
/packages/parse5/lib/common/doctype.ts:
--------------------------------------------------------------------------------
  1 | import { DOCUMENT_MODE } from './html.js';
  2 | import type { DoctypeToken } from './token.js';
  3 | 
  4 | //Const
  5 | const VALID_DOCTYPE_NAME = 'html';
  6 | const VALID_SYSTEM_ID = 'about:legacy-compat';
  7 | const QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd';
  8 | 
  9 | const QUIRKS_MODE_PUBLIC_ID_PREFIXES = [
 10 |     '+//silmaril//dtd html pro v0r11 19970101//',
 11 |     '-//as//dtd html 3.0 aswedit + extensions//',
 12 |     '-//advasoft ltd//dtd html 3.0 aswedit + extensions//',
 13 |     '-//ietf//dtd html 2.0 level 1//',
 14 |     '-//ietf//dtd html 2.0 level 2//',
 15 |     '-//ietf//dtd html 2.0 strict level 1//',
 16 |     '-//ietf//dtd html 2.0 strict level 2//',
 17 |     '-//ietf//dtd html 2.0 strict//',
 18 |     '-//ietf//dtd html 2.0//',
 19 |     '-//ietf//dtd html 2.1e//',
 20 |     '-//ietf//dtd html 3.0//',
 21 |     '-//ietf//dtd html 3.2 final//',
 22 |     '-//ietf//dtd html 3.2//',
 23 |     '-//ietf//dtd html 3//',
 24 |     '-//ietf//dtd html level 0//',
 25 |     '-//ietf//dtd html level 1//',
 26 |     '-//ietf//dtd html level 2//',
 27 |     '-//ietf//dtd html level 3//',
 28 |     '-//ietf//dtd html strict level 0//',
 29 |     '-//ietf//dtd html strict level 1//',
 30 |     '-//ietf//dtd html strict level 2//',
 31 |     '-//ietf//dtd html strict level 3//',
 32 |     '-//ietf//dtd html strict//',
 33 |     '-//ietf//dtd html//',
 34 |     '-//metrius//dtd metrius presentational//',
 35 |     '-//microsoft//dtd internet explorer 2.0 html strict//',
 36 |     '-//microsoft//dtd internet explorer 2.0 html//',
 37 |     '-//microsoft//dtd internet explorer 2.0 tables//',
 38 |     '-//microsoft//dtd internet explorer 3.0 html strict//',
 39 |     '-//microsoft//dtd internet explorer 3.0 html//',
 40 |     '-//microsoft//dtd internet explorer 3.0 tables//',
 41 |     '-//netscape comm. corp.//dtd html//',
 42 |     '-//netscape comm. corp.//dtd strict html//',
 43 |     "-//o'reilly and associates//dtd html 2.0//",
 44 |     "-//o'reilly and associates//dtd html extended 1.0//",
 45 |     "-//o'reilly and associates//dtd html extended relaxed 1.0//",
 46 |     '-//sq//dtd html 2.0 hotmetal + extensions//',
 47 |     '-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//',
 48 |     '-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//',
 49 |     '-//spyglass//dtd html 2.0 extended//',
 50 |     '-//sun microsystems corp.//dtd hotjava html//',
 51 |     '-//sun microsystems corp.//dtd hotjava strict html//',
 52 |     '-//w3c//dtd html 3 1995-03-24//',
 53 |     '-//w3c//dtd html 3.2 draft//',
 54 |     '-//w3c//dtd html 3.2 final//',
 55 |     '-//w3c//dtd html 3.2//',
 56 |     '-//w3c//dtd html 3.2s draft//',
 57 |     '-//w3c//dtd html 4.0 frameset//',
 58 |     '-//w3c//dtd html 4.0 transitional//',
 59 |     '-//w3c//dtd html experimental 19960712//',
 60 |     '-//w3c//dtd html experimental 970421//',
 61 |     '-//w3c//dtd w3 html//',
 62 |     '-//w3o//dtd w3 html 3.0//',
 63 |     '-//webtechs//dtd mozilla html 2.0//',
 64 |     '-//webtechs//dtd mozilla html//',
 65 | ];
 66 | 
 67 | const QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = [
 68 |     ...QUIRKS_MODE_PUBLIC_ID_PREFIXES,
 69 |     '-//w3c//dtd html 4.01 frameset//',
 70 |     '-//w3c//dtd html 4.01 transitional//',
 71 | ];
 72 | 
 73 | const QUIRKS_MODE_PUBLIC_IDS = new Set([
 74 |     '-//w3o//dtd w3 html strict 3.0//en//',
 75 |     '-/w3c/dtd html 4.0 transitional/en',
 76 |     'html',
 77 | ]);
 78 | const LIMITED_QUIRKS_PUBLIC_ID_PREFIXES = ['-//w3c//dtd xhtml 1.0 frameset//', '-//w3c//dtd xhtml 1.0 transitional//'];
 79 | 
 80 | const LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES = [
 81 |     ...LIMITED_QUIRKS_PUBLIC_ID_PREFIXES,
 82 |     '-//w3c//dtd html 4.01 frameset//',
 83 |     '-//w3c//dtd html 4.01 transitional//',
 84 | ];
 85 | 
 86 | //Utils
 87 | function hasPrefix(publicId: string, prefixes: string[]): boolean {
 88 |     return prefixes.some((prefix) => publicId.startsWith(prefix));
 89 | }
 90 | 
 91 | //API
 92 | export function isConforming(token: DoctypeToken): boolean {
 93 |     return (
 94 |         token.name === VALID_DOCTYPE_NAME &&
 95 |         token.publicId === null &&
 96 |         (token.systemId === null || token.systemId === VALID_SYSTEM_ID)
 97 |     );
 98 | }
 99 | 
100 | export function getDocumentMode(token: DoctypeToken): DOCUMENT_MODE {
101 |     if (token.name !== VALID_DOCTYPE_NAME) {
102 |         return DOCUMENT_MODE.QUIRKS;
103 |     }
104 | 
105 |     const { systemId } = token;
106 | 
107 |     if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID) {
108 |         return DOCUMENT_MODE.QUIRKS;
109 |     }
110 | 
111 |     let { publicId } = token;
112 | 
113 |     if (publicId !== null) {
114 |         publicId = publicId.toLowerCase();
115 | 
116 |         if (QUIRKS_MODE_PUBLIC_IDS.has(publicId)) {
117 |             return DOCUMENT_MODE.QUIRKS;
118 |         }
119 | 
120 |         let prefixes = systemId === null ? QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES : QUIRKS_MODE_PUBLIC_ID_PREFIXES;
121 | 
122 |         if (hasPrefix(publicId, prefixes)) {
123 |             return DOCUMENT_MODE.QUIRKS;
124 |         }
125 | 
126 |         prefixes =
127 |             systemId === null ? LIMITED_QUIRKS_PUBLIC_ID_PREFIXES : LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES;
128 | 
129 |         if (hasPrefix(publicId, prefixes)) {
130 |             return DOCUMENT_MODE.LIMITED_QUIRKS;
131 |         }
132 |     }
133 | 
134 |     return DOCUMENT_MODE.NO_QUIRKS;
135 | }
136 | 


--------------------------------------------------------------------------------
/packages/parse5/lib/common/error-codes.ts:
--------------------------------------------------------------------------------
 1 | import type { Location } from './token.js';
 2 | 
 3 | export interface ParserError extends Location {
 4 |     code: ERR;
 5 | }
 6 | 
 7 | export type ParserErrorHandler = (error: ParserError) => void;
 8 | 
 9 | export enum ERR {
10 |     controlCharacterInInputStream = 'control-character-in-input-stream',
11 |     noncharacterInInputStream = 'noncharacter-in-input-stream',
12 |     surrogateInInputStream = 'surrogate-in-input-stream',
13 |     nonVoidHtmlElementStartTagWithTrailingSolidus = 'non-void-html-element-start-tag-with-trailing-solidus',
14 |     endTagWithAttributes = 'end-tag-with-attributes',
15 |     endTagWithTrailingSolidus = 'end-tag-with-trailing-solidus',
16 |     unexpectedSolidusInTag = 'unexpected-solidus-in-tag',
17 |     unexpectedNullCharacter = 'unexpected-null-character',
18 |     unexpectedQuestionMarkInsteadOfTagName = 'unexpected-question-mark-instead-of-tag-name',
19 |     invalidFirstCharacterOfTagName = 'invalid-first-character-of-tag-name',
20 |     unexpectedEqualsSignBeforeAttributeName = 'unexpected-equals-sign-before-attribute-name',
21 |     missingEndTagName = 'missing-end-tag-name',
22 |     unexpectedCharacterInAttributeName = 'unexpected-character-in-attribute-name',
23 |     unknownNamedCharacterReference = 'unknown-named-character-reference',
24 |     missingSemicolonAfterCharacterReference = 'missing-semicolon-after-character-reference',
25 |     unexpectedCharacterAfterDoctypeSystemIdentifier = 'unexpected-character-after-doctype-system-identifier',
26 |     unexpectedCharacterInUnquotedAttributeValue = 'unexpected-character-in-unquoted-attribute-value',
27 |     eofBeforeTagName = 'eof-before-tag-name',
28 |     eofInTag = 'eof-in-tag',
29 |     missingAttributeValue = 'missing-attribute-value',
30 |     missingWhitespaceBetweenAttributes = 'missing-whitespace-between-attributes',
31 |     missingWhitespaceAfterDoctypePublicKeyword = 'missing-whitespace-after-doctype-public-keyword',
32 |     missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers = 'missing-whitespace-between-doctype-public-and-system-identifiers',
33 |     missingWhitespaceAfterDoctypeSystemKeyword = 'missing-whitespace-after-doctype-system-keyword',
34 |     missingQuoteBeforeDoctypePublicIdentifier = 'missing-quote-before-doctype-public-identifier',
35 |     missingQuoteBeforeDoctypeSystemIdentifier = 'missing-quote-before-doctype-system-identifier',
36 |     missingDoctypePublicIdentifier = 'missing-doctype-public-identifier',
37 |     missingDoctypeSystemIdentifier = 'missing-doctype-system-identifier',
38 |     abruptDoctypePublicIdentifier = 'abrupt-doctype-public-identifier',
39 |     abruptDoctypeSystemIdentifier = 'abrupt-doctype-system-identifier',
40 |     cdataInHtmlContent = 'cdata-in-html-content',
41 |     incorrectlyOpenedComment = 'incorrectly-opened-comment',
42 |     eofInScriptHtmlCommentLikeText = 'eof-in-script-html-comment-like-text',
43 |     eofInDoctype = 'eof-in-doctype',
44 |     nestedComment = 'nested-comment',
45 |     abruptClosingOfEmptyComment = 'abrupt-closing-of-empty-comment',
46 |     eofInComment = 'eof-in-comment',
47 |     incorrectlyClosedComment = 'incorrectly-closed-comment',
48 |     eofInCdata = 'eof-in-cdata',
49 |     absenceOfDigitsInNumericCharacterReference = 'absence-of-digits-in-numeric-character-reference',
50 |     nullCharacterReference = 'null-character-reference',
51 |     surrogateCharacterReference = 'surrogate-character-reference',
52 |     characterReferenceOutsideUnicodeRange = 'character-reference-outside-unicode-range',
53 |     controlCharacterReference = 'control-character-reference',
54 |     noncharacterCharacterReference = 'noncharacter-character-reference',
55 |     missingWhitespaceBeforeDoctypeName = 'missing-whitespace-before-doctype-name',
56 |     missingDoctypeName = 'missing-doctype-name',
57 |     invalidCharacterSequenceAfterDoctypeName = 'invalid-character-sequence-after-doctype-name',
58 |     duplicateAttribute = 'duplicate-attribute',
59 |     nonConformingDoctype = 'non-conforming-doctype',
60 |     missingDoctype = 'missing-doctype',
61 |     misplacedDoctype = 'misplaced-doctype',
62 |     endTagWithoutMatchingOpenElement = 'end-tag-without-matching-open-element',
63 |     closingOfElementWithOpenChildElements = 'closing-of-element-with-open-child-elements',
64 |     disallowedContentInNoscriptInHead = 'disallowed-content-in-noscript-in-head',
65 |     openElementsLeftAfterEof = 'open-elements-left-after-eof',
66 |     abandonedHeadElementChild = 'abandoned-head-element-child',
67 |     misplacedStartTagForHeadElement = 'misplaced-start-tag-for-head-element',
68 |     nestedNoscriptInHead = 'nested-noscript-in-head',
69 |     eofInElementThatCanContainOnlyText = 'eof-in-element-that-can-contain-only-text',
70 | }
71 | 


--------------------------------------------------------------------------------
/packages/parse5/lib/common/token.ts:
--------------------------------------------------------------------------------
  1 | import type { TAG_ID } from './html.js';
  2 | 
  3 | export enum TokenType {
  4 |     CHARACTER,
  5 |     NULL_CHARACTER,
  6 |     WHITESPACE_CHARACTER,
  7 |     START_TAG,
  8 |     END_TAG,
  9 |     COMMENT,
 10 |     DOCTYPE,
 11 |     EOF,
 12 |     HIBERNATION,
 13 | }
 14 | 
 15 | export interface Location {
 16 |     /** One-based line index of the first character. */
 17 |     startLine: number;
 18 |     /** One-based column index of the first character. */
 19 |     startCol: number;
 20 |     /** Zero-based first character index. */
 21 |     startOffset: number;
 22 |     /** One-based line index of the last character. */
 23 |     endLine: number;
 24 |     /** One-based column index of the last character. Points directly *after* the last character. */
 25 |     endCol: number;
 26 |     /** Zero-based last character index. Points directly *after* the last character. */
 27 |     endOffset: number;
 28 | }
 29 | 
 30 | export interface LocationWithAttributes extends Location {
 31 |     /** Start tag attributes' location info. */
 32 |     attrs?: Record<string, Location>;
 33 | }
 34 | 
 35 | export interface ElementLocation extends LocationWithAttributes {
 36 |     /** Element's start tag location info. */
 37 |     startTag?: Location;
 38 |     /**
 39 |      * Element's end tag location info.
 40 |      * This property is undefined, if the element has no closing tag.
 41 |      */
 42 |     endTag?: Location;
 43 | }
 44 | 
 45 | interface TokenBase {
 46 |     readonly type: TokenType;
 47 |     location: Location | null;
 48 | }
 49 | 
 50 | export interface DoctypeToken extends TokenBase {
 51 |     readonly type: TokenType.DOCTYPE;
 52 |     name: string | null;
 53 |     forceQuirks: boolean;
 54 |     publicId: string | null;
 55 |     systemId: string | null;
 56 | }
 57 | 
 58 | export interface Attribute {
 59 |     /** The name of the attribute. */
 60 |     name: string;
 61 |     /** The namespace of the attribute. */
 62 |     namespace?: string;
 63 |     /** The namespace-related prefix of the attribute. */
 64 |     prefix?: string;
 65 |     /** The value of the attribute. */
 66 |     value: string;
 67 | }
 68 | 
 69 | export interface TagToken extends TokenBase {
 70 |     readonly type: TokenType.START_TAG | TokenType.END_TAG;
 71 |     tagName: string;
 72 |     /** Used to cache the ID of the tag name. */
 73 |     tagID: TAG_ID;
 74 |     selfClosing: boolean;
 75 |     ackSelfClosing: boolean;
 76 |     attrs: Attribute[];
 77 |     location: LocationWithAttributes | null;
 78 | }
 79 | 
 80 | export function getTokenAttr(token: TagToken, attrName: string): string | null {
 81 |     for (let i = token.attrs.length - 1; i >= 0; i--) {
 82 |         if (token.attrs[i].name === attrName) {
 83 |             return token.attrs[i].value;
 84 |         }
 85 |     }
 86 | 
 87 |     return null;
 88 | }
 89 | 
 90 | export interface CommentToken extends TokenBase {
 91 |     readonly type: TokenType.COMMENT;
 92 |     data: string;
 93 | }
 94 | 
 95 | export interface EOFToken extends TokenBase {
 96 |     readonly type: TokenType.EOF;
 97 | }
 98 | 
 99 | export interface CharacterToken extends TokenBase {
100 |     type: TokenType.CHARACTER | TokenType.NULL_CHARACTER | TokenType.WHITESPACE_CHARACTER;
101 |     chars: string;
102 | }
103 | 
104 | export type Token = DoctypeToken | TagToken | CommentToken | EOFToken | CharacterToken;
105 | 


--------------------------------------------------------------------------------
/packages/parse5/lib/common/unicode.ts:
--------------------------------------------------------------------------------
 1 | const UNDEFINED_CODE_POINTS = new Set([
 2 |     0xff_fe, 0xff_ff, 0x1_ff_fe, 0x1_ff_ff, 0x2_ff_fe, 0x2_ff_ff, 0x3_ff_fe, 0x3_ff_ff, 0x4_ff_fe, 0x4_ff_ff, 0x5_ff_fe,
 3 |     0x5_ff_ff, 0x6_ff_fe, 0x6_ff_ff, 0x7_ff_fe, 0x7_ff_ff, 0x8_ff_fe, 0x8_ff_ff, 0x9_ff_fe, 0x9_ff_ff, 0xa_ff_fe,
 4 |     0xa_ff_ff, 0xb_ff_fe, 0xb_ff_ff, 0xc_ff_fe, 0xc_ff_ff, 0xd_ff_fe, 0xd_ff_ff, 0xe_ff_fe, 0xe_ff_ff, 0xf_ff_fe,
 5 |     0xf_ff_ff, 0x10_ff_fe, 0x10_ff_ff,
 6 | ]);
 7 | 
 8 | export const REPLACEMENT_CHARACTER = '\uFFFD';
 9 | 
10 | export enum CODE_POINTS {
11 |     EOF = -1,
12 |     NULL = 0x00,
13 |     TABULATION = 0x09,
14 |     CARRIAGE_RETURN = 0x0d,
15 |     LINE_FEED = 0x0a,
16 |     FORM_FEED = 0x0c,
17 |     SPACE = 0x20,
18 |     EXCLAMATION_MARK = 0x21,
19 |     QUOTATION_MARK = 0x22,
20 |     AMPERSAND = 0x26,
21 |     APOSTROPHE = 0x27,
22 |     HYPHEN_MINUS = 0x2d,
23 |     SOLIDUS = 0x2f,
24 |     DIGIT_0 = 0x30,
25 |     DIGIT_9 = 0x39,
26 |     SEMICOLON = 0x3b,
27 |     LESS_THAN_SIGN = 0x3c,
28 |     EQUALS_SIGN = 0x3d,
29 |     GREATER_THAN_SIGN = 0x3e,
30 |     QUESTION_MARK = 0x3f,
31 |     LATIN_CAPITAL_A = 0x41,
32 |     LATIN_CAPITAL_Z = 0x5a,
33 |     RIGHT_SQUARE_BRACKET = 0x5d,
34 |     GRAVE_ACCENT = 0x60,
35 |     LATIN_SMALL_A = 0x61,
36 |     LATIN_SMALL_Z = 0x7a,
37 | }
38 | 
39 | export const SEQUENCES = {
40 |     DASH_DASH: '--',
41 |     CDATA_START: '[CDATA[',
42 |     DOCTYPE: 'doctype',
43 |     SCRIPT: 'script',
44 |     PUBLIC: 'public',
45 |     SYSTEM: 'system',
46 | };
47 | 
48 | //Surrogates
49 | export function isSurrogate(cp: number): boolean {
50 |     return cp >= 0xd8_00 && cp <= 0xdf_ff;
51 | }
52 | 
53 | export function isSurrogatePair(cp: number): boolean {
54 |     return cp >= 0xdc_00 && cp <= 0xdf_ff;
55 | }
56 | 
57 | export function getSurrogatePairCodePoint(cp1: number, cp2: number): number {
58 |     return (cp1 - 0xd8_00) * 0x4_00 + 0x24_00 + cp2;
59 | }
60 | 
61 | //NOTE: excluding NULL and ASCII whitespace
62 | export function isControlCodePoint(cp: number): boolean {
63 |     return (
64 |         (cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) ||
65 |         (cp >= 0x7f && cp <= 0x9f)
66 |     );
67 | }
68 | 
69 | export function isUndefinedCodePoint(cp: number): boolean {
70 |     return (cp >= 0xfd_d0 && cp <= 0xfd_ef) || UNDEFINED_CODE_POINTS.has(cp);
71 | }
72 | 


--------------------------------------------------------------------------------
/packages/parse5/lib/index.ts:
--------------------------------------------------------------------------------
  1 | import { Parser, type ParserOptions } from './parser/index.js';
  2 | 
  3 | import type { DefaultTreeAdapterMap } from './tree-adapters/default.js';
  4 | import type { TreeAdapterTypeMap } from './tree-adapters/interface.js';
  5 | 
  6 | export { type DefaultTreeAdapterMap, defaultTreeAdapter } from './tree-adapters/default.js';
  7 | import type * as DefaultTreeAdapter from './tree-adapters/default.js';
  8 | // eslint-disable-next-line @typescript-eslint/no-namespace
  9 | export namespace DefaultTreeAdapterTypes {
 10 |     export type Document = DefaultTreeAdapter.Document;
 11 |     export type DocumentFragment = DefaultTreeAdapter.DocumentFragment;
 12 |     export type Element = DefaultTreeAdapter.Element;
 13 |     export type CommentNode = DefaultTreeAdapter.CommentNode;
 14 |     export type TextNode = DefaultTreeAdapter.TextNode;
 15 |     export type Template = DefaultTreeAdapter.Template;
 16 |     export type DocumentType = DefaultTreeAdapter.DocumentType;
 17 |     export type ParentNode = DefaultTreeAdapter.ParentNode;
 18 |     export type ChildNode = DefaultTreeAdapter.ChildNode;
 19 |     export type Node = DefaultTreeAdapter.Node;
 20 |     export type DefaultTreeAdapterMap = DefaultTreeAdapter.DefaultTreeAdapterMap;
 21 | }
 22 | export type { TreeAdapter, TreeAdapterTypeMap } from './tree-adapters/interface.js';
 23 | export { type ParserOptions, /** @internal */ Parser } from './parser/index.js';
 24 | export { serialize, serializeOuter, type SerializerOptions } from './serializer/index.js';
 25 | export { ERR as ErrorCodes, type ParserError, type ParserErrorHandler } from './common/error-codes.js';
 26 | 
 27 | /** @internal */
 28 | export * as foreignContent from './common/foreign-content.js';
 29 | export * as html from './common/html.js';
 30 | export * as Token from './common/token.js';
 31 | /** @internal */
 32 | export { Tokenizer, type TokenizerOptions, TokenizerMode, type TokenHandler } from './tokenizer/index.js';
 33 | 
 34 | // Shorthands
 35 | 
 36 | /**
 37 |  * Parses an HTML string.
 38 |  *
 39 |  * @param html Input HTML string.
 40 |  * @param options Parsing options.
 41 |  * @returns Document
 42 |  *
 43 |  * @example
 44 |  *
 45 |  * ```js
 46 |  * const parse5 = require('parse5');
 47 |  *
 48 |  * const document = parse5.parse('<!DOCTYPE html><html><head></head><body>Hi there!</body></html>');
 49 |  *
 50 |  * console.log(document.childNodes[1].tagName); //> 'html'
 51 |  *```
 52 |  */
 53 | export function parse<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>(
 54 |     html: string,
 55 |     options?: ParserOptions<T>,
 56 | ): T['document'] {
 57 |     return Parser.parse(html, options);
 58 | }
 59 | 
 60 | /**
 61 |  * Parses an HTML fragment.
 62 |  *
 63 |  * @example
 64 |  *
 65 |  * ```js
 66 |  * const parse5 = require('parse5');
 67 |  *
 68 |  * const documentFragment = parse5.parseFragment('<table></table>');
 69 |  *
 70 |  * console.log(documentFragment.childNodes[0].tagName); //> 'table'
 71 |  *
 72 |  * // Parses the html fragment in the context of the parsed <table> element.
 73 |  * const trFragment = parse5.parseFragment(documentFragment.childNodes[0], '<tr><td>Shake it, baby</td></tr>');
 74 |  *
 75 |  * console.log(trFragment.childNodes[0].childNodes[0].tagName); //> 'td'
 76 |  * ```
 77 |  *
 78 |  * @param fragmentContext Parsing context element. If specified, given fragment will be parsed as if it was set to the context element's `innerHTML` property.
 79 |  * @param html Input HTML fragment string.
 80 |  * @param options Parsing options.
 81 |  * @returns DocumentFragment
 82 |  */
 83 | export function parseFragment<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>(
 84 |     fragmentContext: T['parentNode'] | null,
 85 |     html: string,
 86 |     options: ParserOptions<T>,
 87 | ): T['documentFragment'];
 88 | export function parseFragment<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>(
 89 |     html: string,
 90 |     options?: ParserOptions<T>,
 91 | ): T['documentFragment'];
 92 | export function parseFragment<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>(
 93 |     fragmentContext: T['parentNode'] | null | string,
 94 |     html?: string | ParserOptions<T>,
 95 |     options?: ParserOptions<T>,
 96 | ): T['documentFragment'] {
 97 |     if (typeof fragmentContext === 'string') {
 98 |         options = html as ParserOptions<T>;
 99 |         html = fragmentContext;
100 |         fragmentContext = null;
101 |     }
102 | 
103 |     const parser = Parser.getFragmentParser(fragmentContext, options);
104 | 
105 |     parser.tokenizer.write(html as string, true);
106 | 
107 |     return parser.getFragment();
108 | }
109 | 


--------------------------------------------------------------------------------
/packages/parse5/lib/parser/formatting-element-list.ts:
--------------------------------------------------------------------------------
  1 | import type { Attribute, TagToken } from '../common/token.js';
  2 | import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface.js';
  3 | 
  4 | //Const
  5 | const NOAH_ARK_CAPACITY = 3;
  6 | 
  7 | export enum EntryType {
  8 |     Marker,
  9 |     Element,
 10 | }
 11 | 
 12 | interface MarkerEntry {
 13 |     type: EntryType.Marker;
 14 | }
 15 | 
 16 | export interface ElementEntry<T extends TreeAdapterTypeMap> {
 17 |     type: EntryType.Element;
 18 |     element: T['element'];
 19 |     token: TagToken;
 20 | }
 21 | 
 22 | export type Entry<T extends TreeAdapterTypeMap> = MarkerEntry | ElementEntry<T>;
 23 | 
 24 | const MARKER: MarkerEntry = { type: EntryType.Marker };
 25 | 
 26 | //List of formatting elements
 27 | export class FormattingElementList<T extends TreeAdapterTypeMap> {
 28 |     entries: Entry<T>[] = [];
 29 |     bookmark: Entry<T> | null = null;
 30 | 
 31 |     constructor(private treeAdapter: TreeAdapter<T>) {}
 32 | 
 33 |     //Noah Ark's condition
 34 |     //OPTIMIZATION: at first we try to find possible candidates for exclusion using
 35 |     //lightweight heuristics without thorough attributes check.
 36 |     private _getNoahArkConditionCandidates(
 37 |         newElement: T['element'],
 38 |         neAttrs: Attribute[],
 39 |     ): { idx: number; attrs: Attribute[] }[] {
 40 |         const candidates = [];
 41 | 
 42 |         const neAttrsLength = neAttrs.length;
 43 |         const neTagName = this.treeAdapter.getTagName(newElement);
 44 |         const neNamespaceURI = this.treeAdapter.getNamespaceURI(newElement);
 45 | 
 46 |         for (let i = 0; i < this.entries.length; i++) {
 47 |             const entry = this.entries[i];
 48 | 
 49 |             if (entry.type === EntryType.Marker) {
 50 |                 break;
 51 |             }
 52 | 
 53 |             const { element } = entry;
 54 | 
 55 |             if (
 56 |                 this.treeAdapter.getTagName(element) === neTagName &&
 57 |                 this.treeAdapter.getNamespaceURI(element) === neNamespaceURI
 58 |             ) {
 59 |                 const elementAttrs = this.treeAdapter.getAttrList(element);
 60 | 
 61 |                 if (elementAttrs.length === neAttrsLength) {
 62 |                     candidates.push({ idx: i, attrs: elementAttrs });
 63 |                 }
 64 |             }
 65 |         }
 66 | 
 67 |         return candidates;
 68 |     }
 69 | 
 70 |     private _ensureNoahArkCondition(newElement: T['element']): void {
 71 |         if (this.entries.length < NOAH_ARK_CAPACITY) return;
 72 | 
 73 |         const neAttrs = this.treeAdapter.getAttrList(newElement);
 74 |         const candidates = this._getNoahArkConditionCandidates(newElement, neAttrs);
 75 | 
 76 |         if (candidates.length < NOAH_ARK_CAPACITY) return;
 77 | 
 78 |         //NOTE: build attrs map for the new element, so we can perform fast lookups
 79 |         const neAttrsMap = new Map(neAttrs.map((neAttr: Attribute) => [neAttr.name, neAttr.value]));
 80 |         let validCandidates = 0;
 81 | 
 82 |         //NOTE: remove bottommost candidates, until Noah's Ark condition will not be met
 83 |         for (let i = 0; i < candidates.length; i++) {
 84 |             const candidate = candidates[i];
 85 | 
 86 |             // We know that `candidate.attrs.length === neAttrs.length`
 87 |             if (candidate.attrs.every((cAttr) => neAttrsMap.get(cAttr.name) === cAttr.value)) {
 88 |                 validCandidates += 1;
 89 | 
 90 |                 if (validCandidates >= NOAH_ARK_CAPACITY) {
 91 |                     this.entries.splice(candidate.idx, 1);
 92 |                 }
 93 |             }
 94 |         }
 95 |     }
 96 | 
 97 |     //Mutations
 98 |     insertMarker(): void {
 99 |         this.entries.unshift(MARKER);
100 |     }
101 | 
102 |     pushElement(element: T['element'], token: TagToken): void {
103 |         this._ensureNoahArkCondition(element);
104 | 
105 |         this.entries.unshift({
106 |             type: EntryType.Element,
107 |             element,
108 |             token,
109 |         });
110 |     }
111 | 
112 |     insertElementAfterBookmark(element: T['element'], token: TagToken): void {
113 |         const bookmarkIdx = this.entries.indexOf(this.bookmark!);
114 | 
115 |         this.entries.splice(bookmarkIdx, 0, {
116 |             type: EntryType.Element,
117 |             element,
118 |             token,
119 |         });
120 |     }
121 | 
122 |     removeEntry(entry: Entry<T>): void {
123 |         const entryIndex = this.entries.indexOf(entry);
124 | 
125 |         if (entryIndex !== -1) {
126 |             this.entries.splice(entryIndex, 1);
127 |         }
128 |     }
129 | 
130 |     /**
131 |      * Clears the list of formatting elements up to the last marker.
132 |      *
133 |      * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker
134 |      */
135 |     clearToLastMarker(): void {
136 |         const markerIdx = this.entries.indexOf(MARKER);
137 | 
138 |         if (markerIdx === -1) {
139 |             this.entries.length = 0;
140 |         } else {
141 |             this.entries.splice(0, markerIdx + 1);
142 |         }
143 |     }
144 | 
145 |     //Search
146 |     getElementEntryInScopeWithTagName(tagName: string): ElementEntry<T> | null {
147 |         const entry = this.entries.find(
148 |             (entry) => entry.type === EntryType.Marker || this.treeAdapter.getTagName(entry.element) === tagName,
149 |         );
150 | 
151 |         return entry && entry.type === EntryType.Element ? entry : null;
152 |     }
153 | 
154 |     getElementEntry(element: T['element']): ElementEntry<T> | undefined {
155 |         return this.entries.find(
156 |             (entry): entry is ElementEntry<T> => entry.type === EntryType.Element && entry.element === element,
157 |         );
158 |     }
159 | }
160 | 


--------------------------------------------------------------------------------
/packages/parse5/lib/parser/index.test.ts:
--------------------------------------------------------------------------------
  1 | import { it, assert, describe, beforeEach, afterEach, vi, expect } from 'vitest';
  2 | import { parseFragment, parse } from 'parse5';
  3 | import type { Element, TextNode } from '../tree-adapters/default.js';
  4 | import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-tests.js';
  5 | import { treeAdapters } from 'parse5-test-utils/utils/common.js';
  6 | import type { Htmlparser2TreeAdapterMap } from 'parse5-htmlparser2-tree-adapter';
  7 | 
  8 | generateParsingTests(
  9 |     'parser',
 10 |     'Parser',
 11 |     {
 12 |         expectErrors: [
 13 |             //TODO(GH-448): Foreign content behaviour was updated in the HTML spec.
 14 |             //The old test suite still tests the old behaviour.
 15 |             '0.foreign-fragment',
 16 |             '1.foreign-fragment',
 17 |             '38.foreign-fragment',
 18 |             '40.foreign-fragment',
 19 |             '47.foreign-fragment',
 20 |             '48.foreign-fragment',
 21 |         ],
 22 |     },
 23 |     (test, opts) => ({
 24 |         node: test.fragmentContext ? parseFragment(test.fragmentContext, test.input, opts) : parse(test.input, opts),
 25 |     }),
 26 | );
 27 | 
 28 | generateParsingTests(
 29 |     'parser upstream',
 30 |     'Parser',
 31 |     {
 32 |         withoutErrors: true,
 33 |         suitePath: new URL('../../../../test/data/html5lib-tests/tree-construction', import.meta.url),
 34 |     },
 35 |     (test, opts) => ({
 36 |         node: test.fragmentContext ? parseFragment(test.fragmentContext, test.input, opts) : parse(test.input, opts),
 37 |     }),
 38 | );
 39 | 
 40 | describe('parser', () => {
 41 |     it('Regression - HTML5 Legacy Doctype Misparsed with htmlparser2 tree adapter (GH-45)', () => {
 42 |         const html = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html><head></head><body>Hi there!</body></html>';
 43 |         const document = parse(html, { treeAdapter: treeAdapters.htmlparser2 });
 44 | 
 45 |         assert.ok(treeAdapters.htmlparser2.isDocumentTypeNode(document.childNodes[0]));
 46 |         assert.strictEqual(
 47 |             (document.childNodes[0] as Htmlparser2TreeAdapterMap['documentType']).data,
 48 |             '!DOCTYPE html SYSTEM "about:legacy-compat"',
 49 |         );
 50 |     });
 51 | 
 52 |     describe("Regression - Don't inherit from Object when creating collections (GH-119)", () => {
 53 |         beforeEach(() => {
 54 |             // @ts-expect-error Adding unknown prototype method
 55 |             Object.prototype.heyYo = 123;
 56 |         });
 57 | 
 58 |         afterEach(() => {
 59 |             // @ts-expect-error Deleting unknown prototype property
 60 |             delete Object.prototype.heyYo;
 61 |         });
 62 | 
 63 |         it('parses correctly', () => {
 64 |             const fragment = parseFragment('<div id="123">', {
 65 |                 treeAdapter: treeAdapters.htmlparser2,
 66 |             });
 67 | 
 68 |             assert.ok(treeAdapters.htmlparser2.isElementNode(fragment.childNodes[0]));
 69 |             assert.strictEqual(
 70 |                 treeAdapters.htmlparser2.getAttrList(fragment.childNodes[0] as Htmlparser2TreeAdapterMap['element'])
 71 |                     .length,
 72 |                 1,
 73 |             );
 74 |         });
 75 |     });
 76 | 
 77 |     it('Regression - DOCTYPE empty fields (GH-236)', () => {
 78 |         const document = parse('<!DOCTYPE>');
 79 |         const doctype = document.childNodes[0];
 80 | 
 81 |         expect(doctype).toHaveProperty('name', '');
 82 |         expect(doctype).toHaveProperty('publicId', '');
 83 |         expect(doctype).toHaveProperty('systemId', '');
 84 |     });
 85 | 
 86 |     it('Regression - CRLF inside </noscript> (GH-710)', () => {
 87 |         const onParseError = vi.fn();
 88 |         parse('<!doctype html><noscript>foo</noscript\r\n>', { onParseError });
 89 | 
 90 |         expect(onParseError).not.toHaveBeenCalled();
 91 |     });
 92 | 
 93 |     describe('Tree adapters', () => {
 94 |         it('should support onItemPush and onItemPop', () => {
 95 |             const onItemPush = vi.fn();
 96 |             const onItemPop = vi.fn();
 97 |             const document = parse('<p><p>', {
 98 |                 treeAdapter: {
 99 |                     ...treeAdapters.default,
100 |                     onItemPush,
101 |                     onItemPop,
102 |                 },
103 |             });
104 | 
105 |             const htmlElement = document.childNodes[0];
106 |             assert.ok(treeAdapters.default.isElementNode(htmlElement));
107 |             const bodyElement = (htmlElement as Element).childNodes[1] as Element;
108 |             assert.ok(treeAdapters.default.isElementNode(bodyElement));
109 |             // Expect 5 opened elements; in order: html, head, body, and 2x p
110 |             expect(onItemPush).toHaveBeenCalledTimes(5);
111 |             expect(onItemPush).toHaveBeenNthCalledWith(1, htmlElement);
112 |             expect(onItemPush).toHaveBeenNthCalledWith(3, bodyElement);
113 |             // The last opened element is the second p
114 |             expect(onItemPush).toHaveBeenLastCalledWith(bodyElement.childNodes[1]);
115 |             // The second p isn't closed, plus we never pop body and html. Alas, only 2 pop events (head and p).
116 |             expect(onItemPop).toHaveBeenCalledTimes(2);
117 |             // The last pop event should be the first p.
118 |             expect(onItemPop).toHaveBeenLastCalledWith(bodyElement.childNodes[0], bodyElement);
119 |         });
120 |     });
121 | 
122 |     describe('rawtext parsing', () => {
123 |         it.each([
124 |             ['iframe'],
125 |             ['noembed'],
126 |             ['noframes'],
127 |             ['noscript'],
128 |             ['script'],
129 |             ['style'],
130 |             ['textarea'],
131 |             ['title'],
132 |             ['xmp'],
133 |         ])('<%s>', (tagName) => {
134 |             const html = `<r><${tagName}><math id="</${tagName}><b>should be outside</b>">`;
135 |             const fragment = parseFragment(html);
136 | 
137 |             expect(fragment.childNodes.length).toBe(1);
138 |             const r = fragment.childNodes[0] as Element;
139 |             expect(r.nodeName).toBe('r');
140 |             expect(r.childNodes).toHaveLength(3);
141 |             expect(r.childNodes.map((_) => _.nodeName)).toEqual([tagName, 'b', '#text']);
142 | 
143 |             const target = r.childNodes[0] as Element;
144 |             expect(target.childNodes).toHaveLength(1);
145 |             expect(target.childNodes[0].nodeName).toBe('#text');
146 |             expect((target.childNodes[0] as TextNode).value).toBe('<math id="');
147 | 
148 |             const b = r.childNodes[1] as Element;
149 |             expect(b.childNodes).toHaveLength(1);
150 |             expect(b.childNodes[0].nodeName).toBe('#text');
151 |             expect((b.childNodes[0] as TextNode).value).toBe('should be outside');
152 |         });
153 |     });
154 | });
155 | 


--------------------------------------------------------------------------------
/packages/parse5/lib/serializer/index.test.ts:
--------------------------------------------------------------------------------
 1 | import { it, assert, describe } from 'vitest';
 2 | import { html, parse, parseFragment, serialize, serializeOuter, type DefaultTreeAdapterMap } from 'parse5';
 3 | import { generateSerializerTests } from 'parse5-test-utils/utils/generate-serializer-tests.js';
 4 | import { treeAdapters } from 'parse5-test-utils/utils/common.js';
 5 | 
 6 | generateSerializerTests('serializer', 'Serializer', serialize);
 7 | 
 8 | describe('serializer', () => {
 9 |     describe("Regression - Get text node's parent tagName only if it's an Element node (GH-38)", () => {
10 |         it('serializes correctly', () => {
11 |             const document = parse('<template>yo<div></div>42</template>');
12 |             const treeAdapter: typeof treeAdapters.default = {
13 |                 ...treeAdapters.default,
14 |                 getTagName: (element: DefaultTreeAdapterMap['element']) => {
15 |                     assert.ok(element.tagName);
16 | 
17 |                     return treeAdapters.default.getTagName(element);
18 |                 },
19 |             };
20 | 
21 |             serialize(document, { treeAdapter });
22 |         });
23 |     });
24 | 
25 |     describe('serializeOuter', () => {
26 |         it('serializes outerHTML correctly', () => {
27 |             const document = parseFragment('<div><button>Hello</button></div>');
28 |             const div = document.childNodes[0];
29 |             assert.ok(treeAdapters.default.isElementNode(div));
30 |             const html = serializeOuter(div);
31 | 
32 |             assert.equal(html, '<div><button>Hello</button></div>');
33 |         });
34 |     });
35 | 
36 |     it('serializes <template> elements inner content', () => {
37 |         const document = parseFragment('<template><button>Hello</button></template>');
38 |         const template = document.childNodes[0];
39 |         assert.ok(treeAdapters.default.isElementNode(template));
40 |         const html = serialize(template);
41 | 
42 |         assert.equal(html, '<button>Hello</button>');
43 |     });
44 | 
45 |     it('serializes the children of void elements as the empty string (GH-289)', () => {
46 |         const br = treeAdapters.default.createElement('br', html.NS.HTML, []);
47 | 
48 |         // Add child node to `br`, to make sure they are skipped.
49 |         treeAdapters.default.appendChild(br, treeAdapters.default.createElement('div', html.NS.HTML, []));
50 | 
51 |         assert.equal(serialize(br), '');
52 | 
53 |         // If the namespace is not HTML, the serializer should not skip the children.
54 |         const svgBr = treeAdapters.default.createElement('br', html.NS.SVG, []);
55 |         treeAdapters.default.appendChild(svgBr, treeAdapters.default.createElement('div', html.NS.HTML, []));
56 | 
57 |         assert.equal(serialize(svgBr), '<div></div>');
58 |     });
59 | 
60 |     it('serializes unknown node to empty string', () => {
61 |         const unknown = {} as never;
62 |         assert.strictEqual(serialize(unknown), '');
63 |         assert.strictEqual(serializeOuter(unknown), '');
64 |     });
65 | });
66 | 


--------------------------------------------------------------------------------
/packages/parse5/lib/tokenizer/index.test.ts:
--------------------------------------------------------------------------------
 1 | import { it, assert, describe } from 'vitest';
 2 | import { Tokenizer } from 'parse5';
 3 | import { generateTokenizationTests } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
 4 | 
 5 | const dataPath = new URL('../../../../test/data/html5lib-tests/tokenizer', import.meta.url);
 6 | const tokenizerOpts = {
 7 |     sourceCodeLocationInfo: true,
 8 | };
 9 | 
10 | generateTokenizationTests('Tokenizer', dataPath.pathname, (handler) => new Tokenizer(tokenizerOpts, handler));
11 | 
12 | function noop(): void {
13 |     // Noop
14 | }
15 | 
16 | describe('Tokenizer methods', () => {
17 |     it('should pause and resume', () => {
18 |         let count = 0;
19 |         const tokenizer = new Tokenizer(tokenizerOpts, {
20 |             onComment(t): void {
21 |                 assert.strictEqual(t.data, 'INIT');
22 |                 assert.strictEqual(count++, 0);
23 | 
24 |                 tokenizer.pause();
25 |                 tokenizer.write('<!doctype foo>', false);
26 |             },
27 |             onDoctype(t): void {
28 |                 assert.strictEqual(t.name, 'foo');
29 |                 assert.strictEqual(count++, 2);
30 | 
31 |                 assert.throws(() => tokenizer.resume(), 'Parser was already resumed');
32 |                 tokenizer.write('<next>', true);
33 |             },
34 |             onStartTag(t): void {
35 |                 assert.strictEqual(count++, 3);
36 |                 assert.strictEqual(t.tagName, 'next');
37 |             },
38 |             onEndTag: noop,
39 |             onEof: noop,
40 |             onCharacter: noop,
41 |             onNullCharacter: noop,
42 |             onWhitespaceCharacter: noop,
43 |         });
44 | 
45 |         tokenizer.write('<!--INIT-->', false);
46 |         assert.strictEqual(count++, 1);
47 |         assert.ok(Object.prototype.hasOwnProperty.call(tokenizer, 'paused'));
48 |         assert.equal((tokenizer as typeof tokenizer & { paused: boolean }).paused, true);
49 | 
50 |         tokenizer.resume();
51 | 
52 |         assert.strictEqual(count, 4);
53 |     });
54 | 
55 |     it('should throw if setting the state to an unknown value', () => {
56 |         const tokenizer = new Tokenizer(tokenizerOpts, {} as never);
57 |         tokenizer.state = -1 as never;
58 |         assert.throws(() => tokenizer.write('foo', true), 'Unknown state');
59 |     });
60 | });
61 | 


--------------------------------------------------------------------------------
/packages/parse5/lib/tokenizer/tokenizer-location-info.test.ts:
--------------------------------------------------------------------------------
  1 | import { it, assert } from 'vitest';
  2 | import { Tokenizer, TokenizerMode, type TokenHandler } from './index.js';
  3 | import type { Location, EOFToken, CharacterToken, DoctypeToken, TagToken, CommentToken } from '../common/token.js';
  4 | import { getSubstringByLineCol, normalizeNewLine } from 'parse5-test-utils/utils/common.js';
  5 | 
  6 | /** Receives events and immediately compares them against the expected values. */
  7 | class LocationInfoHandler implements TokenHandler {
  8 |     public sawEof = false;
  9 |     /** All HTML chunks concatenated. */
 10 |     private html: string;
 11 |     /** The index of the last html chunk. */
 12 |     private idx = 0;
 13 |     /** All of the lines in the input. */
 14 |     private lines: string[];
 15 | 
 16 |     constructor(private htmlChunks: string[]) {
 17 |         this.html = htmlChunks.join('');
 18 |         this.lines = this.html.split(/\r?\n/g);
 19 |     }
 20 | 
 21 |     private validateLocation(location: Location | null): void {
 22 |         assert.ok(location);
 23 | 
 24 |         //Offsets
 25 |         const actual = this.html.substring(location!.startOffset, location!.endOffset);
 26 |         const chunk = this.htmlChunks[this.idx];
 27 | 
 28 |         assert.strictEqual(actual, chunk);
 29 | 
 30 |         //Line/col
 31 |         const line = getSubstringByLineCol(this.lines, location!);
 32 |         const expected = normalizeNewLine(chunk);
 33 | 
 34 |         assert.strictEqual(line, expected);
 35 | 
 36 |         this.idx += 1;
 37 |     }
 38 | 
 39 |     onComment({ location }: CommentToken): void {
 40 |         this.validateLocation(location);
 41 |     }
 42 |     onDoctype({ location }: DoctypeToken): void {
 43 |         this.validateLocation(location);
 44 |     }
 45 |     onStartTag({ location }: TagToken): void {
 46 |         this.validateLocation(location);
 47 |     }
 48 |     onEndTag({ location }: TagToken): void {
 49 |         this.validateLocation(location);
 50 |     }
 51 |     onCharacter({ location }: CharacterToken): void {
 52 |         this.validateLocation(location);
 53 |     }
 54 |     onNullCharacter({ location }: CharacterToken): void {
 55 |         this.validateLocation(location);
 56 |     }
 57 |     onWhitespaceCharacter({ location }: CharacterToken): void {
 58 |         this.validateLocation(location);
 59 |     }
 60 |     onEof({ location }: EOFToken): void {
 61 |         assert.ok(location);
 62 |         assert.strictEqual(location!.endOffset, location!.startOffset);
 63 |         assert.strictEqual(location!.endOffset, this.html.length);
 64 | 
 65 |         assert.strictEqual(this.idx, this.htmlChunks.length);
 66 | 
 67 |         this.sawEof = true;
 68 |     }
 69 | }
 70 | 
 71 | it('Location Info (Tokenizer)', () => {
 72 |     const testCases = [
 73 |         {
 74 |             initialMode: TokenizerMode.DATA,
 75 |             lastStartTagName: '',
 76 |             htmlChunks: [
 77 |                 '\r\n',
 78 |                 '<!DOCTYPE html>',
 79 |                 '\n',
 80 |                 '<!-- Test -->',
 81 |                 '\n',
 82 |                 '<head>',
 83 |                 '\n   ',
 84 |                 '<meta charset="utf-8">',
 85 |                 '<title>',
 86 |                 '   ',
 87 |                 'node.js',
 88 |                 '\u0000',
 89 |                 '</title>',
 90 |                 '\n',
 91 |                 '</head>',
 92 |                 '\n',
 93 |                 '<body id="front">',
 94 |                 '\n',
 95 |                 '<div id="intro">',
 96 |                 '\n   ',
 97 |                 '<p\n>',
 98 |                 '\n       ',
 99 |                 'Node.js',
100 |                 ' ',
101 |                 'is',
102 |                 ' ',
103 |                 'a',
104 |                 '\n       ',
105 |                 'platform',
106 |                 ' ',
107 |                 'built',
108 |                 ' ',
109 |                 'on',
110 |                 '\n       ',
111 |                 '<a href="http://code.google.com/p/v8/">',
112 |                 '\n       ',
113 |                 "Chrome's",
114 |                 ' ',
115 |                 'JavaScript',
116 |                 ' ',
117 |                 'runtime',
118 |                 '\n       ',
119 |                 '</a>',
120 |                 '\n',
121 |                 '</div>',
122 |                 '</body>',
123 |             ],
124 |         },
125 |         {
126 |             initialMode: TokenizerMode.RCDATA,
127 |             lastStartTagName: 'title',
128 |             htmlChunks: ['<div>Test', ' \n   ', 'hey', ' ', 'ya!', '</title>', '<!--Yo-->'],
129 |         },
130 |         {
131 |             initialMode: TokenizerMode.RAWTEXT,
132 |             lastStartTagName: 'style',
133 |             htmlChunks: ['.header{', ' \n   ', 'color:red;', '\n', '}', '</style>', 'Some', ' ', 'text'],
134 |         },
135 |         {
136 |             initialMode: TokenizerMode.SCRIPT_DATA,
137 |             lastStartTagName: 'script',
138 |             htmlChunks: ['var', ' ', 'a=c', ' ', '-', ' ', 'd;', '\n', 'a<--d;', '</script>', '<div>'],
139 |         },
140 |         {
141 |             initialMode: TokenizerMode.PLAINTEXT,
142 |             lastStartTagName: 'plaintext',
143 |             htmlChunks: ['Text', ' \n', 'Test</plaintext><div>'],
144 |         },
145 |         {
146 |             initialMode: TokenizerMode.DATA,
147 |             lastStartTagName: '',
148 |             htmlChunks: [
149 |                 '\n',
150 |                 '<!-- regular comment -->',
151 |                 '<! bogus comment >',
152 |                 '<? another bogus comment >',
153 |                 '</!yet another bogus comment>',
154 |                 '<![CDATA[ cdata as a bogus comment >',
155 |             ],
156 |         },
157 |         {
158 |             initialMode: TokenizerMode.DATA,
159 |             lastStartTagName: '',
160 |             inForeignNode: true,
161 |             htmlChunks: ['<a>', '<![CDATA[ ', 'CDATA', ' ]]>', '<test>', ' <![CDATA[ ]]>\n'],
162 |         },
163 |     ];
164 | 
165 |     for (const testCase of testCases) {
166 |         const { htmlChunks } = testCase;
167 |         const handler = new LocationInfoHandler(htmlChunks);
168 |         const tokenizer = new Tokenizer({ sourceCodeLocationInfo: true }, handler);
169 | 
170 |         // NOTE: set small waterline for testing purposes
171 |         tokenizer.preprocessor.bufferWaterline = 8;
172 |         tokenizer.state = testCase.initialMode;
173 |         tokenizer.lastStartTagName = testCase.lastStartTagName;
174 |         tokenizer.inForeignNode = !!testCase.inForeignNode;
175 | 
176 |         for (let i = 0; i < htmlChunks.length; i++) {
177 |             tokenizer.write(htmlChunks[i], i === htmlChunks.length - 1);
178 |         }
179 | 
180 |         assert.ok(handler.sawEof);
181 |         assert.ok(!tokenizer.active);
182 |     }
183 | });
184 | 


--------------------------------------------------------------------------------
/packages/parse5/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "parse5",
 3 |     "type": "module",
 4 |     "description": "HTML parser and serializer.",
 5 |     "version": "7.3.0",
 6 |     "author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)",
 7 |     "contributors": "https://github.com/inikulin/parse5/graphs/contributors",
 8 |     "homepage": "https://parse5.js.org",
 9 |     "funding": "https://github.com/inikulin/parse5?sponsor=1",
10 |     "dependencies": {
11 |         "entities": "^6.0.0"
12 |     },
13 |     "keywords": [
14 |         "html",
15 |         "parser",
16 |         "html5",
17 |         "WHATWG",
18 |         "specification",
19 |         "fast",
20 |         "html parser",
21 |         "html5 parser",
22 |         "htmlparser",
23 |         "parse5",
24 |         "serializer",
25 |         "html serializer",
26 |         "htmlserializer",
27 |         "parse",
28 |         "serialize"
29 |     ],
30 |     "license": "MIT",
31 |     "main": "dist/cjs/index.js",
32 |     "module": "dist/index.js",
33 |     "types": "dist/index.d.ts",
34 |     "exports": {
35 |         "import": "./dist/index.js",
36 |         "require": "./dist/cjs/index.js"
37 |     },
38 |     "scripts": {
39 |         "build:cjs": "tsc --noCheck --moduleResolution node10 --module CommonJS --target ES6 --outDir dist/cjs && echo '{\"type\":\"commonjs\"}' > dist/cjs/package.json"
40 |     },
41 |     "repository": {
42 |         "type": "git",
43 |         "url": "git://github.com/inikulin/parse5.git"
44 |     },
45 |     "files": [
46 |         "dist/cjs/package.json",
47 |         "dist/**/*.js",
48 |         "dist/**/*.d.ts"
49 |     ]
50 | }
51 | 


--------------------------------------------------------------------------------
/packages/parse5/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "../../tsconfig.json",
 3 |     "compilerOptions": {
 4 |         "rootDir": "lib",
 5 |         "outDir": "dist"
 6 |     },
 7 |     "include": ["**/*.ts"],
 8 |     "exclude": ["**/*.test.ts", "dist"]
 9 | }
10 | 


--------------------------------------------------------------------------------
/packages/parse5/typedoc.json:
--------------------------------------------------------------------------------
1 | {
2 |     "extends": ["../../typedoc.base.json"],
3 |     "entryPoints": ["lib/index.ts"]
4 | }
5 | 


--------------------------------------------------------------------------------
/scripts/generate-parser-feedback-test/index.ts:
--------------------------------------------------------------------------------
  1 | import { readFile, writeFile } from 'node:fs/promises';
  2 | import path from 'node:path';
  3 | import { Parser, type DefaultTreeAdapterMap, type TreeAdapterTypeMap, type Token, defaultTreeAdapter } from 'parse5';
  4 | import type { HtmlLibToken } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
  5 | import { parseDatFile } from 'parse5-test-utils/utils/parse-dat-file.js';
  6 | import { addSlashes } from 'parse5-test-utils/utils/common.js';
  7 | 
  8 | for (const file of process.argv.slice(2)) {
  9 |     const content = await readFile(file, 'utf8');
 10 |     const feedbackTestContent = generateParserFeedbackTest(content);
 11 |     const feedbackTestFile = `test/data/parser-feedback/${path.basename(file, '.dat')}.test`;
 12 | 
 13 |     await writeFile(feedbackTestFile, feedbackTestContent);
 14 | }
 15 | 
 16 | function collectParserTokens(html: string): HtmlLibToken[] {
 17 |     const tokens: HtmlLibToken[] = [];
 18 | 
 19 |     class ExtendedParser<T extends TreeAdapterTypeMap> extends Parser<T> {
 20 |         private isTopLevel = true;
 21 |         /**
 22 |          * We only want to add tokens once. We guard against recursive calls
 23 |          * using the `isTopLevel` flag.
 24 |          */
 25 |         private guardTopLevel(fn: () => void, getToken: () => HtmlLibToken): void {
 26 |             const { isTopLevel } = this;
 27 |             this.isTopLevel = false;
 28 | 
 29 |             fn();
 30 | 
 31 |             if (isTopLevel) {
 32 |                 this.isTopLevel = true;
 33 | 
 34 |                 const token = getToken();
 35 | 
 36 |                 if (token[0] === 'Character') {
 37 |                     if (token[1] == null || token[1].length === 0) {
 38 |                         return;
 39 |                     }
 40 | 
 41 |                     const lastToken = tokens.at(-1);
 42 | 
 43 |                     if (lastToken?.[0] === 'Character') {
 44 |                         lastToken[1] += token[1];
 45 |                         return;
 46 |                     }
 47 |                 }
 48 | 
 49 |                 tokens.push(token);
 50 |             }
 51 |         }
 52 | 
 53 |         override onComment(token: Token.CommentToken): void {
 54 |             this.guardTopLevel(
 55 |                 () => super.onComment(token),
 56 |                 () => ['Comment', token.data],
 57 |             );
 58 |         }
 59 |         override onDoctype(token: Token.DoctypeToken): void {
 60 |             this.guardTopLevel(
 61 |                 () => super.onDoctype(token),
 62 |                 () => ['DOCTYPE', token.name, token.publicId, token.systemId, !token.forceQuirks],
 63 |             );
 64 |         }
 65 |         override onStartTag(token: Token.TagToken): void {
 66 |             this.guardTopLevel(
 67 |                 () => super.onStartTag(token),
 68 |                 () => {
 69 |                     const reformatedAttrs = Object.fromEntries(token.attrs.map(({ name, value }) => [name, value]));
 70 |                     const startTagEntry: HtmlLibToken = ['StartTag', token.tagName, reformatedAttrs];
 71 | 
 72 |                     if (token.selfClosing) {
 73 |                         startTagEntry.push(true);
 74 |                     }
 75 | 
 76 |                     return startTagEntry;
 77 |                 },
 78 |             );
 79 |         }
 80 |         override onEndTag(token: Token.TagToken): void {
 81 |             this.guardTopLevel(
 82 |                 () => super.onEndTag(token),
 83 |                 // NOTE: parser feedback simulator can produce adjusted SVG
 84 |                 // tag names for end tag tokens so we need to lower case it
 85 |                 () => ['EndTag', token.tagName.toLowerCase()],
 86 |             );
 87 |         }
 88 |         override onCharacter(token: Token.CharacterToken): void {
 89 |             this.guardTopLevel(
 90 |                 () => super.onCharacter(token),
 91 |                 () => ['Character', token.chars],
 92 |             );
 93 |         }
 94 |         override onNullCharacter(token: Token.CharacterToken): void {
 95 |             this.guardTopLevel(
 96 |                 () => super.onNullCharacter(token),
 97 |                 () => ['Character', token.chars],
 98 |             );
 99 |         }
100 |         override onWhitespaceCharacter(token: Token.CharacterToken): void {
101 |             const { skipNextNewLine } = this;
102 |             const { chars } = token;
103 | 
104 |             this.guardTopLevel(
105 |                 () => super.onWhitespaceCharacter(token),
106 |                 () => ['Character', skipNextNewLine && chars.startsWith('\n') ? chars.slice(1) : chars],
107 |             );
108 |         }
109 |     }
110 | 
111 |     ExtendedParser.parse(html);
112 | 
113 |     return tokens;
114 | }
115 | 
116 | function generateParserFeedbackTest(parserTestFile: string): string {
117 |     const tests = parseDatFile<DefaultTreeAdapterMap>(parserTestFile, defaultTreeAdapter);
118 | 
119 |     const feedbackTest = {
120 |         tests: tests.map(({ input, fragmentContext }) => ({
121 |             fragmentContext: fragmentContext?.tagName ?? null,
122 |             description: addSlashes(input),
123 |             input,
124 |             output: collectParserTokens(input),
125 |         })),
126 |     };
127 | 
128 |     return JSON.stringify(feedbackTest, null, 4);
129 | }
130 | 


--------------------------------------------------------------------------------
/test/data/parser-feedback/adoption02.test:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tests": [
 3 |         {
 4 |             "fragmentContext": null,
 5 |             "description": "<b>1<i>2<p>3</b>4",
 6 |             "input": "<b>1<i>2<p>3</b>4",
 7 |             "output": [
 8 |                 [
 9 |                     "StartTag",
10 |                     "b",
11 |                     {}
12 |                 ],
13 |                 [
14 |                     "Character",
15 |                     "1"
16 |                 ],
17 |                 [
18 |                     "StartTag",
19 |                     "i",
20 |                     {}
21 |                 ],
22 |                 [
23 |                     "Character",
24 |                     "2"
25 |                 ],
26 |                 [
27 |                     "StartTag",
28 |                     "p",
29 |                     {}
30 |                 ],
31 |                 [
32 |                     "Character",
33 |                     "3"
34 |                 ],
35 |                 [
36 |                     "EndTag",
37 |                     "b"
38 |                 ],
39 |                 [
40 |                     "Character",
41 |                     "4"
42 |                 ]
43 |             ]
44 |         },
45 |         {
46 |             "fragmentContext": null,
47 |             "description": "<a><div><style></style><address><a>",
48 |             "input": "<a><div><style></style><address><a>",
49 |             "output": [
50 |                 [
51 |                     "StartTag",
52 |                     "a",
53 |                     {}
54 |                 ],
55 |                 [
56 |                     "StartTag",
57 |                     "div",
58 |                     {}
59 |                 ],
60 |                 [
61 |                     "StartTag",
62 |                     "style",
63 |                     {}
64 |                 ],
65 |                 [
66 |                     "EndTag",
67 |                     "style"
68 |                 ],
69 |                 [
70 |                     "StartTag",
71 |                     "address",
72 |                     {}
73 |                 ],
74 |                 [
75 |                     "StartTag",
76 |                     "a",
77 |                     {}
78 |                 ]
79 |             ]
80 |         }
81 |     ]
82 | }


--------------------------------------------------------------------------------
/test/data/parser-feedback/gh40_form_in_template.test:
--------------------------------------------------------------------------------
1 | {
2 |     "tests": []
3 | }


--------------------------------------------------------------------------------
/test/data/parser-feedback/inbody01.test:
--------------------------------------------------------------------------------
  1 | {
  2 |     "tests": [
  3 |         {
  4 |             "fragmentContext": null,
  5 |             "description": "<button>1</foo>",
  6 |             "input": "<button>1</foo>",
  7 |             "output": [
  8 |                 [
  9 |                     "StartTag",
 10 |                     "button",
 11 |                     {}
 12 |                 ],
 13 |                 [
 14 |                     "Character",
 15 |                     "1"
 16 |                 ],
 17 |                 [
 18 |                     "EndTag",
 19 |                     "foo"
 20 |                 ]
 21 |             ]
 22 |         },
 23 |         {
 24 |             "fragmentContext": null,
 25 |             "description": "<foo>1<p>2</foo>",
 26 |             "input": "<foo>1<p>2</foo>",
 27 |             "output": [
 28 |                 [
 29 |                     "StartTag",
 30 |                     "foo",
 31 |                     {}
 32 |                 ],
 33 |                 [
 34 |                     "Character",
 35 |                     "1"
 36 |                 ],
 37 |                 [
 38 |                     "StartTag",
 39 |                     "p",
 40 |                     {}
 41 |                 ],
 42 |                 [
 43 |                     "Character",
 44 |                     "2"
 45 |                 ],
 46 |                 [
 47 |                     "EndTag",
 48 |                     "foo"
 49 |                 ]
 50 |             ]
 51 |         },
 52 |         {
 53 |             "fragmentContext": null,
 54 |             "description": "<dd>1</foo>",
 55 |             "input": "<dd>1</foo>",
 56 |             "output": [
 57 |                 [
 58 |                     "StartTag",
 59 |                     "dd",
 60 |                     {}
 61 |                 ],
 62 |                 [
 63 |                     "Character",
 64 |                     "1"
 65 |                 ],
 66 |                 [
 67 |                     "EndTag",
 68 |                     "foo"
 69 |                 ]
 70 |             ]
 71 |         },
 72 |         {
 73 |             "fragmentContext": null,
 74 |             "description": "<foo>1<dd>2</foo>",
 75 |             "input": "<foo>1<dd>2</foo>",
 76 |             "output": [
 77 |                 [
 78 |                     "StartTag",
 79 |                     "foo",
 80 |                     {}
 81 |                 ],
 82 |                 [
 83 |                     "Character",
 84 |                     "1"
 85 |                 ],
 86 |                 [
 87 |                     "StartTag",
 88 |                     "dd",
 89 |                     {}
 90 |                 ],
 91 |                 [
 92 |                     "Character",
 93 |                     "2"
 94 |                 ],
 95 |                 [
 96 |                     "EndTag",
 97 |                     "foo"
 98 |                 ]
 99 |             ]
100 |         }
101 |     ]
102 | }


--------------------------------------------------------------------------------
/test/data/parser-feedback/isindex.test:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tests": [
 3 |         {
 4 |             "fragmentContext": null,
 5 |             "description": "<isindex>",
 6 |             "input": "<isindex>",
 7 |             "output": [
 8 |                 [
 9 |                     "StartTag",
10 |                     "isindex",
11 |                     {}
12 |                 ]
13 |             ]
14 |         },
15 |         {
16 |             "fragmentContext": null,
17 |             "description": "<isindex name=\"A\" action=\"B\" prompt=\"C\" foo=\"D\">",
18 |             "input": "<isindex name=\"A\" action=\"B\" prompt=\"C\" foo=\"D\">",
19 |             "output": [
20 |                 [
21 |                     "StartTag",
22 |                     "isindex",
23 |                     {
24 |                         "name": "A",
25 |                         "action": "B",
26 |                         "prompt": "C",
27 |                         "foo": "D"
28 |                     }
29 |                 ]
30 |             ]
31 |         },
32 |         {
33 |             "fragmentContext": null,
34 |             "description": "<form><isindex>",
35 |             "input": "<form><isindex>",
36 |             "output": [
37 |                 [
38 |                     "StartTag",
39 |                     "form",
40 |                     {}
41 |                 ],
42 |                 [
43 |                     "StartTag",
44 |                     "isindex",
45 |                     {}
46 |                 ]
47 |             ]
48 |         },
49 |         {
50 |             "fragmentContext": null,
51 |             "description": "<!doctype html><isindex>x</isindex>x",
52 |             "input": "<!doctype html><isindex>x</isindex>x",
53 |             "output": [
54 |                 [
55 |                     "DOCTYPE",
56 |                     "html",
57 |                     null,
58 |                     null,
59 |                     true
60 |                 ],
61 |                 [
62 |                     "StartTag",
63 |                     "isindex",
64 |                     {}
65 |                 ],
66 |                 [
67 |                     "Character",
68 |                     "x"
69 |                 ],
70 |                 [
71 |                     "EndTag",
72 |                     "isindex"
73 |                 ],
74 |                 [
75 |                     "Character",
76 |                     "x"
77 |                 ]
78 |             ]
79 |         }
80 |     ]
81 | }


--------------------------------------------------------------------------------
/test/data/parser-feedback/main-element.test:
--------------------------------------------------------------------------------
  1 | {
  2 |     "tests": [
  3 |         {
  4 |             "fragmentContext": null,
  5 |             "description": "<!doctype html><p>foo<main>bar<p>baz",
  6 |             "input": "<!doctype html><p>foo<main>bar<p>baz",
  7 |             "output": [
  8 |                 [
  9 |                     "DOCTYPE",
 10 |                     "html",
 11 |                     null,
 12 |                     null,
 13 |                     true
 14 |                 ],
 15 |                 [
 16 |                     "StartTag",
 17 |                     "p",
 18 |                     {}
 19 |                 ],
 20 |                 [
 21 |                     "Character",
 22 |                     "foo"
 23 |                 ],
 24 |                 [
 25 |                     "StartTag",
 26 |                     "main",
 27 |                     {}
 28 |                 ],
 29 |                 [
 30 |                     "Character",
 31 |                     "bar"
 32 |                 ],
 33 |                 [
 34 |                     "StartTag",
 35 |                     "p",
 36 |                     {}
 37 |                 ],
 38 |                 [
 39 |                     "Character",
 40 |                     "baz"
 41 |                 ]
 42 |             ]
 43 |         },
 44 |         {
 45 |             "fragmentContext": null,
 46 |             "description": "<!doctype html><main><p>foo</main>bar",
 47 |             "input": "<!doctype html><main><p>foo</main>bar",
 48 |             "output": [
 49 |                 [
 50 |                     "DOCTYPE",
 51 |                     "html",
 52 |                     null,
 53 |                     null,
 54 |                     true
 55 |                 ],
 56 |                 [
 57 |                     "StartTag",
 58 |                     "main",
 59 |                     {}
 60 |                 ],
 61 |                 [
 62 |                     "StartTag",
 63 |                     "p",
 64 |                     {}
 65 |                 ],
 66 |                 [
 67 |                     "Character",
 68 |                     "foo"
 69 |                 ],
 70 |                 [
 71 |                     "EndTag",
 72 |                     "main"
 73 |                 ],
 74 |                 [
 75 |                     "Character",
 76 |                     "bar"
 77 |                 ]
 78 |             ]
 79 |         },
 80 |         {
 81 |             "fragmentContext": null,
 82 |             "description": "<!DOCTYPE html>xxx<svg><x><g><a><main><b>",
 83 |             "input": "<!DOCTYPE html>xxx<svg><x><g><a><main><b>",
 84 |             "output": [
 85 |                 [
 86 |                     "DOCTYPE",
 87 |                     "html",
 88 |                     null,
 89 |                     null,
 90 |                     true
 91 |                 ],
 92 |                 [
 93 |                     "Character",
 94 |                     "xxx"
 95 |                 ],
 96 |                 [
 97 |                     "StartTag",
 98 |                     "svg",
 99 |                     {}
100 |                 ],
101 |                 [
102 |                     "StartTag",
103 |                     "x",
104 |                     {}
105 |                 ],
106 |                 [
107 |                     "StartTag",
108 |                     "g",
109 |                     {}
110 |                 ],
111 |                 [
112 |                     "StartTag",
113 |                     "a",
114 |                     {}
115 |                 ],
116 |                 [
117 |                     "StartTag",
118 |                     "main",
119 |                     {}
120 |                 ],
121 |                 [
122 |                     "StartTag",
123 |                     "b",
124 |                     {}
125 |                 ]
126 |             ]
127 |         }
128 |     ]
129 | }


--------------------------------------------------------------------------------
/test/data/parser-feedback/math.test:
--------------------------------------------------------------------------------
  1 | {
  2 |     "tests": [
  3 |         {
  4 |             "fragmentContext": "td",
  5 |             "description": "<math><tr><td><mo><tr>",
  6 |             "input": "<math><tr><td><mo><tr>",
  7 |             "output": [
  8 |                 [
  9 |                     "StartTag",
 10 |                     "math",
 11 |                     {}
 12 |                 ],
 13 |                 [
 14 |                     "StartTag",
 15 |                     "tr",
 16 |                     {}
 17 |                 ],
 18 |                 [
 19 |                     "StartTag",
 20 |                     "td",
 21 |                     {}
 22 |                 ],
 23 |                 [
 24 |                     "StartTag",
 25 |                     "mo",
 26 |                     {}
 27 |                 ],
 28 |                 [
 29 |                     "StartTag",
 30 |                     "tr",
 31 |                     {}
 32 |                 ]
 33 |             ]
 34 |         },
 35 |         {
 36 |             "fragmentContext": "tr",
 37 |             "description": "<math><tr><td><mo><tr>",
 38 |             "input": "<math><tr><td><mo><tr>",
 39 |             "output": [
 40 |                 [
 41 |                     "StartTag",
 42 |                     "math",
 43 |                     {}
 44 |                 ],
 45 |                 [
 46 |                     "StartTag",
 47 |                     "tr",
 48 |                     {}
 49 |                 ],
 50 |                 [
 51 |                     "StartTag",
 52 |                     "td",
 53 |                     {}
 54 |                 ],
 55 |                 [
 56 |                     "StartTag",
 57 |                     "mo",
 58 |                     {}
 59 |                 ],
 60 |                 [
 61 |                     "StartTag",
 62 |                     "tr",
 63 |                     {}
 64 |                 ]
 65 |             ]
 66 |         },
 67 |         {
 68 |             "fragmentContext": "thead",
 69 |             "description": "<math><thead><mo><tbody>",
 70 |             "input": "<math><thead><mo><tbody>",
 71 |             "output": [
 72 |                 [
 73 |                     "StartTag",
 74 |                     "math",
 75 |                     {}
 76 |                 ],
 77 |                 [
 78 |                     "StartTag",
 79 |                     "thead",
 80 |                     {}
 81 |                 ],
 82 |                 [
 83 |                     "StartTag",
 84 |                     "mo",
 85 |                     {}
 86 |                 ],
 87 |                 [
 88 |                     "StartTag",
 89 |                     "tbody",
 90 |                     {}
 91 |                 ]
 92 |             ]
 93 |         },
 94 |         {
 95 |             "fragmentContext": "tfoot",
 96 |             "description": "<math><tfoot><mo><tbody>",
 97 |             "input": "<math><tfoot><mo><tbody>",
 98 |             "output": [
 99 |                 [
100 |                     "StartTag",
101 |                     "math",
102 |                     {}
103 |                 ],
104 |                 [
105 |                     "StartTag",
106 |                     "tfoot",
107 |                     {}
108 |                 ],
109 |                 [
110 |                     "StartTag",
111 |                     "mo",
112 |                     {}
113 |                 ],
114 |                 [
115 |                     "StartTag",
116 |                     "tbody",
117 |                     {}
118 |                 ]
119 |             ]
120 |         },
121 |         {
122 |             "fragmentContext": "tbody",
123 |             "description": "<math><tbody><mo><tfoot>",
124 |             "input": "<math><tbody><mo><tfoot>",
125 |             "output": [
126 |                 [
127 |                     "StartTag",
128 |                     "math",
129 |                     {}
130 |                 ],
131 |                 [
132 |                     "StartTag",
133 |                     "tbody",
134 |                     {}
135 |                 ],
136 |                 [
137 |                     "StartTag",
138 |                     "mo",
139 |                     {}
140 |                 ],
141 |                 [
142 |                     "StartTag",
143 |                     "tfoot",
144 |                     {}
145 |                 ]
146 |             ]
147 |         },
148 |         {
149 |             "fragmentContext": "tbody",
150 |             "description": "<math><tbody><mo></table>",
151 |             "input": "<math><tbody><mo></table>",
152 |             "output": [
153 |                 [
154 |                     "StartTag",
155 |                     "math",
156 |                     {}
157 |                 ],
158 |                 [
159 |                     "StartTag",
160 |                     "tbody",
161 |                     {}
162 |                 ],
163 |                 [
164 |                     "StartTag",
165 |                     "mo",
166 |                     {}
167 |                 ],
168 |                 [
169 |                     "EndTag",
170 |                     "table"
171 |                 ]
172 |             ]
173 |         },
174 |         {
175 |             "fragmentContext": "tbody",
176 |             "description": "<math><thead><mo></table>",
177 |             "input": "<math><thead><mo></table>",
178 |             "output": [
179 |                 [
180 |                     "StartTag",
181 |                     "math",
182 |                     {}
183 |                 ],
184 |                 [
185 |                     "StartTag",
186 |                     "thead",
187 |                     {}
188 |                 ],
189 |                 [
190 |                     "StartTag",
191 |                     "mo",
192 |                     {}
193 |                 ],
194 |                 [
195 |                     "EndTag",
196 |                     "table"
197 |                 ]
198 |             ]
199 |         },
200 |         {
201 |             "fragmentContext": "tbody",
202 |             "description": "<math><tfoot><mo></table>",
203 |             "input": "<math><tfoot><mo></table>",
204 |             "output": [
205 |                 [
206 |                     "StartTag",
207 |                     "math",
208 |                     {}
209 |                 ],
210 |                 [
211 |                     "StartTag",
212 |                     "tfoot",
213 |                     {}
214 |                 ],
215 |                 [
216 |                     "StartTag",
217 |                     "mo",
218 |                     {}
219 |                 ],
220 |                 [
221 |                     "EndTag",
222 |                     "table"
223 |                 ]
224 |             ]
225 |         }
226 |     ]
227 | }


--------------------------------------------------------------------------------
/test/data/parser-feedback/namespace-sensitivity.test:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tests": [
 3 |         {
 4 |             "fragmentContext": null,
 5 |             "description": "<body><table><tr><td><svg><td><foreignObject><span></td>Foo",
 6 |             "input": "<body><table><tr><td><svg><td><foreignObject><span></td>Foo",
 7 |             "output": [
 8 |                 [
 9 |                     "StartTag",
10 |                     "body",
11 |                     {}
12 |                 ],
13 |                 [
14 |                     "StartTag",
15 |                     "table",
16 |                     {}
17 |                 ],
18 |                 [
19 |                     "StartTag",
20 |                     "tr",
21 |                     {}
22 |                 ],
23 |                 [
24 |                     "StartTag",
25 |                     "td",
26 |                     {}
27 |                 ],
28 |                 [
29 |                     "StartTag",
30 |                     "svg",
31 |                     {}
32 |                 ],
33 |                 [
34 |                     "StartTag",
35 |                     "td",
36 |                     {}
37 |                 ],
38 |                 [
39 |                     "StartTag",
40 |                     "foreignObject",
41 |                     {}
42 |                 ],
43 |                 [
44 |                     "StartTag",
45 |                     "span",
46 |                     {}
47 |                 ],
48 |                 [
49 |                     "EndTag",
50 |                     "td"
51 |                 ],
52 |                 [
53 |                     "Character",
54 |                     "Foo"
55 |                 ]
56 |             ]
57 |         }
58 |     ]
59 | }


--------------------------------------------------------------------------------
/test/data/parser-feedback/pending-spec-changes-plain-text-unsafe.test:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tests": [
 3 |         {
 4 |             "fragmentContext": null,
 5 |             "description": "<body><table>\\u0000filler\\u0000text\\u0000",
 6 |             "input": "<body><table>\u0000filler\u0000text\u0000",
 7 |             "output": [
 8 |                 [
 9 |                     "StartTag",
10 |                     "body",
11 |                     {}
12 |                 ],
13 |                 [
14 |                     "StartTag",
15 |                     "table",
16 |                     {}
17 |                 ],
18 |                 [
19 |                     "Character",
20 |                     "\u0000filler\u0000text\u0000"
21 |                 ]
22 |             ]
23 |         }
24 |     ]
25 | }


--------------------------------------------------------------------------------
/test/data/parser-feedback/pending-spec-changes.test:
--------------------------------------------------------------------------------
  1 | {
  2 |     "tests": [
  3 |         {
  4 |             "fragmentContext": null,
  5 |             "description": "<input type=\"hidden\"><frameset>",
  6 |             "input": "<input type=\"hidden\"><frameset>",
  7 |             "output": [
  8 |                 [
  9 |                     "StartTag",
 10 |                     "input",
 11 |                     {
 12 |                         "type": "hidden"
 13 |                     }
 14 |                 ],
 15 |                 [
 16 |                     "StartTag",
 17 |                     "frameset",
 18 |                     {}
 19 |                 ]
 20 |             ]
 21 |         },
 22 |         {
 23 |             "fragmentContext": null,
 24 |             "description": "<!DOCTYPE html><table><caption><svg>foo</table>bar",
 25 |             "input": "<!DOCTYPE html><table><caption><svg>foo</table>bar",
 26 |             "output": [
 27 |                 [
 28 |                     "DOCTYPE",
 29 |                     "html",
 30 |                     null,
 31 |                     null,
 32 |                     true
 33 |                 ],
 34 |                 [
 35 |                     "StartTag",
 36 |                     "table",
 37 |                     {}
 38 |                 ],
 39 |                 [
 40 |                     "StartTag",
 41 |                     "caption",
 42 |                     {}
 43 |                 ],
 44 |                 [
 45 |                     "StartTag",
 46 |                     "svg",
 47 |                     {}
 48 |                 ],
 49 |                 [
 50 |                     "Character",
 51 |                     "foo"
 52 |                 ],
 53 |                 [
 54 |                     "EndTag",
 55 |                     "table"
 56 |                 ],
 57 |                 [
 58 |                     "Character",
 59 |                     "bar"
 60 |                 ]
 61 |             ]
 62 |         },
 63 |         {
 64 |             "fragmentContext": null,
 65 |             "description": "<table><tr><td><svg><desc><td></desc><circle>",
 66 |             "input": "<table><tr><td><svg><desc><td></desc><circle>",
 67 |             "output": [
 68 |                 [
 69 |                     "StartTag",
 70 |                     "table",
 71 |                     {}
 72 |                 ],
 73 |                 [
 74 |                     "StartTag",
 75 |                     "tr",
 76 |                     {}
 77 |                 ],
 78 |                 [
 79 |                     "StartTag",
 80 |                     "td",
 81 |                     {}
 82 |                 ],
 83 |                 [
 84 |                     "StartTag",
 85 |                     "svg",
 86 |                     {}
 87 |                 ],
 88 |                 [
 89 |                     "StartTag",
 90 |                     "desc",
 91 |                     {}
 92 |                 ],
 93 |                 [
 94 |                     "StartTag",
 95 |                     "td",
 96 |                     {}
 97 |                 ],
 98 |                 [
 99 |                     "EndTag",
100 |                     "desc"
101 |                 ],
102 |                 [
103 |                     "StartTag",
104 |                     "circle",
105 |                     {}
106 |                 ]
107 |             ]
108 |         }
109 |     ]
110 | }


--------------------------------------------------------------------------------
/test/data/parser-feedback/search-element.test:
--------------------------------------------------------------------------------
  1 | {
  2 |     "tests": [
  3 |         {
  4 |             "fragmentContext": null,
  5 |             "description": "<!doctype html><p>foo<search>bar<p>baz",
  6 |             "input": "<!doctype html><p>foo<search>bar<p>baz",
  7 |             "output": [
  8 |                 [
  9 |                     "DOCTYPE",
 10 |                     "html",
 11 |                     null,
 12 |                     null,
 13 |                     true
 14 |                 ],
 15 |                 [
 16 |                     "StartTag",
 17 |                     "p",
 18 |                     {}
 19 |                 ],
 20 |                 [
 21 |                     "Character",
 22 |                     "foo"
 23 |                 ],
 24 |                 [
 25 |                     "StartTag",
 26 |                     "search",
 27 |                     {}
 28 |                 ],
 29 |                 [
 30 |                     "Character",
 31 |                     "bar"
 32 |                 ],
 33 |                 [
 34 |                     "StartTag",
 35 |                     "p",
 36 |                     {}
 37 |                 ],
 38 |                 [
 39 |                     "Character",
 40 |                     "baz"
 41 |                 ]
 42 |             ]
 43 |         },
 44 |         {
 45 |             "fragmentContext": null,
 46 |             "description": "<!doctype html><search><p>foo</search>bar",
 47 |             "input": "<!doctype html><search><p>foo</search>bar",
 48 |             "output": [
 49 |                 [
 50 |                     "DOCTYPE",
 51 |                     "html",
 52 |                     null,
 53 |                     null,
 54 |                     true
 55 |                 ],
 56 |                 [
 57 |                     "StartTag",
 58 |                     "search",
 59 |                     {}
 60 |                 ],
 61 |                 [
 62 |                     "StartTag",
 63 |                     "p",
 64 |                     {}
 65 |                 ],
 66 |                 [
 67 |                     "Character",
 68 |                     "foo"
 69 |                 ],
 70 |                 [
 71 |                     "EndTag",
 72 |                     "search"
 73 |                 ],
 74 |                 [
 75 |                     "Character",
 76 |                     "bar"
 77 |                 ]
 78 |             ]
 79 |         },
 80 |         {
 81 |             "fragmentContext": null,
 82 |             "description": "<!DOCTYPE html>xxx<svg><x><g><a><search><b>",
 83 |             "input": "<!DOCTYPE html>xxx<svg><x><g><a><search><b>",
 84 |             "output": [
 85 |                 [
 86 |                     "DOCTYPE",
 87 |                     "html",
 88 |                     null,
 89 |                     null,
 90 |                     true
 91 |                 ],
 92 |                 [
 93 |                     "Character",
 94 |                     "xxx"
 95 |                 ],
 96 |                 [
 97 |                     "StartTag",
 98 |                     "svg",
 99 |                     {}
100 |                 ],
101 |                 [
102 |                     "StartTag",
103 |                     "x",
104 |                     {}
105 |                 ],
106 |                 [
107 |                     "StartTag",
108 |                     "g",
109 |                     {}
110 |                 ],
111 |                 [
112 |                     "StartTag",
113 |                     "a",
114 |                     {}
115 |                 ],
116 |                 [
117 |                     "StartTag",
118 |                     "search",
119 |                     {}
120 |                 ],
121 |                 [
122 |                     "StartTag",
123 |                     "b",
124 |                     {}
125 |                 ]
126 |             ]
127 |         }
128 |     ]
129 | }


--------------------------------------------------------------------------------
/test/data/parser-feedback/svg.test:
--------------------------------------------------------------------------------
  1 | {
  2 |     "tests": [
  3 |         {
  4 |             "fragmentContext": "td",
  5 |             "description": "<svg><tr><td><title><tr>",
  6 |             "input": "<svg><tr><td><title><tr>",
  7 |             "output": [
  8 |                 [
  9 |                     "StartTag",
 10 |                     "svg",
 11 |                     {}
 12 |                 ],
 13 |                 [
 14 |                     "StartTag",
 15 |                     "tr",
 16 |                     {}
 17 |                 ],
 18 |                 [
 19 |                     "StartTag",
 20 |                     "td",
 21 |                     {}
 22 |                 ],
 23 |                 [
 24 |                     "StartTag",
 25 |                     "title",
 26 |                     {}
 27 |                 ],
 28 |                 [
 29 |                     "StartTag",
 30 |                     "tr",
 31 |                     {}
 32 |                 ]
 33 |             ]
 34 |         },
 35 |         {
 36 |             "fragmentContext": "tr",
 37 |             "description": "<svg><tr><td><title><tr>",
 38 |             "input": "<svg><tr><td><title><tr>",
 39 |             "output": [
 40 |                 [
 41 |                     "StartTag",
 42 |                     "svg",
 43 |                     {}
 44 |                 ],
 45 |                 [
 46 |                     "StartTag",
 47 |                     "tr",
 48 |                     {}
 49 |                 ],
 50 |                 [
 51 |                     "StartTag",
 52 |                     "td",
 53 |                     {}
 54 |                 ],
 55 |                 [
 56 |                     "StartTag",
 57 |                     "title",
 58 |                     {}
 59 |                 ],
 60 |                 [
 61 |                     "StartTag",
 62 |                     "tr",
 63 |                     {}
 64 |                 ]
 65 |             ]
 66 |         },
 67 |         {
 68 |             "fragmentContext": "thead",
 69 |             "description": "<svg><thead><title><tbody>",
 70 |             "input": "<svg><thead><title><tbody>",
 71 |             "output": [
 72 |                 [
 73 |                     "StartTag",
 74 |                     "svg",
 75 |                     {}
 76 |                 ],
 77 |                 [
 78 |                     "StartTag",
 79 |                     "thead",
 80 |                     {}
 81 |                 ],
 82 |                 [
 83 |                     "StartTag",
 84 |                     "title",
 85 |                     {}
 86 |                 ],
 87 |                 [
 88 |                     "StartTag",
 89 |                     "tbody",
 90 |                     {}
 91 |                 ]
 92 |             ]
 93 |         },
 94 |         {
 95 |             "fragmentContext": "tfoot",
 96 |             "description": "<svg><tfoot><title><tbody>",
 97 |             "input": "<svg><tfoot><title><tbody>",
 98 |             "output": [
 99 |                 [
100 |                     "StartTag",
101 |                     "svg",
102 |                     {}
103 |                 ],
104 |                 [
105 |                     "StartTag",
106 |                     "tfoot",
107 |                     {}
108 |                 ],
109 |                 [
110 |                     "StartTag",
111 |                     "title",
112 |                     {}
113 |                 ],
114 |                 [
115 |                     "StartTag",
116 |                     "tbody",
117 |                     {}
118 |                 ]
119 |             ]
120 |         },
121 |         {
122 |             "fragmentContext": "tbody",
123 |             "description": "<svg><tbody><title><tfoot>",
124 |             "input": "<svg><tbody><title><tfoot>",
125 |             "output": [
126 |                 [
127 |                     "StartTag",
128 |                     "svg",
129 |                     {}
130 |                 ],
131 |                 [
132 |                     "StartTag",
133 |                     "tbody",
134 |                     {}
135 |                 ],
136 |                 [
137 |                     "StartTag",
138 |                     "title",
139 |                     {}
140 |                 ],
141 |                 [
142 |                     "StartTag",
143 |                     "tfoot",
144 |                     {}
145 |                 ]
146 |             ]
147 |         },
148 |         {
149 |             "fragmentContext": "tbody",
150 |             "description": "<svg><tbody><title></table>",
151 |             "input": "<svg><tbody><title></table>",
152 |             "output": [
153 |                 [
154 |                     "StartTag",
155 |                     "svg",
156 |                     {}
157 |                 ],
158 |                 [
159 |                     "StartTag",
160 |                     "tbody",
161 |                     {}
162 |                 ],
163 |                 [
164 |                     "StartTag",
165 |                     "title",
166 |                     {}
167 |                 ],
168 |                 [
169 |                     "EndTag",
170 |                     "table"
171 |                 ]
172 |             ]
173 |         },
174 |         {
175 |             "fragmentContext": "tbody",
176 |             "description": "<svg><thead><title></table>",
177 |             "input": "<svg><thead><title></table>",
178 |             "output": [
179 |                 [
180 |                     "StartTag",
181 |                     "svg",
182 |                     {}
183 |                 ],
184 |                 [
185 |                     "StartTag",
186 |                     "thead",
187 |                     {}
188 |                 ],
189 |                 [
190 |                     "StartTag",
191 |                     "title",
192 |                     {}
193 |                 ],
194 |                 [
195 |                     "EndTag",
196 |                     "table"
197 |                 ]
198 |             ]
199 |         },
200 |         {
201 |             "fragmentContext": "tbody",
202 |             "description": "<svg><tfoot><title></table>",
203 |             "input": "<svg><tfoot><title></table>",
204 |             "output": [
205 |                 [
206 |                     "StartTag",
207 |                     "svg",
208 |                     {}
209 |                 ],
210 |                 [
211 |                     "StartTag",
212 |                     "tfoot",
213 |                     {}
214 |                 ],
215 |                 [
216 |                     "StartTag",
217 |                     "title",
218 |                     {}
219 |                 ],
220 |                 [
221 |                     "EndTag",
222 |                     "table"
223 |                 ]
224 |             ]
225 |         }
226 |     ]
227 | }


--------------------------------------------------------------------------------
/test/data/parser-feedback/tests14.test:
--------------------------------------------------------------------------------
  1 | {
  2 |     "tests": [
  3 |         {
  4 |             "fragmentContext": null,
  5 |             "description": "<!DOCTYPE html><html><body><xyz:abc></xyz:abc>",
  6 |             "input": "<!DOCTYPE html><html><body><xyz:abc></xyz:abc>",
  7 |             "output": [
  8 |                 [
  9 |                     "DOCTYPE",
 10 |                     "html",
 11 |                     null,
 12 |                     null,
 13 |                     true
 14 |                 ],
 15 |                 [
 16 |                     "StartTag",
 17 |                     "html",
 18 |                     {}
 19 |                 ],
 20 |                 [
 21 |                     "StartTag",
 22 |                     "body",
 23 |                     {}
 24 |                 ],
 25 |                 [
 26 |                     "StartTag",
 27 |                     "xyz:abc",
 28 |                     {}
 29 |                 ],
 30 |                 [
 31 |                     "EndTag",
 32 |                     "xyz:abc"
 33 |                 ]
 34 |             ]
 35 |         },
 36 |         {
 37 |             "fragmentContext": null,
 38 |             "description": "<!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>",
 39 |             "input": "<!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>",
 40 |             "output": [
 41 |                 [
 42 |                     "DOCTYPE",
 43 |                     "html",
 44 |                     null,
 45 |                     null,
 46 |                     true
 47 |                 ],
 48 |                 [
 49 |                     "StartTag",
 50 |                     "html",
 51 |                     {}
 52 |                 ],
 53 |                 [
 54 |                     "StartTag",
 55 |                     "body",
 56 |                     {}
 57 |                 ],
 58 |                 [
 59 |                     "StartTag",
 60 |                     "xyz:abc",
 61 |                     {}
 62 |                 ],
 63 |                 [
 64 |                     "EndTag",
 65 |                     "xyz:abc"
 66 |                 ],
 67 |                 [
 68 |                     "StartTag",
 69 |                     "span",
 70 |                     {}
 71 |                 ],
 72 |                 [
 73 |                     "EndTag",
 74 |                     "span"
 75 |                 ]
 76 |             ]
 77 |         },
 78 |         {
 79 |             "fragmentContext": null,
 80 |             "description": "<!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>",
 81 |             "input": "<!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>",
 82 |             "output": [
 83 |                 [
 84 |                     "DOCTYPE",
 85 |                     "html",
 86 |                     null,
 87 |                     null,
 88 |                     true
 89 |                 ],
 90 |                 [
 91 |                     "StartTag",
 92 |                     "html",
 93 |                     {}
 94 |                 ],
 95 |                 [
 96 |                     "StartTag",
 97 |                     "html",
 98 |                     {
 99 |                         "abc:def": "gh"
100 |                     }
101 |                 ],
102 |                 [
103 |                     "StartTag",
104 |                     "xyz:abc",
105 |                     {}
106 |                 ],
107 |                 [
108 |                     "EndTag",
109 |                     "xyz:abc"
110 |                 ]
111 |             ]
112 |         },
113 |         {
114 |             "fragmentContext": null,
115 |             "description": "<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>",
116 |             "input": "<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>",
117 |             "output": [
118 |                 [
119 |                     "DOCTYPE",
120 |                     "html",
121 |                     null,
122 |                     null,
123 |                     true
124 |                 ],
125 |                 [
126 |                     "StartTag",
127 |                     "html",
128 |                     {
129 |                         "xml:lang": "bar"
130 |                     }
131 |                 ],
132 |                 [
133 |                     "StartTag",
134 |                     "html",
135 |                     {
136 |                         "xml:lang": "foo"
137 |                     }
138 |                 ]
139 |             ]
140 |         },
141 |         {
142 |             "fragmentContext": null,
143 |             "description": "<!DOCTYPE html><html 123=456>",
144 |             "input": "<!DOCTYPE html><html 123=456>",
145 |             "output": [
146 |                 [
147 |                     "DOCTYPE",
148 |                     "html",
149 |                     null,
150 |                     null,
151 |                     true
152 |                 ],
153 |                 [
154 |                     "StartTag",
155 |                     "html",
156 |                     {
157 |                         "123": "456"
158 |                     }
159 |                 ]
160 |             ]
161 |         },
162 |         {
163 |             "fragmentContext": null,
164 |             "description": "<!DOCTYPE html><html 123=456><html 789=012>",
165 |             "input": "<!DOCTYPE html><html 123=456><html 789=012>",
166 |             "output": [
167 |                 [
168 |                     "DOCTYPE",
169 |                     "html",
170 |                     null,
171 |                     null,
172 |                     true
173 |                 ],
174 |                 [
175 |                     "StartTag",
176 |                     "html",
177 |                     {
178 |                         "123": "456"
179 |                     }
180 |                 ],
181 |                 [
182 |                     "StartTag",
183 |                     "html",
184 |                     {
185 |                         "789": "012"
186 |                     }
187 |                 ]
188 |             ]
189 |         },
190 |         {
191 |             "fragmentContext": null,
192 |             "description": "<!DOCTYPE html><html><body 789=012>",
193 |             "input": "<!DOCTYPE html><html><body 789=012>",
194 |             "output": [
195 |                 [
196 |                     "DOCTYPE",
197 |                     "html",
198 |                     null,
199 |                     null,
200 |                     true
201 |                 ],
202 |                 [
203 |                     "StartTag",
204 |                     "html",
205 |                     {}
206 |                 ],
207 |                 [
208 |                     "StartTag",
209 |                     "body",
210 |                     {
211 |                         "789": "012"
212 |                     }
213 |                 ]
214 |             ]
215 |         }
216 |     ]
217 | }


--------------------------------------------------------------------------------
/test/data/parser-feedback/tests24.test:
--------------------------------------------------------------------------------
  1 | {
  2 |     "tests": [
  3 |         {
  4 |             "fragmentContext": null,
  5 |             "description": "<!DOCTYPE html>&NotEqualTilde;",
  6 |             "input": "<!DOCTYPE html>&NotEqualTilde;",
  7 |             "output": [
  8 |                 [
  9 |                     "DOCTYPE",
 10 |                     "html",
 11 |                     null,
 12 |                     null,
 13 |                     true
 14 |                 ],
 15 |                 [
 16 |                     "Character",
 17 |                     "≂̸"
 18 |                 ]
 19 |             ]
 20 |         },
 21 |         {
 22 |             "fragmentContext": null,
 23 |             "description": "<!DOCTYPE html>&NotEqualTilde;A",
 24 |             "input": "<!DOCTYPE html>&NotEqualTilde;A",
 25 |             "output": [
 26 |                 [
 27 |                     "DOCTYPE",
 28 |                     "html",
 29 |                     null,
 30 |                     null,
 31 |                     true
 32 |                 ],
 33 |                 [
 34 |                     "Character",
 35 |                     "≂̸A"
 36 |                 ]
 37 |             ]
 38 |         },
 39 |         {
 40 |             "fragmentContext": null,
 41 |             "description": "<!DOCTYPE html>&ThickSpace;",
 42 |             "input": "<!DOCTYPE html>&ThickSpace;",
 43 |             "output": [
 44 |                 [
 45 |                     "DOCTYPE",
 46 |                     "html",
 47 |                     null,
 48 |                     null,
 49 |                     true
 50 |                 ],
 51 |                 [
 52 |                     "Character",
 53 |                     "  "
 54 |                 ]
 55 |             ]
 56 |         },
 57 |         {
 58 |             "fragmentContext": null,
 59 |             "description": "<!DOCTYPE html>&ThickSpace;A",
 60 |             "input": "<!DOCTYPE html>&ThickSpace;A",
 61 |             "output": [
 62 |                 [
 63 |                     "DOCTYPE",
 64 |                     "html",
 65 |                     null,
 66 |                     null,
 67 |                     true
 68 |                 ],
 69 |                 [
 70 |                     "Character",
 71 |                     "  A"
 72 |                 ]
 73 |             ]
 74 |         },
 75 |         {
 76 |             "fragmentContext": null,
 77 |             "description": "<!DOCTYPE html>&NotSubset;",
 78 |             "input": "<!DOCTYPE html>&NotSubset;",
 79 |             "output": [
 80 |                 [
 81 |                     "DOCTYPE",
 82 |                     "html",
 83 |                     null,
 84 |                     null,
 85 |                     true
 86 |                 ],
 87 |                 [
 88 |                     "Character",
 89 |                     "⊂⃒"
 90 |                 ]
 91 |             ]
 92 |         },
 93 |         {
 94 |             "fragmentContext": null,
 95 |             "description": "<!DOCTYPE html>&NotSubset;A",
 96 |             "input": "<!DOCTYPE html>&NotSubset;A",
 97 |             "output": [
 98 |                 [
 99 |                     "DOCTYPE",
100 |                     "html",
101 |                     null,
102 |                     null,
103 |                     true
104 |                 ],
105 |                 [
106 |                     "Character",
107 |                     "⊂⃒A"
108 |                 ]
109 |             ]
110 |         },
111 |         {
112 |             "fragmentContext": null,
113 |             "description": "<!DOCTYPE html>&Gopf;",
114 |             "input": "<!DOCTYPE html>&Gopf;",
115 |             "output": [
116 |                 [
117 |                     "DOCTYPE",
118 |                     "html",
119 |                     null,
120 |                     null,
121 |                     true
122 |                 ],
123 |                 [
124 |                     "Character",
125 |                     "𝔾"
126 |                 ]
127 |             ]
128 |         },
129 |         {
130 |             "fragmentContext": null,
131 |             "description": "<!DOCTYPE html>&Gopf;A",
132 |             "input": "<!DOCTYPE html>&Gopf;A",
133 |             "output": [
134 |                 [
135 |                     "DOCTYPE",
136 |                     "html",
137 |                     null,
138 |                     null,
139 |                     true
140 |                 ],
141 |                 [
142 |                     "Character",
143 |                     "𝔾A"
144 |                 ]
145 |             ]
146 |         }
147 |     ]
148 | }


--------------------------------------------------------------------------------
/test/data/parser-feedback/tests4.test:
--------------------------------------------------------------------------------
  1 | {
  2 |     "tests": [
  3 |         {
  4 |             "fragmentContext": "div",
  5 |             "description": "direct div content",
  6 |             "input": "direct div content",
  7 |             "output": [
  8 |                 [
  9 |                     "Character",
 10 |                     "direct div content"
 11 |                 ]
 12 |             ]
 13 |         },
 14 |         {
 15 |             "fragmentContext": "textarea",
 16 |             "description": "direct textarea content",
 17 |             "input": "direct textarea content",
 18 |             "output": [
 19 |                 [
 20 |                     "Character",
 21 |                     "direct textarea content"
 22 |                 ]
 23 |             ]
 24 |         },
 25 |         {
 26 |             "fragmentContext": "textarea",
 27 |             "description": "textarea content with <em>pseudo</em> <foo>markup",
 28 |             "input": "textarea content with <em>pseudo</em> <foo>markup",
 29 |             "output": [
 30 |                 [
 31 |                     "Character",
 32 |                     "textarea content with "
 33 |                 ],
 34 |                 [
 35 |                     "StartTag",
 36 |                     "em",
 37 |                     {}
 38 |                 ],
 39 |                 [
 40 |                     "Character",
 41 |                     "pseudo"
 42 |                 ],
 43 |                 [
 44 |                     "EndTag",
 45 |                     "em"
 46 |                 ],
 47 |                 [
 48 |                     "Character",
 49 |                     " "
 50 |                 ],
 51 |                 [
 52 |                     "StartTag",
 53 |                     "foo",
 54 |                     {}
 55 |                 ],
 56 |                 [
 57 |                     "Character",
 58 |                     "markup"
 59 |                 ]
 60 |             ]
 61 |         },
 62 |         {
 63 |             "fragmentContext": "style",
 64 |             "description": "this is &#x0043;DATA inside a <style> element",
 65 |             "input": "this is &#x0043;DATA inside a <style> element",
 66 |             "output": [
 67 |                 [
 68 |                     "Character",
 69 |                     "this is CDATA inside a "
 70 |                 ],
 71 |                 [
 72 |                     "StartTag",
 73 |                     "style",
 74 |                     {}
 75 |                 ],
 76 |                 [
 77 |                     "Character",
 78 |                     " element"
 79 |                 ]
 80 |             ]
 81 |         },
 82 |         {
 83 |             "fragmentContext": "plaintext",
 84 |             "description": "</plaintext>",
 85 |             "input": "</plaintext>",
 86 |             "output": [
 87 |                 [
 88 |                     "EndTag",
 89 |                     "plaintext"
 90 |                 ]
 91 |             ]
 92 |         },
 93 |         {
 94 |             "fragmentContext": "html",
 95 |             "description": "setting html's innerHTML",
 96 |             "input": "setting html's innerHTML",
 97 |             "output": [
 98 |                 [
 99 |                     "Character",
100 |                     "setting html's innerHTML"
101 |                 ]
102 |             ]
103 |         },
104 |         {
105 |             "fragmentContext": "head",
106 |             "description": "<title>setting head's innerHTML</title>",
107 |             "input": "<title>setting head's innerHTML</title>",
108 |             "output": [
109 |                 [
110 |                     "StartTag",
111 |                     "title",
112 |                     {}
113 |                 ],
114 |                 [
115 |                     "Character",
116 |                     "setting head's innerHTML"
117 |                 ],
118 |                 [
119 |                     "EndTag",
120 |                     "title"
121 |                 ]
122 |             ]
123 |         }
124 |     ]
125 | }


--------------------------------------------------------------------------------
/test/data/tree-construction-scripting/document_write.dat:
--------------------------------------------------------------------------------
 1 | #data
 2 | 1<script>document.write("2")</script>3
 3 | #errors
 4 | #document
 5 | | <html>
 6 | |   <head>
 7 | |   <body>
 8 | |     "1"
 9 | |     <script>
10 | |       "document.write("2")"
11 | |     "23"
12 | 
13 | #data
14 | 1<script>document.write("2"); document.write("3")</script>
15 | #errors
16 | #document
17 | | <html>
18 | |   <head>
19 | |   <body>
20 | |     "1"
21 | |     <script>
22 | |       "document.write("2"); document.write("3")"
23 | |     "23"
24 | 
25 | #data
26 | 1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4
27 | #errors
28 | #document
29 | | <html>
30 | |   <head>
31 | |   <body>
32 | |     "1"
33 | |     <script>
34 | |       "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")"
35 | |     <script>
36 | |       "document.write('2')"
37 | |     "2"
38 | |     <script>
39 | |       "document.write('3')"
40 | |     "34"
41 | 
42 | #data
43 | <script>document.write('<!--');</script>I should be commented-->
44 | #errors
45 | #document
46 | | <html>
47 | |   <head>
48 | |     <script>
49 | |       "document.write('<!--');"
50 | |     <!-- I should be commented -->
51 | |   <body>
52 | 


--------------------------------------------------------------------------------
/test/package.json:
--------------------------------------------------------------------------------
1 | {
2 |     "name": "parse5-test-utils",
3 |     "version": "1.0.0",
4 |     "private": true,
5 |     "type": "module"
6 | }
7 | 


--------------------------------------------------------------------------------
/test/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "extends": "../tsconfig.json",
 3 |     "compilerOptions": {
 4 |         "rootDir": "utils",
 5 |         "outDir": "dist"
 6 |     },
 7 |     "include": ["**/*.ts"],
 8 |     "references": [
 9 |         { "path": "../packages/parse5/tsconfig.json" },
10 |         { "path": "../packages/parse5-htmlparser2-tree-adapter/tsconfig.json" }
11 |     ]
12 | }
13 | 


--------------------------------------------------------------------------------
/test/utils/common.ts:
--------------------------------------------------------------------------------
  1 | import { describe, assert } from 'vitest';
  2 | import { Writable, finished as finishedCb, type Readable } from 'node:stream';
  3 | import { type TreeAdapter, type Token, defaultTreeAdapter } from 'parse5';
  4 | import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter';
  5 | 
  6 | // Ensure the default tree adapter matches the expected type.
  7 | export const treeAdapters = {
  8 |     default: defaultTreeAdapter,
  9 |     htmlparser2: htmlparser2Adapter,
 10 | } as const;
 11 | 
 12 | export function addSlashes(str: string): string {
 13 |     return str
 14 |         .replace(/\t/g, String.raw`\t`)
 15 |         .replace(/\n/g, String.raw`\n`)
 16 |         .replace(/\f/g, String.raw`\f`)
 17 |         .replace(/\r/g, String.raw`\r`)
 18 |         .replace(/\0/g, String.raw`\u0000`);
 19 | }
 20 | 
 21 | function createDiffMarker(markerPosition: number): string {
 22 |     return '^\n'.padStart(markerPosition + 1, ' ');
 23 | }
 24 | 
 25 | function getRandomChunkSize(min = 1, max = 10): number {
 26 |     return min + Math.floor(Math.random() * (max - min + 1));
 27 | }
 28 | 
 29 | export function makeChunks(str: string, minSize?: number, maxSize?: number): string[] {
 30 |     if (str.length === 0) {
 31 |         return [''];
 32 |     }
 33 | 
 34 |     const chunks = [];
 35 |     let start = 0;
 36 | 
 37 |     // NOTE: start with 1, so we avoid situation when we have just one huge chunk
 38 |     let end = 1;
 39 | 
 40 |     while (start < str.length) {
 41 |         chunks.push(str.substring(start, end));
 42 |         start = end;
 43 |         end = Math.min(end + getRandomChunkSize(minSize, maxSize), str.length);
 44 |     }
 45 | 
 46 |     return chunks;
 47 | }
 48 | 
 49 | export class WritableStreamStub extends Writable {
 50 |     writtenData = '';
 51 | 
 52 |     constructor() {
 53 |         super({ decodeStrings: false });
 54 |     }
 55 | 
 56 |     override _write(chunk: string, _encoding: string, callback: () => void): void {
 57 |         assert.strictEqual(typeof chunk, 'string', 'Expected output to be a string stream');
 58 |         this.writtenData += chunk;
 59 |         callback();
 60 |     }
 61 | }
 62 | 
 63 | export function normalizeNewLine(str: string): string {
 64 |     return str.replace(/\r\n/g, '\n');
 65 | }
 66 | 
 67 | export function removeNewLines(str: string): string {
 68 |     return str.replace(/[\n\r]/g, '');
 69 | }
 70 | 
 71 | export function writeChunkedToStream(str: string, stream: Writable): void {
 72 |     const chunks = makeChunks(str);
 73 |     const lastChunkIdx = chunks.length - 1;
 74 | 
 75 |     for (const [idx, chunk] of chunks.entries()) {
 76 |         if (idx === lastChunkIdx) {
 77 |             stream.end(chunk);
 78 |         } else {
 79 |             stream.write(chunk);
 80 |         }
 81 |     }
 82 | }
 83 | 
 84 | export function generateTestsForEachTreeAdapter(name: string, ctor: (adapter: TreeAdapter) => void): void {
 85 |     describe(name, () => {
 86 |         for (const adapterName of Object.keys(treeAdapters)) {
 87 |             const adapter = treeAdapters[adapterName as keyof typeof treeAdapters] as TreeAdapter;
 88 | 
 89 |             describe(`Tree adapter: ${adapterName}`, () => {
 90 |                 ctor(adapter);
 91 |             });
 92 |         }
 93 |     });
 94 | }
 95 | 
 96 | export function getStringDiffMsg(actual: string, expected: string): string {
 97 |     for (let i = 0; i < expected.length; i++) {
 98 |         if (actual[i] !== expected[i]) {
 99 |             let diffMsg = `\nString differ at index ${i}\n`;
100 | 
101 |             const expectedStr = `Expected: ${addSlashes(expected.substring(i - 100, i + 1))}`;
102 |             const expectedDiffMarker = createDiffMarker(expectedStr.length);
103 | 
104 |             diffMsg += `${expectedStr}${addSlashes(expected.substring(i + 1, i + 20))}\n${expectedDiffMarker}`;
105 | 
106 |             const actualStr = `Actual:   ${addSlashes(actual.substring(i - 100, i + 1))}`;
107 |             const actualDiffMarker = createDiffMarker(actualStr.length);
108 | 
109 |             diffMsg += `${actualStr}${addSlashes(actual.substring(i + 1, i + 20))}\n${actualDiffMarker}`;
110 | 
111 |             return diffMsg;
112 |         }
113 |     }
114 | 
115 |     return '';
116 | }
117 | 
118 | export function getSubstringByLineCol(lines: string[], loc: Token.Location): string {
119 |     lines = lines.slice(loc.startLine - 1, loc.endLine);
120 | 
121 |     const last = lines.length - 1;
122 | 
123 |     lines[last] = lines[last].substring(0, loc.endCol - 1);
124 |     lines[0] = lines[0].substring(loc.startCol - 1);
125 | 
126 |     return lines.join('\n');
127 | }
128 | 
129 | // TODO [engine:node@>=16]: Replace this with `finished` from 'node:stream/promises'.
130 | 
131 | export function finished(stream: Writable | Readable): Promise<void> {
132 |     return new Promise((resolve, reject) => finishedCb(stream, (err) => (err ? reject(err) : resolve())));
133 | }
134 | 


--------------------------------------------------------------------------------
/test/utils/generate-location-info-parser-tests.ts:
--------------------------------------------------------------------------------
  1 | import { it, assert } from 'vitest';
  2 | import * as fs from 'node:fs';
  3 | import path from 'node:path';
  4 | import { type TreeAdapterTypeMap, type TreeAdapter, type ParserOptions, type Token, serializeOuter } from 'parse5';
  5 | import {
  6 |     removeNewLines,
  7 |     getSubstringByLineCol,
  8 |     getStringDiffMsg,
  9 |     normalizeNewLine,
 10 |     generateTestsForEachTreeAdapter,
 11 | } from './common.js';
 12 | import { serializeDoctypeContent } from 'parse5-htmlparser2-tree-adapter';
 13 | 
 14 | function walkTree<T extends TreeAdapterTypeMap>(
 15 |     parent: T['parentNode'],
 16 |     treeAdapter: TreeAdapter<T>,
 17 |     handler: (node: T['node']) => void,
 18 | ): void {
 19 |     for (const node of treeAdapter.getChildNodes(parent)) {
 20 |         if (treeAdapter.isElementNode(node)) {
 21 |             walkTree(node, treeAdapter, handler);
 22 |         }
 23 | 
 24 |         handler(node);
 25 |     }
 26 | }
 27 | 
 28 | function assertLocation(loc: Token.Location, expected: string, html: string, lines: string[]): void {
 29 |     //Offsets
 30 |     let actual = html.substring(loc.startOffset, loc.endOffset);
 31 | 
 32 |     expected = removeNewLines(expected);
 33 |     actual = removeNewLines(actual);
 34 | 
 35 |     assert.ok(expected === actual, getStringDiffMsg(actual, expected));
 36 | 
 37 |     //Line/col
 38 |     actual = getSubstringByLineCol(lines, loc);
 39 |     actual = removeNewLines(actual);
 40 | 
 41 |     assert.ok(actual === expected, getStringDiffMsg(actual, expected));
 42 | }
 43 | 
 44 | //NOTE: Based on the idea that the serialized fragment starts with the startTag
 45 | export function assertStartTagLocation(
 46 |     location: Token.ElementLocation,
 47 |     serializedNode: string,
 48 |     html: string,
 49 |     lines: string[],
 50 | ): void {
 51 |     assert.ok(location.startTag, 'Expected startTag to be defined');
 52 |     const startTag = location.startTag!;
 53 |     const length = startTag.endOffset - startTag.startOffset;
 54 |     const expected = serializedNode.substring(0, length);
 55 | 
 56 |     assertLocation(startTag, expected, html, lines);
 57 | }
 58 | 
 59 | //NOTE: Based on the idea that the serialized fragment ends with the endTag
 60 | function assertEndTagLocation(
 61 |     location: Token.ElementLocation,
 62 |     serializedNode: string,
 63 |     html: string,
 64 |     lines: string[],
 65 | ): void {
 66 |     assert.ok(location.endTag, 'Expected endTag to be defined');
 67 |     const endTag = location.endTag!;
 68 |     const length = endTag.endOffset - endTag.startOffset;
 69 |     const expected = serializedNode.slice(-length);
 70 | 
 71 |     assertLocation(endTag, expected, html, lines);
 72 | }
 73 | 
 74 | function assertAttrsLocation(
 75 |     location: Token.ElementLocation,
 76 |     serializedNode: string,
 77 |     html: string,
 78 |     lines: string[],
 79 | ): void {
 80 |     assert.ok(location.attrs, 'Expected attrs to be defined');
 81 | 
 82 |     for (const attr of Object.values(location.attrs!)) {
 83 |         const expected = serializedNode.slice(
 84 |             attr.startOffset - location.startOffset,
 85 |             attr.endOffset - location.startOffset,
 86 |         );
 87 | 
 88 |         assertLocation(attr, expected, html, lines);
 89 |     }
 90 | }
 91 | 
 92 | export function assertNodeLocation(
 93 |     location: Token.Location,
 94 |     serializedNode: string,
 95 |     html: string,
 96 |     lines: string[],
 97 | ): void {
 98 |     const expected = removeNewLines(serializedNode);
 99 | 
100 |     assertLocation(location, expected, html, lines);
101 | }
102 | 
103 | function loadParserLocationInfoTestData(): { name: string; data: string }[] {
104 |     const dataDirPath = new URL('../data/location-info', import.meta.url);
105 |     const testSetFileDirs = fs.readdirSync(dataDirPath);
106 | 
107 |     return testSetFileDirs.map((dirName) => {
108 |         const dataFilePath = path.join(dataDirPath.pathname, dirName, 'data.html');
109 |         const data = fs.readFileSync(dataFilePath).toString();
110 | 
111 |         return {
112 |             name: dirName,
113 |             data: normalizeNewLine(data),
114 |         };
115 |     });
116 | }
117 | 
118 | export function generateLocationInfoParserTests(
119 |     name: string,
120 |     parse: (html: string, opts: ParserOptions<TreeAdapterTypeMap>) => { node: TreeAdapterTypeMap['node'] },
121 | ): void {
122 |     generateTestsForEachTreeAdapter(name, (treeAdapter) => {
123 |         for (const test of loadParserLocationInfoTestData()) {
124 |             //NOTE: How it works: we parse document with location info.
125 |             //Then for each node in the tree we run the serializer and compare results with the substring
126 |             //obtained via the location info from the expected serialization results.
127 |             it(`Location info (Parser) - ${test.name}`, async () => {
128 |                 const html = test.data;
129 |                 const lines = html.split(/\r?\n/g);
130 | 
131 |                 const parserOpts = {
132 |                     treeAdapter,
133 |                     sourceCodeLocationInfo: true,
134 |                 };
135 | 
136 |                 const parsingResult = parse(html, parserOpts);
137 |                 const document = parsingResult.node;
138 | 
139 |                 walkTree(document, treeAdapter, (node) => {
140 |                     const location = treeAdapter.getNodeSourceCodeLocation(node)!;
141 | 
142 |                     assert.ok(location);
143 | 
144 |                     const serializedNode = treeAdapter.isDocumentTypeNode(node)
145 |                         ? `<${serializeDoctypeContent(
146 |                               treeAdapter.getDocumentTypeNodeName(node),
147 |                               treeAdapter.getDocumentTypeNodePublicId(node),
148 |                               treeAdapter.getDocumentTypeNodeSystemId(node),
149 |                           )}>`
150 |                         : serializeOuter(node, { treeAdapter });
151 | 
152 |                     assertLocation(location, serializedNode, html, lines);
153 | 
154 |                     if (treeAdapter.isElementNode(node)) {
155 |                         assertStartTagLocation(location, serializedNode, html, lines);
156 | 
157 |                         if (location.endTag) {
158 |                             assertEndTagLocation(location, serializedNode, html, lines);
159 |                         }
160 | 
161 |                         if (location.attrs) {
162 |                             assertAttrsLocation(location, serializedNode, html, lines);
163 |                         } else {
164 |                             // If we don't have `location.attrs`, we expect that the node has no attributes.
165 |                             assert.strictEqual(treeAdapter.getAttrList(node).length, 0);
166 |                         }
167 |                     }
168 |                 });
169 |             });
170 |         }
171 |     });
172 | }
173 | 


--------------------------------------------------------------------------------
/test/utils/generate-parsing-tests.ts:
--------------------------------------------------------------------------------
  1 | import { it, assert } from 'vitest';
  2 | import type { ParserOptions, TreeAdapter, TreeAdapterTypeMap, ParserError } from 'parse5';
  3 | import * as fs from 'node:fs';
  4 | import path from 'node:path';
  5 | import { serializeToDatFileFormat } from './serialize-to-dat-file-format.js';
  6 | import { generateTestsForEachTreeAdapter } from './common.js';
  7 | import { parseDatFile, type DatFile } from './parse-dat-file.js';
  8 | 
  9 | export interface TreeConstructionTestData<T extends TreeAdapterTypeMap> extends DatFile<T> {
 10 |     idx: number;
 11 |     setName: string;
 12 |     dirName: string;
 13 | }
 14 | 
 15 | export function loadTreeConstructionTestData<T extends TreeAdapterTypeMap>(
 16 |     dataDir: URL,
 17 |     treeAdapter: TreeAdapter<T>,
 18 | ): TreeConstructionTestData<T>[] {
 19 |     const tests: TreeConstructionTestData<T>[] = [];
 20 | 
 21 |     const dataDirPath = dataDir.pathname;
 22 |     const testSetFileNames = fs.readdirSync(dataDir);
 23 |     const dirName = path.basename(dataDirPath);
 24 | 
 25 |     for (const fileName of testSetFileNames) {
 26 |         if (path.extname(fileName) !== '.dat') {
 27 |             continue;
 28 |         }
 29 | 
 30 |         const filePath = path.join(dataDirPath, fileName);
 31 |         const testSet = fs.readFileSync(filePath, 'utf8');
 32 |         const setName = fileName.replace('.dat', '');
 33 | 
 34 |         for (const [idx, test] of parseDatFile(testSet, treeAdapter).entries()) {
 35 |             tests.push({
 36 |                 ...test,
 37 |                 idx,
 38 |                 setName,
 39 |                 dirName,
 40 |             });
 41 |         }
 42 |     }
 43 | 
 44 |     return tests;
 45 | }
 46 | 
 47 | function prettyPrintParserAssertionArgs(actual: string, expected: string, chunks?: string[]): string {
 48 |     let msg = '\nExpected:\n';
 49 | 
 50 |     msg += '-----------------\n';
 51 |     msg += `${expected}\n`;
 52 |     msg += '\nActual:\n';
 53 |     msg += '-----------------\n';
 54 |     msg += `${actual}\n`;
 55 | 
 56 |     if (chunks) {
 57 |         msg += 'Chunks:\n';
 58 |         msg += JSON.stringify(chunks);
 59 |     }
 60 | 
 61 |     return msg;
 62 | }
 63 | 
 64 | interface ParseMethodOptions<T extends TreeAdapterTypeMap> extends ParserOptions<T> {
 65 |     treeAdapter: TreeAdapter<T>;
 66 | }
 67 | 
 68 | interface ParseResult<T extends TreeAdapterTypeMap> {
 69 |     node: T['node'];
 70 |     chunks?: string[];
 71 | }
 72 | 
 73 | type ParseMethod<T extends TreeAdapterTypeMap> = (
 74 |     input: TreeConstructionTestData<T>,
 75 |     options: ParseMethodOptions<T>,
 76 | ) => ParseResult<T> | Promise<ParseResult<T>>;
 77 | 
 78 | function createParsingTest<T extends TreeAdapterTypeMap>(
 79 |     test: TreeConstructionTestData<T>,
 80 |     treeAdapter: TreeAdapter<T>,
 81 |     parse: ParseMethod<T>,
 82 |     { withoutErrors, expectError }: { withoutErrors?: boolean; expectError?: boolean } = {},
 83 | ): () => Promise<void> {
 84 |     return async (): Promise<void> => {
 85 |         const errs: string[] = [];
 86 | 
 87 |         const opts = {
 88 |             scriptingEnabled: test.scriptingEnabled,
 89 |             treeAdapter,
 90 | 
 91 |             onParseError: (err: ParserError): void => {
 92 |                 let errStr = `(${err.startLine}:${err.startCol}`;
 93 | 
 94 |                 // NOTE: use ranges for token errors
 95 |                 if (err.startLine !== err.endLine || err.startCol !== err.endCol) {
 96 |                     errStr += `-${err.endLine}:${err.endCol}`;
 97 |                 }
 98 | 
 99 |                 errStr += `) ${err.code}`;
100 | 
101 |                 errs.push(errStr);
102 |             },
103 |         };
104 | 
105 |         const { node, chunks } = await parse(test, opts);
106 |         const actual = serializeToDatFileFormat(node, opts.treeAdapter);
107 |         const msg = prettyPrintParserAssertionArgs(actual, test.expected, chunks);
108 |         let sawError = false;
109 | 
110 |         try {
111 |             assert.ok(actual === test.expected, msg);
112 | 
113 |             if (!withoutErrors) {
114 |                 assert.deepEqual(errs.sort(), test.expectedErrors.sort());
115 |             }
116 |         } catch (error) {
117 |             if (expectError) {
118 |                 return;
119 |             }
120 |             sawError = true;
121 | 
122 |             throw error;
123 |         }
124 | 
125 |         if (!sawError && expectError) {
126 |             throw new Error(`Expected error but none was thrown`);
127 |         }
128 |     };
129 | }
130 | 
131 | // TODO: Stop using the fork here.
132 | const treePath = new URL('../data/html5lib-tests-fork/tree-construction', import.meta.url);
133 | 
134 | export function generateParsingTests(
135 |     name: string,
136 |     prefix: string,
137 |     {
138 |         withoutErrors,
139 |         expectErrors: expectError = [],
140 |         suitePath = treePath,
141 |     }: { withoutErrors?: boolean; expectErrors?: string[]; suitePath?: URL },
142 |     parse: ParseMethod<TreeAdapterTypeMap>,
143 | ): void {
144 |     generateTestsForEachTreeAdapter(name, (treeAdapter) => {
145 |         const errorsToExpect = new Set(expectError);
146 | 
147 |         for (const test of loadTreeConstructionTestData(suitePath, treeAdapter)) {
148 |             const expectError = errorsToExpect.delete(`${test.idx}.${test.setName}`);
149 | 
150 |             it(
151 |                 `${prefix}(${test.dirName}) - ${test.idx}.${test.setName} - \`${test.input}\` (line ${test.lineNum})`,
152 |                 createParsingTest<TreeAdapterTypeMap>(test, treeAdapter, parse, {
153 |                     withoutErrors,
154 |                     expectError,
155 |                 }),
156 |             );
157 |         }
158 | 
159 |         if (errorsToExpect.size > 0) {
160 |             throw new Error(`Expected errors were not found: ${[...errorsToExpect].join(', ')}`);
161 |         }
162 |     });
163 | }
164 | 


--------------------------------------------------------------------------------
/test/utils/generate-serializer-tests.ts:
--------------------------------------------------------------------------------
 1 | import { it, assert } from 'vitest';
 2 | import * as fs from 'node:fs';
 3 | import { type TreeAdapterTypeMap, type SerializerOptions, parse } from 'parse5';
 4 | import { generateTestsForEachTreeAdapter, getStringDiffMsg } from './common.js';
 5 | 
 6 | export function generateSerializerTests(
 7 |     name: string,
 8 |     prefix: string,
 9 |     serialize: (
10 |         document: TreeAdapterTypeMap['document'],
11 |         opts: SerializerOptions<TreeAdapterTypeMap>,
12 |     ) => Promise<string> | string,
13 | ): void {
14 |     const data = fs.readFileSync(new URL('../data/serialization/tests.json', import.meta.url)).toString('utf8');
15 |     const tests = JSON.parse(data) as {
16 |         name: string;
17 |         options?: SerializerOptions<TreeAdapterTypeMap>;
18 |         input: string;
19 |         expected: string;
20 |     }[];
21 | 
22 |     generateTestsForEachTreeAdapter(name, (treeAdapter) => {
23 |         for (const [idx, test] of tests.entries()) {
24 |             it(`${prefix} - ${idx}.${test.name}`, async () => {
25 |                 const opts = { ...test.options, treeAdapter };
26 |                 const document = parse(test.input, opts);
27 |                 const serializedResult = await serialize(document, opts);
28 | 
29 |                 //NOTE: use ok assertion, so output will not be polluted by the whole content of the strings
30 |                 assert.ok(serializedResult === test.expected, getStringDiffMsg(serializedResult, test.expected));
31 |             });
32 |         }
33 |     });
34 | }
35 | 


--------------------------------------------------------------------------------
/test/utils/load-sax-parser-test-data.ts:
--------------------------------------------------------------------------------
 1 | import * as fs from 'node:fs';
 2 | import path from 'node:path';
 3 | import { normalizeNewLine } from './common.js';
 4 | 
 5 | export function loadSAXParserTestData(): { name: string; src: string; expected: string }[] {
 6 |     const dataDirPath = new URL('../data/sax', import.meta.url);
 7 |     const testSetFileDirs = fs.readdirSync(dataDirPath);
 8 | 
 9 |     return testSetFileDirs.map((dirName) => {
10 |         const srcFilePath = path.join(dataDirPath.pathname, dirName, 'src.html');
11 |         const expectedFilePath = path.join(dataDirPath.pathname, dirName, 'expected.html');
12 |         const src = fs.readFileSync(srcFilePath).toString();
13 |         const expected = fs.readFileSync(expectedFilePath).toString();
14 | 
15 |         return {
16 |             name: dirName,
17 |             src: normalizeNewLine(src),
18 |             expected: normalizeNewLine(expected),
19 |         };
20 |     });
21 | }
22 | 


--------------------------------------------------------------------------------
/test/utils/parse-dat-file.ts:
--------------------------------------------------------------------------------
 1 | import { html, type TreeAdapterTypeMap, type TreeAdapter } from 'parse5';
 2 | 
 3 | function createFragmentContext<T extends TreeAdapterTypeMap>(
 4 |     tagName: string | undefined,
 5 |     treeAdapter: TreeAdapter<T>,
 6 | ): T['element'] | null {
 7 |     if (!tagName) {
 8 |         return null;
 9 |     }
10 | 
11 |     let namespace = html.NS.HTML;
12 |     const parts = tagName.split(' ');
13 | 
14 |     if (parts.length > 1) {
15 |         tagName = parts[1];
16 | 
17 |         if (parts[0] === 'svg') {
18 |             namespace = html.NS.SVG;
19 |         } else if (parts[0] === 'math') {
20 |             namespace = html.NS.MATHML;
21 |         }
22 |     }
23 | 
24 |     return treeAdapter.createElement(tagName, namespace, []);
25 | }
26 | 
27 | export interface DatFile<T extends TreeAdapterTypeMap> {
28 |     input: string;
29 |     expected: string;
30 |     expectedErrors: string[];
31 |     disableEntitiesDecoding: boolean;
32 |     lineNum: number;
33 |     scriptingEnabled: boolean;
34 |     fragmentContext: T['element'] | null;
35 | }
36 | 
37 | export function parseDatFile<T extends TreeAdapterTypeMap>(testSet: string, treeAdapter: TreeAdapter<T>): DatFile<T>[] {
38 |     const testDescrs: Record<string, number | string[]>[] = [];
39 |     let curDirective = '';
40 |     let curDescr: Record<string, number | string[]> = {};
41 | 
42 |     for (const [idx, line] of testSet.split(/\r?\n/).entries()) {
43 |         if (line === '#data') {
44 |             curDescr = { '#line': idx + 1 };
45 |             testDescrs.push(curDescr);
46 |         }
47 | 
48 |         if (line[0] === '#') {
49 |             curDirective = line;
50 |             curDescr[curDirective] = [];
51 |         } else {
52 |             (curDescr[curDirective] as string[]).push(line);
53 |         }
54 |     }
55 | 
56 |     return testDescrs.map((descr) => {
57 |         const fragmentContextTagName = (descr['#document-fragment'] as string[] | undefined)?.[0];
58 | 
59 |         return {
60 |             input: (descr['#data'] as string[]).join('\n'),
61 |             expected: (descr['#document'] as string[]).join('\n'),
62 |             expectedErrors: descr['#new-errors'] || [],
63 |             disableEntitiesDecoding: !!descr['#disable-html-entities-decoding'],
64 |             lineNum: descr['#line'],
65 |             scriptingEnabled: !descr['#script-off'],
66 |             fragmentContext: createFragmentContext<T>(fragmentContextTagName, treeAdapter),
67 |         } as DatFile<T>;
68 |     });
69 | }
70 | 


--------------------------------------------------------------------------------
/test/utils/serialize-to-dat-file-format.ts:
--------------------------------------------------------------------------------
 1 | import { html, type TreeAdapter, type TreeAdapterTypeMap, type Token } from 'parse5';
 2 | 
 3 | function getSerializedTreeIndent(indent: number): string {
 4 |     return '|'.padEnd(indent + 2, ' ');
 5 | }
 6 | 
 7 | function getElementSerializedNamespaceURI<T extends TreeAdapterTypeMap>(
 8 |     element: T['element'],
 9 |     treeAdapter: TreeAdapter<T>,
10 | ): string {
11 |     switch (treeAdapter.getNamespaceURI(element)) {
12 |         case html.NS.SVG: {
13 |             return 'svg ';
14 |         }
15 |         case html.NS.MATHML: {
16 |             return 'math ';
17 |         }
18 |         default: {
19 |             return '';
20 |         }
21 |     }
22 | }
23 | 
24 | function serializeNodeList<T extends TreeAdapterTypeMap>(
25 |     nodes: T['node'][],
26 |     indent: number,
27 |     treeAdapter: TreeAdapter<T>,
28 | ): string {
29 |     let str = '';
30 | 
31 |     for (let node of nodes) {
32 |         str += getSerializedTreeIndent(indent);
33 | 
34 |         if (treeAdapter.isCommentNode(node)) {
35 |             str += `<!-- ${treeAdapter.getCommentNodeContent(node)} -->\n`;
36 |         } else if (treeAdapter.isTextNode(node)) {
37 |             str += `"${treeAdapter.getTextNodeContent(node)}"\n`;
38 |         } else if (treeAdapter.isDocumentTypeNode(node)) {
39 |             const publicId = treeAdapter.getDocumentTypeNodePublicId(node);
40 |             const systemId = treeAdapter.getDocumentTypeNodeSystemId(node);
41 | 
42 |             str += `<!DOCTYPE ${treeAdapter.getDocumentTypeNodeName(node) || ''}`;
43 | 
44 |             if (publicId || systemId) {
45 |                 str += ` "${publicId}" "${systemId}"`;
46 |             }
47 | 
48 |             str += '>\n';
49 |         } else {
50 |             const tn = treeAdapter.getTagName(node);
51 | 
52 |             str += `<${getElementSerializedNamespaceURI(node, treeAdapter) + tn}>\n`;
53 | 
54 |             let childrenIndent = indent + 2;
55 |             const serializedAttrs = treeAdapter.getAttrList(node).map((attr: Token.Attribute) => {
56 |                 let attrStr = getSerializedTreeIndent(childrenIndent);
57 | 
58 |                 if (attr.prefix) {
59 |                     attrStr += `${attr.prefix} `;
60 |                 }
61 | 
62 |                 attrStr += `${attr.name}="${attr.value}"\n`;
63 | 
64 |                 return attrStr;
65 |             });
66 | 
67 |             str += serializedAttrs.sort().join('');
68 | 
69 |             if (tn === html.TAG_NAMES.TEMPLATE && treeAdapter.getNamespaceURI(node) === html.NS.HTML) {
70 |                 str += `${getSerializedTreeIndent(childrenIndent)}content\n`;
71 |                 childrenIndent += 2;
72 |                 node = treeAdapter.getTemplateContent(node);
73 |             }
74 | 
75 |             str += serializeNodeList(treeAdapter.getChildNodes(node), childrenIndent, treeAdapter);
76 |         }
77 |     }
78 | 
79 |     return str;
80 | }
81 | 
82 | export function serializeToDatFileFormat<T extends TreeAdapterTypeMap>(
83 |     rootNode: T['parentNode'],
84 |     treeAdapter: TreeAdapter<T>,
85 | ): string {
86 |     return serializeNodeList(treeAdapter.getChildNodes(rootNode), 0, treeAdapter);
87 | }
88 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "target": "es2019",
 4 |         "module": "Node16",
 5 |         "moduleResolution": "Node16",
 6 |         "strict": true,
 7 |         "noUnusedLocals": true,
 8 |         "noUnusedParameters": true,
 9 |         "noImplicitReturns": true,
10 |         "noFallthroughCasesInSwitch": true,
11 |         "noImplicitOverride": true,
12 |         "noPropertyAccessFromIndexSignature": true,
13 |         "forceConsistentCasingInFileNames": true,
14 |         "isolatedModules": true,
15 |         "composite": true,
16 |         "skipLibCheck": true
17 |     },
18 |     "references": [{ "path": "packages/parse5/tsconfig.json" }]
19 | }
20 | 


--------------------------------------------------------------------------------
/typedoc.base.json:
--------------------------------------------------------------------------------
1 | {
2 |     "$schema": "https://typedoc.org/schema.json",
3 |     "excludeInternal": true,
4 |     "excludePrivate": true,
5 |     "includeVersion": true
6 | }
7 | 


--------------------------------------------------------------------------------
/typedoc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "entryPoints": ["packages/*"],
 3 |     "out": "docs/build",
 4 |     "name": "parse5",
 5 |     "readme": "README.md",
 6 |     "emit": "docs",
 7 |     "excludeInternal": true,
 8 |     "entryPointStrategy": "packages"
 9 | }
10 | 


--------------------------------------------------------------------------------
/vitest.config.js:
--------------------------------------------------------------------------------
 1 | /// <reference types="vitest/config" />
 2 | import { defineConfig } from 'vite';
 3 | import path from 'node:path';
 4 | import { readdirSync } from 'node:fs';
 5 | 
 6 | const alias = [
 7 |     { find: /^(parse5[^/]*)\/dist\/(.*?)(?:\.js)?$/, replacement: path.resolve('./packages/$1/lib/$2') },
 8 |     { find: /^(parse5[^/]*)$/, replacement: path.resolve('./packages/$1/lib/index.ts') },
 9 |     { find: /parse5-test-utils\/(.*?)/, replacement: path.resolve('./test/$1') },
10 | ];
11 | const packages = readdirSync('./packages');
12 | 
13 | export default defineConfig({
14 |     test: {
15 |         include: ['**/*.test.ts'],
16 |         workspace: packages.map((name) => ({
17 |             test: {
18 |                 name,
19 |                 root: `packages/${name}`,
20 |                 alias,
21 |             },
22 |         })),
23 |         coverage: {
24 |             clean: true,
25 |             include: ['packages'],
26 |             exclude: ['**/dist/**'],
27 |         },
28 |     },
29 | });
30 | 


--------------------------------------------------------------------------------