├── .commitlintrc.json
├── .github
├── CODEOWNERS
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ ├── feature_request.md
│ └── question.md
├── PULL_REQUEST_TEMPLATE.md
├── SECURITY.md
├── dependabot.yml
├── mergify.yml
└── workflows
│ ├── assign-reviewer.yml
│ ├── build.yml
│ ├── commitlint.yml
│ ├── lint.yml
│ ├── release-please.yml
│ ├── size-limit.yml
│ └── test.yml
├── .gitignore
├── .husky
├── commit-msg
└── pre-commit
├── .lintstagedrc.json
├── .mocharc.json
├── .nvmrc
├── .prettierrc.json
├── .size-limit.json
├── CHANGELOG.md
├── LICENSE
├── README.md
├── eslint.config.mjs
├── esm
├── client
│ ├── html-to-dom.d.mts
│ └── html-to-dom.mjs
├── index.d.mts
├── index.mjs
└── server
│ ├── html-to-dom.d.mts
│ └── html-to-dom.mjs
├── examples
└── index.html
├── karma.conf.js
├── package-lock.json
├── package.json
├── rollup.config.mjs
├── src
├── client
│ ├── constants.ts
│ ├── domparser.ts
│ ├── html-to-dom.ts
│ └── utilities.ts
├── index.ts
├── server
│ ├── html-to-dom.ts
│ └── utilities.ts
└── types.ts
├── test
├── cases
│ ├── complex.html
│ ├── complex.svg
│ ├── html.js
│ ├── index.d.ts
│ └── index.js
├── client
│ └── index.js
├── esm
│ ├── client.test.mjs
│ ├── index.test.mjs
│ └── server.test.mjs
├── helpers
│ ├── cycle.js
│ ├── index.d.ts
│ ├── index.js
│ ├── run-tests.js
│ ├── test-case-sensitive-tags.js
│ └── throw-errors.js
├── server
│ ├── client.test.ts
│ ├── index.test.ts
│ └── server.test.ts
└── types
│ └── index.test.ts
└── tsconfig.json
/.commitlintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": ["@commitlint/config-conventional"],
3 | "rules": {
4 | "body-max-line-length": [1, "always", 100]
5 | }
6 | }
7 |
--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @remarkablemark
2 |
3 | /package.json
4 | /package-lock.json
5 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug Report
3 | about: Report a bug
4 | labels: bug
5 | assignees: remarkablemark
6 | ---
7 |
8 | ## Expected Behavior
9 |
10 |
11 |
12 | ## Actual Behavior
13 |
14 |
15 |
16 | ## Steps to Reproduce
17 |
18 |
19 |
20 | ## Reproducible Demo
21 |
22 |
28 |
29 | ## Environment
30 |
31 | - Version:
32 | - Platform:
33 | - Browser:
34 | - OS:
35 |
36 | ## Keywords
37 |
38 |
39 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature Request
3 | about: Suggest improvements or new features
4 | labels: feature
5 | assignees: remarkablemark
6 | ---
7 |
8 | ## Problem
9 |
10 |
11 |
12 | ## Suggested Solution
13 |
14 |
15 |
16 | ## Keywords
17 |
18 |
19 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Question
3 | about: Ask a question
4 | labels: question
5 | assignees: remarkablemark
6 | ---
7 |
8 | ## Question
9 |
10 |
11 |
12 | ## Keywords
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
4 |
5 | ## What is the motivation for this pull request?
6 |
7 |
8 |
9 | ## What is the current behavior?
10 |
11 |
12 |
13 | ## What is the new behavior?
14 |
15 |
16 |
17 | ## Checklist:
18 |
19 |
23 |
24 | - [ ] [Conventional Commits](https://www.conventionalcommits.org/)
25 | - [ ] Tests
26 | - [ ] [Types](https://arethetypeswrong.github.io/)
27 | - [ ] Documentation
28 |
29 |
32 |
--------------------------------------------------------------------------------
/.github/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | Only the current version is supported. Please make sure to update to the latest release.
6 |
7 | ## Reporting a Vulnerability
8 |
9 | To report a security vulnerability, please use the [Tidelift security contact](https://tidelift.com/security). Tidelift will coordinate the fix and disclosure.
10 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: 'npm'
9 | directory: '/'
10 | schedule:
11 | interval: 'daily'
12 | groups:
13 | commitlint:
14 | patterns:
15 | - '@commitlint/*'
16 | eslint:
17 | patterns:
18 | - '@eslint/*'
19 | prettier:
20 | patterns:
21 | - prettier
22 | - eslint-plugin-prettier
23 | typescript-eslint:
24 | patterns:
25 | - '@typescript-eslint/*'
26 |
27 | - package-ecosystem: 'github-actions'
28 | directory: '/'
29 | schedule:
30 | interval: 'daily'
31 |
--------------------------------------------------------------------------------
/.github/mergify.yml:
--------------------------------------------------------------------------------
1 | pull_request_rules:
2 | - name: automatic merge for Dependabot pull requests
3 | conditions:
4 | - author=dependabot[bot]
5 | - check-success=build
6 | - check-success=commitlint
7 | - check-success=lint
8 | - check-success=size
9 | - check-success=test
10 | - 'title~=^build\(deps-dev\): bump '
11 | actions:
12 | merge:
13 | method: squash
14 |
--------------------------------------------------------------------------------
/.github/workflows/assign-reviewer.yml:
--------------------------------------------------------------------------------
1 | name: Assign Reviewer
2 | on: pull_request_target
3 |
4 | jobs:
5 | assign-reviewer:
6 | runs-on: ubuntu-latest
7 | steps:
8 | - name: Assign reviewer
9 | if: >
10 | startsWith(github.event.pull_request.title, 'build(deps-dev): bump ') == false &&
11 | contains(github.event.action, 'opened')
12 | run: >
13 | gh pr edit ${{ github.event.pull_request.html_url }}
14 | --add-assignee ${{ github.event.pull_request.user.login }}
15 | --add-reviewer remarkablemark
16 | env:
17 | GITHUB_TOKEN: ${{ github.token }}
18 | continue-on-error: true
19 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: build
2 | on: [push, pull_request]
3 |
4 | jobs:
5 | build:
6 | runs-on: ubuntu-latest
7 | steps:
8 | - name: Checkout repository
9 | uses: actions/checkout@v4
10 |
11 | - name: Use Node.js
12 | uses: actions/setup-node@v4
13 | with:
14 | cache: npm
15 | node-version-file: .nvmrc
16 |
17 | - name: Install dependencies
18 | run: npm ci --prefer-offline
19 |
20 | - name: Build package
21 | run: npm run build
22 |
23 | - name: Lint package
24 | run: npm run lint:package
25 |
--------------------------------------------------------------------------------
/.github/workflows/commitlint.yml:
--------------------------------------------------------------------------------
1 | name: commitlint
2 | on: [push, pull_request]
3 |
4 | jobs:
5 | commitlint:
6 | runs-on: ubuntu-latest
7 | steps:
8 | - name: Checkout repository
9 | uses: actions/checkout@v4
10 | with:
11 | fetch-depth: 0
12 |
13 | - name: Use Node.js
14 | uses: actions/setup-node@v4
15 | with:
16 | cache: npm
17 | node-version-file: .nvmrc
18 |
19 | - name: Install dependencies
20 | run: npm ci --prefer-offline
21 |
22 | - name: Lint commit message
23 | run: npx commitlint --from=HEAD~1
24 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: lint
2 | on: [push, pull_request]
3 |
4 | jobs:
5 | lint:
6 | runs-on: ubuntu-latest
7 | steps:
8 | - name: Checkout repository
9 | uses: actions/checkout@v4
10 |
11 | - name: Use Node.js
12 | uses: actions/setup-node@v4
13 | with:
14 | cache: npm
15 | node-version-file: .nvmrc
16 |
17 | - name: Install dependencies
18 | run: npm ci --prefer-offline
19 |
20 | - name: Run ESLint
21 | run: npm run lint
22 |
23 | - name: Type check
24 | run: npm run lint:tsc
25 |
--------------------------------------------------------------------------------
/.github/workflows/release-please.yml:
--------------------------------------------------------------------------------
1 | name: release-please
2 | on:
3 | push:
4 | branches:
5 | - master
6 |
7 | jobs:
8 | release-please:
9 | runs-on: ubuntu-latest
10 | outputs:
11 | release_created: ${{ steps.release.outputs.release_created }}
12 |
13 | steps:
14 | - name: Release Please
15 | uses: googleapis/release-please-action@v4
16 | id: release
17 | with:
18 | release-type: node
19 |
20 | publish:
21 | runs-on: ubuntu-latest
22 | permissions:
23 | contents: read
24 | id-token: write
25 | needs: release-please
26 | if: ${{ needs.release-please.outputs.release_created }}
27 |
28 | steps:
29 | - name: Checkout repository
30 | uses: actions/checkout@v4
31 |
32 | - name: Use Node.js
33 | uses: actions/setup-node@v4
34 | with:
35 | node-version-file: .nvmrc
36 | registry-url: https://registry.npmjs.org
37 |
38 | - name: Install dependencies
39 | run: npm ci --prefer-offline
40 |
41 | - name: Publish
42 | run: npm publish --provenance --access public
43 | env:
44 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
45 |
--------------------------------------------------------------------------------
/.github/workflows/size-limit.yml:
--------------------------------------------------------------------------------
1 | name: size
2 | on:
3 | pull_request:
4 | branches:
5 | - master
6 | jobs:
7 | size:
8 | runs-on: ubuntu-latest
9 | env:
10 | CI_JOB_NUMBER: 1
11 | steps:
12 | - uses: actions/checkout@v4
13 | - uses: andresz1/size-limit-action@v1
14 | with:
15 | github_token: ${{ secrets.GITHUB_TOKEN }}
16 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: test
2 | on: [push, pull_request]
3 |
4 | jobs:
5 | test:
6 | runs-on: ubuntu-latest
7 | steps:
8 | - name: Checkout repository
9 | uses: actions/checkout@v4
10 |
11 | - name: Use Node.js
12 | uses: actions/setup-node@v4
13 | with:
14 | cache: npm
15 | node-version-file: .nvmrc
16 |
17 | - name: Install dependencies
18 | run: npm ci --prefer-offline
19 |
20 | - name: Run server test
21 | run: npm run test:server
22 |
23 | - name: Generate coverage report
24 | run: |
25 | mkdir -p coverage
26 | npx nyc report --reporter=text-lcov > coverage/lcov.info
27 |
28 | - name: Upload coverage reports to Codecov
29 | uses: codecov/codecov-action@v5
30 | with:
31 | token: ${{ secrets.CODECOV_TOKEN }}
32 |
33 | - name: Run module tests
34 | run: npm run test:esm
35 |
36 | - name: Run client test
37 | uses: coactions/setup-xvfb@v1
38 | with:
39 | run: npm run test:client
40 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Build directory
2 | dist/
3 | esm/types.ts
4 | lib/
5 |
6 | # Logs
7 | logs
8 | *.log
9 | npm-debug.log*
10 |
11 | # Runtime data
12 | pids
13 | *.pid
14 | *.seed
15 | *.pid.lock
16 |
17 | # Directory for instrumented libs generated by jscoverage/JSCover
18 | lib-cov/
19 |
20 | # Coverage directory used by tools like istanbul
21 | coverage/
22 |
23 | # nyc test coverage
24 | .nyc_output/
25 |
26 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
27 | .grunt/
28 |
29 | # node-waf configuration
30 | .lock-wscript
31 |
32 | # Compiled binary addons (http://nodejs.org/api/addons.html)
33 | build/Release
34 |
35 | # Dependency directories
36 | node_modules/
37 | jspm_packages/
38 |
39 | # Optional npm cache directory
40 | .npm/
41 |
42 | # Optional eslint cache
43 | .eslintcache
44 |
45 | # Optional REPL history
46 | .node_repl_history
47 |
48 | # Vim swap files
49 | *.swp
50 |
51 | # Mac OS
52 | .DS_Store
53 |
--------------------------------------------------------------------------------
/.husky/commit-msg:
--------------------------------------------------------------------------------
1 | npx commitlint --edit $1
2 |
--------------------------------------------------------------------------------
/.husky/pre-commit:
--------------------------------------------------------------------------------
1 | npm test
2 | npm run test:esm
3 | npx lint-staged
4 |
--------------------------------------------------------------------------------
/.lintstagedrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "*.{js,mjs,ts}": "eslint --fix",
3 | "*.{css,html,json,md,yml}": "prettier --write"
4 | }
5 |
--------------------------------------------------------------------------------
/.mocharc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extensions": ["js", "ts"],
3 | "spec": ["test/server/*.test.ts"],
4 | "require": "ts-node/register",
5 | "recursive": true,
6 | "ignore": "esm"
7 | }
8 |
--------------------------------------------------------------------------------
/.nvmrc:
--------------------------------------------------------------------------------
1 | 20
2 |
--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "singleQuote": true
3 | }
4 |
--------------------------------------------------------------------------------
/.size-limit.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "path": "dist/html-dom-parser.min.js",
4 | "limit": "4 KB"
5 | }
6 | ]
7 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4 |
5 | ## [5.1.1](https://github.com/remarkablemark/html-dom-parser/compare/v5.1.0...v5.1.1) (2025-05-09)
6 |
7 |
8 | ### Documentation
9 |
10 | * **readme:** fix build status badge ([192c097](https://github.com/remarkablemark/html-dom-parser/commit/192c097325c04f303bcd21121e2701b2a333a3da))
11 |
12 | ## [5.1.0](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.13...v5.1.0) (2025-05-08)
13 |
14 |
15 | ### Features
16 |
17 | * add `browser` and `default` conditions to `"."` ([aa0ca64](https://github.com/remarkablemark/html-dom-parser/commit/aa0ca64653d30449e0d539f1f88fbf15c80f0984))
18 | * use `default` over `import` condition ([13b08f6](https://github.com/remarkablemark/html-dom-parser/commit/13b08f63af874e0bf73b922a4b34be77a67f609b))
19 |
20 | ## [5.0.13](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.12...v5.0.13) (2024-12-25)
21 |
22 |
23 | ### Build System
24 |
25 | * **deps:** bump htmlparser2 from 9.1.0 to 10.0.0 ([#929](https://github.com/remarkablemark/html-dom-parser/issues/929)) ([2d15abe](https://github.com/remarkablemark/html-dom-parser/commit/2d15abe0d022e3fb513b901d3f42b9b56bcdeb57))
26 |
27 | ## [5.0.12](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.11...v5.0.12) (2024-12-16)
28 |
29 |
30 | ### Bug Fixes
31 |
32 | * **client:** don't break LaTeX when replacing carriage returns ([d69bc66](https://github.com/remarkablemark/html-dom-parser/commit/d69bc662f4470cd8ebf551af873cd7badfa6ebca)), closes [#917](https://github.com/remarkablemark/html-dom-parser/issues/917)
33 |
34 | ## [5.0.11](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.10...v5.0.11) (2024-12-04)
35 |
36 | ### Bug Fixes
37 |
38 | - enable client parser to retain carriage return characters ([#902](https://github.com/remarkablemark/html-dom-parser/issues/902)) ([fe2e993](https://github.com/remarkablemark/html-dom-parser/commit/fe2e993d69c45e44696224985e558efca79db181)), closes [#420](https://github.com/remarkablemark/html-dom-parser/issues/420)
39 |
40 | ## [5.0.10](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.9...v5.0.10) (2024-08-28)
41 |
42 | ### Continuous Integration
43 |
44 | - **github:** publish package to npm registry with provenance ([e023fe8](https://github.com/remarkablemark/html-dom-parser/commit/e023fe8535c73c5b01837a114d6336c409e2ab17))
45 |
46 | ## [5.0.9](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.8...v5.0.9) (2024-07-18)
47 |
48 | ### Bug Fixes
49 |
50 | - exports field includes package.json ([c373a92](https://github.com/remarkablemark/html-dom-parser/commit/c373a92567272712300270ed721e298e46c4f929))
51 |
52 | ## [5.0.8](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.7...v5.0.8) (2024-02-12)
53 |
54 | ### Bug Fixes
55 |
56 | - **esm:** fix exported types ([b6918ae](https://github.com/remarkablemark/html-dom-parser/commit/b6918ae7090308e8812847588f410c96fb6075da))
57 |
58 | ## [5.0.7](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.6...v5.0.7) (2024-01-13)
59 |
60 | ### Build System
61 |
62 | - **deps:** bump htmlparser2 from 9.0.0 to 9.1.0 ([#631](https://github.com/remarkablemark/html-dom-parser/issues/631)) ([6816800](https://github.com/remarkablemark/html-dom-parser/commit/681680074c59cb1ab61758b06dc794476d8a7fe0))
63 |
64 | ## [5.0.6](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.5...v5.0.6) (2023-12-19)
65 |
66 | ### Bug Fixes
67 |
68 | - re-export types correctly for verbatimModuleSyntax ([#612](https://github.com/remarkablemark/html-dom-parser/issues/612)) ([782b675](https://github.com/remarkablemark/html-dom-parser/commit/782b6750223b8b5a556c714ca04e0e682a490248))
69 |
70 | ## [5.0.5](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.4...v5.0.5) (2023-12-16)
71 |
72 | ### Bug Fixes
73 |
74 | - **esm:** fix ESM types by adding .mts declaration files ([96a1cfc](https://github.com/remarkablemark/html-dom-parser/commit/96a1cfcd899f4b4c6030e75d13438fc3c05b3be2))
75 |
76 | ## [5.0.4](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.3...v5.0.4) (2023-10-31)
77 |
78 | ### Bug Fixes
79 |
80 | - **esm:** support vite bundler ([c9e510f](https://github.com/remarkablemark/html-dom-parser/commit/c9e510f7c4e7c0dded3d80a540a313c5c8e3000e))
81 |
82 | ## [5.0.3](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.2...v5.0.3) (2023-10-22)
83 |
84 | ### Miscellaneous Chores
85 |
86 | - export types from index.ts ([8ed55e2](https://github.com/remarkablemark/html-dom-parser/commit/8ed55e23df5c50e19c6df5d0e4856f52ee890a0c))
87 |
88 | ## [5.0.2](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.1...v5.0.2) (2023-10-19)
89 |
90 | ### Bug Fixes
91 |
92 | - **package:** add "/src" to files to fix source map warning ([7082c50](https://github.com/remarkablemark/html-dom-parser/commit/7082c50c56f825dcab896f6847c60b0dcdd9b211))
93 |
94 | ## [5.0.1](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.0...v5.0.1) (2023-10-17)
95 |
96 | ### Bug Fixes
97 |
98 | - **package:** add types to exports in package.json ([df08df3](https://github.com/remarkablemark/html-dom-parser/commit/df08df3c8478dfbe12eeee9e3bc90c8959da454c))
99 |
100 | ## [5.0.0](https://github.com/remarkablemark/html-dom-parser/compare/v4.0.1...v5.0.0) (2023-10-16)
101 |
102 | ### ⚠ BREAKING CHANGES
103 |
104 | - CommonJS imports require the `.default` key.
105 |
106 | ### Code Refactoring
107 |
108 | - migrate to TypeScript ([5915b08](https://github.com/remarkablemark/html-dom-parser/commit/5915b084ce7f3cf59e486da998c203f87a45bda1))
109 |
110 | ## [4.0.1](https://github.com/remarkablemark/html-dom-parser/compare/v4.0.0...v4.0.1) (2023-10-15)
111 |
112 | ### Miscellaneous Chores
113 |
114 | - **index:** set TypeScript Version to 5.2 in index.d.ts ([#525](https://github.com/remarkablemark/html-dom-parser/issues/525)) ([8219338](https://github.com/remarkablemark/html-dom-parser/commit/82193387c3fa05f30d5d6f1d88739d92a4a7156c))
115 |
116 | ## [4.0.0](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.7...v4.0.0) (2023-05-31)
117 |
118 | ### ⚠ BREAKING CHANGES
119 |
120 | - **deps:** bump htmlparser2 from 8.0.2 to 9.0.0
121 |
122 | ### Build System
123 |
124 | - **deps:** bump htmlparser2 from 8.0.2 to 9.0.0 ([467bbaa](https://github.com/remarkablemark/html-dom-parser/commit/467bbaa4d7373635b35a4e7a92ca6b56aec74b09)), closes [#459](https://github.com/remarkablemark/html-dom-parser/issues/459)
125 |
126 | ## [3.1.7](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.6...v3.1.7) (2023-03-25)
127 |
128 | ### Build System
129 |
130 | - **deps:** bump htmlparser2 from 8.0.1 to 8.0.2 ([4fbe117](https://github.com/remarkablemark/html-dom-parser/commit/4fbe1171f1f7dfb87f05b7f9c410eaea0c6e8405)), closes [#433](https://github.com/remarkablemark/html-dom-parser/issues/433)
131 |
132 | ## [3.1.6](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.5...v3.1.6) (2023-03-22)
133 |
134 | ### Bug Fixes
135 |
136 | - **client:** correct spelling of feGaussianBlur ([9e28250](https://github.com/remarkablemark/html-dom-parser/commit/9e282504b0bef656b19249c324b748aa2190cb42)), closes [#429](https://github.com/remarkablemark/html-dom-parser/issues/429)
137 |
138 | ## [3.1.5](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.4...v3.1.5) (2023-03-06)
139 |
140 | ### Bug Fixes
141 |
142 | - **client:** check for "template" in utilities formatDOM ([748cf27](https://github.com/remarkablemark/html-dom-parser/commit/748cf2763565554874e337f8af7b8e6d147a9d94)), closes [#417](https://github.com/remarkablemark/html-dom-parser/issues/417)
143 |
144 | ## [3.1.4](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.3...v3.1.4) (2023-03-04)
145 |
146 | ### Bug Fixes
147 |
148 | - **client:** get template content childNodes in utilities formatDOM ([c2c0bed](https://github.com/remarkablemark/html-dom-parser/commit/c2c0bedbc2f3ae8a3bdb4cdad05df007a2d6e870)), closes [#414](https://github.com/remarkablemark/html-dom-parser/issues/414)
149 |
150 | ## [3.1.3](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.2...v3.1.3) (2023-01-17)
151 |
152 | ### Bug Fixes
153 |
154 | - **package:** specify types in package.json and exports field ([21fb028](https://github.com/remarkablemark/html-dom-parser/commit/21fb02813597c786e29a3d47de89efca1451ee62))
155 |
156 | ## [3.1.2](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.1...v3.1.2) (2022-08-23)
157 |
158 | ### Bug Fixes
159 |
160 | - **client:** fix import in html-to-dom.mjs ([78a7607](https://github.com/remarkablemark/html-dom-parser/commit/78a7607f8b4c421297cc0569d9f4e49647a0eb63)), closes [#337](https://github.com/remarkablemark/html-dom-parser/issues/337)
161 |
162 | ## [3.1.1](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.0...v3.1.1) (2022-08-20)
163 |
164 | ### Bug Fixes
165 |
166 | - **client:** correct ECMAScript export in client html-to-dom.mjs ([7de506c](https://github.com/remarkablemark/html-dom-parser/commit/7de506cb7dfb6e48e4886659283f118265464e92)), closes [#334](https://github.com/remarkablemark/html-dom-parser/issues/334)
167 |
168 | ## [3.1.0](https://github.com/remarkablemark/html-dom-parser/compare/v3.0.1...v3.1.0) (2022-08-16)
169 |
170 | ### Features
171 |
172 | - add esm for client ([0c4c2b6](https://github.com/remarkablemark/html-dom-parser/commit/0c4c2b6d86c75cb3dc924afc6fd8501c5d30a876))
173 |
174 | ## [3.0.1](https://github.com/remarkablemark/html-dom-parser/compare/v3.0.0...v3.0.1) (2022-07-10)
175 |
176 | ### Bug Fixes
177 |
178 | - **client:** ensure head and body with newline are parsed correctly ([b26b645](https://github.com/remarkablemark/html-dom-parser/commit/b26b645c9de708a6b85b61b66bbdb8e75eebb121)), closes [#317](https://github.com/remarkablemark/html-dom-parser/issues/317)
179 |
180 | ## [3.0.0](https://github.com/remarkablemark/html-dom-parser/compare/v2.0.0...v3.0.0) (2022-07-05)
181 |
182 | ### ⚠ BREAKING CHANGES
183 |
184 | - htmlparser2 7.2.0 → 8.0.1
185 |
186 | ### Build System
187 |
188 | - upgrade domhandler to 5.0.3 and htmlparser2 to 8.0.1 ([e80a69c](https://github.com/remarkablemark/html-dom-parser/commit/e80a69c83e07b0ae2f48a78dbd6adb7d0a71b0f3))
189 |
190 | ## [2.0.0](https://github.com/remarkablemark/html-dom-parser/compare/v1.2.0...v2.0.0) (2022-06-18)
191 |
192 | ### ⚠ BREAKING CHANGES
193 |
194 | - **client:** remove Internet Explorer (IE11) support
195 |
196 | ### Features
197 |
198 | - **client:** remove Internet Explorer (IE11) support ([b34cbe1](https://github.com/remarkablemark/html-dom-parser/commit/b34cbe1310c0e223ae05bca00945c70faca374ab)), closes [#225](https://github.com/remarkablemark/html-dom-parser/issues/225)
199 |
200 | ## [1.2.0](https://www.github.com/remarkablemark/html-dom-parser/compare/v1.1.1...v1.2.0) (2022-04-14)
201 |
202 | ### Features
203 |
204 | - add compatibility for react-native ([4a4a974](https://www.github.com/remarkablemark/html-dom-parser/commit/4a4a974c01ad0beaf0591fe169d68afe66ea6e2a))
205 |
206 | ## [1.1.1](https://www.github.com/remarkablemark/html-dom-parser/compare/v1.1.0...v1.1.1) (2022-03-20)
207 |
208 | ### Build System
209 |
210 | - **package:** upgrade domhandler from 4.3.0 to 4.3.1 ([c2e8a82](https://www.github.com/remarkablemark/html-dom-parser/commit/c2e8a82035957bd991a969ef12f3dcd114679a2a))
211 |
212 | ## [1.1.0](https://www.github.com/remarkablemark/html-dom-parser/compare/v1.0.4...v1.1.0) (2022-02-05)
213 |
214 | ### Features
215 |
216 | - add ES Module support ([b8436b1](https://www.github.com/remarkablemark/html-dom-parser/commit/b8436b19147e0146f50982a225e787fd0a53d97d))
217 |
218 | ## [1.0.4](https://www.github.com/remarkablemark/html-dom-parser/compare/v1.0.3...v1.0.4) (2021-12-06)
219 |
220 | ### Build System
221 |
222 | - **deps:** bump domhandler from 4.2.2 to 4.3.0 ([cb49258](https://www.github.com/remarkablemark/html-dom-parser/commit/cb49258f9cfcd0c6bce1fa5d5e9dc52b2bdeb389))
223 |
224 | ## [1.0.3](https://www.github.com/remarkablemark/html-dom-parser/compare/v1.0.2...v1.0.3) (2021-11-27)
225 |
226 | ### Performance Improvements
227 |
228 | - upgrade dependency htmlparser2 to v7.2.0 ([7819211](https://www.github.com/remarkablemark/html-dom-parser/commit/78192117edbf8bcbdd23b6534c66dce515b408de))
229 |
230 | ## [1.0.2](https://www.github.com/remarkablemark/html-dom-parser/compare/v1.0.1...v1.0.2) (2021-09-06)
231 |
232 | ### Build System
233 |
234 | - **deps:** bump domhandler from 4.2.0 to 4.2.2 ([ab46792](https://www.github.com/remarkablemark/html-dom-parser/commit/ab46792a32ba78644748a940748a87124730b24f))
235 |
236 | ## [1.0.1](https://github.com/remarkablemark/html-dom-parser/compare/v1.0.0...v1.0.1) (2021-06-13)
237 |
238 | ## [1.0.0](https://github.com/remarkablemark/html-dom-parser/compare/v0.5.0...v1.0.0) (2020-12-25)
239 |
240 | ### Build System
241 |
242 | - **package:** upgrade `domhandler` to v4 and `htmlparser2` to v6 ([ec5673e](https://github.com/remarkablemark/html-dom-parser/commit/ec5673ef38050f808ce49e2e4ee165d30492b190))
243 |
244 | ### Performance Improvements
245 |
246 | - **client:** deprecate Internet Explorer 9 (IE9) ([d42ea4e](https://github.com/remarkablemark/html-dom-parser/commit/d42ea4ed5df9f44abf16528e36b3cfbb4c7fdd08))
247 | - **utilities:** continue if nodeType is not element, text, comment ([793ff0c](https://github.com/remarkablemark/html-dom-parser/commit/793ff0c7a84ea05ff061e24984595f6e143362e6))
248 |
249 | ### BREAKING CHANGES
250 |
251 | - **package:** upgrade `domhandler` to v4 and `htmlparser2` to v6
252 |
253 | domhandler 3.3.0 → 4.0.0
254 | htmlparser2 4.1.0 → 6.0.0
255 |
256 | domhandler:
257 |
258 | - https://github.com/fb55/domhandler/releases/tag/v4.0.0
259 |
260 | htmlparser2:
261 |
262 | - https://github.com/fb55/htmlparser2/releases/tag/v5.0.0
263 | - https://github.com/fb55/htmlparser2/releases/tag/v5.0.1
264 | - https://github.com/fb55/htmlparser2/releases/tag/v6.0.0
265 |
266 | `decodeEntities` option now defaults to true. `
` is
267 | parsed correctly. Remove root parent node to keep parser
268 | backwards compatible.
269 |
270 | ## [0.5.0](https://github.com/remarkablemark/html-dom-parser/compare/v0.4.0...v0.5.0) (2020-12-13)
271 |
272 | ### Features
273 |
274 | - upgrade `domhandler` to 3.3.0 and `htmlparser2` to 4.1.0 ([2a748b8](https://github.com/remarkablemark/html-dom-parser/commit/2a748b8d4cc06660a18636e018706508f4fc19f4))
275 |
276 | ## [0.4.0](https://github.com/remarkablemark/html-dom-parser/compare/v0.3.1...v0.4.0) (2020-12-13)
277 |
278 | ### Features
279 |
280 | - upgrade `domhandler` to 3.0.0 and `htmlparser` to 4.0.0 ([44dba5e](https://github.com/remarkablemark/html-dom-parser/commit/44dba5efb5cc89668fc064d844c09079560029e1))
281 |
282 | ## [0.3.1](https://github.com/remarkablemark/html-dom-parser/compare/v0.3.0...v0.3.1) (2020-12-13)
283 |
284 |
285 |
286 | ## [0.3.0](https://github.com/remarkablemark/html-dom-parser/compare/v0.2.3...v0.3.0) (2020-06-02)
287 |
288 | ### Features
289 |
290 | - **lib:** throw error if browser does not support parsing methods ([de327af](https://github.com/remarkablemark/html-dom-parser/commit/de327af))
291 |
292 | ### Performance Improvements
293 |
294 | - **lib:** return `[]` if empty string is passed to server parser ([9850d05](https://github.com/remarkablemark/html-dom-parser/commit/9850d05))
295 |
296 | ## [0.2.3](https://github.com/remarkablemark/html-dom-parser/compare/v0.2.2...v0.2.3) (2019-11-04)
297 |
298 | ### Bug Fixes
299 |
300 | - **lib:** improve head and body regex in `domparser.js` ([457bb58](https://github.com/remarkablemark/html-dom-parser/commit/457bb58)), closes [#18](https://github.com/remarkablemark/html-dom-parser/issues/18)
301 |
302 | ### Build System
303 |
304 | - **package:** save commitlint, husky, and lint-staged to devDeps ([3b0ce91](https://github.com/remarkablemark/html-dom-parser/commit/3b0ce91))
305 | - **package:** update `eslint` and install `prettier` and plugin ([b7a6b81](https://github.com/remarkablemark/html-dom-parser/commit/b7a6b81))
306 | - **package:** update `webpack` and save `webpack-cli` ([908e56d](https://github.com/remarkablemark/html-dom-parser/commit/908e56d))
307 | - **package:** update dependencies and devDependencies ([a9016be](https://github.com/remarkablemark/html-dom-parser/commit/a9016be))
308 |
309 | ### Tests
310 |
311 | - **server:** remove skipped test ([a4c1057](https://github.com/remarkablemark/html-dom-parser/commit/a4c1057))
312 | - refactor tests to ES6 ([d5255a5](https://github.com/remarkablemark/html-dom-parser/commit/d5255a5))
313 | - **cases:** add empty string test case to `html.js` ([25d7e8a](https://github.com/remarkablemark/html-dom-parser/commit/25d7e8a))
314 | - **cases:** add more special test cases to `html.js` ([6fdf2ea](https://github.com/remarkablemark/html-dom-parser/commit/6fdf2ea))
315 | - **cases:** refactor test cases and move html data to its own file ([e4fcb09](https://github.com/remarkablemark/html-dom-parser/commit/e4fcb09))
316 | - **cases:** remove unnecessary try/catch wrapper to fix lint error ([ca8175e](https://github.com/remarkablemark/html-dom-parser/commit/ca8175e))
317 | - **cases:** skip html test cases that PhantomJS does not support ([d095d29](https://github.com/remarkablemark/html-dom-parser/commit/d095d29))
318 | - **cases:** update `complex.html` ([1418775](https://github.com/remarkablemark/html-dom-parser/commit/1418775))
319 | - **client:** add tests for client parser that will be run by karma ([a0c58aa](https://github.com/remarkablemark/html-dom-parser/commit/a0c58aa))
320 | - **helpers:** create `index.js` which exports helpers ([a9255d5](https://github.com/remarkablemark/html-dom-parser/commit/a9255d5))
321 | - **helpers:** move helper that tests for errors to separate file ([f2e6312](https://github.com/remarkablemark/html-dom-parser/commit/f2e6312))
322 | - **helpers:** refactor and move `runTests` to its own file ([8e30784](https://github.com/remarkablemark/html-dom-parser/commit/8e30784))
323 | - **server:** add tests that spy and mock htmlparser2 and domhandler ([61075a1](https://github.com/remarkablemark/html-dom-parser/commit/61075a1))
324 | - **server:** move `html-to-dom-server.js` to `server` directory ([3684dac](https://github.com/remarkablemark/html-dom-parser/commit/3684dac))
325 |
326 | ## [0.2.2](https://github.com/remarkablemark/html-dom-parser/compare/v0.2.1...v0.2.2) (2019-06-07)
327 |
328 | ### Bug Fixes
329 |
330 | - **utilities:** do not lowercase case-sensitive SVG tags ([4083004](https://github.com/remarkablemark/html-dom-parser/commit/4083004))
331 |
332 | ### Performance Improvements
333 |
334 | - **utilities:** optimize case-sensitive tag replace with hash map ([6aa06ee](https://github.com/remarkablemark/html-dom-parser/commit/6aa06ee))
335 |
336 | ## [0.2.1](https://github.com/remarkablemark/html-dom-parser/compare/v0.2.0...v0.2.1) (2019-04-03)
337 |
338 | ## [0.2.0](https://github.com/remarkablemark/html-dom-parser/compare/v0.1.3...v0.2.0) (2019-04-01)
339 |
340 | ### Features
341 |
342 | - **types:** add TypeScript decelerations ([b52d52f](https://github.com/remarkablemark/html-dom-parser/commit/b52d52f))
343 |
344 | ## [0.1.3](https://github.com/remarkablemark/html-dom-parser/compare/v0.1.2...v0.1.3) - 2018-02-20
345 |
346 | ### Fixed
347 |
348 | - Fix regular expression vulnerability (#8)
349 | - Regex has potential for catastrophic backtracking
350 | - Credit goes to @davisjam for discovering it
351 |
352 | ### Changed
353 |
354 | - Refactored and updated tests (#8)
355 |
356 | ## [0.1.2](https://github.com/remarkablemark/html-dom-parser/compare/v0.1.1...v0.1.2) - 2017-09-30
357 |
358 | ### Added
359 |
360 | - Create helper `isIE()` in utilities (#7)
361 |
362 | ### Fixed
363 |
364 | - Fix client parser in IE/IE9 (#6, #7)
365 |
366 | ### Changed
367 |
368 | - Upgrade `mocha@3.4.2` and `webpack@2.6.1` (#5)
369 | - npm script `build` runs both `build:min` and `build:unmin` (#5)
370 |
371 | ## [0.1.1](https://github.com/remarkablemark/html-dom-parser/compare/v0.1.0...v0.1.1) - 2017-06-26
372 |
373 | ### Added
374 |
375 | - CHANGELOG with previous releases backfilled
376 |
377 | ### Fixed
378 |
379 | - Fix client parser on IE by specifying required parameter for `createHTMLDocument` (#4)
380 |
381 | ## [0.1.0](https://github.com/remarkablemark/html-dom-parser/compare/v0.0.2...v0.1.0) - 2017-06-17
382 |
383 | ### Changed
384 |
385 | - Improve, refactor, and optimize client parser
386 | - Use `template`, `DOMImplementation`, and/or `DOMParser`
387 |
388 | ## [0.0.2](https://github.com/remarkablemark/html-dom-parser/compare/v0.0.1...v0.0.2) - 2016-10-10
389 |
390 | ### Added
391 |
392 | - Create npm scripts for prepublish
393 |
394 | ### Changed
395 |
396 | - Change webpack to build to UMD target
397 | - Update README installation and usage instructions
398 |
399 | ## [0.0.1](https://github.com/remarkablemark/html-dom-parser/tree/v0.0.1) - 2016-10-10
400 |
401 | ### Added
402 |
403 | - Server parser
404 | - Wrapper for `htmlparser2.parseDOM`
405 | - Client parser
406 | - Uses DOM API to mimic server parser output
407 | - Build client library with webpack
408 | - Add README, tests, and other necessary files
409 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Menglin "Mark" Xu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining
6 | a copy of this software and associated documentation files (the
7 | "Software"), to deal in the Software without restriction, including
8 | without limitation the rights to use, copy, modify, merge, publish,
9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # html-dom-parser
2 |
3 | [](https://nodei.co/npm/html-dom-parser/)
4 |
5 | [](https://www.npmjs.com/package/html-dom-parser)
6 | [](https://bundlephobia.com/package/html-dom-parser)
7 | [](https://github.com/remarkablemark/html-dom-parser/actions/workflows/build.yml)
8 | [](https://codecov.io/gh/remarkablemark/html-dom-parser)
9 | [](https://www.npmjs.com/package/html-dom-parser)
10 |
11 | HTML to DOM parser that works on both the server (Node.js) and the client (browser):
12 |
13 | ```
14 | HTMLDOMParser(string[, options])
15 | ```
16 |
17 | The parser converts an HTML string to a JavaScript object that describes the DOM tree.
18 |
19 | #### Example
20 |
21 | ```js
22 | import parse from 'html-dom-parser';
23 |
24 | parse('Hello, World!
');
25 | ```
26 |
27 |
28 | Output
29 |
30 |
31 | ```js
32 | [
33 | Element {
34 | type: 'tag',
35 | parent: null,
36 | prev: null,
37 | next: null,
38 | startIndex: null,
39 | endIndex: null,
40 | children: [
41 | Text {
42 | type: 'text',
43 | parent: [Circular],
44 | prev: null,
45 | next: null,
46 | startIndex: null,
47 | endIndex: null,
48 | data: 'Hello, World!'
49 | }
50 | ],
51 | name: 'p',
52 | attribs: {}
53 | }
54 | ]
55 | ```
56 |
57 |
58 |
59 |
60 | [Replit](https://replit.com/@remarkablemark/html-dom-parser) | [JSFiddle](https://jsfiddle.net/remarkablemark/ff9yg1yz/) | [Examples](https://github.com/remarkablemark/html-dom-parser/tree/master/examples)
61 |
62 | ## Install
63 |
64 | [NPM](https://www.npmjs.com/package/html-dom-parser):
65 |
66 | ```sh
67 | npm install html-dom-parser --save
68 | ```
69 |
70 | [Yarn](https://yarnpkg.com/package/html-dom-parser):
71 |
72 | ```sh
73 | yarn add html-dom-parser
74 | ```
75 |
76 | [CDN](https://unpkg.com/html-dom-parser/):
77 |
78 | ```html
79 |
80 |
83 | ```
84 |
85 | ## Usage
86 |
87 | Import with ES Modules:
88 |
89 | ```js
90 | import parse from 'html-dom-parser';
91 | ```
92 |
93 | Require with CommonJS:
94 |
95 | ```js
96 | const parse = require('html-dom-parser').default;
97 | ```
98 |
99 | Parse empty string:
100 |
101 | ```js
102 | parse('');
103 | ```
104 |
105 | Output:
106 |
107 |
108 | ```js
109 | []
110 | ```
111 |
112 | Parse string:
113 |
114 | ```js
115 | parse('Hello, World!');
116 | ```
117 |
118 |
119 | Output
120 |
121 |
122 | ```js
123 | [
124 | Text {
125 | type: 'text',
126 | parent: null,
127 | prev: null,
128 | next: null,
129 | startIndex: null,
130 | endIndex: null,
131 | data: 'Hello, World!'
132 | }
133 | ]
134 | ```
135 |
136 |
137 |
138 |
139 | Parse element with attributes:
140 |
141 | ```js
142 | parse('Hello, world!
');
143 | ```
144 |
145 |
146 | Output
147 |
148 |
149 | ```js
150 | [
151 | Element {
152 | type: 'tag',
153 | parent: null,
154 | prev: null,
155 | next: null,
156 | startIndex: null,
157 | endIndex: null,
158 | children: [ [Text], [Element], [Text] ],
159 | name: 'p',
160 | attribs: { class: 'foo', style: 'color: #bada55' }
161 | }
162 | ]
163 | ```
164 |
165 |
166 |
167 |
168 | The server parser is a wrapper of [htmlparser2](https://github.com/fb55/htmlparser2) `parseDOM` but with the root parent node excluded. The next section shows the available options you can use with the server parse.
169 |
170 | The client parser mimics the server parser by using the [DOM](https://developer.mozilla.org/docs/Web/API/Document_Object_Model/Introduction) API to parse the HTML string.
171 |
172 | ## Options (server only)
173 |
174 | Because the server parser is a wrapper of [htmlparser2](https://github.com/fb55/htmlparser2), which implements [domhandler](https://github.com/fb55/domhandler), you can alter how the server parser parses your code with the following options:
175 |
176 | ```js
177 | /**
178 | * These are the default options being used if you omit the optional options object.
179 | * htmlparser2 will use the same options object for its domhandler so the options
180 | * should be combined into a single object like so:
181 | */
182 | const options = {
183 | /**
184 | * Options for the domhandler class.
185 | * https://github.com/fb55/domhandler/blob/master/src/index.ts#L16
186 | */
187 | withStartIndices: false,
188 | withEndIndices: false,
189 | xmlMode: false,
190 | /**
191 | * Options for the htmlparser2 class.
192 | * https://github.com/fb55/htmlparser2/blob/master/src/Parser.ts#L104
193 | */
194 | xmlMode: false, // Will overwrite what is used for the domhandler, otherwise inherited.
195 | decodeEntities: true,
196 | lowerCaseTags: true, // !xmlMode by default
197 | lowerCaseAttributeNames: true, // !xmlMode by default
198 | recognizeCDATA: false, // xmlMode by default
199 | recognizeSelfClosing: false, // xmlMode by default
200 | Tokenizer: Tokenizer,
201 | };
202 | ```
203 |
204 | If you're parsing SVG, you can set `lowerCaseTags` to `true` without having to enable `xmlMode`. This will return all tag names in camelCase and not the HTML standard of lowercase.
205 |
206 | > [!NOTE]
207 | > If you're parsing code client-side (in-browser), you cannot control the parsing options. Client-side parsing automatically handles returning some HTML tags in camelCase, such as specific SVG elements, but returns all other tags lowercased according to the HTML standard.
208 |
209 | ## Migration
210 |
211 | ### v5
212 |
213 | Migrated to TypeScript. CommonJS imports require the `.default` key:
214 |
215 | ```js
216 | const parse = require('html-dom-parser').default;
217 | ```
218 |
219 | ### v4
220 |
221 | Upgraded [htmlparser2](https://github.com/fb55/htmlparser2) to v9.
222 |
223 | ### v3
224 |
225 | Upgraded [domhandler](https://github.com/fb55/domhandler) to v5. [Parser options](https://github.com/fb55/htmlparser2/wiki/Parser-options) like `normalizeWhitespace` have been removed.
226 |
227 | ### v2
228 |
229 | Removed Internet Explorer (IE11) support.
230 |
231 | ### v1
232 |
233 | Upgraded `domhandler` to v4 and `htmlparser2` to v6.
234 |
235 | ## Release
236 |
237 | Release and publish are automated by [Release Please](https://github.com/googleapis/release-please).
238 |
239 | ## Special Thanks
240 |
241 | - [Contributors](https://github.com/remarkablemark/html-dom-parser/graphs/contributors)
242 | - [htmlparser2](https://github.com/fb55/htmlparser2)
243 | - [domhandler](https://github.com/fb55/domhandler)
244 |
245 | ## License
246 |
247 | [MIT](https://github.com/remarkablemark/html-dom-parser/blob/master/LICENSE)
248 |
--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
1 | import path from 'node:path';
2 | import { fileURLToPath } from 'node:url';
3 |
4 | import { includeIgnoreFile } from '@eslint/compat';
5 | import { FlatCompat } from '@eslint/eslintrc';
6 | import js from '@eslint/js';
7 | import typescriptEslint from '@typescript-eslint/eslint-plugin';
8 | import tsParser from '@typescript-eslint/parser';
9 | import prettier from 'eslint-plugin-prettier';
10 | import simpleImportSort from 'eslint-plugin-simple-import-sort';
11 | import globals from 'globals';
12 |
13 | const __filename = fileURLToPath(import.meta.url);
14 | const __dirname = path.dirname(__filename);
15 | const gitignorePath = path.resolve(__dirname, '.gitignore');
16 |
17 | const compat = new FlatCompat({
18 | baseDirectory: __dirname,
19 | recommendedConfig: js.configs.recommended,
20 | allConfig: js.configs.all,
21 | });
22 |
23 | export default [
24 | includeIgnoreFile(gitignorePath),
25 |
26 | ...compat.extends(
27 | 'eslint:recommended',
28 | 'plugin:@typescript-eslint/recommended',
29 | ),
30 |
31 | {
32 | plugins: {
33 | '@typescript-eslint': typescriptEslint,
34 | 'simple-import-sort': simpleImportSort,
35 | prettier,
36 | },
37 |
38 | languageOptions: {
39 | globals: {
40 | ...globals.browser,
41 | ...globals.mocha,
42 | ...globals.node,
43 | },
44 | parser: tsParser,
45 | },
46 |
47 | rules: {
48 | '@typescript-eslint/ban-ts-comment': 'off',
49 | '@typescript-eslint/no-unused-vars': 'error',
50 | '@typescript-eslint/no-var-requires': 'off',
51 | 'no-console': 'error',
52 | 'no-debugger': 'error',
53 | 'prettier/prettier': 'error',
54 | 'simple-import-sort/exports': 'error',
55 | 'simple-import-sort/imports': 'error',
56 | },
57 | },
58 | {
59 | files: ['test/**'],
60 |
61 | rules: {
62 | '@typescript-eslint/no-require-imports': 'off',
63 | },
64 | },
65 | ];
66 |
--------------------------------------------------------------------------------
/esm/client/html-to-dom.d.mts:
--------------------------------------------------------------------------------
1 | import type { DOMNode } from '../types';
2 |
3 | export default function HTMLDOMParser(html: string): DOMNode[];
4 |
--------------------------------------------------------------------------------
/esm/client/html-to-dom.mjs:
--------------------------------------------------------------------------------
1 | import ClientParser from '../../lib/client/html-to-dom.js';
2 |
3 | export default ClientParser.default || ClientParser;
4 |
--------------------------------------------------------------------------------
/esm/index.d.mts:
--------------------------------------------------------------------------------
1 | export { default } from './server/html-to-dom.mjs';
2 | export type * from './types.ts';
3 |
--------------------------------------------------------------------------------
/esm/index.mjs:
--------------------------------------------------------------------------------
1 | import HTMLDOMParser from '../lib/index.js';
2 |
3 | export default HTMLDOMParser.default || HTMLDOMParser;
4 |
--------------------------------------------------------------------------------
/esm/server/html-to-dom.d.mts:
--------------------------------------------------------------------------------
1 | import type { ParserOptions } from 'htmlparser2';
2 |
3 | import type { DOMNode } from '../types';
4 |
5 | export default function HTMLDOMParser(
6 | html: string,
7 | options?: ParserOptions,
8 | ): DOMNode[];
9 |
--------------------------------------------------------------------------------
/esm/server/html-to-dom.mjs:
--------------------------------------------------------------------------------
1 | import ServerParser from '../../lib/server/html-to-dom.js';
2 |
3 | export default ServerParser.default || ServerParser;
4 |
--------------------------------------------------------------------------------
/examples/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
35 |
--------------------------------------------------------------------------------
/karma.conf.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @see https://karma-runner.github.io/6.4/config/configuration-file.html
3 | */
4 | module.exports = (config) => {
5 | config.set({
6 | // base path that will be used to resolve all patterns (eg. files, exclude)
7 | basePath: '',
8 |
9 | // frameworks to use
10 | // available frameworks: https://npmjs.org/browse/keyword/karma-adapter
11 | frameworks: ['mocha', 'chai', 'commonjs'],
12 |
13 | // list of files / patterns to load in the browser
14 | files: [
15 | 'dist/*.js',
16 | 'lib/client/constants.js',
17 | 'lib/server/utilities.js',
18 | 'test/cases/html.js',
19 | 'test/client/*.js',
20 | 'test/helpers/*.js',
21 | ],
22 |
23 | // list of files / patterns to exclude
24 | exclude: [],
25 |
26 | // preprocess matching files before serving them to the browser
27 | // available preprocessors: https://npmjs.org/browse/keyword/karma-preprocessor
28 | preprocessors: {
29 | 'dist/*.js': ['commonjs'],
30 | 'lib/**/*.js': ['commonjs'],
31 | 'test/**/*.js': ['commonjs'],
32 | },
33 |
34 | // test results reporter to use
35 | // possible values: 'dots', 'progress'
36 | // available reporters: https://npmjs.org/browse/keyword/karma-reporter
37 | reporters: ['mocha', 'progress'],
38 |
39 | // web server port
40 | port: 9876,
41 |
42 | // enable / disable colors in the output (reporters and logs)
43 | colors: true,
44 |
45 | // level of logging
46 | // possible values: config.LOG_DISABLE || config.LOG_ERROR || config.LOG_WARN || config.LOG_INFO || config.LOG_DEBUG
47 | logLevel: config.LOG_INFO,
48 |
49 | // enable / disable watching file and executing tests whenever any file changes
50 | autoWatch: true,
51 |
52 | // Start these browsers, currently available:
53 | // - Chrome
54 | // - ChromeCanary
55 | // - Firefox
56 | // - Opera (has to be installed with `npm install karma-opera-launcher`)
57 | // - Safari (only Mac; has to be installed with `npm install karma-safari-launcher`)
58 | // - PhantomJS
59 | // - IE (only Windows; has to be installed with `npm install karma-ie-launcher`)
60 | // available browser launchers: https://npmjs.org/browse/keyword/karma-launcher
61 | browsers: ['Chrome'],
62 |
63 | // If browser does not capture in given timeout [ms], kill it
64 | captureTimeout: 60000,
65 |
66 | // Continuous Integration mode
67 | // if true, Karma captures browsers, runs the tests and exits
68 | singleRun: false,
69 |
70 | // Concurrency level
71 | // how many browser should be started simultaneous
72 | concurrency: Infinity,
73 |
74 | // Client options
75 | client: {
76 | mocha: {
77 | // change Karma's `debug.html` to the Mocha web reporter
78 | reporter: 'html',
79 | },
80 | },
81 |
82 | // Mocha reporter options
83 | // https://www.npmjs.com/package/karma-mocha-reporter
84 | mochaReporter: {
85 | showDiff: true,
86 | },
87 | });
88 | };
89 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "html-dom-parser",
3 | "version": "5.1.1",
4 | "description": "HTML to DOM parser.",
5 | "author": "Mark ",
6 | "main": "./lib/index.js",
7 | "module": "./esm/index.mjs",
8 | "exports": {
9 | ".": {
10 | "browser": {
11 | "require": "./lib/client/html-to-dom.js",
12 | "default": "./esm/client/html-to-dom.mjs"
13 | },
14 | "default": {
15 | "require": "./lib/index.js",
16 | "default": "./esm/index.mjs"
17 | }
18 | },
19 | "./lib/client/*": {
20 | "require": "./lib/client/*.js",
21 | "default": "./esm/client/*.mjs"
22 | },
23 | "./lib/server/*": {
24 | "require": "./lib/server/*.js",
25 | "default": "./esm/server/*.mjs"
26 | },
27 | "./package.json": "./package.json"
28 | },
29 | "scripts": {
30 | "build": "run-s build:*",
31 | "build:cjs": "tsc",
32 | "build:esm": "awk '!/sourceMappingURL/' lib/types.d.ts > esm/types.ts",
33 | "build:umd": "rollup --config --failAfterWarnings",
34 | "clean": "rm -rf .nyc_output coverage dist lib",
35 | "lint": "eslint .",
36 | "lint:fix": "npm run lint -- --fix",
37 | "lint:package": "publint",
38 | "lint:tsc": "tsc --noEmit",
39 | "prepare": "husky",
40 | "prepublishOnly": "run-s lint lint:tsc test:server clean build lint:package",
41 | "size-limit": "size-limit",
42 | "test": "run-s test:server test:client",
43 | "test:client": "npm run test:client:watch -- --single-run",
44 | "test:client:build": "NODE_ENV=test npm run build",
45 | "test:client:watch": "npm run test:client:build && karma start",
46 | "test:esm": "npm run build:cjs && node --test test/esm",
47 | "test:server": "npm run build:cjs && nyc mocha"
48 | },
49 | "repository": {
50 | "type": "git",
51 | "url": "git+https://github.com/remarkablemark/html-dom-parser.git"
52 | },
53 | "bugs": {
54 | "url": "https://github.com/remarkablemark/html-dom-parser/issues"
55 | },
56 | "keywords": [
57 | "html-dom-parser",
58 | "html",
59 | "dom",
60 | "parser",
61 | "htmlparser2",
62 | "pojo"
63 | ],
64 | "dependencies": {
65 | "domhandler": "5.0.3",
66 | "htmlparser2": "10.0.0"
67 | },
68 | "devDependencies": {
69 | "@commitlint/cli": "19.8.1",
70 | "@commitlint/config-conventional": "19.8.1",
71 | "@eslint/compat": "1.2.9",
72 | "@eslint/eslintrc": "3.3.1",
73 | "@eslint/js": "9.28.0",
74 | "@rollup/plugin-alias": "5.1.1",
75 | "@rollup/plugin-commonjs": "28.0.3",
76 | "@rollup/plugin-node-resolve": "16.0.1",
77 | "@rollup/plugin-terser": "0.4.4",
78 | "@rollup/plugin-typescript": "12.1.2",
79 | "@size-limit/preset-big-lib": "11.2.0",
80 | "@types/chai": "4.3.16",
81 | "@types/estree": "1.0.8",
82 | "@types/mocha": "10.0.10",
83 | "@types/node": "22.15.30",
84 | "@typescript-eslint/eslint-plugin": "8.33.1",
85 | "@typescript-eslint/parser": "8.33.1",
86 | "chai": "4.5.0",
87 | "eslint": "9.28.0",
88 | "eslint-plugin-prettier": "5.4.1",
89 | "eslint-plugin-simple-import-sort": "12.1.1",
90 | "globals": "16.2.0",
91 | "html-minifier": "4.0.0",
92 | "husky": "9.1.7",
93 | "jsdomify": "3.1.1",
94 | "karma": "6.4.4",
95 | "karma-chai": "0.1.0",
96 | "karma-chrome-launcher": "3.2.0",
97 | "karma-commonjs": "1.0.0",
98 | "karma-mocha": "2.0.1",
99 | "karma-mocha-reporter": "2.2.5",
100 | "lint-staged": "16.1.0",
101 | "mocha": "11.5.0",
102 | "npm-run-all2": "8.0.4",
103 | "nyc": "17.1.0",
104 | "prettier": "3.5.3",
105 | "publint": "0.3.12",
106 | "rollup": "4.41.1",
107 | "size-limit": "11.2.0",
108 | "ts-node": "10.9.2",
109 | "typescript": "5.8.3"
110 | },
111 | "files": [
112 | "/dist",
113 | "/esm",
114 | "/lib",
115 | "/src"
116 | ],
117 | "browser": {
118 | "./lib/index.js": "./lib/client/html-to-dom.js",
119 | "./esm/index.mjs": "./esm/client/html-to-dom.mjs"
120 | },
121 | "react-native": {
122 | "./lib/index.js": "./lib/server/html-to-dom.js"
123 | },
124 | "license": "MIT"
125 | }
126 |
--------------------------------------------------------------------------------
/rollup.config.mjs:
--------------------------------------------------------------------------------
1 | import alias from '@rollup/plugin-alias';
2 | import commonjs from '@rollup/plugin-commonjs';
3 | import resolve from '@rollup/plugin-node-resolve';
4 | import terser from '@rollup/plugin-terser';
5 | import typescript from '@rollup/plugin-typescript';
6 |
7 | const getConfig = (minify = false) => ({
8 | input: 'src/index.ts',
9 |
10 | output: {
11 | file: `dist/html-dom-parser${minify ? '.min' : ''}.js`,
12 | format: 'umd',
13 | name: 'HTMLDOMParser',
14 | sourcemap: true,
15 | },
16 |
17 | plugins: [
18 | alias({
19 | entries: [
20 | {
21 | find: './server/html-to-dom',
22 | replacement: './client/html-to-dom',
23 | },
24 | ],
25 | }),
26 |
27 | typescript({
28 | declaration: false,
29 | declarationMap: false,
30 | module: 'esnext',
31 | compilerOptions: {
32 | outDir: 'dist',
33 | },
34 | }),
35 |
36 | commonjs(),
37 | resolve({ browser: true }),
38 | minify && terser(),
39 | ],
40 | });
41 |
42 | const configs = [getConfig(), getConfig(true)];
43 |
44 | if (process.env.NODE_ENV === 'test') {
45 | configs.push({
46 | input: 'node_modules/htmlparser2',
47 | output: {
48 | file: 'dist/htmlparser2.js',
49 | format: 'umd',
50 | name: 'htmlparser2',
51 | sourcemap: true,
52 | },
53 | plugins: [commonjs(), resolve({ browser: true })],
54 | });
55 | }
56 |
57 | export default configs;
58 |
--------------------------------------------------------------------------------
/src/client/constants.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * SVG elements are case-sensitive.
3 | *
4 | * @see https://developer.mozilla.org/docs/Web/SVG/Element#svg_elements_a_to_z
5 | */
6 | export const CASE_SENSITIVE_TAG_NAMES = [
7 | 'animateMotion',
8 | 'animateTransform',
9 | 'clipPath',
10 | 'feBlend',
11 | 'feColorMatrix',
12 | 'feComponentTransfer',
13 | 'feComposite',
14 | 'feConvolveMatrix',
15 | 'feDiffuseLighting',
16 | 'feDisplacementMap',
17 | 'feDropShadow',
18 | 'feFlood',
19 | 'feFuncA',
20 | 'feFuncB',
21 | 'feFuncG',
22 | 'feFuncR',
23 | 'feGaussianBlur',
24 | 'feImage',
25 | 'feMerge',
26 | 'feMergeNode',
27 | 'feMorphology',
28 | 'feOffset',
29 | 'fePointLight',
30 | 'feSpecularLighting',
31 | 'feSpotLight',
32 | 'feTile',
33 | 'feTurbulence',
34 | 'foreignObject',
35 | 'linearGradient',
36 | 'radialGradient',
37 | 'textPath',
38 | ] as const;
39 |
40 | export const CASE_SENSITIVE_TAG_NAMES_MAP = CASE_SENSITIVE_TAG_NAMES.reduce(
41 | (accumulator, tagName) => {
42 | accumulator[tagName.toLowerCase()] = tagName;
43 | return accumulator;
44 | },
45 | {} as Record,
46 | );
47 |
48 | export const CARRIAGE_RETURN = '\r';
49 | export const CARRIAGE_RETURN_REGEX = new RegExp(CARRIAGE_RETURN, 'g');
50 | export const CARRIAGE_RETURN_PLACEHOLDER = `__HTML_DOM_PARSER_CARRIAGE_RETURN_PLACEHOLDER_${Date.now()}__`;
51 | export const CARRIAGE_RETURN_PLACEHOLDER_REGEX = new RegExp(
52 | CARRIAGE_RETURN_PLACEHOLDER,
53 | 'g',
54 | );
55 |
--------------------------------------------------------------------------------
/src/client/domparser.ts:
--------------------------------------------------------------------------------
1 | import { escapeSpecialCharacters } from './utilities';
2 |
3 | // constants
4 | const HTML = 'html';
5 | const HEAD = 'head';
6 | const BODY = 'body';
7 | const FIRST_TAG_REGEX = /<([a-zA-Z]+[0-9]?)/; // e.g.,
8 |
9 | // match-all-characters in case of newlines (DOTALL)
10 | const HEAD_TAG_REGEX = //i;
11 | const BODY_TAG_REGEX = //i;
12 |
13 | // falls back to `parseFromString` if `createHTMLDocument` cannot be used
14 | // eslint-disable-next-line @typescript-eslint/no-unused-vars
15 | let parseFromDocument = (html: string, tagName?: string): Document => {
16 | /* istanbul ignore next */
17 | throw new Error(
18 | 'This browser does not support `document.implementation.createHTMLDocument`',
19 | );
20 | };
21 |
22 | // eslint-disable-next-line @typescript-eslint/no-unused-vars
23 | let parseFromString = (html: string, tagName?: string): Document => {
24 | /* istanbul ignore next */
25 | throw new Error(
26 | 'This browser does not support `DOMParser.prototype.parseFromString`',
27 | );
28 | };
29 |
30 | const DOMParser = typeof window === 'object' && window.DOMParser;
31 |
32 | /**
33 | * DOMParser (performance: slow).
34 | *
35 | * @see https://developer.mozilla.org/docs/Web/API/DOMParser#Parsing_an_SVG_or_HTML_document
36 | */
37 | if (typeof DOMParser === 'function') {
38 | const domParser = new DOMParser();
39 | const mimeType = 'text/html';
40 |
41 | /**
42 | * Creates an HTML document using `DOMParser.parseFromString`.
43 | *
44 | * @param html - The HTML string.
45 | * @param tagName - The element to render the HTML (with 'body' as fallback).
46 | * @returns - Document.
47 | */
48 | parseFromString = (html: string, tagName?: string): Document => {
49 | if (tagName) {
50 | /* istanbul ignore next */
51 | html = `<${tagName}>${html}${tagName}>`;
52 | }
53 |
54 | return domParser.parseFromString(html, mimeType);
55 | };
56 |
57 | parseFromDocument = parseFromString;
58 | }
59 |
60 | /**
61 | * DOMImplementation (performance: fair).
62 | *
63 | * @see https://developer.mozilla.org/docs/Web/API/DOMImplementation/createHTMLDocument
64 | */
65 | if (typeof document === 'object' && document.implementation) {
66 | const htmlDocument = document.implementation.createHTMLDocument();
67 |
68 | /**
69 | * Use HTML document created by `document.implementation.createHTMLDocument`.
70 | *
71 | * @param html - The HTML string.
72 | * @param tagName - The element to render the HTML (with 'body' as fallback).
73 | * @returns - Document
74 | */
75 | parseFromDocument = function (html: string, tagName?: string): Document {
76 | if (tagName) {
77 | const element = htmlDocument.documentElement.querySelector(tagName);
78 |
79 | if (element) {
80 | element.innerHTML = html;
81 | }
82 |
83 | return htmlDocument;
84 | }
85 |
86 | htmlDocument.documentElement.innerHTML = html;
87 | return htmlDocument;
88 | };
89 | }
90 |
91 | /**
92 | * Template (performance: fast).
93 | *
94 | * @see https://developer.mozilla.org/docs/Web/HTML/Element/template
95 | */
96 | const template =
97 | typeof document === 'object' && document.createElement('template');
98 |
99 | let parseFromTemplate: (html: string) => NodeList;
100 |
101 | if (template && template.content) {
102 | /**
103 | * Uses a template element (content fragment) to parse HTML.
104 | *
105 | * @param html - HTML string.
106 | * @returns - Nodes.
107 | */
108 | parseFromTemplate = (html: string): NodeList => {
109 | template.innerHTML = html;
110 | return template.content.childNodes;
111 | };
112 | }
113 |
114 | /**
115 | * Parses HTML string to DOM nodes.
116 | *
117 | * @param html - HTML markup.
118 | * @returns - DOM nodes.
119 | */
120 | export default function domparser(html: string): NodeList {
121 | // Escape special characters before parsing
122 | html = escapeSpecialCharacters(html);
123 |
124 | const match = html.match(FIRST_TAG_REGEX);
125 | const firstTagName = match && match[1] ? match[1].toLowerCase() : '';
126 |
127 | switch (firstTagName) {
128 | case HTML: {
129 | const doc = parseFromString(html);
130 |
131 | // the created document may come with filler head/body elements,
132 | // so make sure to remove them if they don't actually exist
133 | if (!HEAD_TAG_REGEX.test(html)) {
134 | const element = doc.querySelector(HEAD);
135 | element?.parentNode?.removeChild(element);
136 | }
137 |
138 | if (!BODY_TAG_REGEX.test(html)) {
139 | const element = doc.querySelector(BODY);
140 | element?.parentNode?.removeChild(element);
141 | }
142 |
143 | return doc.querySelectorAll(HTML);
144 | }
145 |
146 | case HEAD:
147 | case BODY: {
148 | const elements = parseFromDocument(html).querySelectorAll(firstTagName);
149 |
150 | // if there's a sibling element, then return both elements
151 | if (BODY_TAG_REGEX.test(html) && HEAD_TAG_REGEX.test(html)) {
152 | return elements[0].parentNode!.childNodes;
153 | }
154 |
155 | return elements;
156 | }
157 |
158 | // low-level tag or text
159 | default: {
160 | if (parseFromTemplate) {
161 | return parseFromTemplate(html);
162 | }
163 | const element = parseFromDocument(html, BODY).querySelector(BODY);
164 | return element!.childNodes;
165 | }
166 | }
167 | }
168 |
--------------------------------------------------------------------------------
/src/client/html-to-dom.ts:
--------------------------------------------------------------------------------
1 | import domparser from './domparser';
2 | import { formatDOM } from './utilities';
3 |
4 | const DIRECTIVE_REGEX = /<(![a-zA-Z\s]+)>/; // e.g.,
5 |
6 | /**
7 | * Parses HTML string to DOM nodes in browser.
8 | *
9 | * @param html - HTML markup.
10 | * @returns - DOM elements.
11 | */
12 | export default function HTMLDOMParser(html: string) {
13 | if (typeof html !== 'string') {
14 | throw new TypeError('First argument must be a string');
15 | }
16 |
17 | if (!html) {
18 | return [];
19 | }
20 |
21 | // match directive
22 | const match = html.match(DIRECTIVE_REGEX);
23 | const directive = match ? match[1] : undefined;
24 |
25 | return formatDOM(domparser(html), null, directive);
26 | }
27 |
--------------------------------------------------------------------------------
/src/client/utilities.ts:
--------------------------------------------------------------------------------
1 | import { Comment, Element, ProcessingInstruction, Text } from 'domhandler';
2 |
3 | import type { DOMNode } from '../types';
4 | import {
5 | CARRIAGE_RETURN,
6 | CARRIAGE_RETURN_PLACEHOLDER,
7 | CARRIAGE_RETURN_PLACEHOLDER_REGEX,
8 | CARRIAGE_RETURN_REGEX,
9 | CASE_SENSITIVE_TAG_NAMES_MAP,
10 | } from './constants';
11 |
12 | /**
13 | * Gets case-sensitive tag name.
14 | *
15 | * @param tagName - Tag name in lowercase.
16 | * @returns - Case-sensitive tag name.
17 | */
18 | function getCaseSensitiveTagName(tagName: string): string | undefined {
19 | return CASE_SENSITIVE_TAG_NAMES_MAP[tagName];
20 | }
21 |
22 | /**
23 | * Formats DOM attributes to a hash map.
24 | *
25 | * @param attributes - List of attributes.
26 | * @returns - Map of attribute name to value.
27 | */
28 | export function formatAttributes(attributes: NamedNodeMap) {
29 | const map: Record = {};
30 | let index = 0;
31 | const attributesLength = attributes.length;
32 |
33 | // `NamedNodeMap` is array-like
34 | for (; index < attributesLength; index++) {
35 | const attribute = attributes[index];
36 | map[attribute.name] = attribute.value;
37 | }
38 |
39 | return map;
40 | }
41 |
42 | /**
43 | * Corrects the tag name if it is case-sensitive (SVG).
44 | * Otherwise, returns the lowercase tag name (HTML).
45 | *
46 | * @param tagName - Lowercase tag name.
47 | * @returns - Formatted tag name.
48 | */
49 | function formatTagName(tagName: string): string {
50 | tagName = tagName.toLowerCase();
51 | const caseSensitiveTagName = getCaseSensitiveTagName(tagName);
52 |
53 | if (caseSensitiveTagName) {
54 | return caseSensitiveTagName;
55 | }
56 |
57 | return tagName;
58 | }
59 |
60 | /**
61 | * Escapes special characters before parsing.
62 | *
63 | * @param html - The HTML string.
64 | * @returns - HTML string with escaped special characters.
65 | */
66 | export function escapeSpecialCharacters(html: string): string {
67 | return html.replace(CARRIAGE_RETURN_REGEX, CARRIAGE_RETURN_PLACEHOLDER);
68 | }
69 |
70 | /**
71 | * Reverts escaped special characters back to actual characters.
72 | *
73 | * @param text - The text with escaped characters.
74 | * @returns - Text with escaped characters reverted.
75 | */
76 | export function revertEscapedCharacters(text: string): string {
77 | return text.replace(CARRIAGE_RETURN_PLACEHOLDER_REGEX, CARRIAGE_RETURN);
78 | }
79 |
80 | /**
81 | * Transforms DOM nodes to `domhandler` nodes.
82 | *
83 | * @param nodes - DOM nodes.
84 | * @param parent - Parent node.
85 | * @param directive - Directive.
86 | * @returns - Nodes.
87 | */
88 | export function formatDOM(
89 | nodes: NodeList,
90 | parent: DOMNode | null = null,
91 | directive?: string,
92 | ): DOMNode[] {
93 | const domNodes = [];
94 | let current;
95 | let index = 0;
96 | const nodesLength = nodes.length;
97 |
98 | for (; index < nodesLength; index++) {
99 | const node = nodes[index];
100 |
101 | // set the node data given the type
102 | switch (node.nodeType) {
103 | case 1: {
104 | const tagName = formatTagName(node.nodeName);
105 |
106 | // script, style, or tag
107 | current = new Element(
108 | tagName,
109 | formatAttributes((node as HTMLElement).attributes),
110 | );
111 |
112 | current.children = formatDOM(
113 | // template children are on content
114 | tagName === 'template'
115 | ? (node as HTMLTemplateElement).content.childNodes
116 | : node.childNodes,
117 | current,
118 | );
119 |
120 | break;
121 | }
122 |
123 | case 3:
124 | current = new Text(revertEscapedCharacters(node.nodeValue!));
125 | break;
126 |
127 | case 8:
128 | current = new Comment(node.nodeValue!);
129 | break;
130 |
131 | default:
132 | continue;
133 | }
134 |
135 | // set previous node next
136 | const prev = domNodes[index - 1] || null;
137 | if (prev) {
138 | prev.next = current;
139 | }
140 |
141 | // set properties for current node
142 | current.parent = parent as Element;
143 | current.prev = prev;
144 | current.next = null;
145 |
146 | domNodes.push(current);
147 | }
148 |
149 | if (directive) {
150 | current = new ProcessingInstruction(
151 | directive.substring(0, directive.indexOf(' ')).toLowerCase(),
152 | directive,
153 | );
154 |
155 | current.next = domNodes[0] || null;
156 | current.parent = parent as Element;
157 | domNodes.unshift(current);
158 |
159 | if (domNodes[1]) {
160 | domNodes[1].prev = domNodes[0];
161 | }
162 | }
163 |
164 | return domNodes;
165 | }
166 |
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * When running on Node.js, use the server parser.
3 | * When bundling for the browser, use the client parser.
4 | *
5 | * @see https://github.com/substack/node-browserify#browser-field
6 | */
7 | export { default } from './server/html-to-dom';
8 | export * from './types';
9 |
--------------------------------------------------------------------------------
/src/server/html-to-dom.ts:
--------------------------------------------------------------------------------
1 | import { DomHandler } from 'domhandler';
2 | import type { ParserOptions } from 'htmlparser2';
3 | import { Parser } from 'htmlparser2';
4 |
5 | import { unsetRootParent } from './utilities';
6 |
7 | /**
8 | * Parses HTML string to DOM nodes in Node.js.
9 | *
10 | * This is the same method as `require('htmlparser2').parseDOM`
11 | *
12 | * @see https://github.com/fb55/htmlparser2/blob/v9.0.0/src/index.ts#L44-L46
13 | * @see https://github.com/fb55/domhandler/tree/v5.0.3#readme
14 | *
15 | * @param html - HTML markup.
16 | * @param options - Parser options.
17 | * @returns - DOM nodes.
18 | */
19 | export default function HTMLDOMParser(html: string, options?: ParserOptions) {
20 | if (typeof html !== 'string') {
21 | throw new TypeError('First argument must be a string.');
22 | }
23 |
24 | if (!html) {
25 | return [];
26 | }
27 |
28 | const handler = new DomHandler(undefined, options);
29 | new Parser(handler, options).end(html);
30 | return unsetRootParent(handler.dom);
31 | }
32 |
--------------------------------------------------------------------------------
/src/server/utilities.ts:
--------------------------------------------------------------------------------
1 | import type { ChildNode } from 'domhandler';
2 |
3 | import type { DOMNode } from '../types';
4 |
5 | /**
6 | * Sets root parent to null.
7 | *
8 | * @param nodes - Nodes.
9 | * @returns - Nodes.
10 | */
11 | export function unsetRootParent(nodes: ChildNode[]): DOMNode[] {
12 | let index = 0;
13 | const nodesLength = nodes.length;
14 |
15 | for (; index < nodesLength; index++) {
16 | const node = nodes[index];
17 | node.parent = null;
18 | }
19 |
20 | return nodes as DOMNode[];
21 | }
22 |
--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
1 | import type { Comment, Element, ProcessingInstruction, Text } from 'domhandler';
2 |
3 | export type { Comment, Element, ProcessingInstruction, Text };
4 |
5 | export type DOMNode = Comment | Element | ProcessingInstruction | Text;
6 |
--------------------------------------------------------------------------------
/test/cases/complex.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Page Title
6 |
7 |
8 |
13 |
14 |
15 |
18 | bar
19 |
20 | baz qux
21 | link
22 |
23 |
24 |
28 |
29 |
30 | baz
31 |
32 |
33 |
--------------------------------------------------------------------------------
/test/cases/complex.svg:
--------------------------------------------------------------------------------
1 |
11 |
--------------------------------------------------------------------------------
/test/cases/html.js:
--------------------------------------------------------------------------------
1 | module.exports = [
2 | // html tags
3 | {
4 | name: 'empty html',
5 | data: '',
6 | },
7 | {
8 | name: 'html with attribute',
9 | data: '',
10 | },
11 | {
12 | name: 'html with empty head and body',
13 | data: '',
14 | },
15 | {
16 | name: 'html with empty head',
17 | data: '',
18 | },
19 | {
20 | name: 'html with empty body',
21 | data: '',
22 | },
23 | {
24 | name: 'unclosed html and head tags',
25 | data: '',
26 | },
27 | {
28 | name: 'unclosed html and body tags',
29 | data: '',
30 | },
31 | {
32 | name: 'unclosed html, head, and body tags',
33 | data: '',
34 | },
35 |
36 | // head and body tags
37 | {
38 | name: 'unclosed head',
39 | data: '',
40 | },
41 | {
42 | name: 'empty head',
43 | data: '',
44 | },
45 | {
46 | name: 'head with title',
47 | data: 'Text',
48 | },
49 | {
50 | name: 'empty head and body',
51 | data: '',
52 | },
53 | {
54 | name: 'unclosed head and body',
55 | data: '',
56 | },
57 | {
58 | name: 'unclosed title',
59 | data: '',
60 | },
61 | {
62 | name: 'empty title',
63 | data: '',
64 | },
65 | {
66 | name: 'title with text',
67 | data: 'text',
68 | },
69 | {
70 | name: 'title with text as tags',
71 | data: 'text',
72 | },
73 | {
74 | name: 'unclosed body',
75 | data: '',
76 | },
77 | {
78 | name: 'empty body',
79 | data: '',
80 | },
81 | {
82 | name: 'capitalized body',
83 | data: '',
84 | },
85 | {
86 | name: 'body with paragraph',
87 | data: 'text
',
88 | },
89 | {
90 | name: 'head and body with newline',
91 | data: 'text',
92 | },
93 | {
94 | name: 'head and body with whitespace and newlines',
95 | data: 'hellotext',
96 | },
97 | {
98 | name: 'body with whitespace and newline',
99 | data: 'text',
100 | },
101 |
102 | // common tags
103 | {
104 | name: 'empty div',
105 | data: '',
106 | },
107 | {
108 | name: 'empty paragraph',
109 | data: '',
110 | },
111 | {
112 | name: 'paragraph with text',
113 | data: 'text
',
114 | },
115 | {
116 | name: 'meta with attribute',
117 | data: '',
118 | },
119 | {
120 | name: 'meta with closing tag',
121 | data: '',
122 | },
123 | {
124 | name: 'textarea with value',
125 | data: '',
126 | },
127 | {
128 | name: 'multiple spans',
129 | data: '12',
130 | },
131 |
132 | // void (self-closing) tags
133 | {
134 | name: 'void',
135 | data: '
',
136 | },
137 | {
138 | name: 'self-closing void',
139 | data: '
',
140 | },
141 | {
142 | name: 'input with attributes',
143 | data: '',
144 | },
145 | {
146 | name: 'image',
147 | data: '
',
148 | },
149 | {
150 | name: 'multiple void',
151 | data: '
',
152 | },
153 |
154 | // tag attributes
155 | {
156 | name: 'h1 with id attribute',
157 | data: '',
158 | },
159 | {
160 | name: 'h2 with class attribute',
161 | data: '',
162 | },
163 | {
164 | name: 'em with style attribute',
165 | data: '',
166 | },
167 | {
168 | name: 'data attribute',
169 | data: '',
170 | },
171 | {
172 | name: 'event attribute',
173 | data: '',
174 | },
175 | {
176 | name: 'span with multiple attributes',
177 | data: '',
178 | },
179 | {
180 | name: 'hr with multiple attributes',
181 | data: '
',
182 | },
183 |
184 | // adjacent tags
185 | {
186 | name: 'sibling',
187 | data: 'brothersister',
188 | },
189 |
190 | // nested tags
191 | {
192 | name: 'nested definition list',
193 | data: '- foo
- barbaz
',
194 | },
195 | {
196 | name: 'nested unordered list',
197 | data: '',
198 | },
199 |
200 | // script tag
201 | {
202 | name: 'empty script',
203 | data: '',
204 | },
205 | {
206 | name: 'script',
207 | data: '',
208 | },
209 | {
210 | name: 'script with json',
211 | data: '',
212 | },
213 |
214 | // noscript tag
215 | {
216 | name: 'empty noscript',
217 | data: '',
218 | },
219 | {
220 | name: 'noscript with text',
221 | data: '',
222 | },
223 | {
224 | name: 'noscript with p',
225 | data: '',
226 | get skip() {
227 | // client parser renders noscript incorrectly in jsdom
228 | // template renders noscript children as text instead of nodes
229 | var isJSDOM = typeof window === 'object' && window.name === 'nodejs';
230 | return isJSDOM;
231 | },
232 | },
233 |
234 | // template tag
235 | {
236 | name: 'empty template',
237 | data: '',
238 | },
239 | {
240 | name: 'template with content',
241 | data: 'Test
',
242 | },
243 |
244 | // style tag
245 | {
246 | name: 'empty style',
247 | data: '',
248 | },
249 | {
250 | name: 'style',
251 | data: '',
252 | },
253 |
254 | // html5 tags
255 | {
256 | name: 'audio',
257 | data: '