├── .commitlintrc.json ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ └── question.md ├── PULL_REQUEST_TEMPLATE.md ├── SECURITY.md ├── dependabot.yml ├── mergify.yml └── workflows │ ├── assign-reviewer.yml │ ├── build.yml │ ├── commitlint.yml │ ├── lint.yml │ ├── release-please.yml │ ├── size-limit.yml │ └── test.yml ├── .gitignore ├── .husky ├── commit-msg └── pre-commit ├── .lintstagedrc.json ├── .mocharc.json ├── .nvmrc ├── .prettierrc.json ├── .size-limit.json ├── CHANGELOG.md ├── LICENSE ├── README.md ├── eslint.config.mjs ├── esm ├── client │ ├── html-to-dom.d.mts │ └── html-to-dom.mjs ├── index.d.mts ├── index.mjs └── server │ ├── html-to-dom.d.mts │ └── html-to-dom.mjs ├── examples └── index.html ├── karma.conf.js ├── package-lock.json ├── package.json ├── rollup.config.mjs ├── src ├── client │ ├── constants.ts │ ├── domparser.ts │ ├── html-to-dom.ts │ └── utilities.ts ├── index.ts ├── server │ ├── html-to-dom.ts │ └── utilities.ts └── types.ts ├── test ├── cases │ ├── complex.html │ ├── complex.svg │ ├── html.js │ ├── index.d.ts │ └── index.js ├── client │ └── index.js ├── esm │ ├── client.test.mjs │ ├── index.test.mjs │ └── server.test.mjs ├── helpers │ ├── cycle.js │ ├── index.d.ts │ ├── index.js │ ├── run-tests.js │ ├── test-case-sensitive-tags.js │ └── throw-errors.js ├── server │ ├── client.test.ts │ ├── index.test.ts │ └── server.test.ts └── types │ └── index.test.ts └── tsconfig.json /.commitlintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["@commitlint/config-conventional"], 3 | "rules": { 4 | "body-max-line-length": [1, "always", 100] 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @remarkablemark 2 | 3 | /package.json 4 | /package-lock.json 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Report a bug 4 | labels: bug 5 | assignees: remarkablemark 6 | --- 7 | 8 | ## Expected Behavior 9 | 10 | 11 | 12 | ## Actual Behavior 13 | 14 | 15 | 16 | ## Steps to Reproduce 17 | 18 | 19 | 20 | ## Reproducible Demo 21 | 22 | 28 | 29 | ## Environment 30 | 31 | - Version: 32 | - Platform: 33 | - Browser: 34 | - OS: 35 | 36 | ## Keywords 37 | 38 | 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: Suggest improvements or new features 4 | labels: feature 5 | assignees: remarkablemark 6 | --- 7 | 8 | ## Problem 9 | 10 | 11 | 12 | ## Suggested Solution 13 | 14 | 15 | 16 | ## Keywords 17 | 18 | 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: Ask a question 4 | labels: question 5 | assignees: remarkablemark 6 | --- 7 | 8 | ## Question 9 | 10 | 11 | 12 | ## Keywords 13 | 14 | 15 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 4 | 5 | ## What is the motivation for this pull request? 6 | 7 | 8 | 9 | ## What is the current behavior? 10 | 11 | 12 | 13 | ## What is the new behavior? 14 | 15 | 16 | 17 | ## Checklist: 18 | 19 | 23 | 24 | - [ ] [Conventional Commits](https://www.conventionalcommits.org/) 25 | - [ ] Tests 26 | - [ ] [Types](https://arethetypeswrong.github.io/) 27 | - [ ] Documentation 28 | 29 | 32 | -------------------------------------------------------------------------------- /.github/SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Only the current version is supported. Please make sure to update to the latest release. 6 | 7 | ## Reporting a Vulnerability 8 | 9 | To report a security vulnerability, please use the [Tidelift security contact](https://tidelift.com/security). Tidelift will coordinate the fix and disclosure. 10 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: 'npm' 9 | directory: '/' 10 | schedule: 11 | interval: 'daily' 12 | groups: 13 | commitlint: 14 | patterns: 15 | - '@commitlint/*' 16 | eslint: 17 | patterns: 18 | - '@eslint/*' 19 | prettier: 20 | patterns: 21 | - prettier 22 | - eslint-plugin-prettier 23 | typescript-eslint: 24 | patterns: 25 | - '@typescript-eslint/*' 26 | 27 | - package-ecosystem: 'github-actions' 28 | directory: '/' 29 | schedule: 30 | interval: 'daily' 31 | -------------------------------------------------------------------------------- /.github/mergify.yml: -------------------------------------------------------------------------------- 1 | pull_request_rules: 2 | - name: automatic merge for Dependabot pull requests 3 | conditions: 4 | - author=dependabot[bot] 5 | - check-success=build 6 | - check-success=commitlint 7 | - check-success=lint 8 | - check-success=size 9 | - check-success=test 10 | - 'title~=^build\(deps-dev\): bump ' 11 | actions: 12 | merge: 13 | method: squash 14 | -------------------------------------------------------------------------------- /.github/workflows/assign-reviewer.yml: -------------------------------------------------------------------------------- 1 | name: Assign Reviewer 2 | on: pull_request_target 3 | 4 | jobs: 5 | assign-reviewer: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - name: Assign reviewer 9 | if: > 10 | startsWith(github.event.pull_request.title, 'build(deps-dev): bump ') == false && 11 | contains(github.event.action, 'opened') 12 | run: > 13 | gh pr edit ${{ github.event.pull_request.html_url }} 14 | --add-assignee ${{ github.event.pull_request.user.login }} 15 | --add-reviewer remarkablemark 16 | env: 17 | GITHUB_TOKEN: ${{ github.token }} 18 | continue-on-error: true 19 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: [push, pull_request] 3 | 4 | jobs: 5 | build: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - name: Checkout repository 9 | uses: actions/checkout@v4 10 | 11 | - name: Use Node.js 12 | uses: actions/setup-node@v4 13 | with: 14 | cache: npm 15 | node-version-file: .nvmrc 16 | 17 | - name: Install dependencies 18 | run: npm ci --prefer-offline 19 | 20 | - name: Build package 21 | run: npm run build 22 | 23 | - name: Lint package 24 | run: npm run lint:package 25 | -------------------------------------------------------------------------------- /.github/workflows/commitlint.yml: -------------------------------------------------------------------------------- 1 | name: commitlint 2 | on: [push, pull_request] 3 | 4 | jobs: 5 | commitlint: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - name: Checkout repository 9 | uses: actions/checkout@v4 10 | with: 11 | fetch-depth: 0 12 | 13 | - name: Use Node.js 14 | uses: actions/setup-node@v4 15 | with: 16 | cache: npm 17 | node-version-file: .nvmrc 18 | 19 | - name: Install dependencies 20 | run: npm ci --prefer-offline 21 | 22 | - name: Lint commit message 23 | run: npx commitlint --from=HEAD~1 24 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | on: [push, pull_request] 3 | 4 | jobs: 5 | lint: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - name: Checkout repository 9 | uses: actions/checkout@v4 10 | 11 | - name: Use Node.js 12 | uses: actions/setup-node@v4 13 | with: 14 | cache: npm 15 | node-version-file: .nvmrc 16 | 17 | - name: Install dependencies 18 | run: npm ci --prefer-offline 19 | 20 | - name: Run ESLint 21 | run: npm run lint 22 | 23 | - name: Type check 24 | run: npm run lint:tsc 25 | -------------------------------------------------------------------------------- /.github/workflows/release-please.yml: -------------------------------------------------------------------------------- 1 | name: release-please 2 | on: 3 | push: 4 | branches: 5 | - master 6 | 7 | jobs: 8 | release-please: 9 | runs-on: ubuntu-latest 10 | outputs: 11 | release_created: ${{ steps.release.outputs.release_created }} 12 | 13 | steps: 14 | - name: Release Please 15 | uses: googleapis/release-please-action@v4 16 | id: release 17 | with: 18 | release-type: node 19 | 20 | publish: 21 | runs-on: ubuntu-latest 22 | permissions: 23 | contents: read 24 | id-token: write 25 | needs: release-please 26 | if: ${{ needs.release-please.outputs.release_created }} 27 | 28 | steps: 29 | - name: Checkout repository 30 | uses: actions/checkout@v4 31 | 32 | - name: Use Node.js 33 | uses: actions/setup-node@v4 34 | with: 35 | node-version-file: .nvmrc 36 | registry-url: https://registry.npmjs.org 37 | 38 | - name: Install dependencies 39 | run: npm ci --prefer-offline 40 | 41 | - name: Publish 42 | run: npm publish --provenance --access public 43 | env: 44 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 45 | -------------------------------------------------------------------------------- /.github/workflows/size-limit.yml: -------------------------------------------------------------------------------- 1 | name: size 2 | on: 3 | pull_request: 4 | branches: 5 | - master 6 | jobs: 7 | size: 8 | runs-on: ubuntu-latest 9 | env: 10 | CI_JOB_NUMBER: 1 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: andresz1/size-limit-action@v1 14 | with: 15 | github_token: ${{ secrets.GITHUB_TOKEN }} 16 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | on: [push, pull_request] 3 | 4 | jobs: 5 | test: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - name: Checkout repository 9 | uses: actions/checkout@v4 10 | 11 | - name: Use Node.js 12 | uses: actions/setup-node@v4 13 | with: 14 | cache: npm 15 | node-version-file: .nvmrc 16 | 17 | - name: Install dependencies 18 | run: npm ci --prefer-offline 19 | 20 | - name: Run server test 21 | run: npm run test:server 22 | 23 | - name: Generate coverage report 24 | run: | 25 | mkdir -p coverage 26 | npx nyc report --reporter=text-lcov > coverage/lcov.info 27 | 28 | - name: Upload coverage reports to Codecov 29 | uses: codecov/codecov-action@v5 30 | with: 31 | token: ${{ secrets.CODECOV_TOKEN }} 32 | 33 | - name: Run module tests 34 | run: npm run test:esm 35 | 36 | - name: Run client test 37 | uses: coactions/setup-xvfb@v1 38 | with: 39 | run: npm run test:client 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build directory 2 | dist/ 3 | esm/types.ts 4 | lib/ 5 | 6 | # Logs 7 | logs 8 | *.log 9 | npm-debug.log* 10 | 11 | # Runtime data 12 | pids 13 | *.pid 14 | *.seed 15 | *.pid.lock 16 | 17 | # Directory for instrumented libs generated by jscoverage/JSCover 18 | lib-cov/ 19 | 20 | # Coverage directory used by tools like istanbul 21 | coverage/ 22 | 23 | # nyc test coverage 24 | .nyc_output/ 25 | 26 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 27 | .grunt/ 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (http://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules/ 37 | jspm_packages/ 38 | 39 | # Optional npm cache directory 40 | .npm/ 41 | 42 | # Optional eslint cache 43 | .eslintcache 44 | 45 | # Optional REPL history 46 | .node_repl_history 47 | 48 | # Vim swap files 49 | *.swp 50 | 51 | # Mac OS 52 | .DS_Store 53 | -------------------------------------------------------------------------------- /.husky/commit-msg: -------------------------------------------------------------------------------- 1 | npx commitlint --edit $1 2 | -------------------------------------------------------------------------------- /.husky/pre-commit: -------------------------------------------------------------------------------- 1 | npm test 2 | npm run test:esm 3 | npx lint-staged 4 | -------------------------------------------------------------------------------- /.lintstagedrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "*.{js,mjs,ts}": "eslint --fix", 3 | "*.{css,html,json,md,yml}": "prettier --write" 4 | } 5 | -------------------------------------------------------------------------------- /.mocharc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extensions": ["js", "ts"], 3 | "spec": ["test/server/*.test.ts"], 4 | "require": "ts-node/register", 5 | "recursive": true, 6 | "ignore": "esm" 7 | } 8 | -------------------------------------------------------------------------------- /.nvmrc: -------------------------------------------------------------------------------- 1 | 20 2 | -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": true 3 | } 4 | -------------------------------------------------------------------------------- /.size-limit.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "path": "dist/html-dom-parser.min.js", 4 | "limit": "4 KB" 5 | } 6 | ] 7 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines. 4 | 5 | ## [5.1.1](https://github.com/remarkablemark/html-dom-parser/compare/v5.1.0...v5.1.1) (2025-05-09) 6 | 7 | 8 | ### Documentation 9 | 10 | * **readme:** fix build status badge ([192c097](https://github.com/remarkablemark/html-dom-parser/commit/192c097325c04f303bcd21121e2701b2a333a3da)) 11 | 12 | ## [5.1.0](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.13...v5.1.0) (2025-05-08) 13 | 14 | 15 | ### Features 16 | 17 | * add `browser` and `default` conditions to `"."` ([aa0ca64](https://github.com/remarkablemark/html-dom-parser/commit/aa0ca64653d30449e0d539f1f88fbf15c80f0984)) 18 | * use `default` over `import` condition ([13b08f6](https://github.com/remarkablemark/html-dom-parser/commit/13b08f63af874e0bf73b922a4b34be77a67f609b)) 19 | 20 | ## [5.0.13](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.12...v5.0.13) (2024-12-25) 21 | 22 | 23 | ### Build System 24 | 25 | * **deps:** bump htmlparser2 from 9.1.0 to 10.0.0 ([#929](https://github.com/remarkablemark/html-dom-parser/issues/929)) ([2d15abe](https://github.com/remarkablemark/html-dom-parser/commit/2d15abe0d022e3fb513b901d3f42b9b56bcdeb57)) 26 | 27 | ## [5.0.12](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.11...v5.0.12) (2024-12-16) 28 | 29 | 30 | ### Bug Fixes 31 | 32 | * **client:** don't break LaTeX when replacing carriage returns ([d69bc66](https://github.com/remarkablemark/html-dom-parser/commit/d69bc662f4470cd8ebf551af873cd7badfa6ebca)), closes [#917](https://github.com/remarkablemark/html-dom-parser/issues/917) 33 | 34 | ## [5.0.11](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.10...v5.0.11) (2024-12-04) 35 | 36 | ### Bug Fixes 37 | 38 | - enable client parser to retain carriage return characters ([#902](https://github.com/remarkablemark/html-dom-parser/issues/902)) ([fe2e993](https://github.com/remarkablemark/html-dom-parser/commit/fe2e993d69c45e44696224985e558efca79db181)), closes [#420](https://github.com/remarkablemark/html-dom-parser/issues/420) 39 | 40 | ## [5.0.10](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.9...v5.0.10) (2024-08-28) 41 | 42 | ### Continuous Integration 43 | 44 | - **github:** publish package to npm registry with provenance ([e023fe8](https://github.com/remarkablemark/html-dom-parser/commit/e023fe8535c73c5b01837a114d6336c409e2ab17)) 45 | 46 | ## [5.0.9](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.8...v5.0.9) (2024-07-18) 47 | 48 | ### Bug Fixes 49 | 50 | - exports field includes package.json ([c373a92](https://github.com/remarkablemark/html-dom-parser/commit/c373a92567272712300270ed721e298e46c4f929)) 51 | 52 | ## [5.0.8](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.7...v5.0.8) (2024-02-12) 53 | 54 | ### Bug Fixes 55 | 56 | - **esm:** fix exported types ([b6918ae](https://github.com/remarkablemark/html-dom-parser/commit/b6918ae7090308e8812847588f410c96fb6075da)) 57 | 58 | ## [5.0.7](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.6...v5.0.7) (2024-01-13) 59 | 60 | ### Build System 61 | 62 | - **deps:** bump htmlparser2 from 9.0.0 to 9.1.0 ([#631](https://github.com/remarkablemark/html-dom-parser/issues/631)) ([6816800](https://github.com/remarkablemark/html-dom-parser/commit/681680074c59cb1ab61758b06dc794476d8a7fe0)) 63 | 64 | ## [5.0.6](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.5...v5.0.6) (2023-12-19) 65 | 66 | ### Bug Fixes 67 | 68 | - re-export types correctly for verbatimModuleSyntax ([#612](https://github.com/remarkablemark/html-dom-parser/issues/612)) ([782b675](https://github.com/remarkablemark/html-dom-parser/commit/782b6750223b8b5a556c714ca04e0e682a490248)) 69 | 70 | ## [5.0.5](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.4...v5.0.5) (2023-12-16) 71 | 72 | ### Bug Fixes 73 | 74 | - **esm:** fix ESM types by adding .mts declaration files ([96a1cfc](https://github.com/remarkablemark/html-dom-parser/commit/96a1cfcd899f4b4c6030e75d13438fc3c05b3be2)) 75 | 76 | ## [5.0.4](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.3...v5.0.4) (2023-10-31) 77 | 78 | ### Bug Fixes 79 | 80 | - **esm:** support vite bundler ([c9e510f](https://github.com/remarkablemark/html-dom-parser/commit/c9e510f7c4e7c0dded3d80a540a313c5c8e3000e)) 81 | 82 | ## [5.0.3](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.2...v5.0.3) (2023-10-22) 83 | 84 | ### Miscellaneous Chores 85 | 86 | - export types from index.ts ([8ed55e2](https://github.com/remarkablemark/html-dom-parser/commit/8ed55e23df5c50e19c6df5d0e4856f52ee890a0c)) 87 | 88 | ## [5.0.2](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.1...v5.0.2) (2023-10-19) 89 | 90 | ### Bug Fixes 91 | 92 | - **package:** add "/src" to files to fix source map warning ([7082c50](https://github.com/remarkablemark/html-dom-parser/commit/7082c50c56f825dcab896f6847c60b0dcdd9b211)) 93 | 94 | ## [5.0.1](https://github.com/remarkablemark/html-dom-parser/compare/v5.0.0...v5.0.1) (2023-10-17) 95 | 96 | ### Bug Fixes 97 | 98 | - **package:** add types to exports in package.json ([df08df3](https://github.com/remarkablemark/html-dom-parser/commit/df08df3c8478dfbe12eeee9e3bc90c8959da454c)) 99 | 100 | ## [5.0.0](https://github.com/remarkablemark/html-dom-parser/compare/v4.0.1...v5.0.0) (2023-10-16) 101 | 102 | ### ⚠ BREAKING CHANGES 103 | 104 | - CommonJS imports require the `.default` key. 105 | 106 | ### Code Refactoring 107 | 108 | - migrate to TypeScript ([5915b08](https://github.com/remarkablemark/html-dom-parser/commit/5915b084ce7f3cf59e486da998c203f87a45bda1)) 109 | 110 | ## [4.0.1](https://github.com/remarkablemark/html-dom-parser/compare/v4.0.0...v4.0.1) (2023-10-15) 111 | 112 | ### Miscellaneous Chores 113 | 114 | - **index:** set TypeScript Version to 5.2 in index.d.ts ([#525](https://github.com/remarkablemark/html-dom-parser/issues/525)) ([8219338](https://github.com/remarkablemark/html-dom-parser/commit/82193387c3fa05f30d5d6f1d88739d92a4a7156c)) 115 | 116 | ## [4.0.0](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.7...v4.0.0) (2023-05-31) 117 | 118 | ### ⚠ BREAKING CHANGES 119 | 120 | - **deps:** bump htmlparser2 from 8.0.2 to 9.0.0 121 | 122 | ### Build System 123 | 124 | - **deps:** bump htmlparser2 from 8.0.2 to 9.0.0 ([467bbaa](https://github.com/remarkablemark/html-dom-parser/commit/467bbaa4d7373635b35a4e7a92ca6b56aec74b09)), closes [#459](https://github.com/remarkablemark/html-dom-parser/issues/459) 125 | 126 | ## [3.1.7](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.6...v3.1.7) (2023-03-25) 127 | 128 | ### Build System 129 | 130 | - **deps:** bump htmlparser2 from 8.0.1 to 8.0.2 ([4fbe117](https://github.com/remarkablemark/html-dom-parser/commit/4fbe1171f1f7dfb87f05b7f9c410eaea0c6e8405)), closes [#433](https://github.com/remarkablemark/html-dom-parser/issues/433) 131 | 132 | ## [3.1.6](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.5...v3.1.6) (2023-03-22) 133 | 134 | ### Bug Fixes 135 | 136 | - **client:** correct spelling of feGaussianBlur ([9e28250](https://github.com/remarkablemark/html-dom-parser/commit/9e282504b0bef656b19249c324b748aa2190cb42)), closes [#429](https://github.com/remarkablemark/html-dom-parser/issues/429) 137 | 138 | ## [3.1.5](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.4...v3.1.5) (2023-03-06) 139 | 140 | ### Bug Fixes 141 | 142 | - **client:** check for "template" in utilities formatDOM ([748cf27](https://github.com/remarkablemark/html-dom-parser/commit/748cf2763565554874e337f8af7b8e6d147a9d94)), closes [#417](https://github.com/remarkablemark/html-dom-parser/issues/417) 143 | 144 | ## [3.1.4](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.3...v3.1.4) (2023-03-04) 145 | 146 | ### Bug Fixes 147 | 148 | - **client:** get template content childNodes in utilities formatDOM ([c2c0bed](https://github.com/remarkablemark/html-dom-parser/commit/c2c0bedbc2f3ae8a3bdb4cdad05df007a2d6e870)), closes [#414](https://github.com/remarkablemark/html-dom-parser/issues/414) 149 | 150 | ## [3.1.3](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.2...v3.1.3) (2023-01-17) 151 | 152 | ### Bug Fixes 153 | 154 | - **package:** specify types in package.json and exports field ([21fb028](https://github.com/remarkablemark/html-dom-parser/commit/21fb02813597c786e29a3d47de89efca1451ee62)) 155 | 156 | ## [3.1.2](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.1...v3.1.2) (2022-08-23) 157 | 158 | ### Bug Fixes 159 | 160 | - **client:** fix import in html-to-dom.mjs ([78a7607](https://github.com/remarkablemark/html-dom-parser/commit/78a7607f8b4c421297cc0569d9f4e49647a0eb63)), closes [#337](https://github.com/remarkablemark/html-dom-parser/issues/337) 161 | 162 | ## [3.1.1](https://github.com/remarkablemark/html-dom-parser/compare/v3.1.0...v3.1.1) (2022-08-20) 163 | 164 | ### Bug Fixes 165 | 166 | - **client:** correct ECMAScript export in client html-to-dom.mjs ([7de506c](https://github.com/remarkablemark/html-dom-parser/commit/7de506cb7dfb6e48e4886659283f118265464e92)), closes [#334](https://github.com/remarkablemark/html-dom-parser/issues/334) 167 | 168 | ## [3.1.0](https://github.com/remarkablemark/html-dom-parser/compare/v3.0.1...v3.1.0) (2022-08-16) 169 | 170 | ### Features 171 | 172 | - add esm for client ([0c4c2b6](https://github.com/remarkablemark/html-dom-parser/commit/0c4c2b6d86c75cb3dc924afc6fd8501c5d30a876)) 173 | 174 | ## [3.0.1](https://github.com/remarkablemark/html-dom-parser/compare/v3.0.0...v3.0.1) (2022-07-10) 175 | 176 | ### Bug Fixes 177 | 178 | - **client:** ensure head and body with newline are parsed correctly ([b26b645](https://github.com/remarkablemark/html-dom-parser/commit/b26b645c9de708a6b85b61b66bbdb8e75eebb121)), closes [#317](https://github.com/remarkablemark/html-dom-parser/issues/317) 179 | 180 | ## [3.0.0](https://github.com/remarkablemark/html-dom-parser/compare/v2.0.0...v3.0.0) (2022-07-05) 181 | 182 | ### ⚠ BREAKING CHANGES 183 | 184 | - htmlparser2 7.2.0 → 8.0.1 185 | 186 | ### Build System 187 | 188 | - upgrade domhandler to 5.0.3 and htmlparser2 to 8.0.1 ([e80a69c](https://github.com/remarkablemark/html-dom-parser/commit/e80a69c83e07b0ae2f48a78dbd6adb7d0a71b0f3)) 189 | 190 | ## [2.0.0](https://github.com/remarkablemark/html-dom-parser/compare/v1.2.0...v2.0.0) (2022-06-18) 191 | 192 | ### ⚠ BREAKING CHANGES 193 | 194 | - **client:** remove Internet Explorer (IE11) support 195 | 196 | ### Features 197 | 198 | - **client:** remove Internet Explorer (IE11) support ([b34cbe1](https://github.com/remarkablemark/html-dom-parser/commit/b34cbe1310c0e223ae05bca00945c70faca374ab)), closes [#225](https://github.com/remarkablemark/html-dom-parser/issues/225) 199 | 200 | ## [1.2.0](https://www.github.com/remarkablemark/html-dom-parser/compare/v1.1.1...v1.2.0) (2022-04-14) 201 | 202 | ### Features 203 | 204 | - add compatibility for react-native ([4a4a974](https://www.github.com/remarkablemark/html-dom-parser/commit/4a4a974c01ad0beaf0591fe169d68afe66ea6e2a)) 205 | 206 | ## [1.1.1](https://www.github.com/remarkablemark/html-dom-parser/compare/v1.1.0...v1.1.1) (2022-03-20) 207 | 208 | ### Build System 209 | 210 | - **package:** upgrade domhandler from 4.3.0 to 4.3.1 ([c2e8a82](https://www.github.com/remarkablemark/html-dom-parser/commit/c2e8a82035957bd991a969ef12f3dcd114679a2a)) 211 | 212 | ## [1.1.0](https://www.github.com/remarkablemark/html-dom-parser/compare/v1.0.4...v1.1.0) (2022-02-05) 213 | 214 | ### Features 215 | 216 | - add ES Module support ([b8436b1](https://www.github.com/remarkablemark/html-dom-parser/commit/b8436b19147e0146f50982a225e787fd0a53d97d)) 217 | 218 | ## [1.0.4](https://www.github.com/remarkablemark/html-dom-parser/compare/v1.0.3...v1.0.4) (2021-12-06) 219 | 220 | ### Build System 221 | 222 | - **deps:** bump domhandler from 4.2.2 to 4.3.0 ([cb49258](https://www.github.com/remarkablemark/html-dom-parser/commit/cb49258f9cfcd0c6bce1fa5d5e9dc52b2bdeb389)) 223 | 224 | ## [1.0.3](https://www.github.com/remarkablemark/html-dom-parser/compare/v1.0.2...v1.0.3) (2021-11-27) 225 | 226 | ### Performance Improvements 227 | 228 | - upgrade dependency htmlparser2 to v7.2.0 ([7819211](https://www.github.com/remarkablemark/html-dom-parser/commit/78192117edbf8bcbdd23b6534c66dce515b408de)) 229 | 230 | ## [1.0.2](https://www.github.com/remarkablemark/html-dom-parser/compare/v1.0.1...v1.0.2) (2021-09-06) 231 | 232 | ### Build System 233 | 234 | - **deps:** bump domhandler from 4.2.0 to 4.2.2 ([ab46792](https://www.github.com/remarkablemark/html-dom-parser/commit/ab46792a32ba78644748a940748a87124730b24f)) 235 | 236 | ## [1.0.1](https://github.com/remarkablemark/html-dom-parser/compare/v1.0.0...v1.0.1) (2021-06-13) 237 | 238 | ## [1.0.0](https://github.com/remarkablemark/html-dom-parser/compare/v0.5.0...v1.0.0) (2020-12-25) 239 | 240 | ### Build System 241 | 242 | - **package:** upgrade `domhandler` to v4 and `htmlparser2` to v6 ([ec5673e](https://github.com/remarkablemark/html-dom-parser/commit/ec5673ef38050f808ce49e2e4ee165d30492b190)) 243 | 244 | ### Performance Improvements 245 | 246 | - **client:** deprecate Internet Explorer 9 (IE9) ([d42ea4e](https://github.com/remarkablemark/html-dom-parser/commit/d42ea4ed5df9f44abf16528e36b3cfbb4c7fdd08)) 247 | - **utilities:** continue if nodeType is not element, text, comment ([793ff0c](https://github.com/remarkablemark/html-dom-parser/commit/793ff0c7a84ea05ff061e24984595f6e143362e6)) 248 | 249 | ### BREAKING CHANGES 250 | 251 | - **package:** upgrade `domhandler` to v4 and `htmlparser2` to v6 252 | 253 | domhandler 3.3.0 → 4.0.0 254 | htmlparser2 4.1.0 → 6.0.0 255 | 256 | domhandler: 257 | 258 | - https://github.com/fb55/domhandler/releases/tag/v4.0.0 259 | 260 | htmlparser2: 261 | 262 | - https://github.com/fb55/htmlparser2/releases/tag/v5.0.0 263 | - https://github.com/fb55/htmlparser2/releases/tag/v5.0.1 264 | - https://github.com/fb55/htmlparser2/releases/tag/v6.0.0 265 | 266 | `decodeEntities` option now defaults to true. `` is 267 | parsed correctly. Remove root parent node to keep parser 268 | backwards compatible. 269 | 270 | ## [0.5.0](https://github.com/remarkablemark/html-dom-parser/compare/v0.4.0...v0.5.0) (2020-12-13) 271 | 272 | ### Features 273 | 274 | - upgrade `domhandler` to 3.3.0 and `htmlparser2` to 4.1.0 ([2a748b8](https://github.com/remarkablemark/html-dom-parser/commit/2a748b8d4cc06660a18636e018706508f4fc19f4)) 275 | 276 | ## [0.4.0](https://github.com/remarkablemark/html-dom-parser/compare/v0.3.1...v0.4.0) (2020-12-13) 277 | 278 | ### Features 279 | 280 | - upgrade `domhandler` to 3.0.0 and `htmlparser` to 4.0.0 ([44dba5e](https://github.com/remarkablemark/html-dom-parser/commit/44dba5efb5cc89668fc064d844c09079560029e1)) 281 | 282 | ## [0.3.1](https://github.com/remarkablemark/html-dom-parser/compare/v0.3.0...v0.3.1) (2020-12-13) 283 | 284 | <a name="0.3.0"></a> 285 | 286 | ## [0.3.0](https://github.com/remarkablemark/html-dom-parser/compare/v0.2.3...v0.3.0) (2020-06-02) 287 | 288 | ### Features 289 | 290 | - **lib:** throw error if browser does not support parsing methods ([de327af](https://github.com/remarkablemark/html-dom-parser/commit/de327af)) 291 | 292 | ### Performance Improvements 293 | 294 | - **lib:** return `[]` if empty string is passed to server parser ([9850d05](https://github.com/remarkablemark/html-dom-parser/commit/9850d05)) 295 | 296 | ## [0.2.3](https://github.com/remarkablemark/html-dom-parser/compare/v0.2.2...v0.2.3) (2019-11-04) 297 | 298 | ### Bug Fixes 299 | 300 | - **lib:** improve head and body regex in `domparser.js` ([457bb58](https://github.com/remarkablemark/html-dom-parser/commit/457bb58)), closes [#18](https://github.com/remarkablemark/html-dom-parser/issues/18) 301 | 302 | ### Build System 303 | 304 | - **package:** save commitlint, husky, and lint-staged to devDeps ([3b0ce91](https://github.com/remarkablemark/html-dom-parser/commit/3b0ce91)) 305 | - **package:** update `eslint` and install `prettier` and plugin ([b7a6b81](https://github.com/remarkablemark/html-dom-parser/commit/b7a6b81)) 306 | - **package:** update `webpack` and save `webpack-cli` ([908e56d](https://github.com/remarkablemark/html-dom-parser/commit/908e56d)) 307 | - **package:** update dependencies and devDependencies ([a9016be](https://github.com/remarkablemark/html-dom-parser/commit/a9016be)) 308 | 309 | ### Tests 310 | 311 | - **server:** remove skipped test ([a4c1057](https://github.com/remarkablemark/html-dom-parser/commit/a4c1057)) 312 | - refactor tests to ES6 ([d5255a5](https://github.com/remarkablemark/html-dom-parser/commit/d5255a5)) 313 | - **cases:** add empty string test case to `html.js` ([25d7e8a](https://github.com/remarkablemark/html-dom-parser/commit/25d7e8a)) 314 | - **cases:** add more special test cases to `html.js` ([6fdf2ea](https://github.com/remarkablemark/html-dom-parser/commit/6fdf2ea)) 315 | - **cases:** refactor test cases and move html data to its own file ([e4fcb09](https://github.com/remarkablemark/html-dom-parser/commit/e4fcb09)) 316 | - **cases:** remove unnecessary try/catch wrapper to fix lint error ([ca8175e](https://github.com/remarkablemark/html-dom-parser/commit/ca8175e)) 317 | - **cases:** skip html test cases that PhantomJS does not support ([d095d29](https://github.com/remarkablemark/html-dom-parser/commit/d095d29)) 318 | - **cases:** update `complex.html` ([1418775](https://github.com/remarkablemark/html-dom-parser/commit/1418775)) 319 | - **client:** add tests for client parser that will be run by karma ([a0c58aa](https://github.com/remarkablemark/html-dom-parser/commit/a0c58aa)) 320 | - **helpers:** create `index.js` which exports helpers ([a9255d5](https://github.com/remarkablemark/html-dom-parser/commit/a9255d5)) 321 | - **helpers:** move helper that tests for errors to separate file ([f2e6312](https://github.com/remarkablemark/html-dom-parser/commit/f2e6312)) 322 | - **helpers:** refactor and move `runTests` to its own file ([8e30784](https://github.com/remarkablemark/html-dom-parser/commit/8e30784)) 323 | - **server:** add tests that spy and mock htmlparser2 and domhandler ([61075a1](https://github.com/remarkablemark/html-dom-parser/commit/61075a1)) 324 | - **server:** move `html-to-dom-server.js` to `server` directory ([3684dac](https://github.com/remarkablemark/html-dom-parser/commit/3684dac)) 325 | 326 | ## [0.2.2](https://github.com/remarkablemark/html-dom-parser/compare/v0.2.1...v0.2.2) (2019-06-07) 327 | 328 | ### Bug Fixes 329 | 330 | - **utilities:** do not lowercase case-sensitive SVG tags ([4083004](https://github.com/remarkablemark/html-dom-parser/commit/4083004)) 331 | 332 | ### Performance Improvements 333 | 334 | - **utilities:** optimize case-sensitive tag replace with hash map ([6aa06ee](https://github.com/remarkablemark/html-dom-parser/commit/6aa06ee)) 335 | 336 | ## [0.2.1](https://github.com/remarkablemark/html-dom-parser/compare/v0.2.0...v0.2.1) (2019-04-03) 337 | 338 | ## [0.2.0](https://github.com/remarkablemark/html-dom-parser/compare/v0.1.3...v0.2.0) (2019-04-01) 339 | 340 | ### Features 341 | 342 | - **types:** add TypeScript decelerations ([b52d52f](https://github.com/remarkablemark/html-dom-parser/commit/b52d52f)) 343 | 344 | ## [0.1.3](https://github.com/remarkablemark/html-dom-parser/compare/v0.1.2...v0.1.3) - 2018-02-20 345 | 346 | ### Fixed 347 | 348 | - Fix regular expression vulnerability (#8) 349 | - Regex has potential for catastrophic backtracking 350 | - Credit goes to @davisjam for discovering it 351 | 352 | ### Changed 353 | 354 | - Refactored and updated tests (#8) 355 | 356 | ## [0.1.2](https://github.com/remarkablemark/html-dom-parser/compare/v0.1.1...v0.1.2) - 2017-09-30 357 | 358 | ### Added 359 | 360 | - Create helper `isIE()` in utilities (#7) 361 | 362 | ### Fixed 363 | 364 | - Fix client parser in IE/IE9 (#6, #7) 365 | 366 | ### Changed 367 | 368 | - Upgrade `mocha@3.4.2` and `webpack@2.6.1` (#5) 369 | - npm script `build` runs both `build:min` and `build:unmin` (#5) 370 | 371 | ## [0.1.1](https://github.com/remarkablemark/html-dom-parser/compare/v0.1.0...v0.1.1) - 2017-06-26 372 | 373 | ### Added 374 | 375 | - CHANGELOG with previous releases backfilled 376 | 377 | ### Fixed 378 | 379 | - Fix client parser on IE by specifying required parameter for `createHTMLDocument` (#4) 380 | 381 | ## [0.1.0](https://github.com/remarkablemark/html-dom-parser/compare/v0.0.2...v0.1.0) - 2017-06-17 382 | 383 | ### Changed 384 | 385 | - Improve, refactor, and optimize client parser 386 | - Use `template`, `DOMImplementation`, and/or `DOMParser` 387 | 388 | ## [0.0.2](https://github.com/remarkablemark/html-dom-parser/compare/v0.0.1...v0.0.2) - 2016-10-10 389 | 390 | ### Added 391 | 392 | - Create npm scripts for prepublish 393 | 394 | ### Changed 395 | 396 | - Change webpack to build to UMD target 397 | - Update README installation and usage instructions 398 | 399 | ## [0.0.1](https://github.com/remarkablemark/html-dom-parser/tree/v0.0.1) - 2016-10-10 400 | 401 | ### Added 402 | 403 | - Server parser 404 | - Wrapper for `htmlparser2.parseDOM` 405 | - Client parser 406 | - Uses DOM API to mimic server parser output 407 | - Build client library with webpack 408 | - Add README, tests, and other necessary files 409 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Menglin "Mark" Xu <mark@remarkablemark.org> 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # html-dom-parser 2 | 3 | [![NPM](https://nodei.co/npm/html-dom-parser.png)](https://nodei.co/npm/html-dom-parser/) 4 | 5 | [![NPM version](https://badgen.net/npm/v/html-dom-parser)](https://www.npmjs.com/package/html-dom-parser) 6 | [![Bundlephobia minified + gzip](https://badgen.net/bundlephobia/minzip/html-dom-parser)](https://bundlephobia.com/package/html-dom-parser) 7 | [![build](https://github.com/remarkablemark/html-dom-parser/actions/workflows/build.yml/badge.svg)](https://github.com/remarkablemark/html-dom-parser/actions/workflows/build.yml) 8 | [![codecov](https://codecov.io/gh/remarkablemark/html-dom-parser/branch/master/graph/badge.svg?token=6RRL0875TY)](https://codecov.io/gh/remarkablemark/html-dom-parser) 9 | [![NPM downloads](https://badgen.net/npm/dm/html-dom-parser)](https://www.npmjs.com/package/html-dom-parser) 10 | 11 | HTML to DOM parser that works on both the server (Node.js) and the client (browser): 12 | 13 | ``` 14 | HTMLDOMParser(string[, options]) 15 | ``` 16 | 17 | The parser converts an HTML string to a JavaScript object that describes the DOM tree. 18 | 19 | #### Example 20 | 21 | ```js 22 | import parse from 'html-dom-parser'; 23 | 24 | parse('<p>Hello, World!</p>'); 25 | ``` 26 | 27 | <details> 28 | <summary>Output</summary> 29 | <p> 30 | 31 | ```js 32 | [ 33 | Element { 34 | type: 'tag', 35 | parent: null, 36 | prev: null, 37 | next: null, 38 | startIndex: null, 39 | endIndex: null, 40 | children: [ 41 | Text { 42 | type: 'text', 43 | parent: [Circular], 44 | prev: null, 45 | next: null, 46 | startIndex: null, 47 | endIndex: null, 48 | data: 'Hello, World!' 49 | } 50 | ], 51 | name: 'p', 52 | attribs: {} 53 | } 54 | ] 55 | ``` 56 | 57 | </p> 58 | </details> 59 | 60 | [Replit](https://replit.com/@remarkablemark/html-dom-parser) | [JSFiddle](https://jsfiddle.net/remarkablemark/ff9yg1yz/) | [Examples](https://github.com/remarkablemark/html-dom-parser/tree/master/examples) 61 | 62 | ## Install 63 | 64 | [NPM](https://www.npmjs.com/package/html-dom-parser): 65 | 66 | ```sh 67 | npm install html-dom-parser --save 68 | ``` 69 | 70 | [Yarn](https://yarnpkg.com/package/html-dom-parser): 71 | 72 | ```sh 73 | yarn add html-dom-parser 74 | ``` 75 | 76 | [CDN](https://unpkg.com/html-dom-parser/): 77 | 78 | ```html 79 | <script src="https://unpkg.com/html-dom-parser@latest/dist/html-dom-parser.min.js"></script> 80 | <script> 81 | window.HTMLDOMParser(/* string */); 82 | </script> 83 | ``` 84 | 85 | ## Usage 86 | 87 | Import with ES Modules: 88 | 89 | ```js 90 | import parse from 'html-dom-parser'; 91 | ``` 92 | 93 | Require with CommonJS: 94 | 95 | ```js 96 | const parse = require('html-dom-parser').default; 97 | ``` 98 | 99 | Parse empty string: 100 | 101 | ```js 102 | parse(''); 103 | ``` 104 | 105 | Output: 106 | 107 | <!-- prettier-ignore --> 108 | ```js 109 | [] 110 | ``` 111 | 112 | Parse string: 113 | 114 | ```js 115 | parse('Hello, World!'); 116 | ``` 117 | 118 | <details> 119 | <summary>Output</summary> 120 | <p> 121 | 122 | ```js 123 | [ 124 | Text { 125 | type: 'text', 126 | parent: null, 127 | prev: null, 128 | next: null, 129 | startIndex: null, 130 | endIndex: null, 131 | data: 'Hello, World!' 132 | } 133 | ] 134 | ``` 135 | 136 | </p> 137 | </details> 138 | 139 | Parse element with attributes: 140 | 141 | ```js 142 | parse('<p class="foo" style="color: #bada55">Hello, <em>world</em>!</p>'); 143 | ``` 144 | 145 | <details> 146 | <summary>Output</summary> 147 | <p> 148 | 149 | ```js 150 | [ 151 | Element { 152 | type: 'tag', 153 | parent: null, 154 | prev: null, 155 | next: null, 156 | startIndex: null, 157 | endIndex: null, 158 | children: [ [Text], [Element], [Text] ], 159 | name: 'p', 160 | attribs: { class: 'foo', style: 'color: #bada55' } 161 | } 162 | ] 163 | ``` 164 | 165 | </p> 166 | </details> 167 | 168 | The server parser is a wrapper of [htmlparser2](https://github.com/fb55/htmlparser2) `parseDOM` but with the root parent node excluded. The next section shows the available options you can use with the server parse. 169 | 170 | The client parser mimics the server parser by using the [DOM](https://developer.mozilla.org/docs/Web/API/Document_Object_Model/Introduction) API to parse the HTML string. 171 | 172 | ## Options (server only) 173 | 174 | Because the server parser is a wrapper of [htmlparser2](https://github.com/fb55/htmlparser2), which implements [domhandler](https://github.com/fb55/domhandler), you can alter how the server parser parses your code with the following options: 175 | 176 | ```js 177 | /** 178 | * These are the default options being used if you omit the optional options object. 179 | * htmlparser2 will use the same options object for its domhandler so the options 180 | * should be combined into a single object like so: 181 | */ 182 | const options = { 183 | /** 184 | * Options for the domhandler class. 185 | * https://github.com/fb55/domhandler/blob/master/src/index.ts#L16 186 | */ 187 | withStartIndices: false, 188 | withEndIndices: false, 189 | xmlMode: false, 190 | /** 191 | * Options for the htmlparser2 class. 192 | * https://github.com/fb55/htmlparser2/blob/master/src/Parser.ts#L104 193 | */ 194 | xmlMode: false, // Will overwrite what is used for the domhandler, otherwise inherited. 195 | decodeEntities: true, 196 | lowerCaseTags: true, // !xmlMode by default 197 | lowerCaseAttributeNames: true, // !xmlMode by default 198 | recognizeCDATA: false, // xmlMode by default 199 | recognizeSelfClosing: false, // xmlMode by default 200 | Tokenizer: Tokenizer, 201 | }; 202 | ``` 203 | 204 | If you're parsing SVG, you can set `lowerCaseTags` to `true` without having to enable `xmlMode`. This will return all tag names in camelCase and not the HTML standard of lowercase. 205 | 206 | > [!NOTE] 207 | > If you're parsing code client-side (in-browser), you cannot control the parsing options. Client-side parsing automatically handles returning some HTML tags in camelCase, such as specific SVG elements, but returns all other tags lowercased according to the HTML standard. 208 | 209 | ## Migration 210 | 211 | ### v5 212 | 213 | Migrated to TypeScript. CommonJS imports require the `.default` key: 214 | 215 | ```js 216 | const parse = require('html-dom-parser').default; 217 | ``` 218 | 219 | ### v4 220 | 221 | Upgraded [htmlparser2](https://github.com/fb55/htmlparser2) to v9. 222 | 223 | ### v3 224 | 225 | Upgraded [domhandler](https://github.com/fb55/domhandler) to v5. [Parser options](https://github.com/fb55/htmlparser2/wiki/Parser-options) like `normalizeWhitespace` have been removed. 226 | 227 | ### v2 228 | 229 | Removed Internet Explorer (IE11) support. 230 | 231 | ### v1 232 | 233 | Upgraded `domhandler` to v4 and `htmlparser2` to v6. 234 | 235 | ## Release 236 | 237 | Release and publish are automated by [Release Please](https://github.com/googleapis/release-please). 238 | 239 | ## Special Thanks 240 | 241 | - [Contributors](https://github.com/remarkablemark/html-dom-parser/graphs/contributors) 242 | - [htmlparser2](https://github.com/fb55/htmlparser2) 243 | - [domhandler](https://github.com/fb55/domhandler) 244 | 245 | ## License 246 | 247 | [MIT](https://github.com/remarkablemark/html-dom-parser/blob/master/LICENSE) 248 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import path from 'node:path'; 2 | import { fileURLToPath } from 'node:url'; 3 | 4 | import { includeIgnoreFile } from '@eslint/compat'; 5 | import { FlatCompat } from '@eslint/eslintrc'; 6 | import js from '@eslint/js'; 7 | import typescriptEslint from '@typescript-eslint/eslint-plugin'; 8 | import tsParser from '@typescript-eslint/parser'; 9 | import prettier from 'eslint-plugin-prettier'; 10 | import simpleImportSort from 'eslint-plugin-simple-import-sort'; 11 | import globals from 'globals'; 12 | 13 | const __filename = fileURLToPath(import.meta.url); 14 | const __dirname = path.dirname(__filename); 15 | const gitignorePath = path.resolve(__dirname, '.gitignore'); 16 | 17 | const compat = new FlatCompat({ 18 | baseDirectory: __dirname, 19 | recommendedConfig: js.configs.recommended, 20 | allConfig: js.configs.all, 21 | }); 22 | 23 | export default [ 24 | includeIgnoreFile(gitignorePath), 25 | 26 | ...compat.extends( 27 | 'eslint:recommended', 28 | 'plugin:@typescript-eslint/recommended', 29 | ), 30 | 31 | { 32 | plugins: { 33 | '@typescript-eslint': typescriptEslint, 34 | 'simple-import-sort': simpleImportSort, 35 | prettier, 36 | }, 37 | 38 | languageOptions: { 39 | globals: { 40 | ...globals.browser, 41 | ...globals.mocha, 42 | ...globals.node, 43 | }, 44 | parser: tsParser, 45 | }, 46 | 47 | rules: { 48 | '@typescript-eslint/ban-ts-comment': 'off', 49 | '@typescript-eslint/no-unused-vars': 'error', 50 | '@typescript-eslint/no-var-requires': 'off', 51 | 'no-console': 'error', 52 | 'no-debugger': 'error', 53 | 'prettier/prettier': 'error', 54 | 'simple-import-sort/exports': 'error', 55 | 'simple-import-sort/imports': 'error', 56 | }, 57 | }, 58 | { 59 | files: ['test/**'], 60 | 61 | rules: { 62 | '@typescript-eslint/no-require-imports': 'off', 63 | }, 64 | }, 65 | ]; 66 | -------------------------------------------------------------------------------- /esm/client/html-to-dom.d.mts: -------------------------------------------------------------------------------- 1 | import type { DOMNode } from '../types'; 2 | 3 | export default function HTMLDOMParser(html: string): DOMNode[]; 4 | -------------------------------------------------------------------------------- /esm/client/html-to-dom.mjs: -------------------------------------------------------------------------------- 1 | import ClientParser from '../../lib/client/html-to-dom.js'; 2 | 3 | export default ClientParser.default || ClientParser; 4 | -------------------------------------------------------------------------------- /esm/index.d.mts: -------------------------------------------------------------------------------- 1 | export { default } from './server/html-to-dom.mjs'; 2 | export type * from './types.ts'; 3 | -------------------------------------------------------------------------------- /esm/index.mjs: -------------------------------------------------------------------------------- 1 | import HTMLDOMParser from '../lib/index.js'; 2 | 3 | export default HTMLDOMParser.default || HTMLDOMParser; 4 | -------------------------------------------------------------------------------- /esm/server/html-to-dom.d.mts: -------------------------------------------------------------------------------- 1 | import type { ParserOptions } from 'htmlparser2'; 2 | 3 | import type { DOMNode } from '../types'; 4 | 5 | export default function HTMLDOMParser( 6 | html: string, 7 | options?: ParserOptions, 8 | ): DOMNode[]; 9 | -------------------------------------------------------------------------------- /esm/server/html-to-dom.mjs: -------------------------------------------------------------------------------- 1 | import ServerParser from '../../lib/server/html-to-dom.js'; 2 | 3 | export default ServerParser.default || ServerParser; 4 | -------------------------------------------------------------------------------- /examples/index.html: -------------------------------------------------------------------------------- 1 | <textarea cols="50" rows="5"><p>Hello, world!</p></textarea> 2 | <pre><code></code></pre> 3 | <script src="../dist/html-dom-parser.js"></script> 4 | <script> 5 | var code = document.querySelector('code'); 6 | var textarea = document.querySelector('textarea'); 7 | 8 | function renderOutput(event) { 9 | const html = event.target.value; 10 | cache = []; 11 | const output = window.HTMLDOMParser(html); 12 | console.log(output); 13 | code.innerText = JSON.stringify(output, replacer, 2); 14 | } 15 | 16 | textarea.addEventListener('input', renderOutput); 17 | renderOutput({ target: textarea }); 18 | 19 | /** 20 | * @see {@link https://stackoverflow.com/questions/11616630/json-stringify-avoid-typeerror-converting-circular-structure-to-json} 21 | */ 22 | var cache; 23 | function replacer(key, value) { 24 | if (typeof value === 'object' && value !== null) { 25 | if (cache.indexOf(value) !== -1) { 26 | // duplicate reference found 27 | return '[Circular]'; 28 | } 29 | // store value in our collection 30 | cache.push(value); 31 | } 32 | return value; 33 | } 34 | </script> 35 | -------------------------------------------------------------------------------- /karma.conf.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @see https://karma-runner.github.io/6.4/config/configuration-file.html 3 | */ 4 | module.exports = (config) => { 5 | config.set({ 6 | // base path that will be used to resolve all patterns (eg. files, exclude) 7 | basePath: '', 8 | 9 | // frameworks to use 10 | // available frameworks: https://npmjs.org/browse/keyword/karma-adapter 11 | frameworks: ['mocha', 'chai', 'commonjs'], 12 | 13 | // list of files / patterns to load in the browser 14 | files: [ 15 | 'dist/*.js', 16 | 'lib/client/constants.js', 17 | 'lib/server/utilities.js', 18 | 'test/cases/html.js', 19 | 'test/client/*.js', 20 | 'test/helpers/*.js', 21 | ], 22 | 23 | // list of files / patterns to exclude 24 | exclude: [], 25 | 26 | // preprocess matching files before serving them to the browser 27 | // available preprocessors: https://npmjs.org/browse/keyword/karma-preprocessor 28 | preprocessors: { 29 | 'dist/*.js': ['commonjs'], 30 | 'lib/**/*.js': ['commonjs'], 31 | 'test/**/*.js': ['commonjs'], 32 | }, 33 | 34 | // test results reporter to use 35 | // possible values: 'dots', 'progress' 36 | // available reporters: https://npmjs.org/browse/keyword/karma-reporter 37 | reporters: ['mocha', 'progress'], 38 | 39 | // web server port 40 | port: 9876, 41 | 42 | // enable / disable colors in the output (reporters and logs) 43 | colors: true, 44 | 45 | // level of logging 46 | // possible values: config.LOG_DISABLE || config.LOG_ERROR || config.LOG_WARN || config.LOG_INFO || config.LOG_DEBUG 47 | logLevel: config.LOG_INFO, 48 | 49 | // enable / disable watching file and executing tests whenever any file changes 50 | autoWatch: true, 51 | 52 | // Start these browsers, currently available: 53 | // - Chrome 54 | // - ChromeCanary 55 | // - Firefox 56 | // - Opera (has to be installed with `npm install karma-opera-launcher`) 57 | // - Safari (only Mac; has to be installed with `npm install karma-safari-launcher`) 58 | // - PhantomJS 59 | // - IE (only Windows; has to be installed with `npm install karma-ie-launcher`) 60 | // available browser launchers: https://npmjs.org/browse/keyword/karma-launcher 61 | browsers: ['Chrome'], 62 | 63 | // If browser does not capture in given timeout [ms], kill it 64 | captureTimeout: 60000, 65 | 66 | // Continuous Integration mode 67 | // if true, Karma captures browsers, runs the tests and exits 68 | singleRun: false, 69 | 70 | // Concurrency level 71 | // how many browser should be started simultaneous 72 | concurrency: Infinity, 73 | 74 | // Client options 75 | client: { 76 | mocha: { 77 | // change Karma's `debug.html` to the Mocha web reporter 78 | reporter: 'html', 79 | }, 80 | }, 81 | 82 | // Mocha reporter options 83 | // https://www.npmjs.com/package/karma-mocha-reporter 84 | mochaReporter: { 85 | showDiff: true, 86 | }, 87 | }); 88 | }; 89 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "html-dom-parser", 3 | "version": "5.1.1", 4 | "description": "HTML to DOM parser.", 5 | "author": "Mark <mark@remarkablemark.org>", 6 | "main": "./lib/index.js", 7 | "module": "./esm/index.mjs", 8 | "exports": { 9 | ".": { 10 | "browser": { 11 | "require": "./lib/client/html-to-dom.js", 12 | "default": "./esm/client/html-to-dom.mjs" 13 | }, 14 | "default": { 15 | "require": "./lib/index.js", 16 | "default": "./esm/index.mjs" 17 | } 18 | }, 19 | "./lib/client/*": { 20 | "require": "./lib/client/*.js", 21 | "default": "./esm/client/*.mjs" 22 | }, 23 | "./lib/server/*": { 24 | "require": "./lib/server/*.js", 25 | "default": "./esm/server/*.mjs" 26 | }, 27 | "./package.json": "./package.json" 28 | }, 29 | "scripts": { 30 | "build": "run-s build:*", 31 | "build:cjs": "tsc", 32 | "build:esm": "awk '!/sourceMappingURL/' lib/types.d.ts > esm/types.ts", 33 | "build:umd": "rollup --config --failAfterWarnings", 34 | "clean": "rm -rf .nyc_output coverage dist lib", 35 | "lint": "eslint .", 36 | "lint:fix": "npm run lint -- --fix", 37 | "lint:package": "publint", 38 | "lint:tsc": "tsc --noEmit", 39 | "prepare": "husky", 40 | "prepublishOnly": "run-s lint lint:tsc test:server clean build lint:package", 41 | "size-limit": "size-limit", 42 | "test": "run-s test:server test:client", 43 | "test:client": "npm run test:client:watch -- --single-run", 44 | "test:client:build": "NODE_ENV=test npm run build", 45 | "test:client:watch": "npm run test:client:build && karma start", 46 | "test:esm": "npm run build:cjs && node --test test/esm", 47 | "test:server": "npm run build:cjs && nyc mocha" 48 | }, 49 | "repository": { 50 | "type": "git", 51 | "url": "git+https://github.com/remarkablemark/html-dom-parser.git" 52 | }, 53 | "bugs": { 54 | "url": "https://github.com/remarkablemark/html-dom-parser/issues" 55 | }, 56 | "keywords": [ 57 | "html-dom-parser", 58 | "html", 59 | "dom", 60 | "parser", 61 | "htmlparser2", 62 | "pojo" 63 | ], 64 | "dependencies": { 65 | "domhandler": "5.0.3", 66 | "htmlparser2": "10.0.0" 67 | }, 68 | "devDependencies": { 69 | "@commitlint/cli": "19.8.1", 70 | "@commitlint/config-conventional": "19.8.1", 71 | "@eslint/compat": "1.2.9", 72 | "@eslint/eslintrc": "3.3.1", 73 | "@eslint/js": "9.28.0", 74 | "@rollup/plugin-alias": "5.1.1", 75 | "@rollup/plugin-commonjs": "28.0.3", 76 | "@rollup/plugin-node-resolve": "16.0.1", 77 | "@rollup/plugin-terser": "0.4.4", 78 | "@rollup/plugin-typescript": "12.1.2", 79 | "@size-limit/preset-big-lib": "11.2.0", 80 | "@types/chai": "4.3.16", 81 | "@types/estree": "1.0.8", 82 | "@types/mocha": "10.0.10", 83 | "@types/node": "22.15.30", 84 | "@typescript-eslint/eslint-plugin": "8.33.1", 85 | "@typescript-eslint/parser": "8.33.1", 86 | "chai": "4.5.0", 87 | "eslint": "9.28.0", 88 | "eslint-plugin-prettier": "5.4.1", 89 | "eslint-plugin-simple-import-sort": "12.1.1", 90 | "globals": "16.2.0", 91 | "html-minifier": "4.0.0", 92 | "husky": "9.1.7", 93 | "jsdomify": "3.1.1", 94 | "karma": "6.4.4", 95 | "karma-chai": "0.1.0", 96 | "karma-chrome-launcher": "3.2.0", 97 | "karma-commonjs": "1.0.0", 98 | "karma-mocha": "2.0.1", 99 | "karma-mocha-reporter": "2.2.5", 100 | "lint-staged": "16.1.0", 101 | "mocha": "11.5.0", 102 | "npm-run-all2": "8.0.4", 103 | "nyc": "17.1.0", 104 | "prettier": "3.5.3", 105 | "publint": "0.3.12", 106 | "rollup": "4.41.1", 107 | "size-limit": "11.2.0", 108 | "ts-node": "10.9.2", 109 | "typescript": "5.8.3" 110 | }, 111 | "files": [ 112 | "/dist", 113 | "/esm", 114 | "/lib", 115 | "/src" 116 | ], 117 | "browser": { 118 | "./lib/index.js": "./lib/client/html-to-dom.js", 119 | "./esm/index.mjs": "./esm/client/html-to-dom.mjs" 120 | }, 121 | "react-native": { 122 | "./lib/index.js": "./lib/server/html-to-dom.js" 123 | }, 124 | "license": "MIT" 125 | } 126 | -------------------------------------------------------------------------------- /rollup.config.mjs: -------------------------------------------------------------------------------- 1 | import alias from '@rollup/plugin-alias'; 2 | import commonjs from '@rollup/plugin-commonjs'; 3 | import resolve from '@rollup/plugin-node-resolve'; 4 | import terser from '@rollup/plugin-terser'; 5 | import typescript from '@rollup/plugin-typescript'; 6 | 7 | const getConfig = (minify = false) => ({ 8 | input: 'src/index.ts', 9 | 10 | output: { 11 | file: `dist/html-dom-parser${minify ? '.min' : ''}.js`, 12 | format: 'umd', 13 | name: 'HTMLDOMParser', 14 | sourcemap: true, 15 | }, 16 | 17 | plugins: [ 18 | alias({ 19 | entries: [ 20 | { 21 | find: './server/html-to-dom', 22 | replacement: './client/html-to-dom', 23 | }, 24 | ], 25 | }), 26 | 27 | typescript({ 28 | declaration: false, 29 | declarationMap: false, 30 | module: 'esnext', 31 | compilerOptions: { 32 | outDir: 'dist', 33 | }, 34 | }), 35 | 36 | commonjs(), 37 | resolve({ browser: true }), 38 | minify && terser(), 39 | ], 40 | }); 41 | 42 | const configs = [getConfig(), getConfig(true)]; 43 | 44 | if (process.env.NODE_ENV === 'test') { 45 | configs.push({ 46 | input: 'node_modules/htmlparser2', 47 | output: { 48 | file: 'dist/htmlparser2.js', 49 | format: 'umd', 50 | name: 'htmlparser2', 51 | sourcemap: true, 52 | }, 53 | plugins: [commonjs(), resolve({ browser: true })], 54 | }); 55 | } 56 | 57 | export default configs; 58 | -------------------------------------------------------------------------------- /src/client/constants.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * SVG elements are case-sensitive. 3 | * 4 | * @see https://developer.mozilla.org/docs/Web/SVG/Element#svg_elements_a_to_z 5 | */ 6 | export const CASE_SENSITIVE_TAG_NAMES = [ 7 | 'animateMotion', 8 | 'animateTransform', 9 | 'clipPath', 10 | 'feBlend', 11 | 'feColorMatrix', 12 | 'feComponentTransfer', 13 | 'feComposite', 14 | 'feConvolveMatrix', 15 | 'feDiffuseLighting', 16 | 'feDisplacementMap', 17 | 'feDropShadow', 18 | 'feFlood', 19 | 'feFuncA', 20 | 'feFuncB', 21 | 'feFuncG', 22 | 'feFuncR', 23 | 'feGaussianBlur', 24 | 'feImage', 25 | 'feMerge', 26 | 'feMergeNode', 27 | 'feMorphology', 28 | 'feOffset', 29 | 'fePointLight', 30 | 'feSpecularLighting', 31 | 'feSpotLight', 32 | 'feTile', 33 | 'feTurbulence', 34 | 'foreignObject', 35 | 'linearGradient', 36 | 'radialGradient', 37 | 'textPath', 38 | ] as const; 39 | 40 | export const CASE_SENSITIVE_TAG_NAMES_MAP = CASE_SENSITIVE_TAG_NAMES.reduce( 41 | (accumulator, tagName) => { 42 | accumulator[tagName.toLowerCase()] = tagName; 43 | return accumulator; 44 | }, 45 | {} as Record<string, string>, 46 | ); 47 | 48 | export const CARRIAGE_RETURN = '\r'; 49 | export const CARRIAGE_RETURN_REGEX = new RegExp(CARRIAGE_RETURN, 'g'); 50 | export const CARRIAGE_RETURN_PLACEHOLDER = `__HTML_DOM_PARSER_CARRIAGE_RETURN_PLACEHOLDER_${Date.now()}__`; 51 | export const CARRIAGE_RETURN_PLACEHOLDER_REGEX = new RegExp( 52 | CARRIAGE_RETURN_PLACEHOLDER, 53 | 'g', 54 | ); 55 | -------------------------------------------------------------------------------- /src/client/domparser.ts: -------------------------------------------------------------------------------- 1 | import { escapeSpecialCharacters } from './utilities'; 2 | 3 | // constants 4 | const HTML = 'html'; 5 | const HEAD = 'head'; 6 | const BODY = 'body'; 7 | const FIRST_TAG_REGEX = /<([a-zA-Z]+[0-9]?)/; // e.g., <h1> 8 | 9 | // match-all-characters in case of newlines (DOTALL) 10 | const HEAD_TAG_REGEX = /<head[^]*>/i; 11 | const BODY_TAG_REGEX = /<body[^]*>/i; 12 | 13 | // falls back to `parseFromString` if `createHTMLDocument` cannot be used 14 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 15 | let parseFromDocument = (html: string, tagName?: string): Document => { 16 | /* istanbul ignore next */ 17 | throw new Error( 18 | 'This browser does not support `document.implementation.createHTMLDocument`', 19 | ); 20 | }; 21 | 22 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 23 | let parseFromString = (html: string, tagName?: string): Document => { 24 | /* istanbul ignore next */ 25 | throw new Error( 26 | 'This browser does not support `DOMParser.prototype.parseFromString`', 27 | ); 28 | }; 29 | 30 | const DOMParser = typeof window === 'object' && window.DOMParser; 31 | 32 | /** 33 | * DOMParser (performance: slow). 34 | * 35 | * @see https://developer.mozilla.org/docs/Web/API/DOMParser#Parsing_an_SVG_or_HTML_document 36 | */ 37 | if (typeof DOMParser === 'function') { 38 | const domParser = new DOMParser(); 39 | const mimeType = 'text/html'; 40 | 41 | /** 42 | * Creates an HTML document using `DOMParser.parseFromString`. 43 | * 44 | * @param html - The HTML string. 45 | * @param tagName - The element to render the HTML (with 'body' as fallback). 46 | * @returns - Document. 47 | */ 48 | parseFromString = (html: string, tagName?: string): Document => { 49 | if (tagName) { 50 | /* istanbul ignore next */ 51 | html = `<${tagName}>${html}</${tagName}>`; 52 | } 53 | 54 | return domParser.parseFromString(html, mimeType); 55 | }; 56 | 57 | parseFromDocument = parseFromString; 58 | } 59 | 60 | /** 61 | * DOMImplementation (performance: fair). 62 | * 63 | * @see https://developer.mozilla.org/docs/Web/API/DOMImplementation/createHTMLDocument 64 | */ 65 | if (typeof document === 'object' && document.implementation) { 66 | const htmlDocument = document.implementation.createHTMLDocument(); 67 | 68 | /** 69 | * Use HTML document created by `document.implementation.createHTMLDocument`. 70 | * 71 | * @param html - The HTML string. 72 | * @param tagName - The element to render the HTML (with 'body' as fallback). 73 | * @returns - Document 74 | */ 75 | parseFromDocument = function (html: string, tagName?: string): Document { 76 | if (tagName) { 77 | const element = htmlDocument.documentElement.querySelector(tagName); 78 | 79 | if (element) { 80 | element.innerHTML = html; 81 | } 82 | 83 | return htmlDocument; 84 | } 85 | 86 | htmlDocument.documentElement.innerHTML = html; 87 | return htmlDocument; 88 | }; 89 | } 90 | 91 | /** 92 | * Template (performance: fast). 93 | * 94 | * @see https://developer.mozilla.org/docs/Web/HTML/Element/template 95 | */ 96 | const template = 97 | typeof document === 'object' && document.createElement('template'); 98 | 99 | let parseFromTemplate: (html: string) => NodeList; 100 | 101 | if (template && template.content) { 102 | /** 103 | * Uses a template element (content fragment) to parse HTML. 104 | * 105 | * @param html - HTML string. 106 | * @returns - Nodes. 107 | */ 108 | parseFromTemplate = (html: string): NodeList => { 109 | template.innerHTML = html; 110 | return template.content.childNodes; 111 | }; 112 | } 113 | 114 | /** 115 | * Parses HTML string to DOM nodes. 116 | * 117 | * @param html - HTML markup. 118 | * @returns - DOM nodes. 119 | */ 120 | export default function domparser(html: string): NodeList { 121 | // Escape special characters before parsing 122 | html = escapeSpecialCharacters(html); 123 | 124 | const match = html.match(FIRST_TAG_REGEX); 125 | const firstTagName = match && match[1] ? match[1].toLowerCase() : ''; 126 | 127 | switch (firstTagName) { 128 | case HTML: { 129 | const doc = parseFromString(html); 130 | 131 | // the created document may come with filler head/body elements, 132 | // so make sure to remove them if they don't actually exist 133 | if (!HEAD_TAG_REGEX.test(html)) { 134 | const element = doc.querySelector(HEAD); 135 | element?.parentNode?.removeChild(element); 136 | } 137 | 138 | if (!BODY_TAG_REGEX.test(html)) { 139 | const element = doc.querySelector(BODY); 140 | element?.parentNode?.removeChild(element); 141 | } 142 | 143 | return doc.querySelectorAll(HTML); 144 | } 145 | 146 | case HEAD: 147 | case BODY: { 148 | const elements = parseFromDocument(html).querySelectorAll(firstTagName); 149 | 150 | // if there's a sibling element, then return both elements 151 | if (BODY_TAG_REGEX.test(html) && HEAD_TAG_REGEX.test(html)) { 152 | return elements[0].parentNode!.childNodes; 153 | } 154 | 155 | return elements; 156 | } 157 | 158 | // low-level tag or text 159 | default: { 160 | if (parseFromTemplate) { 161 | return parseFromTemplate(html); 162 | } 163 | const element = parseFromDocument(html, BODY).querySelector(BODY); 164 | return element!.childNodes; 165 | } 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /src/client/html-to-dom.ts: -------------------------------------------------------------------------------- 1 | import domparser from './domparser'; 2 | import { formatDOM } from './utilities'; 3 | 4 | const DIRECTIVE_REGEX = /<(![a-zA-Z\s]+)>/; // e.g., <!doctype html> 5 | 6 | /** 7 | * Parses HTML string to DOM nodes in browser. 8 | * 9 | * @param html - HTML markup. 10 | * @returns - DOM elements. 11 | */ 12 | export default function HTMLDOMParser(html: string) { 13 | if (typeof html !== 'string') { 14 | throw new TypeError('First argument must be a string'); 15 | } 16 | 17 | if (!html) { 18 | return []; 19 | } 20 | 21 | // match directive 22 | const match = html.match(DIRECTIVE_REGEX); 23 | const directive = match ? match[1] : undefined; 24 | 25 | return formatDOM(domparser(html), null, directive); 26 | } 27 | -------------------------------------------------------------------------------- /src/client/utilities.ts: -------------------------------------------------------------------------------- 1 | import { Comment, Element, ProcessingInstruction, Text } from 'domhandler'; 2 | 3 | import type { DOMNode } from '../types'; 4 | import { 5 | CARRIAGE_RETURN, 6 | CARRIAGE_RETURN_PLACEHOLDER, 7 | CARRIAGE_RETURN_PLACEHOLDER_REGEX, 8 | CARRIAGE_RETURN_REGEX, 9 | CASE_SENSITIVE_TAG_NAMES_MAP, 10 | } from './constants'; 11 | 12 | /** 13 | * Gets case-sensitive tag name. 14 | * 15 | * @param tagName - Tag name in lowercase. 16 | * @returns - Case-sensitive tag name. 17 | */ 18 | function getCaseSensitiveTagName(tagName: string): string | undefined { 19 | return CASE_SENSITIVE_TAG_NAMES_MAP[tagName]; 20 | } 21 | 22 | /** 23 | * Formats DOM attributes to a hash map. 24 | * 25 | * @param attributes - List of attributes. 26 | * @returns - Map of attribute name to value. 27 | */ 28 | export function formatAttributes(attributes: NamedNodeMap) { 29 | const map: Record<string, string> = {}; 30 | let index = 0; 31 | const attributesLength = attributes.length; 32 | 33 | // `NamedNodeMap` is array-like 34 | for (; index < attributesLength; index++) { 35 | const attribute = attributes[index]; 36 | map[attribute.name] = attribute.value; 37 | } 38 | 39 | return map; 40 | } 41 | 42 | /** 43 | * Corrects the tag name if it is case-sensitive (SVG). 44 | * Otherwise, returns the lowercase tag name (HTML). 45 | * 46 | * @param tagName - Lowercase tag name. 47 | * @returns - Formatted tag name. 48 | */ 49 | function formatTagName(tagName: string): string { 50 | tagName = tagName.toLowerCase(); 51 | const caseSensitiveTagName = getCaseSensitiveTagName(tagName); 52 | 53 | if (caseSensitiveTagName) { 54 | return caseSensitiveTagName; 55 | } 56 | 57 | return tagName; 58 | } 59 | 60 | /** 61 | * Escapes special characters before parsing. 62 | * 63 | * @param html - The HTML string. 64 | * @returns - HTML string with escaped special characters. 65 | */ 66 | export function escapeSpecialCharacters(html: string): string { 67 | return html.replace(CARRIAGE_RETURN_REGEX, CARRIAGE_RETURN_PLACEHOLDER); 68 | } 69 | 70 | /** 71 | * Reverts escaped special characters back to actual characters. 72 | * 73 | * @param text - The text with escaped characters. 74 | * @returns - Text with escaped characters reverted. 75 | */ 76 | export function revertEscapedCharacters(text: string): string { 77 | return text.replace(CARRIAGE_RETURN_PLACEHOLDER_REGEX, CARRIAGE_RETURN); 78 | } 79 | 80 | /** 81 | * Transforms DOM nodes to `domhandler` nodes. 82 | * 83 | * @param nodes - DOM nodes. 84 | * @param parent - Parent node. 85 | * @param directive - Directive. 86 | * @returns - Nodes. 87 | */ 88 | export function formatDOM( 89 | nodes: NodeList, 90 | parent: DOMNode | null = null, 91 | directive?: string, 92 | ): DOMNode[] { 93 | const domNodes = []; 94 | let current; 95 | let index = 0; 96 | const nodesLength = nodes.length; 97 | 98 | for (; index < nodesLength; index++) { 99 | const node = nodes[index]; 100 | 101 | // set the node data given the type 102 | switch (node.nodeType) { 103 | case 1: { 104 | const tagName = formatTagName(node.nodeName); 105 | 106 | // script, style, or tag 107 | current = new Element( 108 | tagName, 109 | formatAttributes((node as HTMLElement).attributes), 110 | ); 111 | 112 | current.children = formatDOM( 113 | // template children are on content 114 | tagName === 'template' 115 | ? (node as HTMLTemplateElement).content.childNodes 116 | : node.childNodes, 117 | current, 118 | ); 119 | 120 | break; 121 | } 122 | 123 | case 3: 124 | current = new Text(revertEscapedCharacters(node.nodeValue!)); 125 | break; 126 | 127 | case 8: 128 | current = new Comment(node.nodeValue!); 129 | break; 130 | 131 | default: 132 | continue; 133 | } 134 | 135 | // set previous node next 136 | const prev = domNodes[index - 1] || null; 137 | if (prev) { 138 | prev.next = current; 139 | } 140 | 141 | // set properties for current node 142 | current.parent = parent as Element; 143 | current.prev = prev; 144 | current.next = null; 145 | 146 | domNodes.push(current); 147 | } 148 | 149 | if (directive) { 150 | current = new ProcessingInstruction( 151 | directive.substring(0, directive.indexOf(' ')).toLowerCase(), 152 | directive, 153 | ); 154 | 155 | current.next = domNodes[0] || null; 156 | current.parent = parent as Element; 157 | domNodes.unshift(current); 158 | 159 | if (domNodes[1]) { 160 | domNodes[1].prev = domNodes[0]; 161 | } 162 | } 163 | 164 | return domNodes; 165 | } 166 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * When running on Node.js, use the server parser. 3 | * When bundling for the browser, use the client parser. 4 | * 5 | * @see https://github.com/substack/node-browserify#browser-field 6 | */ 7 | export { default } from './server/html-to-dom'; 8 | export * from './types'; 9 | -------------------------------------------------------------------------------- /src/server/html-to-dom.ts: -------------------------------------------------------------------------------- 1 | import { DomHandler } from 'domhandler'; 2 | import type { ParserOptions } from 'htmlparser2'; 3 | import { Parser } from 'htmlparser2'; 4 | 5 | import { unsetRootParent } from './utilities'; 6 | 7 | /** 8 | * Parses HTML string to DOM nodes in Node.js. 9 | * 10 | * This is the same method as `require('htmlparser2').parseDOM` 11 | * 12 | * @see https://github.com/fb55/htmlparser2/blob/v9.0.0/src/index.ts#L44-L46 13 | * @see https://github.com/fb55/domhandler/tree/v5.0.3#readme 14 | * 15 | * @param html - HTML markup. 16 | * @param options - Parser options. 17 | * @returns - DOM nodes. 18 | */ 19 | export default function HTMLDOMParser(html: string, options?: ParserOptions) { 20 | if (typeof html !== 'string') { 21 | throw new TypeError('First argument must be a string.'); 22 | } 23 | 24 | if (!html) { 25 | return []; 26 | } 27 | 28 | const handler = new DomHandler(undefined, options); 29 | new Parser(handler, options).end(html); 30 | return unsetRootParent(handler.dom); 31 | } 32 | -------------------------------------------------------------------------------- /src/server/utilities.ts: -------------------------------------------------------------------------------- 1 | import type { ChildNode } from 'domhandler'; 2 | 3 | import type { DOMNode } from '../types'; 4 | 5 | /** 6 | * Sets root parent to null. 7 | * 8 | * @param nodes - Nodes. 9 | * @returns - Nodes. 10 | */ 11 | export function unsetRootParent(nodes: ChildNode[]): DOMNode[] { 12 | let index = 0; 13 | const nodesLength = nodes.length; 14 | 15 | for (; index < nodesLength; index++) { 16 | const node = nodes[index]; 17 | node.parent = null; 18 | } 19 | 20 | return nodes as DOMNode[]; 21 | } 22 | -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- 1 | import type { Comment, Element, ProcessingInstruction, Text } from 'domhandler'; 2 | 3 | export type { Comment, Element, ProcessingInstruction, Text }; 4 | 5 | export type DOMNode = Comment | Element | ProcessingInstruction | Text; 6 | -------------------------------------------------------------------------------- /test/cases/complex.html: -------------------------------------------------------------------------------- 1 | <!DOCTYPE html> 2 | <html lang="en"> 3 | <head> 4 | <meta charset="utf-8"> 5 | <title>Page Title 6 | 7 | 8 | 13 | 14 | 15 |
16 | 17 |
18 |

bar

19 |

20 | baz qux 21 | link 22 |

23 | 24 | 28 | 29 | 30 | baz 31 | 32 | 33 | -------------------------------------------------------------------------------- /test/cases/complex.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | A 8 | 9 | Your browser does not support inline SVG. 10 | 11 | -------------------------------------------------------------------------------- /test/cases/html.js: -------------------------------------------------------------------------------- 1 | module.exports = [ 2 | // html tags 3 | { 4 | name: 'empty html', 5 | data: '', 6 | }, 7 | { 8 | name: 'html with attribute', 9 | data: '', 10 | }, 11 | { 12 | name: 'html with empty head and body', 13 | data: '', 14 | }, 15 | { 16 | name: 'html with empty head', 17 | data: '', 18 | }, 19 | { 20 | name: 'html with empty body', 21 | data: '', 22 | }, 23 | { 24 | name: 'unclosed html and head tags', 25 | data: '', 26 | }, 27 | { 28 | name: 'unclosed html and body tags', 29 | data: '', 30 | }, 31 | { 32 | name: 'unclosed html, head, and body tags', 33 | data: '', 34 | }, 35 | 36 | // head and body tags 37 | { 38 | name: 'unclosed head', 39 | data: '', 40 | }, 41 | { 42 | name: 'empty head', 43 | data: '', 44 | }, 45 | { 46 | name: 'head with title', 47 | data: 'Text', 48 | }, 49 | { 50 | name: 'empty head and body', 51 | data: '', 52 | }, 53 | { 54 | name: 'unclosed head and body', 55 | data: '', 56 | }, 57 | { 58 | name: 'unclosed title', 59 | data: '', 60 | }, 61 | { 62 | name: 'empty title', 63 | data: '<title>', 64 | }, 65 | { 66 | name: 'title with text', 67 | data: 'text', 68 | }, 69 | { 70 | name: 'title with text as tags', 71 | data: '<b>text</b>', 72 | }, 73 | { 74 | name: 'unclosed body', 75 | data: '', 76 | }, 77 | { 78 | name: 'empty body', 79 | data: '', 80 | }, 81 | { 82 | name: 'capitalized body', 83 | data: '', 84 | }, 85 | { 86 | name: 'body with paragraph', 87 | data: '

text

', 88 | }, 89 | { 90 | name: 'head and body with newline', 91 | data: 'text', 92 | }, 93 | { 94 | name: 'head and body with whitespace and newlines', 95 | data: 'hellotext', 96 | }, 97 | { 98 | name: 'body with whitespace and newline', 99 | data: 'text', 100 | }, 101 | 102 | // common tags 103 | { 104 | name: 'empty div', 105 | data: '
', 106 | }, 107 | { 108 | name: 'empty paragraph', 109 | data: '

', 110 | }, 111 | { 112 | name: 'paragraph with text', 113 | data: '

text

', 114 | }, 115 | { 116 | name: 'meta with attribute', 117 | data: '', 118 | }, 119 | { 120 | name: 'meta with closing tag', 121 | data: '', 122 | }, 123 | { 124 | name: 'textarea with value', 125 | data: '', 126 | }, 127 | { 128 | name: 'multiple spans', 129 | data: '12', 130 | }, 131 | 132 | // void (self-closing) tags 133 | { 134 | name: 'void', 135 | data: '
', 136 | }, 137 | { 138 | name: 'self-closing void', 139 | data: '
', 140 | }, 141 | { 142 | name: 'input with attributes', 143 | data: '', 144 | }, 145 | { 146 | name: 'image', 147 | data: 'Image', 148 | }, 149 | { 150 | name: 'multiple void', 151 | data: '
', 152 | }, 153 | 154 | // tag attributes 155 | { 156 | name: 'h1 with id attribute', 157 | data: '

', 158 | }, 159 | { 160 | name: 'h2 with class attribute', 161 | data: '

', 162 | }, 163 | { 164 | name: 'em with style attribute', 165 | data: '', 166 | }, 167 | { 168 | name: 'data attribute', 169 | data: '
', 170 | }, 171 | { 172 | name: 'event attribute', 173 | data: '
', 174 | }, 175 | { 176 | name: 'span with multiple attributes', 177 | data: '', 178 | }, 179 | { 180 | name: 'hr with multiple attributes', 181 | data: '
', 182 | }, 183 | 184 | // adjacent tags 185 | { 186 | name: 'sibling', 187 | data: '
  • brother
  • sister
  • ', 188 | }, 189 | 190 | // nested tags 191 | { 192 | name: 'nested definition list', 193 | data: '
    foo
    barbaz
    ', 194 | }, 195 | { 196 | name: 'nested unordered list', 197 | data: '', 198 | }, 199 | 200 | // script tag 201 | { 202 | name: 'empty script', 203 | data: '', 204 | }, 205 | { 206 | name: 'script', 207 | data: '', 208 | }, 209 | { 210 | name: 'script with json', 211 | data: '', 212 | }, 213 | 214 | // noscript tag 215 | { 216 | name: 'empty noscript', 217 | data: '', 218 | }, 219 | { 220 | name: 'noscript with text', 221 | data: '', 222 | }, 223 | { 224 | name: 'noscript with p', 225 | data: '', 226 | get skip() { 227 | // client parser renders noscript incorrectly in jsdom 228 | // template renders noscript children as text instead of nodes 229 | var isJSDOM = typeof window === 'object' && window.name === 'nodejs'; 230 | return isJSDOM; 231 | }, 232 | }, 233 | 234 | // template tag 235 | { 236 | name: 'empty template', 237 | data: '', 238 | }, 239 | { 240 | name: 'template with content', 241 | data: '', 242 | }, 243 | 244 | // style tag 245 | { 246 | name: 'empty style', 247 | data: '', 248 | }, 249 | { 250 | name: 'style', 251 | data: '', 252 | }, 253 | 254 | // html5 tags 255 | { 256 | name: 'audio', 257 | data: '