├── .eslintrc.json ├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ ├── codeql-analysis.yml │ ├── dependabot-automerge.yml │ ├── nodejs-test.yml │ └── site.yml ├── .gitignore ├── .npmignore ├── LICENSE ├── package-lock.json ├── package.json ├── readme.md ├── src ├── __fixtures__ │ ├── 01-basic.json │ ├── 02-single_tag_1.json │ ├── 03-single_tag_2.json │ ├── 04-unescaped_in_script.json │ ├── 05-tags_in_comment.json │ ├── 06-comment_in_script.json │ ├── 07-unescaped_in_style.json │ ├── 08-extra_spaces_in_tag.json │ ├── 09-unquoted_attrib.json │ ├── 10-singular_attribute.json │ ├── 11-text_outside_tags.json │ ├── 12-text_only.json │ ├── 13-comment_in_text.json │ ├── 14-comment_in_text_in_script.json │ ├── 15-non-verbose.json │ ├── 17-xml_namespace.json │ ├── 18-enforce_empty_tags.json │ ├── 19-ignore_empty_tags.json │ ├── 20-template_script_tags.json │ ├── 21-conditional_comments.json │ ├── 22-lowercase_tags.json │ ├── 23-dom-lvl1.json │ ├── 24-with-start-indices.json │ ├── 25-with-end-indices.json │ ├── 26-root-level-text.json │ └── 27-xml-no-special-tags.json ├── index.spec.ts ├── index.ts ├── node.spec.ts └── node.ts ├── tsconfig.eslint.json └── tsconfig.json /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["eslint:recommended", "prettier"], 3 | "env": { 4 | "node": true, 5 | "es6": true 6 | }, 7 | "rules": { 8 | "eqeqeq": [2, "smart"], 9 | "no-caller": 2, 10 | "dot-notation": 2, 11 | "no-var": 2, 12 | "prefer-const": 2, 13 | "prefer-arrow-callback": [2, { "allowNamedFunctions": true }], 14 | "arrow-body-style": [2, "as-needed"], 15 | "object-shorthand": 2, 16 | "prefer-template": 2, 17 | "one-var": [2, "never"], 18 | "prefer-destructuring": [2, { "object": true }], 19 | "capitalized-comments": 2, 20 | "multiline-comment-style": [2, "starred-block"], 21 | "spaced-comment": 2, 22 | "yoda": [2, "never"], 23 | "curly": [2, "multi-line"], 24 | "no-else-return": 2 25 | }, 26 | "overrides": [ 27 | { 28 | "files": "*.ts", 29 | "extends": [ 30 | "plugin:@typescript-eslint/eslint-recommended", 31 | "plugin:@typescript-eslint/recommended", 32 | "prettier" 33 | ], 34 | "parserOptions": { 35 | "sourceType": "module", 36 | "project": "./tsconfig.eslint.json" 37 | }, 38 | "rules": { 39 | "@typescript-eslint/prefer-for-of": 0, 40 | "@typescript-eslint/member-ordering": 0, 41 | "@typescript-eslint/explicit-function-return-type": 0, 42 | "@typescript-eslint/no-unused-vars": 0, 43 | "@typescript-eslint/no-non-null-assertion": 0, 44 | "@typescript-eslint/no-use-before-define": [ 45 | 2, 46 | { "functions": false } 47 | ], 48 | "@typescript-eslint/consistent-type-definitions": [ 49 | 2, 50 | "interface" 51 | ], 52 | "@typescript-eslint/prefer-function-type": 2, 53 | "@typescript-eslint/no-unnecessary-type-arguments": 2, 54 | "@typescript-eslint/prefer-string-starts-ends-with": 2, 55 | "@typescript-eslint/prefer-readonly": 2, 56 | "@typescript-eslint/prefer-includes": 2, 57 | "@typescript-eslint/no-unnecessary-condition": 2, 58 | "@typescript-eslint/switch-exhaustiveness-check": 2, 59 | "@typescript-eslint/prefer-nullish-coalescing": 2 60 | } 61 | } 62 | ] 63 | } 64 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [fb55] 2 | tidelift: npm/domhandler 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: npm 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | versioning-strategy: increase 9 | - package-ecosystem: "github-actions" 10 | directory: "/" 11 | schedule: 12 | interval: daily 13 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | # The branches below must be a subset of the branches above 8 | branches: [master] 9 | schedule: 10 | - cron: "0 0 * * 0" 11 | 12 | jobs: 13 | analyze: 14 | name: Analyze 15 | runs-on: ubuntu-latest 16 | permissions: 17 | actions: read 18 | contents: read 19 | security-events: write 20 | 21 | steps: 22 | - name: Checkout repository 23 | uses: actions/checkout@v4 24 | 25 | - name: Initialize CodeQL 26 | uses: github/codeql-action/init@v3 27 | with: 28 | languages: "javascript" 29 | 30 | - name: Perform CodeQL Analysis 31 | uses: github/codeql-action/analyze@v3 32 | -------------------------------------------------------------------------------- /.github/workflows/dependabot-automerge.yml: -------------------------------------------------------------------------------- 1 | # Based on https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/automating-dependabot-with-github-actions#enable-auto-merge-on-a-pull-request 2 | name: Dependabot auto-merge 3 | on: pull_request_target 4 | 5 | permissions: 6 | pull-requests: write 7 | contents: write 8 | 9 | jobs: 10 | dependabot: 11 | runs-on: ubuntu-latest 12 | if: ${{ github.actor == 'dependabot[bot]' }} 13 | steps: 14 | - name: Dependabot metadata 15 | id: metadata 16 | uses: dependabot/fetch-metadata@v2.4.0 17 | with: 18 | github-token: "${{ secrets.GITHUB_TOKEN }}" 19 | - name: Enable auto-merge for Dependabot PRs 20 | # Automatically merge semver-patch and semver-minor PRs 21 | if: "${{ steps.metadata.outputs.update-type == 22 | 'version-update:semver-minor' || 23 | steps.metadata.outputs.update-type == 24 | 'version-update:semver-patch' }}" 25 | run: gh pr merge --auto --squash "$PR_URL" 26 | env: 27 | PR_URL: ${{github.event.pull_request.html_url}} 28 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 29 | -------------------------------------------------------------------------------- /.github/workflows/nodejs-test.yml: -------------------------------------------------------------------------------- 1 | name: Node.js CI 2 | 3 | on: 4 | push: 5 | branches-ignore: 6 | - "dependabot/**" 7 | pull_request: 8 | 9 | env: 10 | CI: true 11 | FORCE_COLOR: 2 12 | NODE_COV: "lts/*" # The Node.js version to run coveralls on 13 | 14 | permissions: 15 | contents: read 16 | 17 | jobs: 18 | lint: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: actions/checkout@v4 22 | - uses: actions/setup-node@v4 23 | with: 24 | node-version: lts/* 25 | cache: npm 26 | - run: npm ci 27 | - run: npm run lint 28 | 29 | test: 30 | permissions: 31 | checks: write # for coverallsapp/github-action to create new checks 32 | contents: read # for actions/checkout to fetch code 33 | name: Node ${{ matrix.node }} 34 | runs-on: ubuntu-latest 35 | 36 | strategy: 37 | fail-fast: false 38 | matrix: 39 | node: 40 | - 16 41 | - 18 42 | - 20 43 | - "lts/*" 44 | 45 | steps: 46 | - uses: actions/checkout@v4 47 | - name: Use Node.js ${{ matrix.node }} 48 | uses: actions/setup-node@v4 49 | with: 50 | node-version: ${{ matrix.node }} 51 | cache: npm 52 | - run: npm ci 53 | - run: npm run build --if-present 54 | 55 | - name: Run Jest 56 | run: npm run test:jest 57 | if: matrix.node != env.NODE_COV 58 | 59 | - name: Run Jest with coverage 60 | run: npm run test:jest -- --coverage 61 | if: matrix.node == env.NODE_COV 62 | 63 | - name: Run Coveralls 64 | uses: coverallsapp/github-action@v2.3.6 65 | if: matrix.node == env.NODE_COV 66 | continue-on-error: true 67 | with: 68 | github-token: "${{ secrets.GITHUB_TOKEN }}" 69 | -------------------------------------------------------------------------------- /.github/workflows/site.yml: -------------------------------------------------------------------------------- 1 | name: Deploy TypeDoc docs to GitHub Pages 2 | 3 | # Based on https://raw.githubusercontent.com/actions/starter-workflows 4 | 5 | on: 6 | # Runs on pushes targeting the master branch 7 | push: 8 | branches: [master] 9 | 10 | # Allows you to run this workflow manually from the Actions tab 11 | workflow_dispatch: 12 | 13 | env: 14 | CI: true 15 | FORCE_COLOR: 2 16 | 17 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 18 | permissions: 19 | contents: read 20 | pages: write 21 | id-token: write 22 | 23 | # Allow one concurrent deployment 24 | concurrency: 25 | group: "pages" 26 | cancel-in-progress: true 27 | 28 | jobs: 29 | # Build job 30 | build: 31 | runs-on: ubuntu-latest 32 | steps: 33 | - name: Checkout 34 | uses: actions/checkout@v4 35 | - name: Setup Node 36 | uses: actions/setup-node@v4 37 | with: 38 | node-version: lts/* 39 | cache: npm 40 | - name: Setup Pages 41 | id: pages 42 | uses: actions/configure-pages@v5 43 | - name: Install dependencies 44 | run: npm ci 45 | - name: Build docs 46 | run: npm run build:docs 47 | - name: Upload artifact 48 | uses: actions/upload-pages-artifact@v3 49 | with: 50 | path: ./docs 51 | 52 | # Deployment job 53 | deploy: 54 | environment: 55 | name: github-pages 56 | url: ${{ steps.deployment.outputs.page_url }} 57 | runs-on: ubuntu-latest 58 | needs: build 59 | steps: 60 | - name: Deploy to GitHub Pages 61 | id: deployment 62 | uses: actions/deploy-pages@v4 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | coverage/ 3 | lib/ 4 | docs/ 5 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | test 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Felix Böhm 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS, 11 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "domhandler", 3 | "version": "5.0.3", 4 | "description": "Handler for htmlparser2 that turns pages into a dom", 5 | "author": "Felix Boehm ", 6 | "funding": { 7 | "url": "https://github.com/fb55/domhandler?sponsor=1" 8 | }, 9 | "license": "BSD-2-Clause", 10 | "main": "lib/index.js", 11 | "types": "lib/index.d.ts", 12 | "module": "lib/esm/index.js", 13 | "exports": { 14 | "require": "./lib/index.js", 15 | "import": "./lib/esm/index.js" 16 | }, 17 | "sideEffects": false, 18 | "files": [ 19 | "lib" 20 | ], 21 | "scripts": { 22 | "test": "npm run test:jest && npm run lint", 23 | "test:jest": "jest", 24 | "lint": "npm run lint:es && npm run lint:prettier", 25 | "lint:es": "eslint --ignore-path .gitignore .", 26 | "lint:prettier": "npm run prettier -- --check", 27 | "format": "npm run format:es && npm run format:prettier", 28 | "format:es": "npm run lint:es -- --fix", 29 | "format:prettier": "npm run prettier -- --write", 30 | "prettier": "prettier \"**/*.{ts,md,json,yml}\" --ignore-path .gitignore", 31 | "build": "npm run build:cjs && npm run build:esm", 32 | "build:cjs": "tsc", 33 | "build:esm": "tsc --module esnext --target es2019 --outDir lib/esm && echo '{\"type\":\"module\"}' > lib/esm/package.json", 34 | "build:docs": "typedoc --hideGenerator --plugin typedoc-plugin-missing-exports src/index.ts", 35 | "prepare": "npm run build" 36 | }, 37 | "repository": { 38 | "type": "git", 39 | "url": "git://github.com/fb55/domhandler.git" 40 | }, 41 | "keywords": [ 42 | "dom", 43 | "htmlparser2" 44 | ], 45 | "engines": { 46 | "node": ">= 4" 47 | }, 48 | "dependencies": { 49 | "domelementtype": "^2.3.0" 50 | }, 51 | "devDependencies": { 52 | "@types/jest": "^29.5.14", 53 | "@types/node": "^22.15.27", 54 | "@typescript-eslint/eslint-plugin": "^8.33.0", 55 | "@typescript-eslint/parser": "^8.32.1", 56 | "eslint-config-prettier": "^10.1.5", 57 | "eslint": "^8.57.1", 58 | "htmlparser2": "^10.0.0", 59 | "jest": "^29.7.0", 60 | "prettier": "^3.5.3", 61 | "ts-jest": "^29.3.4", 62 | "typedoc-plugin-missing-exports": "^3.1.0", 63 | "typedoc": "^0.27.9", 64 | "typescript": "^5.8.3" 65 | }, 66 | "jest": { 67 | "preset": "ts-jest", 68 | "testEnvironment": "node", 69 | "moduleNameMapper": { 70 | "^(.*)\\.js$": [ 71 | "$1", 72 | "$1.js" 73 | ] 74 | } 75 | }, 76 | "prettier": { 77 | "tabWidth": 4 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # domhandler [![Node.js CI](https://github.com/fb55/domhandler/actions/workflows/nodejs-test.yml/badge.svg)](https://github.com/fb55/domhandler/actions/workflows/nodejs-test.yml) 2 | 3 | The DOM handler creates a tree containing all nodes of a page. 4 | The tree can be manipulated using the [domutils](https://github.com/fb55/domutils) 5 | or [cheerio](https://github.com/cheeriojs/cheerio) libraries and 6 | rendered using [dom-serializer](https://github.com/cheeriojs/dom-serializer) . 7 | 8 | ## Usage 9 | 10 | ```javascript 11 | const handler = new DomHandler([ callback(err, dom), ] [ options ]); 12 | // const parser = new Parser(handler[, options]); 13 | ``` 14 | 15 | Available options are described below. 16 | 17 | ## Example 18 | 19 | ```javascript 20 | const { Parser } = require("htmlparser2"); 21 | const { DomHandler } = require("domhandler"); 22 | const rawHtml = 23 | "Xyz "; 24 | const handler = new DomHandler((error, dom) => { 25 | if (error) { 26 | // Handle error 27 | } else { 28 | // Parsing completed, do something 29 | console.log(dom); 30 | } 31 | }); 32 | const parser = new Parser(handler); 33 | parser.write(rawHtml); 34 | parser.end(); 35 | ``` 36 | 37 | Output: 38 | 39 | ```javascript 40 | [ 41 | { 42 | data: "Xyz ", 43 | type: "text", 44 | }, 45 | { 46 | type: "script", 47 | name: "script", 48 | attribs: { 49 | language: "javascript", 50 | }, 51 | children: [ 52 | { 53 | data: "var foo = '';<", 54 | type: "text", 55 | }, 56 | ], 57 | }, 58 | { 59 | data: "", 5 | "expected": [ 6 | { 7 | "type": "tag", 8 | "name": "head", 9 | "attribs": {}, 10 | "children": [ 11 | { 12 | "data": " commented out tags Test", 13 | "type": "comment" 14 | } 15 | ] 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /src/__fixtures__/06-comment_in_script.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Script source in comment", 3 | "options": {}, 4 | "html": "", 5 | "expected": [ 6 | { 7 | "type": "script", 8 | "name": "script", 9 | "attribs": {}, 10 | "children": [ 11 | { 12 | "data": "", 13 | "type": "text" 14 | } 15 | ] 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /src/__fixtures__/07-unescaped_in_style.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Unescaped chars in style", 3 | "options": {}, 4 | "html": "", 5 | "expected": [ 6 | { 7 | "type": "style", 8 | "name": "style", 9 | "attribs": { 10 | "type": "text/css" 11 | }, 12 | "children": [ 13 | { 14 | "data": "\n body > p\n\t{ font-weight: bold; }", 15 | "type": "text" 16 | } 17 | ] 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /src/__fixtures__/08-extra_spaces_in_tag.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Extra spaces in tag", 3 | "options": {}, 4 | "html": "the text", 5 | "expected": [ 6 | { 7 | "type": "tag", 8 | "name": "font", 9 | "attribs": { 10 | "size": "14" 11 | }, 12 | "children": [ 13 | { 14 | "data": "the text", 15 | "type": "text" 16 | } 17 | ] 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /src/__fixtures__/09-unquoted_attrib.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Unquoted attributes", 3 | "options": {}, 4 | "html": "the text", 5 | "expected": [ 6 | { 7 | "type": "tag", 8 | "name": "font", 9 | "attribs": { 10 | "size": "14" 11 | }, 12 | "children": [ 13 | { 14 | "data": "the text", 15 | "type": "text" 16 | } 17 | ] 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /src/__fixtures__/10-singular_attribute.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Singular attribute", 3 | "options": {}, 4 | "html": "