├── .editorconfig ├── .eslintrc ├── .github └── workflows │ ├── bump-build-release.yml │ ├── coverage-badge.yml │ └── default.yml ├── .gitignore ├── .nvmrc ├── .prettierignore ├── .prettierrc ├── LICENSE ├── README.md ├── badges ├── coverage-branches.svg ├── coverage-functions.svg ├── coverage-jest coverage.svg ├── coverage-lines.svg └── coverage-statements.svg ├── jest.config.js ├── package-lock.json ├── package.json ├── src ├── index.ts └── lib │ ├── Dom.ts │ ├── Node.ts │ └── NodeAttribute.ts ├── test ├── firstChild.test.ts ├── getAttribute.test.ts ├── getElementById.test.ts ├── getElementsByAttribute.test.ts ├── getElementsByClassName.test.ts ├── getElementsByName.test.ts ├── getElementsByTagName.test.ts ├── innerHTML.test.ts ├── invalidHTML.test.ts ├── jest-extend.ts ├── lastChild.test.ts ├── outerHTML.test.ts ├── parentNode.test.ts ├── textContent.test.ts └── types.d.ts └── tsconfig.json /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | indent_style = space 6 | indent_size = 2 7 | end_of_line = lf 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "airbnb-base", 4 | "airbnb-typescript/base", 5 | "prettier", 6 | "plugin:sonarjs/recommended" 7 | ], 8 | "parserOptions": { 9 | "project": "./tsconfig.json" 10 | }, 11 | "rules": { 12 | "import/prefer-default-export": 0, 13 | "@typescript-eslint/no-use-before-define": 0, 14 | "complexity": [ 15 | "error", 16 | 6 17 | ], 18 | "sonarjs/cognitive-complexity": [ 19 | "error", 20 | 7 21 | ], 22 | "@typescript-eslint/naming-convention": [ 23 | "error", 24 | { 25 | "selector": "variable", 26 | "format": ["camelCase"], 27 | "leadingUnderscore": "forbid", 28 | "trailingUnderscore": "forbid" 29 | }, 30 | { 31 | "selector": "function", 32 | "format": ["camelCase", "PascalCase"], 33 | "leadingUnderscore": "forbid", 34 | "trailingUnderscore": "forbid" 35 | } 36 | ], 37 | "no-plusplus": ["error", { "allowForLoopAfterthoughts": true }], 38 | "no-restricted-syntax": [ 39 | 0, 40 | { 41 | "selector": "ForOfStatement" 42 | } 43 | ] 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /.github/workflows/bump-build-release.yml: -------------------------------------------------------------------------------- 1 | name: Build and release 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | bump-version: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout source code 14 | uses: actions/checkout@v3 15 | 16 | - name: Automated Version Bump 17 | uses: phips28/gh-action-bump-version@master 18 | with: 19 | tag-prefix: 'v' 20 | 21 | - name: Read nvmrc 22 | id: read-nvmrc 23 | run: echo "version=$(cat .nvmrc)" >> $GITHUB_OUTPUT 24 | shell: bash 25 | 26 | - name: Setup Node.js, Build, Publish 27 | uses: actions/setup-node@v3 28 | with: 29 | node-version: ${{ steps.read-nvmrc.outputs.version }} 30 | - run: echo "//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}" >> ~/.npmrc 31 | - run: npm ci 32 | - run: npm run build 33 | - run: npm publish 34 | env: 35 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 36 | -------------------------------------------------------------------------------- /.github/workflows/coverage-badge.yml: -------------------------------------------------------------------------------- 1 | name: Generate coverage badge 2 | 3 | on: 4 | workflow_run: 5 | workflows: [Build and release] 6 | types: 7 | - completed 8 | 9 | jobs: 10 | coverage-badge: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Checkout source code 15 | uses: actions/checkout@v3 16 | 17 | - name: Read nvmrc 18 | id: read-nvmrc 19 | run: echo "version=$(cat .nvmrc)" >> $GITHUB_OUTPUT 20 | shell: bash 21 | 22 | - name: Setup Node.js, Build, Publish 23 | uses: actions/setup-node@v3 24 | with: 25 | node-version: ${{ steps.read-nvmrc.outputs.version }} 26 | - run: npm ci 27 | - run: npm test 28 | 29 | - name: Generating coverage badges 30 | uses: jpb06/jest-badges-action@latest 31 | with: 32 | branches: main 33 | 34 | env: 35 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 36 | -------------------------------------------------------------------------------- /.github/workflows/default.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: pull_request 3 | jobs: 4 | static-checks: 5 | runs-on: ubuntu-latest 6 | name: Static checks 7 | 8 | steps: 9 | - name: Load current commit 10 | uses: actions/checkout@v3 11 | with: 12 | token: ${{ secrets.GITHUB_TOKEN }} 13 | 14 | - name: Read nvmrc 15 | id: read-nvmrc 16 | run: echo "version=$(cat .nvmrc)" >> $GITHUB_OUTPUT 17 | shell: bash 18 | 19 | - name: Setup Node.js 20 | uses: actions/setup-node@v3 21 | with: 22 | node-version: ${{ steps.read-nvmrc.outputs.version }} 23 | 24 | - name: Install dependencies 25 | run: npm ci 26 | 27 | - name: eslint 28 | run: npm run lint 29 | 30 | - name: prettier 31 | run: npm run prettier 32 | 33 | tests: 34 | runs-on: ubuntu-latest 35 | name: Tests 36 | 37 | steps: 38 | - name: Load current commit 39 | uses: actions/checkout@v3 40 | with: 41 | token: ${{ secrets.GITHUB_TOKEN }} 42 | 43 | - name: Read nvmrc 44 | id: read-nvmrc 45 | run: echo "version=$(cat .nvmrc)" >> $GITHUB_OUTPUT 46 | shell: bash 47 | 48 | - name: Setup Node.js 49 | uses: actions/setup-node@v3 50 | with: 51 | node-version: ${{ steps.read-nvmrc.outputs.version }} 52 | 53 | - name: Install dependencies 54 | run: npm ci 55 | 56 | - name: Run tests and check coverage 57 | run: npm test 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | node_modules 3 | dist 4 | coverage 5 | -------------------------------------------------------------------------------- /.nvmrc: -------------------------------------------------------------------------------- 1 | v18 2 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | .eslintrc 2 | coverage/* 3 | README.md 4 | tsconfig.json 5 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "trailingComma": "all", 3 | "singleQuote": true, 4 | "printWidth": 100 5 | } 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015(s), Konstantin Ershov 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dom-parser 2 | 3 | Fast, tiny, zero-dependency DOM parser based on RegExps 4 | 5 | ![GitHub Workflow Status (with event)](https://img.shields.io/github/actions/workflow/status/ershov-konst/dom-parser/bump-build-release.yml) 6 | ![Jest coverage](./badges/coverage-jest%20coverage.svg) 7 | ![npm](https://img.shields.io/npm/dw/dom-parser) 8 | ![GitHub](https://img.shields.io/github/license/ershov-konst/dom-parser) 9 | 10 | 11 | ## Installation 12 | 13 | npm install dom-parser 14 | 15 | ## Usage 16 | ```typescript 17 | import { parseFromString } from 'dom-parser'; 18 | 19 | const html = await fs.readFileAsync('htmlToParse.html'); 20 | 21 | // Getting DOM model 22 | const dom = parseFromString(html); 23 | 24 | // Searching Nodes 25 | const rootNode = dom.getElementById('rootNode'); 26 | const childNodes = rootNode.getElementsByClassName('childNodeClass'); 27 | 28 | ``` 29 | 30 | ## API 31 | 32 | ### Dom 33 | 34 | #### Implemented methods 35 | 36 | * getElementById 37 | * getElementsByClassName 38 | * getElementsByTagName 39 | * getElementsByName 40 | 41 | ### Node 42 | 43 | #### Implemented properties 44 | 45 | * nodeType 46 | * nodeName 47 | * childNodes 48 | * firstChild 49 | * lastChild 50 | * parentNode 51 | * attributes 52 | * innerHTML 53 | * outerHTML 54 | * textContent 55 | 56 | #### Implemented methods 57 | 58 | * getAttribute 59 | * getElementById 60 | * getElementsByClassName 61 | * getElementsByTagName 62 | * getElementsByName 63 | 64 | Usage - https://developer.mozilla.org/en-US/docs/Web/API/HTMLElement 65 | 66 | 67 | ## Contributing 68 | 69 | Issues and pull requests are welcome! 70 | -------------------------------------------------------------------------------- /badges/coverage-branches.svg: -------------------------------------------------------------------------------- 1 | branches: 95.87%branches95.87% -------------------------------------------------------------------------------- /badges/coverage-functions.svg: -------------------------------------------------------------------------------- 1 | functions: 100%functions100% -------------------------------------------------------------------------------- /badges/coverage-jest coverage.svg: -------------------------------------------------------------------------------- 1 | jest coverage: 98.97%jest coverage98.97% -------------------------------------------------------------------------------- /badges/coverage-lines.svg: -------------------------------------------------------------------------------- 1 | lines: 100%lines100% -------------------------------------------------------------------------------- /badges/coverage-statements.svg: -------------------------------------------------------------------------------- 1 | statements: 100%statements100% -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('ts-jest').JestConfigWithTsJest} */ 2 | module.exports = { 3 | preset: 'ts-jest', 4 | testEnvironment: 'node', 5 | setupFilesAfterEnv: ['/test/jest-extend.ts'], 6 | testPathIgnorePatterns: ['/node_modules/', '/dist'], 7 | coverageReporters: ['clover', 'json', 'lcov', 'text', 'json-summary'], 8 | coverageThreshold: { 9 | global: { 10 | statements: 100, 11 | branches: 95, 12 | functions: 100, 13 | lines: 100, 14 | }, 15 | }, 16 | }; 17 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dom-parser", 3 | "version": "1.1.5", 4 | "description": "Fast dom parser based on regexps", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/ershov-konst/dom-parser.git" 8 | }, 9 | "keywords": [ 10 | "domparser", 11 | "dom", 12 | "parser", 13 | "xml", 14 | "html", 15 | "xmlparser", 16 | "htmlparser", 17 | "scraping" 18 | ], 19 | "main": "dist/index.js", 20 | "devDependencies": { 21 | "@types/jest": "^29.5.6", 22 | "dom-compare": "https://github.com/ershov-konst/dom-compare.git", 23 | "eslint": "^8.52.0", 24 | "eslint-config-airbnb-base": "^15.0.0", 25 | "eslint-config-airbnb-typescript": "^17.1.0", 26 | "eslint-config-prettier": "^9.0.0", 27 | "eslint-plugin-sonarjs": "^0.21.0", 28 | "jest": "^29.7.0", 29 | "json-summary": "^1.3.0", 30 | "prettier": "^3.0.3", 31 | "ts-jest": "^29.1.1", 32 | "typescript": "^5.2.2" 33 | }, 34 | "scripts": { 35 | "lint": "eslint src", 36 | "prettier": "npx prettier --check .", 37 | "build": "tsc", 38 | "test": "jest --coverage" 39 | }, 40 | "author": "Konstantin Ershov", 41 | "license": "ISC", 42 | "files": [ 43 | "dist" 44 | ] 45 | } 46 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import { Dom } from './lib/Dom'; 2 | 3 | export function parseFromString(html: string) { 4 | return new Dom(html); 5 | } 6 | 7 | export * from './lib/Dom'; 8 | export * from './lib/Node'; 9 | -------------------------------------------------------------------------------- /src/lib/Dom.ts: -------------------------------------------------------------------------------- 1 | import { Node, NodeType } from './Node'; 2 | import { NodeAttribute } from './NodeAttribute'; 3 | 4 | const tagRegExp = 5 | /(<\/?(?:[a-z][a-z0-9]*:)?[a-z][a-z0-9-_.]*?[a-z0-9]*\s*(?:\s+[a-z0-9-_:]+(?:=(?:(?:'[\s\S]*?')|(?:"[\s\S]*?")))?)*\s*\/?>)|([^<]|<(?![a-z/]))*/gi; 6 | const attrRegExp = /\s[a-z0-9-_:]+\b(\s*=\s*('|")[\s\S]*?\2)?/gi; 7 | const splitAttrRegExp = /(\s[a-z0-9-_:]+\b\s*)(?:=(\s*('|")[\s\S]*?\3))?/gi; 8 | const startTagExp = /^<[a-z]/; 9 | const selfCloseTagExp = /\/>$/; 10 | const closeTagExp = /^<\//; 11 | const textNodeExp = /^[^<]/; 12 | const nodeNameExp = /<\/?((?:([a-z][a-z0-9]*):)?(?:[a-z](?:[a-z0-9-_.]*[a-z0-9])?))/i; 13 | const attributeQuotesExp = /^('|")|('|")$/g; 14 | const noClosingTagsExp = /^(?:area|base|br|col|command|embed|hr|img|input|link|meta|param|source)/i; 15 | 16 | export class Dom { 17 | rawHTML: string; 18 | 19 | constructor(rawHTML: string) { 20 | this.rawHTML = rawHTML; 21 | } 22 | 23 | private find(conditionFn: (node: Node) => boolean, findFirst: true): Node | null; 24 | private find(conditionFn: (node: Node) => boolean): Node[]; 25 | private find(conditionFn: (node: Node) => boolean, findFirst?: boolean) { 26 | const result = find(this.rawHTML, conditionFn, findFirst); 27 | return findFirst ? result[0] || null : result; 28 | } 29 | 30 | getElementsByClassName(className: string) { 31 | const expr = new RegExp(`^(.*?\\s)?${className}(\\s.*?)?$`); 32 | return this.find((node) => 33 | Boolean(node.attributes.length && expr.test(node.getAttribute('class') || '')), 34 | ); 35 | } 36 | 37 | getElementsByTagName(tagName: string) { 38 | return this.find((node) => node.nodeName.toUpperCase() === tagName.toUpperCase()); 39 | } 40 | 41 | getElementById(id: string): Node | null { 42 | return this.find((node) => node.getAttribute('id') === id, true); 43 | } 44 | 45 | getElementsByName(name: string) { 46 | return this.find((node) => node.getAttribute('name') === name); 47 | } 48 | 49 | getElementsByAttribute(attributeName: string, attributeValue: string) { 50 | return this.find((node) => node.getAttribute(attributeName) === attributeValue); 51 | } 52 | } 53 | 54 | // private 55 | 56 | function find(html: string, conditionFn: (node: Node) => boolean, onlyFirst: boolean = false) { 57 | const generator = domGenerator(html); 58 | const result: Node[] = []; 59 | 60 | for (const node of generator) { 61 | if (node && conditionFn(node)) { 62 | result.push(node); 63 | if (onlyFirst) { 64 | return result; 65 | } 66 | } 67 | } 68 | return result; 69 | } 70 | 71 | function* domGenerator(html: string) { 72 | const tags = getAllTags(html); 73 | let cursor: Node | null = null; 74 | 75 | for (let i = 0, l = tags.length; i < l; i++) { 76 | const tag = tags[i]; 77 | const node = createNode(tag, cursor); 78 | 79 | cursor = node || cursor; 80 | 81 | if (isElementComposed(cursor, tag)) { 82 | yield cursor; 83 | cursor = cursor.parentNode; 84 | } 85 | } 86 | 87 | while (cursor) { 88 | yield cursor; 89 | cursor = cursor.parentNode; 90 | } 91 | } 92 | 93 | function isElementComposed(element: Node | null, tag: string) { 94 | if (!tag) { 95 | return false; 96 | } 97 | const isCloseTag = closeTagExp.test(tag); 98 | const [, nodeName] = tag.match(nodeNameExp) || []; 99 | const isElementClosedByTag = isCloseTag && element.nodeName === nodeName; 100 | 101 | return isElementClosedByTag || element.isSelfCloseTag || element.nodeType === NodeType.text; 102 | } 103 | 104 | function getAllTags(html: string) { 105 | return html.match(tagRegExp) || []; 106 | } 107 | 108 | function createNode(tag: string, parentNode: Node | null): Node | null { 109 | const isTextNode = textNodeExp.test(tag); 110 | const isStartTag = startTagExp.test(tag); 111 | 112 | if (isTextNode) { 113 | return createTextNode(tag, parentNode); 114 | } 115 | 116 | if (isStartTag) { 117 | return createElementNode(tag, parentNode); 118 | } 119 | 120 | return null; 121 | } 122 | 123 | function createElementNode(tag: string, parentNode: Node | null) { 124 | const [, nodeName, namespace] = tag.match(nodeNameExp) || []; 125 | const selfCloseTag = selfCloseTagExp.test(tag) || noClosingTagsExp.test(nodeName); 126 | const attributes = parseAttributes(tag); 127 | 128 | const elementNode = new Node({ 129 | nodeType: NodeType.element, 130 | nodeName, 131 | namespace, 132 | attributes, 133 | childNodes: [], 134 | parentNode, 135 | selfCloseTag, 136 | }); 137 | 138 | parentNode?.childNodes?.push(elementNode); 139 | return elementNode; 140 | } 141 | 142 | function parseAttributes(tag: string) { 143 | return (tag.match(attrRegExp) || []).map((attributeString) => { 144 | splitAttrRegExp.lastIndex = 0; 145 | const exec = splitAttrRegExp.exec(attributeString) || []; 146 | const [, name = '', value = '']: string[] = exec; 147 | return new NodeAttribute({ 148 | name: name.trim(), 149 | value: value.trim().replace(attributeQuotesExp, ''), 150 | }); 151 | }); 152 | } 153 | 154 | function createTextNode(text: string, parentNode: Node | null) { 155 | const textNode = new Node({ 156 | nodeType: NodeType.text, 157 | nodeName: '#text', 158 | text, 159 | parentNode, 160 | }); 161 | parentNode?.childNodes?.push(textNode); 162 | return textNode; 163 | } 164 | -------------------------------------------------------------------------------- /src/lib/Node.ts: -------------------------------------------------------------------------------- 1 | // https://developer.mozilla.org/en-US/docs/Web/API/Node 2 | 3 | import { NodeAttribute } from './NodeAttribute'; 4 | 5 | export enum NodeType { 6 | element = 1, 7 | text = 3, 8 | } 9 | 10 | interface NodeProps { 11 | nodeType: NodeType; 12 | namespace?: string; 13 | selfCloseTag?: boolean; 14 | text?: string; 15 | nodeName: string; 16 | childNodes?: Node[]; 17 | parentNode: Node | null; 18 | attributes?: NodeAttribute[]; 19 | } 20 | 21 | export class Node { 22 | namespace: string | null; 23 | 24 | nodeType: NodeType; 25 | 26 | text: string | null; 27 | 28 | nodeName: string; 29 | 30 | childNodes: Node[]; 31 | 32 | parentNode: Node | null; 33 | 34 | attributes: NodeAttribute[]; 35 | 36 | readonly isSelfCloseTag: boolean; 37 | 38 | constructor({ 39 | nodeType, 40 | namespace, 41 | selfCloseTag, 42 | text, 43 | nodeName, 44 | childNodes, 45 | parentNode, 46 | attributes, 47 | }: NodeProps) { 48 | this.namespace = namespace || null; 49 | this.nodeType = nodeType; 50 | this.isSelfCloseTag = Boolean(selfCloseTag); 51 | this.text = text || null; 52 | this.nodeName = nodeType === NodeType.element ? nodeName : '#text'; 53 | this.childNodes = childNodes || []; 54 | this.parentNode = parentNode; 55 | this.attributes = attributes || []; 56 | } 57 | 58 | get firstChild() { 59 | return this.childNodes[0] || null; 60 | } 61 | 62 | get lastChild() { 63 | return this.childNodes[this.childNodes.length - 1] || null; 64 | } 65 | 66 | get innerHTML() { 67 | return this.childNodes.reduce( 68 | (html, node) => html + (node.nodeType === NodeType.text ? node.text : node.outerHTML), 69 | '', 70 | ); 71 | } 72 | 73 | get outerHTML() { 74 | if (this.nodeType === NodeType.text) { 75 | return this.textContent; 76 | } 77 | 78 | const attributesString = stringifyAttributes(this.attributes); 79 | const openTag = `<${this.nodeName}${attributesString.length ? ' ' : ''}${attributesString}${ 80 | this.isSelfCloseTag ? '/' : '' 81 | }>`; 82 | 83 | if (this.isSelfCloseTag) { 84 | return openTag; 85 | } 86 | 87 | const childs: string = this.childNodes.map((child) => child.outerHTML).join(''); 88 | const closeTag = ``; 89 | 90 | return [openTag, childs, closeTag].join(''); 91 | } 92 | 93 | get textContent(): string { 94 | if (this.nodeType === NodeType.text) { 95 | return this.text; 96 | } 97 | return this.childNodes 98 | .map((node) => node.textContent) 99 | .join('') 100 | .replace(/\x20+/g, ' '); 101 | } 102 | 103 | getAttribute(name: string) { 104 | const attribute = this.attributes.find((a) => a.name === name); 105 | return attribute ? attribute.value : null; 106 | } 107 | 108 | getElementsByTagName(tagName: string) { 109 | return searchElements(this, (elem) => elem.nodeName.toUpperCase() === tagName.toUpperCase()); 110 | } 111 | 112 | getElementsByClassName(className: string) { 113 | const expr = new RegExp(`^(.*?\\s)?${className}(\\s.*?)?$`); 114 | return searchElements(this, (node) => 115 | Boolean(node.attributes.length && expr.test(node.getAttribute('class') || '')), 116 | ); 117 | } 118 | 119 | getElementsByName(name: string) { 120 | return searchElements(this, (node) => 121 | Boolean(node.attributes.length && node.getAttribute('name') === name), 122 | ); 123 | } 124 | 125 | getElementById(id: string) { 126 | return searchElement(this, (node) => 127 | Boolean(node.attributes.length && node.getAttribute('id') === id), 128 | ); 129 | } 130 | 131 | static ELEMENT_NODE = NodeType.element; 132 | 133 | static TEXT_NODE = NodeType.text; 134 | } 135 | 136 | // private 137 | function searchElements(root: Node, conditionFn: (node: Node) => boolean): Node[] { 138 | if (root.nodeType === NodeType.text) { 139 | return []; 140 | } 141 | 142 | return root.childNodes.reduce((accumulator, childNode) => { 143 | if (childNode.nodeType !== NodeType.text && conditionFn(childNode)) { 144 | return [...accumulator, childNode, ...searchElements(childNode, conditionFn)]; 145 | } 146 | return [...accumulator, ...searchElements(childNode, conditionFn)]; 147 | }, []); 148 | } 149 | 150 | function searchElement(root: Node, conditionFn: (node: Node) => boolean): Node | null { 151 | for (let i = 0, l = root.childNodes.length; i < l; i++) { 152 | const childNode = root.childNodes[i]; 153 | if (conditionFn(childNode)) { 154 | return childNode; 155 | } 156 | 157 | const node = searchElement(childNode, conditionFn); 158 | if (node) { 159 | return node; 160 | } 161 | } 162 | 163 | return null; 164 | } 165 | 166 | function stringifyAttributes(attributes: NodeAttribute[]) { 167 | return attributes.map((elem) => elem.name + (elem.value ? `="${elem.value}"` : '')).join(' '); 168 | } 169 | -------------------------------------------------------------------------------- /src/lib/NodeAttribute.ts: -------------------------------------------------------------------------------- 1 | export interface NodeAttributeProps { 2 | name: string; 3 | value: string; 4 | } 5 | 6 | export class NodeAttribute { 7 | name: string; 8 | 9 | value: string; 10 | 11 | constructor({ name, value }: NodeAttributeProps) { 12 | this.name = name; 13 | this.value = value; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /test/firstChild.test.ts: -------------------------------------------------------------------------------- 1 | import { NodeType, parseFromString } from '../src'; 2 | 3 | describe('firstChild', () => { 4 | it('textNode as a child', () => { 5 | const html = ` 6 |
7 | 8 |
9 | 10 |
`; 11 | 12 | const dom = parseFromString(html); 13 | const { firstChild } = dom.getElementById('root'); 14 | 15 | expect(firstChild.nodeType).toEqual(NodeType.text); 16 | }); 17 | 18 | it('elementNode as a child', () => { 19 | const html = `
`; 20 | 21 | const dom = parseFromString(html); 22 | const { firstChild } = dom.getElementById('root'); 23 | 24 | expect(firstChild.getAttribute('id')).toEqual('first'); 25 | }); 26 | 27 | it('empty', () => { 28 | const html = `
`; 29 | 30 | const dom = parseFromString(html); 31 | const { firstChild } = dom.getElementById('root'); 32 | 33 | expect(firstChild).toBeNull(); 34 | }); 35 | }); 36 | -------------------------------------------------------------------------------- /test/getAttribute.test.ts: -------------------------------------------------------------------------------- 1 | import { parseFromString } from '../src'; 2 | 3 | test('Node.getAttribute', () => { 4 | const html = 5 | '
\n' + 6 | ' \n' + 7 | '
'; 8 | 9 | const dom = parseFromString(html); 10 | const outer = dom.getElementById('outer'); 11 | const inner = dom.getElementById('inner'); 12 | 13 | expect(outer.attributes).toHaveLength(3); 14 | expect(outer.getAttribute('id')).toEqual('outer'); 15 | expect(outer.getAttribute('data-a')).toEqual(''); 16 | expect(outer.getAttribute('ttt')).toEqual("asd'"); 17 | expect(outer.getAttribute('not-exists')).toEqual(null); 18 | expect(inner.getAttribute('href')).toEqual('/search?field=123'); 19 | }); 20 | -------------------------------------------------------------------------------- /test/getElementById.test.ts: -------------------------------------------------------------------------------- 1 | import { parseFromString } from '../src'; 2 | 3 | describe('getElementById', () => { 4 | describe('getting an element by id', () => { 5 | const html = ` 6 |
7 | text 8 |
9 | text 10 |
11 |
12 | text 13 |
14 | text 15 |
16 |
17 | text 18 |
19 | 20 |
21 |
`; 22 | 23 | it('Dom', () => { 24 | const dom = parseFromString(html); 25 | const element = dom.getElementById('example'); 26 | const notExistsElement = dom.getElementById('notExists'); 27 | const nestedElement = dom.getElementById('nestedExample'); 28 | 29 | expect(element).not.toBeNull(); 30 | expect(nestedElement).not.toBeNull(); 31 | expect(element.getAttribute('class')).toEqual('example with id'); 32 | expect(notExistsElement).toBeNull(); 33 | }); 34 | it('Node', () => { 35 | const dom = parseFromString(html); 36 | const root = dom.getElementById('root'); 37 | const element = root.getElementById('example'); 38 | const nestedElement = root.getElementById('nestedExample'); 39 | const notExistsElement = root.getElementById('notExists'); 40 | 41 | expect(element).not.toBeNull(); 42 | expect(nestedElement).not.toBeNull(); 43 | expect(element.getAttribute('class')).toEqual('example with id'); 44 | 45 | expect(notExistsElement).toBeNull(); 46 | }); 47 | }); 48 | 49 | describe('getting only first element', () => { 50 | const html = ` 51 |
52 | text 53 |
54 | text 55 |
56 |
57 | text 58 |
59 | text 60 |
61 |
62 | text 63 |
64 |
65 | text 66 |
67 | text 68 |
69 |
70 | text 71 |
72 | text 73 |
74 |
75 | text 76 |
77 | 78 |
79 |
80 |
81 |
`; 82 | 83 | it('Dom', () => { 84 | const dom = parseFromString(html); 85 | const element = dom.getElementById('example'); 86 | const nestedElement = dom.getElementById('nestedExample'); 87 | 88 | expect(element).not.toBeNull(); 89 | expect(nestedElement).not.toBeNull(); 90 | expect(element.getAttribute('class')).toEqual('first example'); 91 | }); 92 | 93 | it('Node', () => { 94 | const dom = parseFromString(html); 95 | const root = dom.getElementById('root'); 96 | const element = root.getElementById('example'); 97 | const nestedElement = root.getElementById('nestedExample'); 98 | 99 | expect(element).not.toBeNull(); 100 | expect(nestedElement).not.toBeNull(); 101 | expect(element.getAttribute('class')).toEqual('first example'); 102 | }); 103 | }); 104 | }); 105 | -------------------------------------------------------------------------------- /test/getElementsByAttribute.test.ts: -------------------------------------------------------------------------------- 1 | import { parseFromString } from '../src'; 2 | 3 | describe('getElementsByAttribute', () => { 4 | it('similar attribute names', () => { 5 | const html = ` 6 |
7 | text 8 | text 9 | text 10 |
11 | example 12 |
`; 13 | 14 | const dom = parseFromString(html); 15 | const elements = dom.getElementsByAttribute('name', 'example'); 16 | 17 | expect(elements).toHaveLength(1); 18 | }); 19 | 20 | it('spaces and case', () => { 21 | const html = ` 22 |
23 | text 24 |
25 | example 26 |
27 |
28 | 29 | 30 |
31 | 32 |
`; 33 | 34 | const dom = parseFromString(html); 35 | const elements = dom.getElementsByName('example'); 36 | 37 | expect(elements).toHaveLength(2); 38 | }); 39 | }); 40 | -------------------------------------------------------------------------------- /test/getElementsByClassName.test.ts: -------------------------------------------------------------------------------- 1 | import { parseFromString } from '../src'; 2 | 3 | describe('getElementsByClassName', () => { 4 | describe('spaces and case', () => { 5 | const html = ` 6 |
7 | text 8 |
9 | text 10 |
11 |
12 | text 13 |
14 | text 15 |
16 |
17 | text 18 |
19 |
20 |
`; 21 | 22 | it('Dom', () => { 23 | const dom = parseFromString(html); 24 | const elements = dom.getElementsByClassName('example'); 25 | expect(elements).toHaveLength(5); 26 | }); 27 | 28 | it('Node', () => { 29 | const dom = parseFromString(html); 30 | const root = dom.getElementById('root'); 31 | const elements = root.getElementsByClassName('example'); 32 | expect(elements).toHaveLength(5); 33 | }); 34 | }); 35 | 36 | describe('nested elements', () => { 37 | const html = ` 38 |
39 | text 40 |
41 | text 42 |
43 |
44 | text 45 |
46 | text 47 |
48 |
49 | text 50 |
51 |
52 | text 53 |
54 | text 55 |
56 |
57 | text 58 |
59 | text 60 |
61 |
62 | text 63 |
64 |
65 |
66 |
67 |
`; 68 | 69 | it('Dom', () => { 70 | const dom = parseFromString(html); 71 | const elements = dom.getElementsByClassName('example'); 72 | 73 | expect(elements).toHaveLength(12); 74 | }); 75 | 76 | it('Node', () => { 77 | const dom = parseFromString(html); 78 | const root = dom.getElementById('root'); 79 | const elements = root.getElementsByClassName('example'); 80 | 81 | expect(elements).toHaveLength(12); 82 | }); 83 | }); 84 | }); 85 | -------------------------------------------------------------------------------- /test/getElementsByName.test.ts: -------------------------------------------------------------------------------- 1 | import { parseFromString } from '../src'; 2 | 3 | describe('getElementsByName', () => { 4 | describe('spaces and case', () => { 5 | const html = ` 6 |
7 | text 8 | 9 |
10 | text 11 |
12 |
13 | 14 | 15 | 16 |
17 | 18 |
`; 19 | 20 | it('Dom', () => { 21 | const dom = parseFromString(html); 22 | const elements = dom.getElementsByName('example'); 23 | 24 | expect(elements).toHaveLength(2); 25 | }); 26 | 27 | it('Node', () => { 28 | const dom = parseFromString(html); 29 | const root = dom.getElementById('form'); 30 | const elements = root.getElementsByName('example'); 31 | 32 | expect(elements).toHaveLength(2); 33 | }); 34 | }); 35 | }); 36 | -------------------------------------------------------------------------------- /test/getElementsByTagName.test.ts: -------------------------------------------------------------------------------- 1 | import { parseFromString } from '../src'; 2 | 3 | describe('getElementsByTagName', () => { 4 | describe('divs and spans', () => { 5 | const html = ` 6 |
7 | text 8 |
9 | text 10 |
11 |
12 | text 13 |
14 | text 15 |
16 |
17 | text 18 |
19 |
20 |
`; 21 | 22 | it('Dom', () => { 23 | const dom = parseFromString(html); 24 | const divs = dom.getElementsByTagName('div'); 25 | const spans = dom.getElementsByTagName('span'); 26 | 27 | expect(divs).toHaveLength(8); 28 | expect(spans).toHaveLength(5); 29 | }); 30 | 31 | it('Node', () => { 32 | const dom = parseFromString(html); 33 | const root = dom.getElementById('root'); 34 | const divs = root.getElementsByTagName('div'); 35 | const spans = root.getElementsByTagName('span'); 36 | 37 | expect(divs).toHaveLength(7); 38 | expect(spans).toHaveLength(5); 39 | }); 40 | }); 41 | 42 | describe('custom tags', () => { 43 | const html = `
44 | 45 | foo 46 | foo 47 | 48 | 49 | bar 50 | bar 51 | 52 |
`; 53 | 54 | it('Dom', () => { 55 | const dom = parseFromString(html); 56 | const tipsColon = dom.getElementsByTagName('tip:link'); 57 | const tipsHyphen = dom.getElementsByTagName('tip-link'); 58 | const tipsUnderline = dom.getElementsByTagName('tip_link_head'); 59 | 60 | expect(tipsColon).toHaveLength(2); 61 | expect(tipsHyphen).toHaveLength(2); 62 | expect(tipsUnderline).toHaveLength(2); 63 | }); 64 | 65 | it('Node', () => { 66 | const dom = parseFromString(html); 67 | const root = dom.getElementById('root'); 68 | const tipsColon = root.getElementsByTagName('tip:link'); 69 | const tipsHyphen = root.getElementsByTagName('tip-link'); 70 | const tipsUnderline = root.getElementsByTagName('tip_link_head'); 71 | 72 | expect(tipsColon).toHaveLength(2); 73 | expect(tipsHyphen).toHaveLength(2); 74 | expect(tipsUnderline).toHaveLength(2); 75 | }); 76 | }); 77 | 78 | describe('tag names with similar nodeName', () => { 79 | const html = `
80 | 83 |
`; 84 | 85 | it('Dom', () => { 86 | const dom = parseFromString(html); 87 | const elements = dom.getElementsByTagName('a'); 88 | 89 | expect(elements).toHaveLength(1); 90 | expect(elements[0].nodeName).toEqual('a'); 91 | }); 92 | 93 | it('Node', () => { 94 | const dom = parseFromString(html); 95 | const root = dom.getElementById('root'); 96 | const elements = root.getElementsByTagName('a'); 97 | 98 | expect(elements).toHaveLength(1); 99 | expect(elements[0].nodeName).toEqual('a'); 100 | }); 101 | }); 102 | }); 103 | -------------------------------------------------------------------------------- /test/innerHTML.test.ts: -------------------------------------------------------------------------------- 1 | import { parseFromString } from '../src'; 2 | 3 | it('innerHTML', () => { 4 | const html = `
5 |
6 | 7 |
8 |
1
9 | with namespace 10 | custom one 11 |
12 |
13 |
14 |
15 |
`; 16 | const inner = `
17 | 18 |
19 |
1
20 | with namespace 21 | custom one 22 |
23 |
24 |
25 |
`; 26 | 27 | const dom = parseFromString(html); 28 | const ctn = dom.getElementById('root'); 29 | 30 | expect(ctn.innerHTML).toEqualDom(inner); 31 | }); 32 | -------------------------------------------------------------------------------- /test/invalidHTML.test.ts: -------------------------------------------------------------------------------- 1 | import { parseFromString } from '../src'; 2 | 3 | describe('invalid html', () => { 4 | it('unclosed tag', () => { 5 | const invalidHTML = `
6 |
7 | 8 |
9 |
1
10 | 11 |
12 |
`; 13 | const validHTML = `
14 |
15 | 16 |
17 |
1
18 |
19 |
20 |
21 |
`; 22 | 23 | const dom = parseFromString(invalidHTML); 24 | const ctn = dom.getElementById('root'); 25 | 26 | expect(ctn.outerHTML).toEqualDom(validHTML); 27 | }); 28 | 29 | it('excess closing tag', () => { 30 | const invalidHTML = `
31 | 32 |
33 |
1
34 |
35 |
36 | 37 | 38 |
39 |
1
40 |
41 |
42 | 43 | `; 44 | const validHTML = `
45 | 46 |
47 |
1
48 |
49 |
50 | 51 |
52 |
1
53 |
54 |
55 |
`; 56 | 57 | const dom = parseFromString(invalidHTML); 58 | const ctn = dom.getElementById('root'); 59 | 60 | expect(ctn.outerHTML).toEqualDom(validHTML); 61 | }); 62 | it('empty string', () => { 63 | const dom = parseFromString(''); 64 | const anyById = dom.getElementById('any'); 65 | const emptyList = dom.getElementsByClassName('any'); 66 | 67 | expect(anyById).toBeNull(); 68 | expect(emptyList).toHaveLength(0); 69 | }); 70 | }); 71 | -------------------------------------------------------------------------------- /test/jest-extend.ts: -------------------------------------------------------------------------------- 1 | import { compareStrings, GroupingReporter as compareReporter } from 'dom-compare'; 2 | 3 | expect.extend({ 4 | toEqualDom(received: string, expected: string) { 5 | const compareResult = compareStrings(received, expected); 6 | const pass = compareResult.getResult(); 7 | 8 | if (pass) { 9 | return { 10 | message: () => 'Dom matched', 11 | pass, 12 | }; 13 | } 14 | 15 | return { 16 | message: () => compareReporter.report(compareResult), 17 | pass, 18 | }; 19 | }, 20 | }); 21 | 22 | export default function globalSetup() { 23 | // DO NOTHING 24 | } 25 | -------------------------------------------------------------------------------- /test/lastChild.test.ts: -------------------------------------------------------------------------------- 1 | import { NodeType, parseFromString } from '../src'; 2 | 3 | describe('lastChild', () => { 4 | it('textNode as a child', () => { 5 | const html = ` 6 |
7 | 8 |
9 | 10 |
`; 11 | 12 | const dom = parseFromString(html); 13 | const { lastChild } = dom.getElementById('root'); 14 | 15 | expect(lastChild.nodeType).toEqual(NodeType.text); 16 | }); 17 | 18 | it('elementNode as a child', () => { 19 | const html = `
`; 20 | 21 | const dom = parseFromString(html); 22 | const { lastChild } = dom.getElementById('root'); 23 | 24 | expect(lastChild.getAttribute('id')).toEqual('last'); 25 | }); 26 | 27 | it('empty', () => { 28 | const html = `
`; 29 | 30 | const dom = parseFromString(html); 31 | const { lastChild } = dom.getElementById('root'); 32 | 33 | expect(lastChild).toBeNull(); 34 | }); 35 | }); 36 | -------------------------------------------------------------------------------- /test/outerHTML.test.ts: -------------------------------------------------------------------------------- 1 | import { parseFromString } from '../src'; 2 | 3 | describe('outerHTML', () => { 4 | it('exact html', () => { 5 | const html = `
6 |
7 | 8 |
9 |
1
10 |
11 | 12 | with namespace 13 | custom one 14 |
15 |
16 |
17 |
`; 18 | 19 | const dom = parseFromString(html); 20 | const ctn = dom.getElementById('root'); 21 | 22 | expect(ctn.outerHTML).toEqual(html); 23 | }); 24 | it('auto closing tags', () => { 25 | const initialHtml = `
26 |
27 | 28 |
29 |
`; 30 | const correctedHtml = `
31 |
32 | 33 |
34 |
`; 35 | 36 | const dom = parseFromString(initialHtml); 37 | const ctn = dom.getElementById('root'); 38 | 39 | expect(ctn.outerHTML).toEqual(correctedHtml); 40 | }); 41 | it('svg', () => { 42 | const svg = ` 43 | 44 | 45 | `; 46 | 47 | const dom = parseFromString(svg); 48 | const ctn = dom.getElementById('mask'); 49 | 50 | expect(ctn.outerHTML).toEqual(svg); 51 | }); 52 | }); 53 | -------------------------------------------------------------------------------- /test/parentNode.test.ts: -------------------------------------------------------------------------------- 1 | import { parseFromString } from '../src'; 2 | 3 | describe('parentNode', () => { 4 | it('entire document', () => { 5 | const html = ` 6 | 7 | 8 | 9 | 10 |
11 |

Example Domain

12 |

This domain is established to be used for illustrative examples in documents. You may use this 13 | domain in examples without prior coordination or asking for permission.

14 |

More information...

15 |
16 | 17 | `; 18 | 19 | const dom = parseFromString(html); 20 | const p = dom.getElementById('text'); 21 | 22 | expect(p.textContent).toEqual('More information...'); 23 | expect(p.parentNode.getAttribute('id')).toEqual('container'); 24 | }); 25 | 26 | it('fragment', () => { 27 | const html = ` 28 |
29 |

Example Domain

30 |

This domain is established to be used for illustrative examples in documents. You may use this 31 | domain in examples without prior coordination or asking for permission.

32 |

More information...

33 |
34 |
`; 35 | 36 | const dom = parseFromString(html); 37 | const text = dom.getElementById('text'); 38 | const div = dom.getElementById('div'); 39 | 40 | expect(text).not.toBeNull(); 41 | expect(text.textContent).toEqual('More information...'); 42 | expect(text.parentNode.getAttribute('id')).toEqual('container'); 43 | 44 | expect(div).not.toBeNull(); 45 | expect(div.parentNode.getAttribute('id')).toEqual('container'); 46 | }); 47 | }); 48 | -------------------------------------------------------------------------------- /test/textContent.test.ts: -------------------------------------------------------------------------------- 1 | import { parseFromString } from '../src'; 2 | 3 | describe('textContent', () => { 4 | it('composed', () => { 5 | const html = `
6 |
7 | some text 8 | 9 |
10 |
123
11 |
12 | some text 13 |
14 |
15 |
`; 16 | 17 | const dom = parseFromString(html); 18 | const ctn = dom.getElementById('root'); 19 | 20 | expect(ctn.textContent).toEqual('\n \n some text\n \n \n 123 \n \n some text\n \n \n '); 21 | }); 22 | 23 | it('xml', () => { 24 | const html = ` 25 | 26 | 27 | Example RSS 28 | 29 | 30 | Example RSS 2 31 | 32 | 33 | Example RSS 3 34 | 35 | `; 36 | 37 | const dom = parseFromString(html); 38 | const titles = dom.getElementsByTagName('title'); 39 | 40 | expect(titles).toHaveLength(3); 41 | expect(titles[0].textContent).toEqual('Example RSS'); 42 | expect(titles[1].textContent).toEqual('Example RSS 2'); 43 | expect(titles[2].textContent).toEqual('Example RSS 3'); 44 | }); 45 | }); 46 | -------------------------------------------------------------------------------- /test/types.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'dom-compare' { 2 | interface CompareResult { 3 | getResult: () => boolean; 4 | } 5 | 6 | class GroupingReporter { 7 | static report: (compareResult: CompareResult) => string; 8 | } 9 | 10 | function compareStrings(a: string, b: string): CompareResult; 11 | } 12 | 13 | declare namespace jest { 14 | interface Matchers { 15 | toEqualDom(expected: string): R; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "target": "ES6", 5 | "outDir": "./dist", 6 | "declaration": true 7 | }, 8 | "include": [ 9 | "src", 10 | "test/types.d.ts" 11 | ] 12 | } 13 | --------------------------------------------------------------------------------