├── .npmignore ├── .gitignore ├── src ├── index.js ├── stringify.js └── parse.js ├── jest.config.js ├── rollup.config.js ├── types └── index.d.ts ├── package.json ├── LICENSE ├── README.md └── test └── parse.spec.js /.npmignore: -------------------------------------------------------------------------------- 1 | src/ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | lib/ 3 | dist/ -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | export { parse } from './parse.js'; 2 | export { stringify } from './stringify.js'; -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('jest').Config} */ 2 | export default { 3 | testEnvironment: "jsdom" 4 | } 5 | -------------------------------------------------------------------------------- /rollup.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | input: 'src/index.js', 3 | output: [{ 4 | file: 'lib/index.js', 5 | format: 'cjs' 6 | }, { 7 | file: 'dist/index.js', 8 | format: 'es' 9 | }] 10 | } -------------------------------------------------------------------------------- /types/index.d.ts: -------------------------------------------------------------------------------- 1 | export interface IDom { 2 | type: string; 3 | content ? : string; 4 | voidElement: boolean; 5 | name: string; 6 | attrs: { type: 'attr' | 'directive', name: string, value: string}[]; 7 | children: IDom[]; 8 | } 9 | 10 | export declare function parse(html: string): IDom[]; 11 | export declare function stringify(doc: IDom[]): string; -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "html-parse-string", 3 | "version": "0.0.9", 4 | "description": "Utils for parsing and stringify html strings", 5 | "main": "lib/index.js", 6 | "module": "dist/index.js", 7 | "types": "types/index.d.ts", 8 | "type": "module", 9 | "scripts": { 10 | "build": "rollup -c", 11 | "prepublishOnly": "npm run build", 12 | "test": "NODE_OPTIONS='--experimental-vm-modules $NODE_OPTIONS' jest", 13 | "prepare": "npm run build" 14 | }, 15 | "repository": { 16 | "type": "git", 17 | "url": "git+https://github.com/ryansolid/html-string-parser.git" 18 | }, 19 | "author": "Ryan Carniato ", 20 | "license": "MIT", 21 | "bugs": { 22 | "url": "https://github.com/ryansolid/html-string-parser/issues" 23 | }, 24 | "homepage": "https://github.com/ryansolid/html-string-parser#readme", 25 | "devDependencies": { 26 | "rollup": "3.10.0", 27 | "jest": "29.5.0", 28 | "jest-environment-jsdom": "29.5.0" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ryan Carniato 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # html-parse-string 2 | HTML parse and stringify utils 3 | 4 | ## Installation 5 | 6 | ``` 7 | npm install html-parse-string 8 | ``` 9 | 10 | ## IDom 11 | basic data structure 12 | ``` 13 | export interface IDom { 14 | type: string; 15 | content ? : string; 16 | voidElement: boolean; 17 | name: string; 18 | attrs: { [key: string]: any }; 19 | children: IDom[]; 20 | } 21 | 22 | ``` 23 | 24 | ## parse 25 | parse html to idom array 26 | ``` 27 | const { parse, stringify } = require('html-parse-string'); 28 | const t = `
this is div
`; 29 | console.log(parse(t)); 30 | ``` 31 | get idom array 32 | ``` 33 | [ 34 | { 35 | type: "tag", 36 | name: "div", 37 | voidElement: false, 38 | attrs: {}, 39 | children: [ 40 | { 41 | type: "text", 42 | content: "this is div", 43 | }, 44 | ], 45 | }, 46 | ]; 47 | ``` 48 | ## stringify 49 | stringify idom array to html 50 | ``` 51 | const { parse, stringify } = require('html-parse-string'); 52 | const t = `
this is div
`; 53 | const ast = parse(t); 54 | console.log(stringify(ast)); 55 | ``` 56 | get html string 57 | ``` 58 |
this is div
59 | ``` -------------------------------------------------------------------------------- /src/stringify.js: -------------------------------------------------------------------------------- 1 | // Based on package html-parse-stringify2 2 | // Expanded to handle webcomponents 3 | 4 | /** 5 | * @param {import('../types/index').IDom['attrs']} attrs 6 | * @returns {string} 7 | */ 8 | function attrString(attrs) { 9 | const buff = []; 10 | for (const attr of attrs) { 11 | buff.push(attr.name + '="' + attr.value.replace(/"/g, '"') + '"'); 12 | } 13 | if (!buff.length) { 14 | return ''; 15 | } 16 | return ' ' + buff.join(' '); 17 | }; 18 | 19 | /** 20 | * @param {string} buff 21 | * @param {import('../types/index').IDom} doc 22 | * @returns {string} 23 | */ 24 | function stringifier(buff, doc) { 25 | switch (doc.type) { 26 | case 'text': 27 | return buff + doc.content; 28 | case 'tag': 29 | buff += '<' + doc.name + (doc.attrs ? attrString(doc.attrs) : '') + (doc.voidElement ? '/>' : '>'); 30 | if (doc.voidElement) { 31 | return buff; 32 | } 33 | return buff + doc.children.reduce(stringifier, '') + ''; 34 | case 'comment': 35 | return buff += ''; 36 | } 37 | }; 38 | 39 | /** 40 | * @param {import('../types/index').IDom[]} doc 41 | * @returns {string} 42 | */ 43 | export function stringify(doc) { 44 | return doc.reduce(function (token, rootEl) { 45 | return token + stringifier('', rootEl); 46 | }, ''); 47 | }; -------------------------------------------------------------------------------- /test/parse.spec.js: -------------------------------------------------------------------------------- 1 | import { parse } from "../src"; 2 | 3 | describe("parse", () => { 4 | test("Simple div", () => { 5 | const html = "
"; 6 | const result = parse(html); 7 | expect(result).toEqual([ 8 | { 9 | type: "tag", 10 | name: "div", 11 | attrs: [ 12 | { name: "asd", value: "23", type: 'attr' }, 13 | { name: "qwe", value: "1243423", type: 'attr' } 14 | ], 15 | children: [], 16 | voidElement: false 17 | } 18 | ]); 19 | }); 20 | 21 | test("With dynamic content", () => { 22 | const html = "
###
"; 23 | const result = parse(html); 24 | expect(result).toEqual([ 25 | { 26 | type: "tag", 27 | name: "div", 28 | attrs: [ 29 | { name: "qwe", value: "#23#", type: 'attr' } 30 | ], 31 | children: [ 32 | { type: "text", content: "###" } 33 | ], 34 | voidElement: false 35 | } 36 | ]); 37 | }); 38 | 39 | test("With use effect", () => { 40 | const html = "
#3#
"; 41 | const result = parse(html); 42 | expect(result).toEqual([ 43 | { 44 | type: "tag", 45 | name: "div", 46 | attrs: [ 47 | { name: "use:#1#", value: "#2#", type: 'directive' } 48 | ], 49 | children: [ 50 | { type: "text", content: "#3#" } 51 | ], 52 | voidElement: false 53 | } 54 | ]); 55 | }); 56 | }); -------------------------------------------------------------------------------- /src/parse.js: -------------------------------------------------------------------------------- 1 | // Based on package html-parse-stringify2 2 | // Expanded to handle webcomponents 3 | 4 | const tagRE = /(?:|<(?:"[^"]*"['"]*|'[^']*'['"]*|[^'">])+>)/g; 5 | 6 | // See https://regexr.com/6p8p0 7 | const attrRE = /(?:\s(?[^/\s><=]+?)(?=[\s/>]))|(?:(?\S+?)(?:\s*=\s*(?:(['"])(?[\s\S]*?)\3|(?[^\s>]+))))/g 8 | // ^ capture group 1: boolean attribute name (attributes without values) 9 | // ^ capture group 2: non-boolean attribute name 10 | // ^ capture group 4: non-boolean attribute value with quotes 11 | // ^ capture group 5: non-boolean attribute value without quotes 12 | // TODO 13 | // - "/" values in the middle of the HTML tag (they don't self-close the element, but skipped) 14 | // - What other cases? 15 | 16 | 17 | const lookup = { 18 | area: true, 19 | base: true, 20 | br: true, 21 | col: true, 22 | embed: true, 23 | hr: true, 24 | img: true, 25 | input: true, 26 | keygen: true, 27 | link: true, 28 | menuitem: true, 29 | meta: true, 30 | param: true, 31 | source: true, 32 | track: true, 33 | wbr: true 34 | }; 35 | 36 | function parseTag(/**@type {string}*/tag) { 37 | let i = 0; 38 | const res = { 39 | type: 'tag', 40 | name: '', 41 | voidElement: false, 42 | attrs: [], 43 | children: [] 44 | }; 45 | const tagMatch = tag.match(/<\/?([^\s]+?)[/\s>]/) 46 | if (tagMatch) { 47 | res.name = tagMatch[1] 48 | if ( 49 | lookup[tagMatch[1].toLowerCase()] || 50 | tag.charAt(tag.length - 2) === '/' 51 | ) { 52 | res.voidElement = true 53 | } 54 | 55 | // handle comment tag 56 | if (res.name.startsWith('!--')) { 57 | const endIndex = tag.indexOf('-->') 58 | return { 59 | type: 'comment', 60 | comment: endIndex !== -1 ? tag.slice(4, endIndex) : '', 61 | } 62 | } 63 | } 64 | 65 | const reg = new RegExp(attrRE) 66 | 67 | for (const match of tag.matchAll(reg)) { 68 | // TODO named groups method not working yet, groups is undefined in tests (maybe not out in Node.js yet) 69 | // const groups = match.groups 70 | // res.attrs[groups.boolean || groups.name] = groups.value1 || groups.value2 || "" 71 | if ((match[1] || match[2]).startsWith('use:')) { 72 | res.attrs.push({ type: 'directive', name: match[1] || match[2], value: match[4] || match[5] || '' }); 73 | } else { 74 | res.attrs.push({ type: 'attr', name: match[1] || match[2], value: match[4] || match[5] || '' }); 75 | } 76 | } 77 | 78 | return res 79 | }; 80 | 81 | // common logic for pushing a child node onto a list 82 | function pushTextNode(list, html, start) { 83 | // calculate correct end of the content slice in case there's 84 | // no tag after the text node. 85 | const end = html.indexOf('<', start); 86 | const content = html.slice(start, end === -1 ? void 0 : end); 87 | if (!/^\s*$/.test(content)) { 88 | list.push({ 89 | type: 'text', 90 | content: content 91 | }); 92 | } 93 | }; 94 | 95 | function pushCommentNode(list, tag) { 96 | // calculate correct end of the content slice in case there's 97 | // no tag after the text node. 98 | const content = tag.replace('', ''); 99 | if (!/^\s*$/.test(content)) { 100 | list.push({ 101 | type: 'comment', 102 | content: content 103 | }); 104 | } 105 | }; 106 | 107 | 108 | export function parse(html) { 109 | const result = []; 110 | let current = void 0; 111 | let level = -1; 112 | const arr = []; 113 | const byTag = {}; 114 | html.replace(tagRE, (tag, index) => { 115 | const isOpen = tag.charAt(1) !== '/'; 116 | const isComment = tag.slice(0, 4) === '