├── .github └── workflows │ └── node.js.yml ├── .gitignore ├── LICENSE ├── README.md ├── index.js ├── jsconfig.json ├── package-lock.json ├── package.json └── test.js /.github/workflows/node.js.yml: -------------------------------------------------------------------------------- 1 | # This workflow will do a clean installation of node dependencies, cache/restore them, build the source code and run tests across different versions of node 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-nodejs 3 | 4 | name: Node.js CI 5 | 6 | on: 7 | push: 8 | branches: [master] 9 | pull_request: 10 | branches: [master] 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | 16 | strategy: 17 | matrix: 18 | node-version: [18, 20, 22] 19 | # See supported Node.js release schedule at https://nodejs.org/en/about/releases/ 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | - name: Use Node.js ${{ matrix.node-version }} 24 | uses: actions/setup-node@v4 25 | with: 26 | node-version: ${{ matrix.node-version }} 27 | cache: npm 28 | - run: npm ci 29 | - run: npm run build --if-present 30 | - run: npm test 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | *.d.ts 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Mohamed Akram 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # html-format 2 | 3 | Format HTML strings by indenting, wrapping, and removing unnecessary 4 | whitespace while preserving newlines. 5 | 6 | ## Install 7 | 8 | npm install html-format 9 | 10 | ## Usage 11 | 12 | ```javascript 13 | import format from "html-format"; 14 | 15 | const html = `\ 16 | 17 |
18 | 19 | `; 20 | 21 | // indent = 2 spaces (default), width = 80 characters (default) 22 | format(html) == 23 | `\ 24 | 25 |
26 | 27 | `; 28 | 29 | // indent = 4 spaces, width = 80 characters (default) 30 | format(html, " ".repeat(4)) == 31 | `\ 32 | 33 |
34 | 35 | `; 36 | 37 | // indent = 4 spaces, width = 20 characters 38 | format(html, " ".repeat(4), 20) == 39 | `\ 40 | 41 |
43 |
44 | 45 | `; 46 | ``` 47 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const tagName = String.raw`[A-Za-z][^/\s>]*`; 2 | 3 | // Preserve strings in templates and such 4 | // Avoid apostrophes and unintentional captures 5 | const doubleQuotedString = String.raw`\B"(?:\\[^<>\n]|[^\\"<>\n])*"\B`; 6 | const singleQuotedString = String.raw`\B'(?:\\[^<>\n]|[^\\'<>\n])*'\B`; 7 | const quotedString = String.raw`${doubleQuotedString}|${singleQuotedString}`; 8 | 9 | const quotedAttrValue = String.raw`"(?[^"]*)"`; 10 | const singleQuotedAttrValue = String.raw`'(?[^']*)'`; 11 | // https://mothereff.in/unquoted-attributes 12 | const unquotedAttrValue = String.raw`(?[^\s"'\`=<>]+)`; 13 | 14 | const attrName = String.raw`[^=\s>/"']+(?=[=>\s]|$)`; 15 | const attrValue = String.raw`${quotedAttrValue}|${singleQuotedAttrValue}|${unquotedAttrValue}`; 16 | const attrNameValue = String.raw`(?${attrName})(?:\s*=\s*(?:${attrValue}))?`; 17 | 18 | // Make sure not to swallow the closing slash if one exists 19 | const attrText = String.raw`${quotedString}|[^\s>]*[^\s>/]|[^\s>]*/(?!\s*>)`; 20 | 21 | const attr = String.raw`(?\s*)(?:${attrNameValue}|(?${attrText}))`; 22 | 23 | const tokens = { 24 | comment: String.raw``, 25 | dtd: String.raw`]+>`, 26 | startTag: String.raw`<(?${tagName})(?(?:${attr})*)\s*(?/?)\s*>`, 27 | endTag: String.raw`${tagName})\s*>`, 28 | space: String.raw`\s+`, 29 | text: String.raw`[^<\s"']+|${quotedString}|['"]`, 30 | wildcard: String.raw`.`, 31 | }; 32 | 33 | const grammar = Object.entries(tokens) 34 | .map(([k, v]) => `(?<${k}>${v})`) 35 | .join("|"); 36 | 37 | /** 38 | * 39 | * @param {RegExp} lexer 40 | * @param {string} s 41 | */ 42 | function* getTokens(lexer, s) { 43 | let res; 44 | let { lastIndex } = lexer; 45 | while ((res = lexer.exec(s))) { 46 | yield /** @type {RegExpExecArray & { groups: Record }} */ ( 47 | res 48 | ); 49 | ({ lastIndex } = lexer); 50 | } 51 | if (lastIndex != s.length) throw new Error("Failed to parse string"); 52 | } 53 | 54 | const voidTags = new Set([ 55 | "area", 56 | "base", 57 | "basefont", 58 | "bgsound", 59 | "br", 60 | "col", 61 | "command", 62 | "embed", 63 | "frame", 64 | "hr", 65 | "image", 66 | "img", 67 | "input", 68 | "keygen", 69 | "link", 70 | "meta", 71 | "param", 72 | "source", 73 | "track", 74 | "wbr", 75 | ]); 76 | 77 | function format(/** @type {string} */ html, indent = " ", width = 80) { 78 | const lexer = new RegExp(grammar, "gys"); 79 | const attrLexer = new RegExp(attr, "gy"); 80 | 81 | /** @type {string[]} */ 82 | const output = []; 83 | 84 | /** @type {string | null} */ 85 | let specialElement = null; 86 | let level = 0; 87 | 88 | let lineLength = 0; 89 | let span = ""; 90 | let spanLevel = 0; 91 | let lastSpace = ""; 92 | 93 | const flushOutput = () => { 94 | if (lastSpace && lastSpace != "\n") { 95 | const newline = span.indexOf("\n"); 96 | const len = newline == -1 ? span.length : newline; 97 | if (lineLength + lastSpace.length + len > width) lastSpace = "\n"; 98 | } 99 | 100 | const ind = lastSpace == "\n" && span ? indent.repeat(spanLevel) : ""; 101 | const out = `${lastSpace}${ind}${span}`; 102 | 103 | if (out) { 104 | const pos = out.lastIndexOf("\n"); 105 | if (pos == -1) lineLength += out.length; 106 | else lineLength = out.length - pos - 1; 107 | output.push(out); 108 | } 109 | 110 | span = lastSpace = ""; 111 | }; 112 | 113 | const addOutput = (/** @type {string[]} */ ...args) => { 114 | for (const s of args) { 115 | if (!specialElement && /^\s+$/.test(s)) { 116 | flushOutput(); 117 | lastSpace = s; 118 | } else { 119 | if (!span) spanLevel = level; 120 | span += s; 121 | } 122 | } 123 | }; 124 | 125 | for (const token of getTokens(lexer, html)) { 126 | // For testing 127 | if (/** @type {any} */ (format).__strict && token.groups.wildcard) 128 | throw new Error("Unexpected wildcard"); 129 | 130 | if (token.groups.endTag) { 131 | const tagName = token.groups.endTagName.toLowerCase(); 132 | if (tagName == specialElement) specialElement = null; 133 | if (!specialElement) { 134 | --level; 135 | addOutput(``); 136 | } 137 | } 138 | 139 | if (!specialElement) { 140 | if (token.groups.space) { 141 | addOutput(...(token[0].match(/\n/g)?.slice(0, 2) ?? [" "])); 142 | } else if ( 143 | token.groups.comment || 144 | token.groups.dtd || 145 | token.groups.text || 146 | token.groups.wildcard 147 | ) { 148 | addOutput(token[0]); 149 | } else if (token.groups.startTag) { 150 | const tagName = token.groups.startTagName.toLowerCase(); 151 | 152 | addOutput(`<${tagName}`); 153 | 154 | ++level; 155 | 156 | if (token.groups.attrs) { 157 | let { lastIndex } = attrLexer; 158 | let attrToken; 159 | let lastToken; 160 | while ( 161 | (attrToken = 162 | /** @type {RegExpExecArray & { groups: Record }} */ ( 163 | attrLexer.exec(token.groups.attrs) 164 | )) 165 | ) { 166 | ({ lastIndex } = attrLexer); 167 | 168 | // For testing 169 | if ( 170 | /** @type {any} */ (format).__strict && 171 | attrToken.groups.attrText 172 | ) 173 | throw new Error("Unexpected attr text"); 174 | 175 | if (attrToken.groups.attrText) { 176 | if (attrToken.groups.attrSpace) 177 | addOutput(/\n/.test(attrToken.groups.attrSpace) ? "\n" : " "); 178 | addOutput(attrToken.groups.attrText); 179 | } else { 180 | if (attrToken.groups.attrSpace || !lastToken?.groups.attrText) 181 | addOutput(/\n/.test(attrToken.groups.attrSpace) ? "\n" : " "); 182 | addOutput( 183 | `${attrToken.groups.attrName}${ 184 | attrToken.groups.quotedAttrValue 185 | ? `="${attrToken.groups.quotedAttrValue}"` 186 | : attrToken.groups.singleQuotedAttrValue 187 | ? `='${attrToken.groups.singleQuotedAttrValue}'` 188 | : attrToken.groups.unquotedAttrValue 189 | ? `=${attrToken.groups.unquotedAttrValue}` 190 | : "" 191 | }` 192 | ); 193 | } 194 | 195 | lastToken = attrToken; 196 | } 197 | if (lastIndex != token.groups.attrs.length) 198 | throw new Error("Failed to parse attributes"); 199 | } 200 | 201 | const hasClosingSlash = Boolean(token.groups.closingSlash); 202 | 203 | addOutput(hasClosingSlash ? " />" : ">"); 204 | 205 | if (hasClosingSlash || voidTags.has(tagName)) --level; 206 | else if (["pre", "textarea", "script", "style"].includes(tagName)) 207 | specialElement = tagName; 208 | } 209 | } else addOutput(token[0]); 210 | } 211 | 212 | // Flush remaining output 213 | flushOutput(); 214 | 215 | let newline = false; 216 | while (/^\s+$/.test(output[output.length - 1])) { 217 | const last = /** @type {string} */ (output.pop()); 218 | if (/\n/.test(last)) newline = true; 219 | } 220 | 221 | if (newline) output.push("\n"); 222 | 223 | return output.join(""); 224 | } 225 | 226 | format.default = format; 227 | module.exports = format; 228 | -------------------------------------------------------------------------------- /jsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": ["index.js"], 3 | "compilerOptions": { 4 | "checkJs": true, 5 | "declaration": true, 6 | "noEmit": false, 7 | "emitDeclarationOnly": true, 8 | "outDir": ".", 9 | "target": "ES2022", 10 | "module": "CommonJS", 11 | "strict": true, 12 | "noImplicitReturns": true, 13 | "noUnusedLocals": true, 14 | "noUnusedParameters": true 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "html-format", 3 | "version": "1.1.7", 4 | "lockfileVersion": 3, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "html-format", 9 | "version": "1.1.7", 10 | "license": "MIT", 11 | "devDependencies": { 12 | "typescript": "^5.3.3" 13 | } 14 | }, 15 | "node_modules/typescript": { 16 | "version": "5.3.3", 17 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.3.3.tgz", 18 | "integrity": "sha512-pXWcraxM0uxAS+tN0AG/BF2TyqmHO014Z070UsJ+pFvYuRSq8KH8DmWpnbXe0pEPDHXZV3FcAbJkijJ5oNEnWw==", 19 | "dev": true, 20 | "bin": { 21 | "tsc": "bin/tsc", 22 | "tsserver": "bin/tsserver" 23 | }, 24 | "engines": { 25 | "node": ">=14.17" 26 | } 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "html-format", 3 | "version": "1.1.7", 4 | "description": "Format HTML strings.", 5 | "main": "index.js", 6 | "files": [ 7 | "index.d.ts", 8 | "index.js" 9 | ], 10 | "devDependencies": { 11 | "typescript": "^5.3.3" 12 | }, 13 | "scripts": { 14 | "build": "tsc -p jsconfig.json", 15 | "test": "node --test", 16 | "prepare": "npm run build", 17 | "version": "npm test", 18 | "postversion": "git push && git push --tags" 19 | }, 20 | "repository": { 21 | "type": "git", 22 | "url": "git+https://github.com/mohd-akram/html-format.git" 23 | }, 24 | "keywords": [ 25 | "html", 26 | "format" 27 | ], 28 | "author": "Mohamed Akram", 29 | "license": "MIT", 30 | "bugs": { 31 | "url": "https://github.com/mohd-akram/html-format/issues" 32 | }, 33 | "homepage": "https://github.com/mohd-akram/html-format#readme" 34 | } 35 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | const nodeTest = require("node:test"); 3 | 4 | const assert = require("assert/strict"); 5 | 6 | const format = require("./index.js"); 7 | 8 | /** 9 | * 10 | * @param {string} name 11 | * @param {string} input 12 | * @param {string} expected 13 | * @param {boolean} [strict] 14 | * @param {string} [indent] 15 | * @param {number} [width] 16 | */ 17 | function test(name, input, expected, strict = true, indent, width) { 18 | nodeTest.test(name, () => { 19 | if (!strict) { 20 | /** @type {any} */ (format).__strict = true; 21 | assert.throws(() => format(input, indent, width)); 22 | } 23 | /** @type {any} */ (format).__strict = strict; 24 | const actual = format(input, indent, width); 25 | assert.equal(actual, expected); 26 | if (width == undefined) width = 80; 27 | for (const line of actual.split("\n")) { 28 | assert.ok( 29 | line.length <= width, 30 | `line.length = ${line.length} > ${width}` 31 | ); 32 | // Ensure no trailing whitespace 33 | assert.ok(!/\s+$/.test(line)); 34 | } 35 | }); 36 | } 37 | 38 | test("No-op", "", ""); 39 | 40 | test("Remove extra space 1", " ", " "); 41 | test( 42 | "Remove extra space 2", 43 | " pasted from the internet ", 44 | " pasted from the internet " 45 | ); 46 | 47 | test("Preserve newlines", "\n\n", "\n\n"); 48 | 49 | test( 50 | "Preserve nested newlines", 51 | "\n \n\n ", 52 | "\n \n\n " 53 | ); 54 | 55 | test("Preserve trailing newline", "\n", "\n"); 56 | 57 | test("Trim trailing newlines", "\n\n\n", "\n"); 58 | 59 | test( 60 | "Indent once", 61 | "\n
", 62 | "\n
" 63 | ); 64 | 65 | test( 66 | "Indent twice", 67 | "\n
\n
", 68 | "\n
\n
" 69 | ); 70 | 71 | test( 72 | "Fix wrong indents", 73 | "\n
\n
\n
\n
\n ", 74 | "\n
\n
\n
\n
\n" 75 | ); 76 | 77 | test( 78 | "Space attributes", 79 | '
', 80 | '
' 81 | ); 82 | 83 | test( 84 | "Align attributes", 85 | '
', 86 | '
' 87 | ); 88 | 89 | test( 90 | "Remove extra attribute space 1", 91 | '
', 92 | '
' 93 | ); 94 | test( 95 | "Remove extra attribute space 2", 96 | '
', 97 | '
' 98 | ); 99 | test( 100 | "Remove extra attribute space 3", 101 | '
', 102 | '
' 103 | ); 104 | test( 105 | "Remove extra attribute space 4", 106 | '
', 107 | '
' 108 | ); 109 | test( 110 | "Remove extra attribute space 5", 111 | '
', 112 | '
' 113 | ); 114 | 115 | test( 116 | "Wrap many attributes", 117 | '', 118 | '' 119 | ); 120 | 121 | test( 122 | "Do not wrap after long attribute with newline", 123 | '
no wrap needed', 124 | '
no wrap needed' 125 | ); 126 | 127 | test( 128 | "Wrap long line", 129 | "

\nLorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.

", 130 | "

\n Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor\n incididunt ut labore et dolore magna aliqua.

" 131 | ); 132 | 133 | test( 134 | "Wrap long tag", 135 | 'Go', 136 | 'Go' 137 | ); 138 | 139 | test( 140 | "Wrap after special element", 141 | "
this is an incredibly long sentence that never seems to end
this should actually wrap", 142 | "
this is an incredibly long sentence that never seems to end
this\nshould actually wrap" 143 | ); 144 | 145 | test( 146 | "Do not wrap after special element", 147 | "
this is an incredibly long sentence that never seems to end\n starting a new paragraph
this should not actually wrap", 148 | "
this is an incredibly long sentence that never seems to end\n starting a new paragraph
this should not actually wrap" 149 | ); 150 | 151 | test( 152 | "Do not format pre element content", 153 | '
\ns  p  a  c  e  
s', 154 | '
\ns  p  a  c  e  
s' 155 | ); 156 | 157 | test( 158 | "Do not format textarea element content", 159 | ' s', 160 | ' s' 161 | ); 162 | 163 | test( 164 | "Do not format style element content", 165 | "", 166 | "" 167 | ); 168 | 169 | test( 170 | "Do not format script element content", 171 | '', 172 | '' 173 | ); 174 | 175 | test( 176 | "Format special tag attributes", 177 | '', 178 | '' 179 | ); 180 | 181 | test( 182 | "Wrap special tag", 183 | '', 184 | '' 185 | ); 186 | 187 | test( 188 | "Do not indent after doctype", 189 | "\n", 190 | "\n" 191 | ); 192 | 193 | test( 194 | "Do not indent after comment", 195 | "\n
", 196 | "\n
" 197 | ); 198 | 199 | test( 200 | "Do not parse tags inside quotes", 201 | '
', 202 | '
' 203 | ); 204 | 205 | test( 206 | "Do not parse tags inside comments", 207 | "\n
", 208 | "\n
" 209 | ); 210 | 211 | test( 212 | "Format element after empty comment", 213 | "\n
", 214 | "\n
" 215 | ); 216 | 217 | test( 218 | "Do not remove space before double/triple equals 1", 219 | ``, 220 | `` 221 | ); 222 | 223 | test( 224 | "Do not remove space before double/triple equals 2", 225 | ``, 226 | `` 227 | ); 228 | 229 | test( 230 | "Handle attribute-like text inside tag", 231 | "
", 232 | "
", 233 | false 234 | ); 235 | 236 | test( 237 | "Different indent", 238 | '\n
\n', 239 | '\n
\n', 240 | true, 241 | " ".repeat(4) 242 | ); 243 | 244 | test( 245 | "Different indent and width", 246 | '\n
\n', 247 | '\n
\n
\n', 248 | true, 249 | " ".repeat(4), 250 | 21 251 | ); 252 | 253 | test( 254 | "Handle space around non-attribute quotes correctly", 255 | 'Users', 256 | 'Users', 257 | false 258 | ); 259 | 260 | test( 261 | "Handle space inside non-attribute quotes correctly", 262 | '', 263 | '', 264 | false 265 | ); 266 | 267 | test( 268 | "Handle consecutive quotes", 269 | '', 270 | '', 271 | false 272 | ); 273 | 274 | test( 275 | "Preserve quoted strings", 276 | ":\" leave m\\\"e alone\" ' and m\\'e too':", 277 | ":\" leave m\\\"e alone\" ' and m\\'e too':" 278 | ); 279 | 280 | test( 281 | "Ignore quoted strings with newlines", 282 | "Collector's \n
\nstuff\n
'", 283 | "Collector's\n
\n stuff\n
'" 284 | ); 285 | 286 | test( 287 | "Preserve incomplete HTML", 288 | "
do not remove me",
289 |   "
do not remove me"
290 | );
291 | 
292 | test(
293 |   "Handle invalid HTML",
294 |   "< this is a very long sentence to test the regex",
295 |   "< this is a very long sentence to test the regex",
296 |   false
297 | );
298 | 
299 | test("Void tags work correctly", "
\n
", "
\n
"); 300 | 301 | test( 302 | "Handle self-closing tag", 303 | "\n", 304 | "\n" 305 | ); 306 | 307 | test("Handle extraneous slashes 1", "", "", false); 308 | test("Handle extraneous slashes 2", "", "", false); 309 | 310 | test( 311 | "Handle arbitrary text in tag", 312 | '
\nstuff\n
', 313 | '
\n stuff\n
', 314 | false 315 | ); 316 | --------------------------------------------------------------------------------