├── .gitignore ├── LICENSE ├── bin └── html.js ├── img ├── after.png ├── before.png └── copyashtml.png ├── lib └── html.js ├── package.json ├── readme.md └── src └── html.js /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014-2015 Max Ogden and contributors 4 | Copyright (c) 2007-2013 Einar Lielmanis and contributors. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /bin/html.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var html = require("../lib/html") 4 | var fs = require('fs') 5 | var concat = require('concat-stream') 6 | 7 | var args = process.argv.slice(0) 8 | // shift off node and script name 9 | args.shift() 10 | args.shift() 11 | 12 | if (args.length > 0) processFiles(args) 13 | else readStdin() 14 | 15 | function readStdin() { 16 | var stdin = process.openStdin() 17 | stdin.pipe(concat(function concatted (buff) { 18 | process.stdout.write(html.prettyPrint(buff.toString(), {indent_size: 2})) 19 | })) 20 | } 21 | 22 | function processFiles(files) { 23 | if (files.length > 1) { 24 | files.map(function(filename) { 25 | prettifyFile(filename) 26 | }) 27 | return 28 | } 29 | var str = fs.readFileSync(files[0]).toString() 30 | process.stdout.write(prettify(str)) 31 | } 32 | 33 | function prettify(str) { 34 | return html.prettyPrint(str, {indent_size: 2}) 35 | } 36 | 37 | function prettifyFile(filename) { 38 | fs.writeFileSync(filename, prettify(fs.readFileSync(filename).toString())) 39 | } 40 | -------------------------------------------------------------------------------- /img/after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/max-mapper/commonjs-html-prettyprinter/0717eb216ebcc67399ca3c0406e5c03a77f7e761/img/after.png -------------------------------------------------------------------------------- /img/before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/max-mapper/commonjs-html-prettyprinter/0717eb216ebcc67399ca3c0406e5c03a77f7e761/img/before.png -------------------------------------------------------------------------------- /img/copyashtml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/max-mapper/commonjs-html-prettyprinter/0717eb216ebcc67399ca3c0406e5c03a77f7e761/img/copyashtml.png -------------------------------------------------------------------------------- /lib/html.js: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Style HTML 4 | --------------- 5 | 6 | Written by Nochum Sossonko, (nsossonko@hotmail.com) 7 | 8 | Based on code initially developed by: Einar Lielmanis, 9 | http://jsbeautifier.org/ 10 | 11 | 12 | You are free to use this in any way you want, in case you find this useful or working for you. 13 | 14 | Usage: 15 | style_html(html_source); 16 | 17 | style_html(html_source, options); 18 | 19 | The options are: 20 | indent_size (default 4) — indentation size, 21 | indent_char (default space) — character to indent with, 22 | max_char (default 70) - maximum amount of characters per line, 23 | brace_style (default "collapse") - "collapse" | "expand" | "end-expand" 24 | put braces on the same line as control statements (default), or put braces on own line (Allman / ANSI style), or just put end braces on own line. 25 | unformatted (defaults to inline tags) - list of tags, that shouldn't be reformatted 26 | indent_scripts (default normal) - "keep"|"separate"|"normal" 27 | 28 | e.g. 29 | 30 | style_html(html_source, { 31 | 'indent_size': 2, 32 | 'indent_char': ' ', 33 | 'max_char': 78, 34 | 'brace_style': 'expand', 35 | 'unformatted': ['a', 'sub', 'sup', 'b', 'i', 'u'] 36 | }); 37 | */ 38 | 39 | function style_html(html_source, options) { 40 | //Wrapper function to invoke all the necessary constructors and deal with the output. 41 | 42 | var multi_parser, 43 | indent_size, 44 | indent_character, 45 | max_char, 46 | brace_style, 47 | unformatted; 48 | 49 | options = options || {}; 50 | indent_size = options.indent_size || 4; 51 | indent_character = options.indent_char || ' '; 52 | brace_style = options.brace_style || 'collapse'; 53 | max_char = options.max_char == 0 ? Infinity : options.max_char || 70; 54 | unformatted = options.unformatted || ['a', 'span', 'bdo', 'em', 'strong', 'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'q', 'sub', 'sup', 'tt', 'i', 'b', 'big', 'small', 'u', 's', 'strike', 'font', 'ins', 'del', 'pre', 'address', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']; 55 | 56 | function Parser() { 57 | 58 | this.pos = 0; //Parser position 59 | this.token = ''; 60 | this.current_mode = 'CONTENT'; //reflects the current Parser mode: TAG/CONTENT 61 | this.tags = { //An object to hold tags, their position, and their parent-tags, initiated with default values 62 | parent: 'parent1', 63 | parentcount: 1, 64 | parent1: '' 65 | }; 66 | this.tag_type = ''; 67 | this.token_text = this.last_token = this.last_text = this.token_type = ''; 68 | 69 | this.Utils = { //Uilities made available to the various functions 70 | whitespace: "\n\r\t ".split(''), 71 | single_token: 'br,input,link,meta,!doctype,basefont,base,area,hr,wbr,param,img,isindex,?xml,embed,?php,?,?='.split(','), //all the single tags for HTML 72 | extra_liners: 'head,body,/html'.split(','), //for tags that need a line of whitespace before them 73 | in_array: function (what, arr) { 74 | for (var i=0; i= this.input.length) { 91 | return content.length?content.join(''):['', 'TK_EOF']; 92 | } 93 | 94 | input_char = this.input.charAt(this.pos); 95 | this.pos++; 96 | this.line_char_count++; 97 | 98 | if (this.Utils.in_array(input_char, this.Utils.whitespace)) { 99 | if (content.length) { 100 | space = true; 101 | } 102 | this.line_char_count--; 103 | continue; //don't want to insert unnecessary space 104 | } 105 | else if (space) { 106 | if (this.line_char_count >= this.max_char) { //insert a line when the max_char is reached 107 | content.push('\n'); 108 | for (var i=0; i', 'igm'); 131 | reg_match.lastIndex = this.pos; 132 | var reg_array = reg_match.exec(this.input); 133 | var end_script = reg_array?reg_array.index:this.input.length; //absolute end of script 134 | if(this.pos < end_script) { //get everything in between the script tags 135 | content = this.input.substring(this.pos, end_script); 136 | this.pos = end_script; 137 | } 138 | return content; 139 | } 140 | 141 | this.record_tag = function (tag){ //function to record a tag and its parent in this.tags Object 142 | if (this.tags[tag + 'count']) { //check for the existence of this tag type 143 | this.tags[tag + 'count']++; 144 | this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level 145 | } 146 | else { //otherwise initialize this tag type 147 | this.tags[tag + 'count'] = 1; 148 | this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level 149 | } 150 | this.tags[tag + this.tags[tag + 'count'] + 'parent'] = this.tags.parent; //set the parent (i.e. in the case of a div this.tags.div1parent) 151 | this.tags.parent = tag + this.tags[tag + 'count']; //and make this the current parent (i.e. in the case of a div 'div1') 152 | } 153 | 154 | this.retrieve_tag = function (tag) { //function to retrieve the opening tag to the corresponding closer 155 | if (this.tags[tag + 'count']) { //if the openener is not in the Object we ignore it 156 | var temp_parent = this.tags.parent; //check to see if it's a closable tag. 157 | while (temp_parent) { //till we reach '' (the initial value); 158 | if (tag + this.tags[tag + 'count'] === temp_parent) { //if this is it use it 159 | break; 160 | } 161 | temp_parent = this.tags[temp_parent + 'parent']; //otherwise keep on climbing up the DOM Tree 162 | } 163 | if (temp_parent) { //if we caught something 164 | this.indent_level = this.tags[tag + this.tags[tag + 'count']]; //set the indent_level accordingly 165 | this.tags.parent = this.tags[temp_parent + 'parent']; //and set the current parent 166 | } 167 | delete this.tags[tag + this.tags[tag + 'count'] + 'parent']; //delete the closed tags parent reference... 168 | delete this.tags[tag + this.tags[tag + 'count']]; //...and the tag itself 169 | if (this.tags[tag + 'count'] == 1) { 170 | delete this.tags[tag + 'count']; 171 | } 172 | else { 173 | this.tags[tag + 'count']--; 174 | } 175 | } 176 | } 177 | 178 | this.get_tag = function () { //function to get a full tag and parse its type 179 | var input_char = '', 180 | content = [], 181 | space = false, 182 | tag_start, tag_end; 183 | 184 | do { 185 | if (this.pos >= this.input.length) { 186 | return content.length?content.join(''):['', 'TK_EOF']; 187 | } 188 | 189 | input_char = this.input.charAt(this.pos); 190 | this.pos++; 191 | this.line_char_count++; 192 | 193 | if (this.Utils.in_array(input_char, this.Utils.whitespace)) { //don't want to insert unnecessary space 194 | space = true; 195 | this.line_char_count--; 196 | continue; 197 | } 198 | 199 | if (input_char === "'" || input_char === '"') { 200 | if (!content[1] || content[1] !== '!') { //if we're in a comment strings don't get treated specially 201 | input_char += this.get_unformatted(input_char); 202 | space = true; 203 | } 204 | } 205 | 206 | if (input_char === '=') { //no space before = 207 | space = false; 208 | } 209 | 210 | if (content.length && content[content.length-1] !== '=' && input_char !== '>' 211 | && space) { //no space after = or before > 212 | if (this.line_char_count >= this.max_char) { 213 | this.print_newline(false, content); 214 | this.line_char_count = 0; 215 | } 216 | else { 217 | content.push(' '); 218 | this.line_char_count++; 219 | } 220 | space = false; 221 | } 222 | if (input_char === '<') { 223 | tag_start = this.pos - 1; 224 | } 225 | content.push(input_char); //inserts character at-a-time (or string) 226 | } while (input_char !== '>'); 227 | 228 | var tag_complete = content.join(''); 229 | var tag_index; 230 | if (tag_complete.indexOf(' ') != -1) { //if there's whitespace, thats where the tag name ends 231 | tag_index = tag_complete.indexOf(' '); 232 | } 233 | else { //otherwise go with the tag ending 234 | tag_index = tag_complete.indexOf('>'); 235 | } 236 | var tag_check = tag_complete.substring(1, tag_index).toLowerCase(); 237 | if (tag_complete.charAt(tag_complete.length-2) === '/' || 238 | this.Utils.in_array(tag_check, this.Utils.single_token)) { //if this tag name is a single tag type (either in the list or has a closing /) 239 | this.tag_type = 'SINGLE'; 240 | } 241 | else if (tag_check === 'script') { //for later script handling 242 | this.record_tag(tag_check); 243 | this.tag_type = 'SCRIPT'; 244 | } 245 | else if (tag_check === 'style') { //for future style handling (for now it justs uses get_content) 246 | this.record_tag(tag_check); 247 | this.tag_type = 'STYLE'; 248 | } 249 | else if (this.Utils.in_array(tag_check, unformatted)) { // do not reformat the "unformatted" tags 250 | var comment = this.get_unformatted('', tag_complete); //...delegate to get_unformatted function 251 | content.push(comment); 252 | // Preserve collapsed whitespace either before or after this tag. 253 | if (tag_start > 0 && this.Utils.in_array(this.input.charAt(tag_start - 1), this.Utils.whitespace)){ 254 | content.splice(0, 0, this.input.charAt(tag_start - 1)); 255 | } 256 | tag_end = this.pos - 1; 257 | if (this.Utils.in_array(this.input.charAt(tag_end + 1), this.Utils.whitespace)){ 258 | content.push(this.input.charAt(tag_end + 1)); 259 | } 260 | this.tag_type = 'SINGLE'; 261 | } 262 | else if (tag_check.charAt(0) === '!') { //peek for so... 265 | var comment = this.get_unformatted('-->', tag_complete); //...delegate to get_unformatted 266 | content.push(comment); 267 | } 268 | this.tag_type = 'START'; 269 | } 270 | else if (tag_check.indexOf('[endif') != -1) {//peek for ', tag_complete); 281 | content.push(comment); 282 | this.tag_type = 'SINGLE'; 283 | } 284 | } 285 | else { 286 | if (tag_check.charAt(0) === '/') { //this tag is a double tag so check for tag-ending 287 | this.retrieve_tag(tag_check.substring(1)); //remove it and all ancestors 288 | this.tag_type = 'END'; 289 | } 290 | else { //otherwise it's a start-tag 291 | this.record_tag(tag_check); //push it on the tag stack 292 | this.tag_type = 'START'; 293 | } 294 | if (this.Utils.in_array(tag_check, this.Utils.extra_liners)) { //check if this double needs an extra line 295 | this.print_newline(true, this.output); 296 | } 297 | } 298 | return content.join(''); //returns fully formatted tag 299 | } 300 | 301 | this.get_unformatted = function (delimiter, orig_tag) { //function to return unformatted content in its entirety 302 | 303 | if (orig_tag && orig_tag.toLowerCase().indexOf(delimiter) != -1) { 304 | return ''; 305 | } 306 | var input_char = ''; 307 | var content = ''; 308 | var space = true; 309 | do { 310 | 311 | if (this.pos >= this.input.length) { 312 | return content; 313 | } 314 | 315 | input_char = this.input.charAt(this.pos); 316 | this.pos++ 317 | 318 | if (this.Utils.in_array(input_char, this.Utils.whitespace)) { 319 | if (!space) { 320 | this.line_char_count--; 321 | continue; 322 | } 323 | if (input_char === '\n' || input_char === '\r') { 324 | content += '\n'; 325 | /* Don't change tab indention for unformatted blocks. If using code for html editing, this will greatly affect
 tags if they are specified in the 'unformatted array'
326 |             for (var i=0; i 0) {
428 |           this.indent_level--;
429 |         }
430 |       }
431 |     }
432 |     return this;
433 |   }
434 | 
435 |   /*_____________________--------------------_____________________*/
436 | 
437 |   multi_parser = new Parser(); //wrapping functions Parser
438 |   multi_parser.printer(html_source, indent_character, indent_size, max_char, brace_style); //initialize starting values
439 | 
440 |   while (true) {
441 |       var t = multi_parser.get_token();
442 |       multi_parser.token_text = t[0];
443 |       multi_parser.token_type = t[1];
444 | 
445 |     if (multi_parser.token_type === 'TK_EOF') {
446 |       break;
447 |     }
448 | 
449 |     switch (multi_parser.token_type) {
450 |       case 'TK_TAG_START':
451 |         multi_parser.print_newline(false, multi_parser.output);
452 |         multi_parser.print_token(multi_parser.token_text);
453 |         multi_parser.indent();
454 |         multi_parser.current_mode = 'CONTENT';
455 |         break;
456 |       case 'TK_TAG_STYLE':
457 |       case 'TK_TAG_SCRIPT':
458 |         multi_parser.print_newline(false, multi_parser.output);
459 |         multi_parser.print_token(multi_parser.token_text);
460 |         multi_parser.current_mode = 'CONTENT';
461 |         break;
462 |       case 'TK_TAG_END':
463 |         //Print new line only if the tag has no content and has child
464 |         if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') {
465 |             var tag_name = multi_parser.token_text.match(/\w+/)[0];
466 |             var tag_extracted_from_last_output = multi_parser.output[multi_parser.output.length -1].match(/<\s*(\w+)/);
467 |             if (tag_extracted_from_last_output === null || tag_extracted_from_last_output[1] !== tag_name)
468 |                 multi_parser.print_newline(true, multi_parser.output);
469 |         }
470 |         multi_parser.print_token(multi_parser.token_text);
471 |         multi_parser.current_mode = 'CONTENT';
472 |         break;
473 |       case 'TK_TAG_SINGLE':
474 |         // Don't add a newline before elements that should remain unformatted.
475 |         var tag_check = multi_parser.token_text.match(/^\s*<([a-z]+)/i);
476 |         if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)){
477 |             multi_parser.print_newline(false, multi_parser.output);
478 |         }
479 |         multi_parser.print_token(multi_parser.token_text);
480 |         multi_parser.current_mode = 'CONTENT';
481 |         break;
482 |       case 'TK_CONTENT':
483 |         if (multi_parser.token_text !== '') {
484 |           multi_parser.print_token(multi_parser.token_text);
485 |         }
486 |         multi_parser.current_mode = 'TAG';
487 |         break;
488 |       case 'TK_STYLE':
489 |       case 'TK_SCRIPT':
490 |         if (multi_parser.token_text !== '') {
491 |           multi_parser.output.push('\n');
492 |           var text = multi_parser.token_text;
493 |           if (multi_parser.token_type == 'TK_SCRIPT') {
494 |             var _beautifier = typeof js_beautify == 'function' && js_beautify;
495 |           } else if (multi_parser.token_type == 'TK_STYLE') {
496 |             var _beautifier = typeof css_beautify == 'function' && css_beautify;
497 |           }
498 | 
499 |           if (options.indent_scripts == "keep") {
500 |             var script_indent_level = 0;
501 |           } else if (options.indent_scripts == "separate") {
502 |             var script_indent_level = -multi_parser.indent_level;
503 |           } else {
504 |             var script_indent_level = 1;
505 |           }
506 | 
507 |           var indentation = multi_parser.get_full_indent(script_indent_level);
508 |           if (_beautifier) {
509 |             // call the Beautifier if avaliable
510 |             text = _beautifier(text.replace(/^\s*/, indentation), options);
511 |           } else {
512 |             // simply indent the string otherwise
513 |             var white = text.match(/^\s*/)[0];
514 |             var _level = white.match(/[^\n\r]*$/)[0].split(multi_parser.indent_string).length - 1;
515 |             var reindent = multi_parser.get_full_indent(script_indent_level -_level);
516 |             text = text.replace(/^\s*/, indentation)
517 |                    .replace(/\r\n|\r|\n/g, '\n' + reindent)
518 |                    .replace(/\s*$/, '');
519 |           }
520 |           if (text) {
521 |             multi_parser.print_token(text);
522 |             multi_parser.print_newline(true, multi_parser.output);
523 |           }
524 |         }
525 |         multi_parser.current_mode = 'TAG';
526 |         break;
527 |     }
528 |     multi_parser.last_token = multi_parser.token_type;
529 |     multi_parser.last_text = multi_parser.token_text;
530 |   }
531 |   return multi_parser.output.join('');
532 | }
533 | 
534 | module.exports = {
535 |   prettyPrint: style_html
536 | };


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "html",
 3 |   "version": "1.0.0",
 4 |   "description": "HTML pretty printer CLI utility (based on jsbeautifier)",
 5 |   "author": "Max Ogden  (http://maxogden.com)",
 6 |   "contributors": [
 7 |     "Nochum Sossonko ",
 8 |     "Einar Lielmanis "
 9 |   ],
10 |   "keywords": [
11 |     "html",
12 |     "tabifier",
13 |     "beautifier",
14 |     "prettyprinter",
15 |     "prettifier",
16 |     "pretty",
17 |     "command",
18 |     "shell"
19 |   ],
20 |   "repository": {
21 |     "type": "git",
22 |     "url": "https://github.com/maxogden/commonjs-html-prettyprinter.git"
23 |   },
24 |   "bin": {
25 |     "html": "./bin/html.js"
26 |   },
27 |   "main": "lib/html.js",
28 |   "bugs": {
29 |     "url": "https://github.com/maxogden/commonjs-html-prettyprinter/issues"
30 |   },
31 |   "homepage": "https://github.com/maxogden/commonjs-html-prettyprinter",
32 |   "dependencies": {
33 |     "concat-stream": "^1.4.7"
34 |   },
35 |   "devDependencies": {},
36 |   "scripts": {
37 |     "test": "echo \"Error: no test specified\" && exit 1"
38 |   },
39 |   "license": "BSD"
40 | }
41 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # html prettyprinter
 2 | 
 3 | A node port of beautify-html.js by Nochum Sossonko which is based on jsbeautifier by Einar Lielmanis
 4 | 
 5 | ## Installation
 6 | 
 7 | ### from npm (node package manager)
 8 | ``` bash
 9 |   npm install html
10 | ```
11 | 
12 | ## Usage (command line)
13 | 
14 | ```
15 |   echo "

AwesomeComis awesome

" | html 16 | ``` 17 | 18 | returns: 19 | 20 | ``` html 21 |

22 | 23 | AwesomeCom 24 | 25 | 26 | is awesome 27 | 28 |

29 | ```` 30 | 31 | `html foo.html` will write the prettified version to `stdout`. 32 | 33 | `html *.html` will *update in place* all matching html files with their prettified versions. 34 | 35 | ## Advanced usage 36 | 37 | I find myself constantly using the 'Copy as HTML' feature of the Chrome Inspector: 38 | 39 | ![Copy as HTML](https://github.com/maxogden/commonjs-html-prettyprinter/raw/master/img/copyashtml.png) 40 | 41 | The downside is that that usually the HTML that gets copied is pretty ugly: 42 | 43 | ![Before pretty printing](https://github.com/maxogden/commonjs-html-prettyprinter/raw/master/img/before.png) 44 | 45 | On OS X you can use `pbpaste` and `pbcopy` to stream your clipboard in and out of unix pipes. With the ugly HTML still in your clipboard run this command: 46 | 47 | `pbpaste | html | pbcopy` 48 | 49 | Now when you paste your clipboard into an editor you will get nice, pretty printed HTML: 50 | 51 | ![After pretty printing](https://github.com/maxogden/commonjs-html-prettyprinter/raw/master/img/after.png) 52 | 53 | ## Upgrading 54 | 55 | grab the newest `beautify-html.js` from [js-beautifier](https://github.com/einars/js-beautify) and drop it into `lib/` as `html.js`. then add the following code to the bottom of `html.js`: 56 | 57 | ```javascript 58 | module.exports = { prettyPrint: style_html } 59 | ``` 60 | 61 | BSD LICENSE -------------------------------------------------------------------------------- /src/html.js: -------------------------------------------------------------------------------- 1 | //= github://einars/js-beautify/[beautify, beautify-html] 2 | 3 | module.exports = { 4 | prettyPrint: style_html 5 | }; --------------------------------------------------------------------------------