', 'igm');
131 | reg_match.lastIndex = this.pos;
132 | var reg_array = reg_match.exec(this.input);
133 | var end_script = reg_array?reg_array.index:this.input.length; //absolute end of script
134 | if(this.pos < end_script) { //get everything in between the script tags
135 | content = this.input.substring(this.pos, end_script);
136 | this.pos = end_script;
137 | }
138 | return content;
139 | }
140 |
141 | this.record_tag = function (tag){ //function to record a tag and its parent in this.tags Object
142 | if (this.tags[tag + 'count']) { //check for the existence of this tag type
143 | this.tags[tag + 'count']++;
144 | this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level
145 | }
146 | else { //otherwise initialize this tag type
147 | this.tags[tag + 'count'] = 1;
148 | this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level
149 | }
150 | this.tags[tag + this.tags[tag + 'count'] + 'parent'] = this.tags.parent; //set the parent (i.e. in the case of a div this.tags.div1parent)
151 | this.tags.parent = tag + this.tags[tag + 'count']; //and make this the current parent (i.e. in the case of a div 'div1')
152 | }
153 |
154 | this.retrieve_tag = function (tag) { //function to retrieve the opening tag to the corresponding closer
155 | if (this.tags[tag + 'count']) { //if the openener is not in the Object we ignore it
156 | var temp_parent = this.tags.parent; //check to see if it's a closable tag.
157 | while (temp_parent) { //till we reach '' (the initial value);
158 | if (tag + this.tags[tag + 'count'] === temp_parent) { //if this is it use it
159 | break;
160 | }
161 | temp_parent = this.tags[temp_parent + 'parent']; //otherwise keep on climbing up the DOM Tree
162 | }
163 | if (temp_parent) { //if we caught something
164 | this.indent_level = this.tags[tag + this.tags[tag + 'count']]; //set the indent_level accordingly
165 | this.tags.parent = this.tags[temp_parent + 'parent']; //and set the current parent
166 | }
167 | delete this.tags[tag + this.tags[tag + 'count'] + 'parent']; //delete the closed tags parent reference...
168 | delete this.tags[tag + this.tags[tag + 'count']]; //...and the tag itself
169 | if (this.tags[tag + 'count'] == 1) {
170 | delete this.tags[tag + 'count'];
171 | }
172 | else {
173 | this.tags[tag + 'count']--;
174 | }
175 | }
176 | }
177 |
178 | this.get_tag = function () { //function to get a full tag and parse its type
179 | var input_char = '',
180 | content = [],
181 | space = false,
182 | tag_start, tag_end;
183 |
184 | do {
185 | if (this.pos >= this.input.length) {
186 | return content.length?content.join(''):['', 'TK_EOF'];
187 | }
188 |
189 | input_char = this.input.charAt(this.pos);
190 | this.pos++;
191 | this.line_char_count++;
192 |
193 | if (this.Utils.in_array(input_char, this.Utils.whitespace)) { //don't want to insert unnecessary space
194 | space = true;
195 | this.line_char_count--;
196 | continue;
197 | }
198 |
199 | if (input_char === "'" || input_char === '"') {
200 | if (!content[1] || content[1] !== '!') { //if we're in a comment strings don't get treated specially
201 | input_char += this.get_unformatted(input_char);
202 | space = true;
203 | }
204 | }
205 |
206 | if (input_char === '=') { //no space before =
207 | space = false;
208 | }
209 |
210 | if (content.length && content[content.length-1] !== '=' && input_char !== '>'
211 | && space) { //no space after = or before >
212 | if (this.line_char_count >= this.max_char) {
213 | this.print_newline(false, content);
214 | this.line_char_count = 0;
215 | }
216 | else {
217 | content.push(' ');
218 | this.line_char_count++;
219 | }
220 | space = false;
221 | }
222 | if (input_char === '<') {
223 | tag_start = this.pos - 1;
224 | }
225 | content.push(input_char); //inserts character at-a-time (or string)
226 | } while (input_char !== '>');
227 |
228 | var tag_complete = content.join('');
229 | var tag_index;
230 | if (tag_complete.indexOf(' ') != -1) { //if there's whitespace, thats where the tag name ends
231 | tag_index = tag_complete.indexOf(' ');
232 | }
233 | else { //otherwise go with the tag ending
234 | tag_index = tag_complete.indexOf('>');
235 | }
236 | var tag_check = tag_complete.substring(1, tag_index).toLowerCase();
237 | if (tag_complete.charAt(tag_complete.length-2) === '/' ||
238 | this.Utils.in_array(tag_check, this.Utils.single_token)) { //if this tag name is a single tag type (either in the list or has a closing /)
239 | this.tag_type = 'SINGLE';
240 | }
241 | else if (tag_check === 'script') { //for later script handling
242 | this.record_tag(tag_check);
243 | this.tag_type = 'SCRIPT';
244 | }
245 | else if (tag_check === 'style') { //for future style handling (for now it justs uses get_content)
246 | this.record_tag(tag_check);
247 | this.tag_type = 'STYLE';
248 | }
249 | else if (this.Utils.in_array(tag_check, unformatted)) { // do not reformat the "unformatted" tags
250 | var comment = this.get_unformatted(''+tag_check+'>', tag_complete); //...delegate to get_unformatted function
251 | content.push(comment);
252 | // Preserve collapsed whitespace either before or after this tag.
253 | if (tag_start > 0 && this.Utils.in_array(this.input.charAt(tag_start - 1), this.Utils.whitespace)){
254 | content.splice(0, 0, this.input.charAt(tag_start - 1));
255 | }
256 | tag_end = this.pos - 1;
257 | if (this.Utils.in_array(this.input.charAt(tag_end + 1), this.Utils.whitespace)){
258 | content.push(this.input.charAt(tag_end + 1));
259 | }
260 | this.tag_type = 'SINGLE';
261 | }
262 | else if (tag_check.charAt(0) === '!') { //peek for so...
265 | var comment = this.get_unformatted('-->', tag_complete); //...delegate to get_unformatted
266 | content.push(comment);
267 | }
268 | this.tag_type = 'START';
269 | }
270 | else if (tag_check.indexOf('[endif') != -1) {//peek for ', tag_complete);
281 | content.push(comment);
282 | this.tag_type = 'SINGLE';
283 | }
284 | }
285 | else {
286 | if (tag_check.charAt(0) === '/') { //this tag is a double tag so check for tag-ending
287 | this.retrieve_tag(tag_check.substring(1)); //remove it and all ancestors
288 | this.tag_type = 'END';
289 | }
290 | else { //otherwise it's a start-tag
291 | this.record_tag(tag_check); //push it on the tag stack
292 | this.tag_type = 'START';
293 | }
294 | if (this.Utils.in_array(tag_check, this.Utils.extra_liners)) { //check if this double needs an extra line
295 | this.print_newline(true, this.output);
296 | }
297 | }
298 | return content.join(''); //returns fully formatted tag
299 | }
300 |
301 | this.get_unformatted = function (delimiter, orig_tag) { //function to return unformatted content in its entirety
302 |
303 | if (orig_tag && orig_tag.toLowerCase().indexOf(delimiter) != -1) {
304 | return '';
305 | }
306 | var input_char = '';
307 | var content = '';
308 | var space = true;
309 | do {
310 |
311 | if (this.pos >= this.input.length) {
312 | return content;
313 | }
314 |
315 | input_char = this.input.charAt(this.pos);
316 | this.pos++
317 |
318 | if (this.Utils.in_array(input_char, this.Utils.whitespace)) {
319 | if (!space) {
320 | this.line_char_count--;
321 | continue;
322 | }
323 | if (input_char === '\n' || input_char === '\r') {
324 | content += '\n';
325 | /* Don't change tab indention for unformatted blocks. If using code for html editing, this will greatly affect tags if they are specified in the 'unformatted array'
326 | for (var i=0; i 0) {
428 | this.indent_level--;
429 | }
430 | }
431 | }
432 | return this;
433 | }
434 |
435 | /*_____________________--------------------_____________________*/
436 |
437 | multi_parser = new Parser(); //wrapping functions Parser
438 | multi_parser.printer(html_source, indent_character, indent_size, max_char, brace_style); //initialize starting values
439 |
440 | while (true) {
441 | var t = multi_parser.get_token();
442 | multi_parser.token_text = t[0];
443 | multi_parser.token_type = t[1];
444 |
445 | if (multi_parser.token_type === 'TK_EOF') {
446 | break;
447 | }
448 |
449 | switch (multi_parser.token_type) {
450 | case 'TK_TAG_START':
451 | multi_parser.print_newline(false, multi_parser.output);
452 | multi_parser.print_token(multi_parser.token_text);
453 | multi_parser.indent();
454 | multi_parser.current_mode = 'CONTENT';
455 | break;
456 | case 'TK_TAG_STYLE':
457 | case 'TK_TAG_SCRIPT':
458 | multi_parser.print_newline(false, multi_parser.output);
459 | multi_parser.print_token(multi_parser.token_text);
460 | multi_parser.current_mode = 'CONTENT';
461 | break;
462 | case 'TK_TAG_END':
463 | //Print new line only if the tag has no content and has child
464 | if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') {
465 | var tag_name = multi_parser.token_text.match(/\w+/)[0];
466 | var tag_extracted_from_last_output = multi_parser.output[multi_parser.output.length -1].match(/<\s*(\w+)/);
467 | if (tag_extracted_from_last_output === null || tag_extracted_from_last_output[1] !== tag_name)
468 | multi_parser.print_newline(true, multi_parser.output);
469 | }
470 | multi_parser.print_token(multi_parser.token_text);
471 | multi_parser.current_mode = 'CONTENT';
472 | break;
473 | case 'TK_TAG_SINGLE':
474 | // Don't add a newline before elements that should remain unformatted.
475 | var tag_check = multi_parser.token_text.match(/^\s*<([a-z]+)/i);
476 | if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)){
477 | multi_parser.print_newline(false, multi_parser.output);
478 | }
479 | multi_parser.print_token(multi_parser.token_text);
480 | multi_parser.current_mode = 'CONTENT';
481 | break;
482 | case 'TK_CONTENT':
483 | if (multi_parser.token_text !== '') {
484 | multi_parser.print_token(multi_parser.token_text);
485 | }
486 | multi_parser.current_mode = 'TAG';
487 | break;
488 | case 'TK_STYLE':
489 | case 'TK_SCRIPT':
490 | if (multi_parser.token_text !== '') {
491 | multi_parser.output.push('\n');
492 | var text = multi_parser.token_text;
493 | if (multi_parser.token_type == 'TK_SCRIPT') {
494 | var _beautifier = typeof js_beautify == 'function' && js_beautify;
495 | } else if (multi_parser.token_type == 'TK_STYLE') {
496 | var _beautifier = typeof css_beautify == 'function' && css_beautify;
497 | }
498 |
499 | if (options.indent_scripts == "keep") {
500 | var script_indent_level = 0;
501 | } else if (options.indent_scripts == "separate") {
502 | var script_indent_level = -multi_parser.indent_level;
503 | } else {
504 | var script_indent_level = 1;
505 | }
506 |
507 | var indentation = multi_parser.get_full_indent(script_indent_level);
508 | if (_beautifier) {
509 | // call the Beautifier if avaliable
510 | text = _beautifier(text.replace(/^\s*/, indentation), options);
511 | } else {
512 | // simply indent the string otherwise
513 | var white = text.match(/^\s*/)[0];
514 | var _level = white.match(/[^\n\r]*$/)[0].split(multi_parser.indent_string).length - 1;
515 | var reindent = multi_parser.get_full_indent(script_indent_level -_level);
516 | text = text.replace(/^\s*/, indentation)
517 | .replace(/\r\n|\r|\n/g, '\n' + reindent)
518 | .replace(/\s*$/, '');
519 | }
520 | if (text) {
521 | multi_parser.print_token(text);
522 | multi_parser.print_newline(true, multi_parser.output);
523 | }
524 | }
525 | multi_parser.current_mode = 'TAG';
526 | break;
527 | }
528 | multi_parser.last_token = multi_parser.token_type;
529 | multi_parser.last_text = multi_parser.token_text;
530 | }
531 | return multi_parser.output.join('');
532 | }
533 |
534 | module.exports = {
535 | prettyPrint: style_html
536 | };
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "html",
3 | "version": "1.0.0",
4 | "description": "HTML pretty printer CLI utility (based on jsbeautifier)",
5 | "author": "Max Ogden (http://maxogden.com)",
6 | "contributors": [
7 | "Nochum Sossonko ",
8 | "Einar Lielmanis "
9 | ],
10 | "keywords": [
11 | "html",
12 | "tabifier",
13 | "beautifier",
14 | "prettyprinter",
15 | "prettifier",
16 | "pretty",
17 | "command",
18 | "shell"
19 | ],
20 | "repository": {
21 | "type": "git",
22 | "url": "https://github.com/maxogden/commonjs-html-prettyprinter.git"
23 | },
24 | "bin": {
25 | "html": "./bin/html.js"
26 | },
27 | "main": "lib/html.js",
28 | "bugs": {
29 | "url": "https://github.com/maxogden/commonjs-html-prettyprinter/issues"
30 | },
31 | "homepage": "https://github.com/maxogden/commonjs-html-prettyprinter",
32 | "dependencies": {
33 | "concat-stream": "^1.4.7"
34 | },
35 | "devDependencies": {},
36 | "scripts": {
37 | "test": "echo \"Error: no test specified\" && exit 1"
38 | },
39 | "license": "BSD"
40 | }
41 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # html prettyprinter
2 |
3 | A node port of beautify-html.js by Nochum Sossonko which is based on jsbeautifier by Einar Lielmanis
4 |
5 | ## Installation
6 |
7 | ### from npm (node package manager)
8 | ``` bash
9 | npm install html
10 | ```
11 |
12 | ## Usage (command line)
13 |
14 | ```
15 | echo "" | html
16 | ```
17 |
18 | returns:
19 |
20 | ``` html
21 |
22 |
23 | AwesomeCom
24 |
25 |
26 | is awesome
27 |
28 |
29 | ````
30 |
31 | `html foo.html` will write the prettified version to `stdout`.
32 |
33 | `html *.html` will *update in place* all matching html files with their prettified versions.
34 |
35 | ## Advanced usage
36 |
37 | I find myself constantly using the 'Copy as HTML' feature of the Chrome Inspector:
38 |
39 | 
40 |
41 | The downside is that that usually the HTML that gets copied is pretty ugly:
42 |
43 | 
44 |
45 | On OS X you can use `pbpaste` and `pbcopy` to stream your clipboard in and out of unix pipes. With the ugly HTML still in your clipboard run this command:
46 |
47 | `pbpaste | html | pbcopy`
48 |
49 | Now when you paste your clipboard into an editor you will get nice, pretty printed HTML:
50 |
51 | 
52 |
53 | ## Upgrading
54 |
55 | grab the newest `beautify-html.js` from [js-beautifier](https://github.com/einars/js-beautify) and drop it into `lib/` as `html.js`. then add the following code to the bottom of `html.js`:
56 |
57 | ```javascript
58 | module.exports = { prettyPrint: style_html }
59 | ```
60 |
61 | BSD LICENSE
--------------------------------------------------------------------------------
/src/html.js:
--------------------------------------------------------------------------------
1 | //= github://einars/js-beautify/[beautify, beautify-html]
2 |
3 | module.exports = {
4 | prettyPrint: style_html
5 | };
--------------------------------------------------------------------------------