├── .eslintrc.json ├── .gitattributes ├── .github └── workflows │ ├── ci.yml │ └── codeql.yml ├── .gitignore ├── Gruntfile.js ├── LICENSE ├── README.md ├── _config.yml ├── assets ├── .eslintrc.json ├── master.css ├── master.js └── worker.js ├── backtest.js ├── benchmark.js ├── benchmarks ├── generated │ └── .gitkeep └── index.json ├── cli.js ├── dist ├── htmlminifier.js └── htmlminifier.min.js ├── index.html ├── package-lock.json ├── package.json ├── sample-cli-config-file.conf ├── src ├── .eslintrc.json ├── htmlminifier.js ├── htmlparser.js ├── tokenchain.js └── utils.js ├── test.js └── tests ├── .eslintrc.json ├── index.html └── minifier.js /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "node": true 4 | }, 5 | "extends": "eslint:recommended", 6 | "rules": { 7 | "array-bracket-spacing": "error", 8 | "array-callback-return": "error", 9 | "block-scoped-var": "error", 10 | "block-spacing": "error", 11 | "brace-style": [ 12 | "error", 13 | "stroustrup", 14 | { 15 | "allowSingleLine": true 16 | } 17 | ], 18 | "comma-spacing": "error", 19 | "comma-style": [ 20 | "error", 21 | "last" 22 | ], 23 | "computed-property-spacing": "error", 24 | "curly": "error", 25 | "dot-location": [ 26 | "error", 27 | "property" 28 | ], 29 | "dot-notation": "error", 30 | "eol-last": "error", 31 | "eqeqeq": "error", 32 | "func-style": [ 33 | "error", 34 | "declaration" 35 | ], 36 | "indent": [ 37 | "error", 38 | 2, 39 | { 40 | "SwitchCase": 1, 41 | "VariableDeclarator": 2 42 | } 43 | ], 44 | "key-spacing": [ 45 | "error", 46 | { 47 | "beforeColon": false, 48 | "afterColon": true, 49 | "mode": "minimum" 50 | } 51 | ], 52 | "keyword-spacing": "error", 53 | "linebreak-style": "error", 54 | "new-parens": "error", 55 | "no-array-constructor": "error", 56 | "no-caller": "error", 57 | "no-console": "off", 58 | "no-else-return": "error", 59 | "no-eq-null": "error", 60 | "no-eval": "error", 61 | "no-extend-native": "error", 62 | "no-extra-bind": "error", 63 | "no-extra-label": "error", 64 | "no-extra-parens": "error", 65 | "no-floating-decimal": "error", 66 | "no-implied-eval": "error", 67 | "no-iterator": "error", 68 | "no-lone-blocks": "error", 69 | "no-lonely-if": "error", 70 | "no-multiple-empty-lines": "error", 71 | "no-multi-spaces": "error", 72 | "no-multi-str": "error", 73 | "no-native-reassign": "error", 74 | "no-negated-condition": "error", 75 | "no-new-wrappers": "error", 76 | "no-new-object": "error", 77 | "no-octal-escape": "error", 78 | "no-path-concat": "error", 79 | "no-process-env": "error", 80 | "no-proto": "error", 81 | "no-return-assign": "error", 82 | "no-script-url": "error", 83 | "no-self-compare": "error", 84 | "no-sequences": "error", 85 | "no-shadow-restricted-names": "error", 86 | "no-spaced-func": "error", 87 | "no-throw-literal": "error", 88 | "no-trailing-spaces": "error", 89 | "no-undef-init": "error", 90 | "no-undefined": "error", 91 | "no-unmodified-loop-condition": "error", 92 | "no-unneeded-ternary": "error", 93 | "no-unused-expressions": "error", 94 | "no-use-before-define": [ 95 | "error", 96 | "nofunc" 97 | ], 98 | "no-useless-call": "error", 99 | "no-useless-concat": "error", 100 | "no-useless-escape": "error", 101 | "no-void": "error", 102 | "no-whitespace-before-property": "error", 103 | "no-with": "error", 104 | "object-curly-spacing": [ 105 | "error", 106 | "always" 107 | ], 108 | "operator-assignment": [ 109 | "error", 110 | "always" 111 | ], 112 | "operator-linebreak": [ 113 | "error", 114 | "after" 115 | ], 116 | "quote-props": [ 117 | "error", 118 | "as-needed" 119 | ], 120 | "quotes": [ 121 | "error", 122 | "single" 123 | ], 124 | "semi": "error", 125 | "semi-spacing": "error", 126 | "space-before-blocks": "error", 127 | "space-before-function-paren": [ 128 | "error", 129 | "never" 130 | ], 131 | "space-in-parens": "error", 132 | "space-infix-ops": "error", 133 | "space-unary-ops": "error", 134 | "spaced-comment": [ 135 | "error", 136 | "always", 137 | { 138 | "markers": [ 139 | "!" 140 | ] 141 | } 142 | ], 143 | "strict": "error", 144 | "wrap-iife": [ 145 | "error", 146 | "inside" 147 | ], 148 | "yoda": "error" 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Enforce Unix newlines 2 | * text=auto eol=lf 3 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | FORCE_COLOR: 2 7 | 8 | jobs: 9 | test: 10 | name: Node ${{ matrix.node }} 11 | runs-on: ${{ matrix.os }} 12 | 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | os: [ubuntu-latest] 17 | node: [10, 12, 14, 16] 18 | 19 | steps: 20 | - name: Clone repository 21 | uses: actions/checkout@v2 22 | 23 | - name: Set up Node.js 24 | uses: actions/setup-node@v2 25 | with: 26 | node-version: ${{ matrix.node }} 27 | cache: npm 28 | 29 | - name: Install npm dependencies 30 | run: npm ci 31 | 32 | - name: Run tests 33 | run: npm test 34 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: 6 | - gh-pages 7 | - "!dependabot/**" 8 | pull_request: 9 | # The branches below must be a subset of the branches above 10 | branches: 11 | - gh-pages 12 | schedule: 13 | - cron: "0 0 * * 0" 14 | 15 | jobs: 16 | analyze: 17 | name: Analyze 18 | runs-on: ubuntu-latest 19 | permissions: 20 | actions: read 21 | contents: read 22 | security-events: write 23 | 24 | steps: 25 | - name: Checkout repository 26 | uses: actions/checkout@v2 27 | 28 | - name: Initialize CodeQL 29 | uses: github/codeql-action/init@v1 30 | with: 31 | languages: "javascript" 32 | 33 | - name: Perform CodeQL Analysis 34 | uses: github/codeql-action/analyze@v1 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.sublime-project 2 | *.sublime-workspace 3 | .DS_Store 4 | /.jekyll-metadata 5 | /_site/ 6 | /benchmarks/*.html 7 | /benchmarks/generated 8 | /node_modules/ 9 | /npm-debug.log 10 | -------------------------------------------------------------------------------- /Gruntfile.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | function qunitVersion() { 4 | var prepareStackTrace = Error.prepareStackTrace; 5 | Error.prepareStackTrace = function() { 6 | return ''; 7 | }; 8 | try { 9 | return require('qunit').version; 10 | } 11 | finally { 12 | Error.prepareStackTrace = prepareStackTrace; 13 | } 14 | } 15 | 16 | module.exports = function(grunt) { 17 | // Force use of Unix newlines 18 | grunt.util.linefeed = '\n'; 19 | 20 | grunt.initConfig({ 21 | pkg: grunt.file.readJSON('package.json'), 22 | qunit_ver: qunitVersion(), 23 | banner: '/*!\n' + 24 | ' * HTMLMinifier v<%= pkg.version %> (<%= pkg.homepage %>)\n' + 25 | ' * Copyright 2010-<%= grunt.template.today("yyyy") %> <%= pkg.author %>\n' + 26 | ' * Licensed under the <%= pkg.license %> license\n' + 27 | ' */\n', 28 | 29 | browserify: { 30 | src: { 31 | options: { 32 | banner: '<%= banner %>', 33 | preBundleCB: function() { 34 | var fs = require('fs'); 35 | var UglifyJS = require('uglify-js'); 36 | var files = {}; 37 | UglifyJS.FILES.forEach(function(file) { 38 | files[file] = fs.readFileSync(file, 'utf8'); 39 | }); 40 | fs.writeFileSync('./dist/uglify.js', UglifyJS.minify(files, { 41 | compress: false, 42 | mangle: false, 43 | wrap: 'exports' 44 | }).code); 45 | }, 46 | postBundleCB: function(err, src, next) { 47 | require('fs').unlinkSync('./dist/uglify.js'); 48 | next(err, src); 49 | }, 50 | require: [ 51 | './dist/uglify.js:uglify-js', 52 | './src/htmlminifier.js:html-minifier' 53 | ] 54 | }, 55 | src: 'src/htmlminifier.js', 56 | dest: 'dist/htmlminifier.js' 57 | } 58 | }, 59 | 60 | eslint: { 61 | grunt: { 62 | src: 'Gruntfile.js' 63 | }, 64 | src: { 65 | src: ['cli.js', 'src/**/*.js'] 66 | }, 67 | tests: { 68 | src: ['tests/*.js', 'test.js'] 69 | }, 70 | web: { 71 | src: ['assets/master.js', 'assets/worker.js'] 72 | }, 73 | other: { 74 | src: ['backtest.js', 'benchmark.js'] 75 | } 76 | }, 77 | 78 | qunit: { 79 | htmlminifier: ['./tests/minifier', 'tests/index.html'] 80 | }, 81 | 82 | replace: { 83 | './index.html': [ 84 | /(<h1>.*?<span>).*?(<\/span><\/h1>)/, 85 | '$1(v<%= pkg.version %>)$2' 86 | ], 87 | './tests/index.html': [ 88 | /("[^"]+\/qunit-)[0-9.]+?(\.(?:css|js)")/g, 89 | '$1<%= qunit_ver %>$2' 90 | ] 91 | }, 92 | 93 | uglify: { 94 | options: { 95 | banner: '<%= banner %>', 96 | compress: true, 97 | mangle: true, 98 | preserveComments: false, 99 | report: 'min' 100 | }, 101 | minify: { 102 | files: { 103 | 'dist/htmlminifier.min.js': '<%= browserify.src.dest %>' 104 | } 105 | } 106 | } 107 | }); 108 | 109 | grunt.loadNpmTasks('grunt-browserify'); 110 | grunt.loadNpmTasks('grunt-contrib-uglify'); 111 | grunt.loadNpmTasks('grunt-eslint'); 112 | 113 | function report(type, details) { 114 | grunt.log.writeln(type + ' completed in ' + details.runtime + 'ms'); 115 | details.failures.forEach(function(details) { 116 | grunt.log.error(); 117 | grunt.log.error(details.name + (details.message ? ' [' + details.message + ']' : '')); 118 | grunt.log.error(details.source); 119 | grunt.log.error('Actual:'); 120 | grunt.log.error(details.actual); 121 | grunt.log.error('Expected:'); 122 | grunt.log.error(details.expected); 123 | }); 124 | grunt.log[details.failed ? 'error' : 'ok'](details.passed + ' of ' + details.total + ' passed, ' + details.failed + ' failed'); 125 | return details.failed; 126 | } 127 | 128 | var phantomjs = require('phantomjs-prebuilt').path; 129 | grunt.registerMultiTask('qunit', function() { 130 | var done = this.async(); 131 | var errors = []; 132 | 133 | function run(testType, binPath, testPath) { 134 | grunt.util.spawn({ 135 | cmd: binPath, 136 | args: ['test.js', testPath] 137 | }, function(error, result) { 138 | if (error) { 139 | grunt.log.error(result.stderr); 140 | grunt.log.error(testType + ' test failed to load'); 141 | errors.push(-1); 142 | } 143 | else { 144 | var output = result.stdout; 145 | var index = output.lastIndexOf('\n'); 146 | if (index !== -1) { 147 | // There's something before the report JSON 148 | // Log it to the console -- it's probably some debug output: 149 | console.log(output.slice(0, index)); 150 | output = output.slice(index); 151 | } 152 | errors.push(report(testType, JSON.parse(output))); 153 | } 154 | if (errors.length === 2) { 155 | done(!errors[0] && !errors[1]); 156 | } 157 | }); 158 | } 159 | 160 | run('node', process.argv[0], this.data[0]); 161 | run('web', phantomjs, this.data[1]); 162 | }); 163 | 164 | grunt.registerMultiTask('replace', function() { 165 | var pattern = this.data[0]; 166 | var path = this.target; 167 | var html = grunt.file.read(path); 168 | html = html.replace(pattern, this.data[1]); 169 | grunt.file.write(path, html); 170 | }); 171 | 172 | grunt.registerTask('dist', [ 173 | 'replace', 174 | 'browserify', 175 | 'uglify' 176 | ]); 177 | 178 | grunt.registerTask('test', [ 179 | 'eslint', 180 | 'dist', 181 | 'qunit' 182 | ]); 183 | 184 | grunt.registerTask('default', 'test'); 185 | }; 186 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010-2018 Juriy "kangax" Zaytsev 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HTMLMinifier 2 | 3 | [](https://www.npmjs.com/package/html-minifier) 4 | [](https://github.com/kangax/html-minifier/actions?query=workflow%3ACI+branch%3Agh-pages) 5 | [](https://david-dm.org/kangax/html-minifier) 6 | 7 | [HTMLMinifier](https://kangax.github.io/html-minifier/) is a highly **configurable**, **well-tested**, JavaScript-based HTML minifier. 8 | 9 | See [corresponding blog post](http://perfectionkills.com/experimenting-with-html-minifier/) for all the gory details of [how it works](http://perfectionkills.com/experimenting-with-html-minifier/#how_it_works), [description of each option](http://perfectionkills.com/experimenting-with-html-minifier/#options), [testing results](http://perfectionkills.com/experimenting-with-html-minifier/#field_testing) and [conclusions](http://perfectionkills.com/experimenting-with-html-minifier/#cost_and_benefits). 10 | 11 | [Test suite is available online](https://kangax.github.io/html-minifier/tests/). 12 | 13 | Also see corresponding [Ruby wrapper](https://github.com/stereobooster/html_minifier), and for Node.js, [Grunt plugin](https://github.com/gruntjs/grunt-contrib-htmlmin), [Gulp module](https://github.com/jonschlinkert/gulp-htmlmin), [Koa middleware wrapper](https://github.com/koajs/html-minifier) and [Express middleware wrapper](https://github.com/melonmanchan/express-minify-html). 14 | 15 | For lint-like capabilities take a look at [HTMLLint](https://github.com/kangax/html-lint). 16 | 17 | ## Minification comparison 18 | 19 | How does HTMLMinifier compare to other solutions — [HTML Minifier from Will Peavy](http://www.willpeavy.com/minifier/) (1st result in [Google search for "html minifier"](https://www.google.com/#q=html+minifier)) as well as [htmlcompressor.com](http://htmlcompressor.com) and [minimize](https://github.com/Swaagie/minimize)? 20 | 21 | | Site | Original size *(KB)* | HTMLMinifier | minimize | Will Peavy | htmlcompressor.com | 22 | | ---------------------------------------------------------------------------- |:--------------------:| ------------:| --------:| ----------:| ------------------:| 23 | | [Google](https://www.google.com/) | 46 | **42** | 46 | 48 | 46 | 24 | | [HTMLMinifier](https://github.com/kangax/html-minifier) | 125 | **98** | 111 | 117 | 111 | 25 | | [Twitter](https://twitter.com/) | 207 | **165** | 200 | 224 | 200 | 26 | | [Stack Overflow](https://stackoverflow.com/) | 253 | **195** | 207 | 215 | 204 | 27 | | [Bootstrap CSS](https://getbootstrap.com/docs/3.3/css/) | 271 | **260** | 269 | 228 | 269 | 28 | | [BBC](https://www.bbc.co.uk/) | 298 | **239** | 290 | 291 | 280 | 29 | | [Amazon](https://www.amazon.co.uk/) | 422 | **316** | 412 | 425 | n/a | 30 | | [NBC](https://www.nbc.com/) | 553 | **530** | 552 | 553 | 534 | 31 | | [Wikipedia](https://en.wikipedia.org/wiki/President_of_the_United_States) | 565 | **461** | 548 | 569 | 548 | 32 | | [New York Times](https://www.nytimes.com/) | 678 | **606** | 675 | 670 | n/a | 33 | | [Eloquent Javascript](https://eloquentjavascript.net/1st_edition/print.html) | 870 | **815** | 840 | 864 | n/a | 34 | | [ES6 table](https://kangax.github.io/compat-table/es6/) | 5911 | **5051** | 5595 | n/a | n/a | 35 | | [ES draft](https://tc39.github.io/ecma262/) | 6126 | **5495** | 5664 | n/a | n/a | 36 | 37 | ## Options Quick Reference 38 | 39 | Most of the options are disabled by default. 40 | 41 | | Option | Description | Default | 42 | |--------------------------------|-----------------|---------| 43 | | `caseSensitive` | Treat attributes in case sensitive manner (useful for custom HTML tags) | `false` | 44 | | `collapseBooleanAttributes` | [Omit attribute values from boolean attributes](http://perfectionkills.com/experimenting-with-html-minifier/#collapse_boolean_attributes) | `false` | 45 | | `collapseInlineTagWhitespace` | Don't leave any spaces between `display:inline;` elements when collapsing. Must be used in conjunction with `collapseWhitespace=true` | `false` | 46 | | `collapseWhitespace` | [Collapse white space that contributes to text nodes in a document tree](http://perfectionkills.com/experimenting-with-html-minifier/#collapse_whitespace) | `false` | 47 | | `conservativeCollapse` | Always collapse to 1 space (never remove it entirely). Must be used in conjunction with `collapseWhitespace=true` | `false` | 48 | | `continueOnParseError` | [Handle parse errors](https://html.spec.whatwg.org/multipage/parsing.html#parse-errors) instead of aborting. | `false` | 49 | | `customAttrAssign` | Arrays of regex'es that allow to support custom attribute assign expressions (e.g. `'<div flex?="{{mode != cover}}"></div>'`) | `[ ]` | 50 | | `customAttrCollapse` | Regex that specifies custom attribute to strip newlines from (e.g. `/ng-class/`) | | 51 | | `customAttrSurround` | Arrays of regex'es that allow to support custom attribute surround expressions (e.g. `<input {{#if value}}checked="checked"{{/if}}>`) | `[ ]` | 52 | | `customEventAttributes` | Arrays of regex'es that allow to support custom event attributes for `minifyJS` (e.g. `ng-click`) | `[ /^on[a-z]{3,}$/ ]` | 53 | | `decodeEntities` | Use direct Unicode characters whenever possible | `false` | 54 | | `html5` | Parse input according to HTML5 specifications | `true` | 55 | | `ignoreCustomComments` | Array of regex'es that allow to ignore certain comments, when matched | `[ /^!/ ]` | 56 | | `ignoreCustomFragments` | Array of regex'es that allow to ignore certain fragments, when matched (e.g. `<?php ... ?>`, `{{ ... }}`, etc.) | `[ /<%[\s\S]*?%>/, /<\?[\s\S]*?\?>/ ]` | 57 | | `includeAutoGeneratedTags` | Insert tags generated by HTML parser | `true` | 58 | | `keepClosingSlash` | Keep the trailing slash on singleton elements | `false` | 59 | | `maxLineLength` | Specify a maximum line length. Compressed output will be split by newlines at valid HTML split-points | 60 | | `minifyCSS` | Minify CSS in style elements and style attributes (uses [clean-css](https://github.com/jakubpawlowicz/clean-css)) | `false` (could be `true`, `Object`, `Function(text, type)`) | 61 | | `minifyJS` | Minify JavaScript in script elements and event attributes (uses [UglifyJS](https://github.com/mishoo/UglifyJS2)) | `false` (could be `true`, `Object`, `Function(text, inline)`) | 62 | | `minifyURLs` | Minify URLs in various attributes (uses [relateurl](https://github.com/stevenvachon/relateurl)) | `false` (could be `String`, `Object`, `Function(text)`) | 63 | | `preserveLineBreaks` | Always collapse to 1 line break (never remove it entirely) when whitespace between tags include a line break. Must be used in conjunction with `collapseWhitespace=true` | `false` | 64 | | `preventAttributesEscaping` | Prevents the escaping of the values of attributes | `false` | 65 | | `processConditionalComments` | Process contents of conditional comments through minifier | `false` | 66 | | `processScripts` | Array of strings corresponding to types of script elements to process through minifier (e.g. `text/ng-template`, `text/x-handlebars-template`, etc.) | `[ ]` | 67 | | `quoteCharacter` | Type of quote to use for attribute values (' or ") | | 68 | | `removeAttributeQuotes` | [Remove quotes around attributes when possible](http://perfectionkills.com/experimenting-with-html-minifier/#remove_attribute_quotes) | `false` | 69 | | `removeComments` | [Strip HTML comments](http://perfectionkills.com/experimenting-with-html-minifier/#remove_comments) | `false` | 70 | | `removeEmptyAttributes` | [Remove all attributes with whitespace-only values](http://perfectionkills.com/experimenting-with-html-minifier/#remove_empty_or_blank_attributes) | `false` (could be `true`, `Function(attrName, tag)`) | 71 | | `removeEmptyElements` | [Remove all elements with empty contents](http://perfectionkills.com/experimenting-with-html-minifier/#remove_empty_elements) | `false` | 72 | | `removeOptionalTags` | [Remove optional tags](http://perfectionkills.com/experimenting-with-html-minifier/#remove_optional_tags) | `false` | 73 | | `removeRedundantAttributes` | [Remove attributes when value matches default.](http://perfectionkills.com/experimenting-with-html-minifier/#remove_redundant_attributes) | `false` | 74 | | `removeScriptTypeAttributes` | Remove `type="text/javascript"` from `script` tags. Other `type` attribute values are left intact | `false` | 75 | | `removeStyleLinkTypeAttributes`| Remove `type="text/css"` from `style` and `link` tags. Other `type` attribute values are left intact | `false` | 76 | | `removeTagWhitespace` | Remove space between attributes whenever possible. **Note that this will result in invalid HTML!** | `false` | 77 | | `sortAttributes` | [Sort attributes by frequency](#sorting-attributes--style-classes) | `false` | 78 | | `sortClassName` | [Sort style classes by frequency](#sorting-attributes--style-classes) | `false` | 79 | | `trimCustomFragments` | Trim white space around `ignoreCustomFragments`. | `false` | 80 | | `useShortDoctype` | [Replaces the `doctype` with the short (HTML5) doctype](http://perfectionkills.com/experimenting-with-html-minifier/#use_short_doctype) | `false` | 81 | 82 | ### Sorting attributes / style classes 83 | 84 | Minifier options like `sortAttributes` and `sortClassName` won't impact the plain-text size of the output. However, they form long repetitive chains of characters that should improve compression ratio of gzip used in HTTP compression. 85 | 86 | ## Special cases 87 | 88 | ### Ignoring chunks of markup 89 | 90 | If you have chunks of markup you would like preserved, you can wrap them `<!-- htmlmin:ignore -->`. 91 | 92 | ### Preserving SVG tags 93 | 94 | SVG tags are automatically recognized, and when they are minified, both case-sensitivity and closing-slashes are preserved, regardless of the minification settings used for the rest of the file. 95 | 96 | ### Working with invalid markup 97 | 98 | HTMLMinifier **can't work with invalid or partial chunks of markup**. This is because it parses markup into a tree structure, then modifies it (removing anything that was specified for removal, ignoring anything that was specified to be ignored, etc.), then it creates a markup out of that tree and returns it. 99 | 100 | Input markup (e.g. `<p id="">foo`) 101 | 102 | ↓ 103 | 104 | Internal representation of markup in a form of tree (e.g. `{ tag: "p", attr: "id", children: ["foo"] }`) 105 | 106 | ↓ 107 | 108 | Transformation of internal representation (e.g. removal of `id` attribute) 109 | 110 | ↓ 111 | 112 | Output of resulting markup (e.g. `<p>foo</p>`) 113 | 114 | HTMLMinifier can't know that original markup was only half of the tree; it does its best to try to parse it as a full tree and it loses information about tree being malformed or partial in the beginning. As a result, it can't create a partial/malformed tree at the time of the output. 115 | 116 | ## Installation Instructions 117 | 118 | From NPM for use as a command line app: 119 | 120 | ```shell 121 | npm install html-minifier -g 122 | ``` 123 | 124 | From NPM for programmatic use: 125 | 126 | ```shell 127 | npm install html-minifier 128 | ``` 129 | 130 | From Git: 131 | 132 | ```shell 133 | git clone git://github.com/kangax/html-minifier.git 134 | cd html-minifier 135 | npm link . 136 | ``` 137 | 138 | ## Usage 139 | 140 | Note that almost all options are disabled by default. For command line usage please see `html-minifier --help` for a list of available options. Experiment and find what works best for you and your project. 141 | 142 | * **Sample command line:** ``html-minifier --collapse-whitespace --remove-comments --remove-optional-tags --remove-redundant-attributes --remove-script-type-attributes --remove-tag-whitespace --use-short-doctype --minify-css true --minify-js true`` 143 | 144 | ### Node.js 145 | 146 | ```js 147 | var minify = require('html-minifier').minify; 148 | var result = minify('<p title="blah" id="moo">foo</p>', { 149 | removeAttributeQuotes: true 150 | }); 151 | result; // '<p title=blah id=moo>foo</p>' 152 | ``` 153 | ### Gulp 154 | 155 | ```js 156 | const { src, dest, series } = require('gulp'); 157 | const htmlMinify = require('html-minifier'); 158 | 159 | const options = { 160 | includeAutoGeneratedTags: true, 161 | removeAttributeQuotes: true, 162 | removeComments: true, 163 | removeRedundantAttributes: true, 164 | removeScriptTypeAttributes: true, 165 | removeStyleLinkTypeAttributes: true, 166 | sortClassName: true, 167 | useShortDoctype: true, 168 | collapseWhitespace: true 169 | }; 170 | 171 | function html() { 172 | return src('app/**/*.html') 173 | .on('data', function(file) { 174 | const buferFile = Buffer.from(htmlMinify.minify(file.contents.toString(), options)) 175 | return file.contents = buferFile 176 | }) 177 | .pipe(dest('build')) 178 | } 179 | 180 | exports.html = series(html) 181 | ``` 182 | 183 | ## Running benchmarks 184 | 185 | Benchmarks for minified HTML: 186 | 187 | ```shell 188 | node benchmark.js 189 | ``` 190 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | exclude: 2 | - "*.conf" 3 | - "benchmarks" 4 | - "node_modules" 5 | - "src" 6 | - "backtest.js" 7 | - "benchmark.js" 8 | - "cli.js" 9 | - "Gruntfile.js" 10 | - "package.json" 11 | - "package-lock.json" 12 | - "README.md" 13 | - "LICENSE" 14 | - "test.js" 15 | -------------------------------------------------------------------------------- /assets/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "browser": true, 4 | "worker": true 5 | }, 6 | "rules": { 7 | "strict": [ 8 | "error", 9 | "function" 10 | ] 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /assets/master.css: -------------------------------------------------------------------------------- 1 | body { font-family: "Cambria", Georgia, Times, "Times New Roman", serif; margin-top: 0; padding-top: 0; } 2 | textarea { height: 30em; } 3 | h1 { margin-top: 0.5em; font-size: 1.25em; } 4 | h1 span { font-size: 0.6em; } 5 | button { font-weight: bold; width: 100px; } 6 | 7 | .minify-button { margin: 16px 0; } 8 | #outer-wrapper { overflow: hidden; } 9 | #wrapper { width: 65%; float: left; } 10 | #input { width: 99%; height: 18em; } 11 | #output { width: 99%; height: 18em; margin-bottom: 2em; } 12 | #options { float: right; width: 33%; padding-left: 1em; margin-top: 3em; } 13 | #options ul { list-style: none; padding: 0.5em; overflow: hidden; background: #ffe; margin-top: 0; } 14 | #options ul li { float: left; clear: both; padding-bottom: 0.5em; } 15 | #options ul li div { margin-left: 1.75em; } 16 | #options label, #options input { float: left; } 17 | #options label.sub-option{ margin-left: 22px; margin-right: 5px } 18 | #options label { margin-left: 0.25em; } 19 | #options label + input { margin-left: 0.5em; } 20 | #stats { margin-bottom: 2em; overflow: hidden; margin-top: 0; } 21 | #todo { font-family: monospace; margin-bottom: 2em; } 22 | 23 | .success { color: green; } 24 | .failure { color: red; } 25 | .quiet { font-size: 0.85em; color: #888; } 26 | .short { display: inline-block; width: 20em; margin-top: 0.25em; margin-left: 0.25em; } 27 | 28 | .controls span { margin-right: 0.5em; margin-left: 1em; } 29 | .controls a { margin-left: 0.1em; } 30 | .controls a:focus, .controls a:hover { text-decoration: none; } 31 | 32 | .unsafe { color: #f33; } 33 | 34 | iframe { position: absolute; top: 10px; right: 10px; } 35 | 36 | .footer p { font-style: italic; } 37 | -------------------------------------------------------------------------------- /assets/master.js: -------------------------------------------------------------------------------- 1 | (function() { 2 | 'use strict'; 3 | 4 | var minify = (function() { 5 | var minify = require('html-minifier').minify; 6 | return function(value, options, callback, errorback) { 7 | options.log = function(message) { 8 | console.log(message); 9 | }; 10 | var minified; 11 | try { 12 | minified = minify(value, options); 13 | } 14 | catch (err) { 15 | return errorback(err); 16 | } 17 | callback(minified); 18 | }; 19 | })(); 20 | if (typeof Worker === 'function') { 21 | var worker = new Worker('assets/worker.js'); 22 | worker.onmessage = function() { 23 | minify = function(value, options, callback, errorback) { 24 | worker.onmessage = function(event) { 25 | var data = event.data; 26 | if (data.error) { 27 | errorback(data.error); 28 | } 29 | else { 30 | callback(data); 31 | } 32 | }; 33 | worker.postMessage({ 34 | value: value, 35 | options: options 36 | }); 37 | }; 38 | }; 39 | } 40 | 41 | function byId(id) { 42 | return document.getElementById(id); 43 | } 44 | 45 | function escapeHTML(str) { 46 | return (str + '').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>'); 47 | } 48 | 49 | function forEachOption(fn) { 50 | [].forEach.call(byId('options').getElementsByTagName('input'), fn); 51 | } 52 | 53 | function getOptions() { 54 | var options = {}; 55 | forEachOption(function(element) { 56 | var key = element.id; 57 | var value; 58 | if (element.type === 'checkbox') { 59 | value = element.checked; 60 | } 61 | else { 62 | value = element.value.replace(/^\s+|\s+$/, ''); 63 | if (!value) { 64 | return; 65 | } 66 | } 67 | switch (key) { 68 | case 'maxLineLength': 69 | value = parseInt(value); 70 | break; 71 | case 'processScripts': 72 | value = value.split(/\s*,\s*/); 73 | } 74 | options[key] = value; 75 | }); 76 | return options; 77 | } 78 | 79 | function commify(str) { 80 | return String(str) 81 | .split('').reverse().join('') 82 | .replace(/(...)(?!$)/g, '$1,') 83 | .split('').reverse().join(''); 84 | } 85 | 86 | byId('minify-btn').onclick = function() { 87 | byId('minify-btn').disabled = true; 88 | var originalValue = byId('input').value; 89 | minify(originalValue, getOptions(), function(minifiedValue) { 90 | var diff = originalValue.length - minifiedValue.length; 91 | var savings = originalValue.length ? (100 * diff / originalValue.length).toFixed(2) : 0; 92 | 93 | byId('output').value = minifiedValue; 94 | 95 | byId('stats').innerHTML = 96 | '<span class="success">' + 97 | 'Original size: <strong>' + commify(originalValue.length) + '</strong>' + 98 | '. Minified size: <strong>' + commify(minifiedValue.length) + '</strong>' + 99 | '. Savings: <strong>' + commify(diff) + ' (' + savings + '%)</strong>.' + 100 | '</span>'; 101 | byId('minify-btn').disabled = false; 102 | }, function(err) { 103 | byId('output').value = ''; 104 | byId('stats').innerHTML = '<span class="failure">' + escapeHTML(err) + '</span>'; 105 | byId('minify-btn').disabled = false; 106 | }); 107 | }; 108 | 109 | byId('select-all').onclick = function() { 110 | forEachOption(function(element) { 111 | if (element.type === 'checkbox') { 112 | element.checked = true; 113 | } 114 | }); 115 | return false; 116 | }; 117 | 118 | byId('select-none').onclick = function() { 119 | forEachOption(function(element) { 120 | if (element.type === 'checkbox') { 121 | element.checked = false; 122 | } 123 | else { 124 | element.value = ''; 125 | } 126 | }); 127 | return false; 128 | }; 129 | 130 | var defaultOptions = getOptions(); 131 | byId('select-defaults').onclick = function() { 132 | for (var key in defaultOptions) { 133 | var element = byId(key); 134 | element[element.type === 'checkbox' ? 'checked' : 'value'] = defaultOptions[key]; 135 | } 136 | return false; 137 | }; 138 | })(); 139 | 140 | /* eslint-disable */ 141 | (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ 142 | (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), 143 | m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) 144 | })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); 145 | 146 | ga('create', 'UA-1128111-22', 'auto'); 147 | ga('send', 'pageview'); 148 | 149 | (function(i){ 150 | var s = document.getElementById(i); 151 | var f = document.createElement('iframe'); 152 | f.src = (document.location.protocol === 'https:' ? 'https' : 'http') + '://api.flattr.com/button/view/?uid=kangax&button=compact&url=' + encodeURIComponent(document.URL); 153 | f.title = 'Flattr'; 154 | f.height = 20; 155 | f.width = 110; 156 | f.style.borderWidth = 0; 157 | s.parentNode.insertBefore(f, s); 158 | })('wrapper'); 159 | -------------------------------------------------------------------------------- /assets/worker.js: -------------------------------------------------------------------------------- 1 | (function() { 2 | 'use strict'; 3 | 4 | importScripts('../dist/htmlminifier.min.js'); 5 | var minify = require('html-minifier').minify; 6 | addEventListener('message', function(event) { 7 | try { 8 | var options = event.data.options; 9 | options.log = function(message) { 10 | console.log(message); 11 | }; 12 | postMessage(minify(event.data.value, options)); 13 | } 14 | catch (err) { 15 | postMessage({ 16 | error: err + '' 17 | }); 18 | } 19 | }); 20 | postMessage(null); 21 | })(); 22 | -------------------------------------------------------------------------------- /backtest.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | 'use strict'; 4 | 5 | var child_process = require('child_process'), 6 | fs = require('fs'), 7 | os = require('os'), 8 | path = require('path'), 9 | Progress = require('progress'); 10 | 11 | var urls = require('./benchmarks'); 12 | var fileNames = Object.keys(urls); 13 | 14 | function git() { 15 | var args = [].concat.apply([], [].slice.call(arguments, 0, -1)); 16 | var callback = arguments[arguments.length - 1]; 17 | var task = child_process.spawn('git', args, { stdio: ['ignore', 'pipe', 'ignore'] }); 18 | var output = ''; 19 | task.stdout.setEncoding('utf8'); 20 | task.stdout.on('data', function(data) { 21 | output += data; 22 | }); 23 | task.on('exit', function(code) { 24 | callback(code, output); 25 | }); 26 | } 27 | 28 | function readText(filePath, callback) { 29 | fs.readFile(filePath, { encoding: 'utf8' }, callback); 30 | } 31 | 32 | function writeText(filePath, data) { 33 | fs.writeFile(filePath, data, { encoding: 'utf8' }, function(err) { 34 | if (err) { 35 | throw err; 36 | } 37 | }); 38 | } 39 | 40 | function loadModule() { 41 | require('./src/htmlparser'); 42 | return require('./src/htmlminifier').minify || global.minify; 43 | } 44 | 45 | function getOptions(fileName, options) { 46 | var result = { 47 | minifyURLs: { 48 | site: urls[fileName] 49 | } 50 | }; 51 | for (var key in options) { 52 | result[key] = options[key]; 53 | } 54 | return result; 55 | } 56 | 57 | function minify(hash, options) { 58 | var minify = loadModule(); 59 | process.send('ready'); 60 | var count = fileNames.length; 61 | fileNames.forEach(function(fileName) { 62 | readText(path.join('benchmarks/', fileName + '.html'), function(err, data) { 63 | if (err) { 64 | throw err; 65 | } 66 | else { 67 | try { 68 | var minified = minify(data, getOptions(fileName, options)); 69 | if (minified) { 70 | process.send({ name: fileName, size: minified.length }); 71 | } 72 | else { 73 | throw new Error('unexpected result: ' + minified); 74 | } 75 | } 76 | catch (e) { 77 | console.error('[' + fileName + ']', e.stack || e); 78 | } 79 | finally { 80 | if (!--count) { 81 | process.disconnect(); 82 | } 83 | } 84 | } 85 | }); 86 | }); 87 | } 88 | 89 | function print(table) { 90 | var output = []; 91 | var errors = []; 92 | var row = fileNames.slice(0); 93 | row.unshift('hash', 'date'); 94 | output.push(row.join(',')); 95 | for (var hash in table) { 96 | var data = table[hash]; 97 | row = [hash, '"' + data.date + '"']; 98 | fileNames.forEach(function(fileName) { 99 | row.push(data[fileName]); 100 | }); 101 | output.push(row.join(',')); 102 | if (data.error) { 103 | errors.push(hash + ' - ' + data.error); 104 | } 105 | } 106 | writeText('backtest.csv', output.join('\n')); 107 | writeText('backtest.log', errors.join('\n')); 108 | } 109 | 110 | if (process.argv.length > 2) { 111 | var count = +process.argv[2]; 112 | if (count) { 113 | git('log', '--date=iso', '--pretty=format:%h %cd', '-' + count, function(code, data) { 114 | var table = {}; 115 | var commits = data.split(/\s*?\n/).map(function(line) { 116 | var index = line.indexOf(' '); 117 | var hash = line.substr(0, index); 118 | table[hash] = { 119 | date: line.substr(index + 1).replace('+', '').replace(/ 0000$/, '') 120 | }; 121 | return hash; 122 | }); 123 | var nThreads = os.cpus().length; 124 | var running = 0; 125 | var progress = new Progress('[:bar] :etas', { 126 | width: 50, 127 | total: commits.length * 2 128 | }); 129 | 130 | function fork() { 131 | if (commits.length && running < nThreads) { 132 | var hash = commits.shift(); 133 | var task = child_process.fork('./backtest', { silent: true }); 134 | var error = ''; 135 | var id = setTimeout(function() { 136 | if (task.connected) { 137 | error += 'task timed out\n'; 138 | task.kill(); 139 | } 140 | }, 60000); 141 | task.on('message', function(data) { 142 | if (data === 'ready') { 143 | progress.tick(1); 144 | fork(); 145 | } 146 | else { 147 | table[hash][data.name] = data.size; 148 | } 149 | }).on('exit', function() { 150 | progress.tick(1); 151 | clearTimeout(id); 152 | if (error) { 153 | table[hash].error = error; 154 | } 155 | if (!--running && !commits.length) { 156 | print(table); 157 | } 158 | else { 159 | fork(); 160 | } 161 | }); 162 | task.stderr.setEncoding('utf8'); 163 | task.stderr.on('data', function(data) { 164 | error += data; 165 | }); 166 | task.stdout.resume(); 167 | task.send(hash); 168 | running++; 169 | } 170 | } 171 | 172 | fork(); 173 | }); 174 | } 175 | else { 176 | console.error('Invalid input:', process.argv[2]); 177 | } 178 | } 179 | else { 180 | process.on('message', function(hash) { 181 | var paths = ['src', 'benchmark.conf', 'sample-cli-config-file.conf']; 182 | git('reset', 'HEAD', '--', paths, function() { 183 | var conf = 'sample-cli-config-file.conf'; 184 | 185 | function checkout() { 186 | var path = paths.shift(); 187 | git('checkout', hash, '--', path, function(code) { 188 | if (code === 0 && path === 'benchmark.conf') { 189 | conf = path; 190 | } 191 | if (paths.length) { 192 | checkout(); 193 | } 194 | else { 195 | readText(conf, function(err, data) { 196 | if (err) { 197 | throw err; 198 | } 199 | else { 200 | minify(hash, JSON.parse(data)); 201 | } 202 | }); 203 | } 204 | }); 205 | } 206 | 207 | checkout(); 208 | }); 209 | }); 210 | } 211 | -------------------------------------------------------------------------------- /benchmark.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | 'use strict'; 4 | 5 | var packages = require('./package.json').benchmarkDependencies; 6 | packages = Object.keys(packages).map(function(name) { 7 | return name + '@' + packages[name]; 8 | }); 9 | packages.unshift('install', '--no-save', '--no-optional'); 10 | var installed = require('child_process').spawnSync('npm', packages, { 11 | encoding: 'utf-8', 12 | shell: true 13 | }); 14 | if (installed.error) { 15 | throw installed.error; 16 | } 17 | else if (installed.status) { 18 | console.log(installed.stdout); 19 | console.error(installed.stderr); 20 | process.exit(installed.status); 21 | } 22 | 23 | var brotli = require('brotli'), 24 | chalk = require('chalk'), 25 | fork = require('child_process').fork, 26 | fs = require('fs'), 27 | https = require('https'), 28 | lzma = require('lzma'), 29 | Minimize = require('minimize'), 30 | path = require('path'), 31 | Progress = require('progress'), 32 | querystring = require('querystring'), 33 | Table = require('cli-table'), 34 | url = require('url'), 35 | zlib = require('zlib'); 36 | 37 | var urls = require('./benchmarks'); 38 | var fileNames = Object.keys(urls); 39 | 40 | var minimize = new Minimize(); 41 | 42 | var progress = new Progress('[:bar] :etas :fileName', { 43 | width: 50, 44 | total: fileNames.length 45 | }); 46 | 47 | var table = new Table({ 48 | head: ['File', 'Before', 'After', 'Minimize', 'Will Peavy', 'htmlcompressor.com', 'Savings', 'Time'], 49 | colWidths: [fileNames.reduce(function(length, fileName) { 50 | return Math.max(length, fileName.length); 51 | }, 0) + 2, 25, 25, 25, 25, 25, 20, 10] 52 | }); 53 | 54 | function toKb(size, precision) { 55 | return (size / 1024).toFixed(precision || 0); 56 | } 57 | 58 | function redSize(size) { 59 | return chalk.red.bold(size) + chalk.white(' (' + toKb(size, 2) + ' KB)'); 60 | } 61 | 62 | function greenSize(size) { 63 | return chalk.green.bold(size) + chalk.white(' (' + toKb(size, 2) + ' KB)'); 64 | } 65 | 66 | function blueSavings(oldSize, newSize) { 67 | var savingsPercent = (1 - newSize / oldSize) * 100; 68 | var savings = oldSize - newSize; 69 | return chalk.cyan.bold(savingsPercent.toFixed(2)) + chalk.white('% (' + toKb(savings, 2) + ' KB)'); 70 | } 71 | 72 | function blueTime(time) { 73 | return chalk.cyan.bold(time) + chalk.white(' ms'); 74 | } 75 | 76 | function readBuffer(filePath, callback) { 77 | fs.readFile(filePath, function(err, data) { 78 | if (err) { 79 | throw new Error('There was an error reading ' + filePath); 80 | } 81 | callback(data); 82 | }); 83 | } 84 | 85 | function readText(filePath, callback) { 86 | fs.readFile(filePath, { encoding: 'utf8' }, function(err, data) { 87 | if (err) { 88 | throw new Error('There was an error reading ' + filePath); 89 | } 90 | callback(data); 91 | }); 92 | } 93 | 94 | function writeBuffer(filePath, data, callback) { 95 | fs.writeFile(filePath, data, function(err) { 96 | if (err) { 97 | throw new Error('There was an error writing ' + filePath); 98 | } 99 | callback(); 100 | }); 101 | } 102 | 103 | function writeText(filePath, data, callback) { 104 | fs.writeFile(filePath, data, { encoding: 'utf8' }, function(err) { 105 | if (err) { 106 | throw new Error('There was an error writing ' + filePath); 107 | } 108 | if (callback) { 109 | callback(); 110 | } 111 | }); 112 | } 113 | 114 | function readSize(filePath, callback) { 115 | fs.stat(filePath, function(err, stats) { 116 | if (err) { 117 | throw new Error('There was an error reading ' + filePath); 118 | } 119 | callback(stats.size); 120 | }); 121 | } 122 | 123 | function gzip(inPath, outPath, callback) { 124 | fs.createReadStream(inPath).pipe(zlib.createGzip({ 125 | level: zlib.Z_BEST_COMPRESSION 126 | })).pipe(fs.createWriteStream(outPath)).on('finish', callback); 127 | } 128 | 129 | function run(tasks, done) { 130 | var i = 0; 131 | 132 | function callback() { 133 | if (i < tasks.length) { 134 | tasks[i++](callback); 135 | } 136 | else { 137 | done(); 138 | } 139 | } 140 | 141 | callback(); 142 | } 143 | 144 | var rows = {}; 145 | 146 | function generateMarkdownTable() { 147 | var headers = [ 148 | 'Site', 149 | 'Original size *(KB)*', 150 | 'HTMLMinifier', 151 | 'minimize', 152 | 'Will Peavy', 153 | 'htmlcompressor.com' 154 | ]; 155 | fileNames.forEach(function(fileName) { 156 | var row = rows[fileName].report; 157 | row[2] = '**' + row[2] + '**'; 158 | }); 159 | var widths = headers.map(function(header, index) { 160 | var width = header.length; 161 | fileNames.forEach(function(fileName) { 162 | width = Math.max(width, rows[fileName].report[index].length); 163 | }); 164 | return width; 165 | }); 166 | var content = ''; 167 | 168 | function output(row) { 169 | widths.forEach(function(width, index) { 170 | var text = row[index]; 171 | content += '| ' + text + new Array(width - text.length + 2).join(' '); 172 | }); 173 | content += '|\n'; 174 | } 175 | 176 | output(headers); 177 | widths.forEach(function(width, index) { 178 | content += '|'; 179 | content += index === 1 ? ':' : ' '; 180 | content += new Array(width + 1).join('-'); 181 | content += index === 0 ? ' ' : ':'; 182 | }); 183 | content += '|\n'; 184 | fileNames.sort(function(a, b) { 185 | var r = +rows[a].report[1]; 186 | var s = +rows[b].report[1]; 187 | return r < s ? -1 : r > s ? 1 : a < b ? -1 : a > b ? 1 : 0; 188 | }).forEach(function(fileName) { 189 | output(rows[fileName].report); 190 | }); 191 | return content; 192 | } 193 | 194 | function displayTable() { 195 | fileNames.forEach(function(fileName) { 196 | table.push(rows[fileName].display); 197 | }); 198 | console.log(); 199 | console.log(table.toString()); 200 | } 201 | 202 | run(fileNames.map(function(fileName) { 203 | var filePath = path.join('benchmarks/', fileName + '.html'); 204 | 205 | function processFile(site, done) { 206 | var original = { 207 | filePath: filePath, 208 | gzFilePath: path.join('benchmarks/generated/', fileName + '.html.gz'), 209 | lzFilePath: path.join('benchmarks/generated/', fileName + '.html.lz'), 210 | brFilePath: path.join('benchmarks/generated/', fileName + '.html.br') 211 | }; 212 | var infos = {}; 213 | ['minifier', 'minimize', 'willpeavy', 'compressor'].forEach(function(name) { 214 | infos[name] = { 215 | filePath: path.join('benchmarks/generated/', fileName + '.' + name + '.html'), 216 | gzFilePath: path.join('benchmarks/generated/', fileName + '.' + name + '.html.gz'), 217 | lzFilePath: path.join('benchmarks/generated/', fileName + '.' + name + '.html.lz'), 218 | brFilePath: path.join('benchmarks/generated/', fileName + '.' + name + '.html.br') 219 | }; 220 | }); 221 | 222 | function readSizes(info, done) { 223 | info.endTime = Date.now(); 224 | run([ 225 | // Apply Gzip on minified output 226 | function(done) { 227 | gzip(info.filePath, info.gzFilePath, function() { 228 | info.gzTime = Date.now(); 229 | // Open and read the size of the minified+gzip output 230 | readSize(info.gzFilePath, function(size) { 231 | info.gzSize = size; 232 | done(); 233 | }); 234 | }); 235 | }, 236 | // Apply LZMA on minified output 237 | function(done) { 238 | readBuffer(info.filePath, function(data) { 239 | lzma.compress(data, 1, function(result, error) { 240 | if (error) { 241 | throw error; 242 | } 243 | writeBuffer(info.lzFilePath, new Buffer(result), function() { 244 | info.lzTime = Date.now(); 245 | // Open and read the size of the minified+lzma output 246 | readSize(info.lzFilePath, function(size) { 247 | info.lzSize = size; 248 | done(); 249 | }); 250 | }); 251 | }); 252 | }); 253 | }, 254 | // Apply Brotli on minified output 255 | function(done) { 256 | readBuffer(info.filePath, function(data) { 257 | var output = new Buffer(brotli.compress(data, true).buffer); 258 | writeBuffer(info.brFilePath, output, function() { 259 | info.brTime = Date.now(); 260 | // Open and read the size of the minified+brotli output 261 | readSize(info.brFilePath, function(size) { 262 | info.brSize = size; 263 | done(); 264 | }); 265 | }); 266 | }); 267 | }, 268 | // Open and read the size of the minified output 269 | function(done) { 270 | readSize(info.filePath, function(size) { 271 | info.size = size; 272 | done(); 273 | }); 274 | } 275 | ], done); 276 | } 277 | 278 | function testHTMLMinifier(done) { 279 | var info = infos.minifier; 280 | info.startTime = Date.now(); 281 | var args = [filePath, '-c', 'sample-cli-config-file.conf', '--minify-urls', site, '-o', info.filePath]; 282 | fork('./cli', args).on('exit', function() { 283 | readSizes(info, done); 284 | }); 285 | } 286 | 287 | function testMinimize(done) { 288 | readBuffer(filePath, function(data) { 289 | minimize.parse(data, function(error, data) { 290 | var info = infos.minimize; 291 | writeBuffer(info.filePath, data, function() { 292 | readSizes(info, done); 293 | }); 294 | }); 295 | }); 296 | } 297 | 298 | function testWillPeavy(done) { 299 | readText(filePath, function(data) { 300 | var options = url.parse('https://www.willpeavy.com/minifier/'); 301 | options.method = 'POST'; 302 | options.headers = { 303 | 'Content-Type': 'application/x-www-form-urlencoded' 304 | }; 305 | https.request(options, function(res) { 306 | res.setEncoding('utf8'); 307 | var response = ''; 308 | res.on('data', function(chunk) { 309 | response += chunk; 310 | }).on('end', function() { 311 | var info = infos.willpeavy; 312 | if (res.statusCode === 200) { 313 | // Extract result from <textarea/> 314 | var start = response.indexOf('>', response.indexOf('<textarea')); 315 | var end = response.lastIndexOf('</textarea>'); 316 | var result = response.slice(start + 1, end).replace(/<\\\//g, '</'); 317 | writeText(info.filePath, result, function() { 318 | readSizes(info, done); 319 | }); 320 | } 321 | // Site refused to process content 322 | else { 323 | info.size = 0; 324 | info.gzSize = 0; 325 | info.lzSize = 0; 326 | info.brSize = 0; 327 | done(); 328 | } 329 | }); 330 | }).end(querystring.stringify({ 331 | html: data 332 | })); 333 | }); 334 | } 335 | 336 | function testHTMLCompressor(done) { 337 | readText(filePath, function(data) { 338 | var options = url.parse('https://htmlcompressor.com/compress_ajax_v2.php'); 339 | options.method = 'POST'; 340 | options.headers = { 341 | 'Accept-Encoding': 'gzip', 342 | 'Content-Type': 'application/x-www-form-urlencoded' 343 | }; 344 | var info = infos.compressor; 345 | 346 | function failed() { 347 | // Site refused to process content 348 | if (info) { 349 | info.size = 0; 350 | info.gzSize = 0; 351 | info.lzSize = 0; 352 | info.brSize = 0; 353 | info = null; 354 | done(); 355 | } 356 | } 357 | 358 | https.request(options, function(res) { 359 | if (res.headers['content-encoding'] === 'gzip') { 360 | res = res.pipe(zlib.createGunzip()); 361 | } 362 | res.setEncoding('utf8'); 363 | var response = ''; 364 | res.on('data', function(chunk) { 365 | response += chunk; 366 | }).on('end', function() { 367 | try { 368 | response = JSON.parse(response); 369 | } 370 | catch (e) { 371 | response = {}; 372 | } 373 | if (info && response.success) { 374 | writeText(info.filePath, response.result, function() { 375 | readSizes(info, done); 376 | }); 377 | } 378 | // Site refused to process content 379 | else { 380 | failed(); 381 | } 382 | }); 383 | }).on('error', failed).end(querystring.stringify({ 384 | code_type: 'html', 385 | html_level: 3, 386 | html_strip_quotes: 1, 387 | minimize_style: 1, 388 | minimize_events: 1, 389 | minimize_js_href: 1, 390 | minimize_css: 1, 391 | minimize_js: 1, 392 | html_optional_cdata: 1, 393 | js_engine: 'yui', 394 | js_fallback: 1, 395 | code: data 396 | })); 397 | }); 398 | } 399 | 400 | run([ 401 | function(done) { 402 | readSizes(original, done); 403 | }, 404 | testHTMLMinifier, 405 | testMinimize, 406 | testWillPeavy, 407 | testHTMLCompressor 408 | ], function() { 409 | var display = [ 410 | [fileName, '+ gzip', '+ lzma', '+ brotli'].join('\n'), 411 | [redSize(original.size), redSize(original.gzSize), redSize(original.lzSize), redSize(original.brSize)].join('\n') 412 | ]; 413 | var report = [ 414 | '[' + fileName + '](' + urls[fileName] + ')', 415 | toKb(original.size) 416 | ]; 417 | for (var name in infos) { 418 | var info = infos[name]; 419 | display.push([greenSize(info.size), greenSize(info.gzSize), greenSize(info.lzSize), greenSize(info.brSize)].join('\n')); 420 | report.push(info.size ? toKb(info.size) : 'n/a'); 421 | } 422 | display.push( 423 | [ 424 | blueSavings(original.size, infos.minifier.size), 425 | blueSavings(original.gzSize, infos.minifier.gzSize), 426 | blueSavings(original.lzSize, infos.minifier.lzSize), 427 | blueSavings(original.brSize, infos.minifier.brSize) 428 | ].join('\n'), 429 | [ 430 | blueTime(infos.minifier.endTime - infos.minifier.startTime), 431 | blueTime(original.gzTime - original.endTime), 432 | blueTime(original.lzTime - original.gzTime), 433 | blueTime(original.brTime - original.lzTime) 434 | ].join('\n') 435 | ); 436 | rows[fileName] = { 437 | display: display, 438 | report: report 439 | }; 440 | progress.tick({ fileName: '' }); 441 | done(); 442 | }); 443 | } 444 | 445 | function get(site, callback) { 446 | var options = url.parse(site); 447 | https.get(options, function(res) { 448 | var status = res.statusCode; 449 | if (status === 200) { 450 | if (res.headers['content-encoding'] === 'gzip') { 451 | res = res.pipe(zlib.createGunzip()); 452 | } 453 | res.pipe(fs.createWriteStream(filePath)).on('finish', function() { 454 | callback(site); 455 | }); 456 | } 457 | else if (status >= 300 && status < 400 && res.headers.location) { 458 | get(url.resolve(site, res.headers.location), callback); 459 | } 460 | else { 461 | throw new Error('HTTP error ' + status + '\n' + site); 462 | } 463 | }); 464 | } 465 | 466 | return function(done) { 467 | progress.tick(0, { fileName: fileName }); 468 | get(urls[fileName], function(site) { 469 | processFile(site, done); 470 | }); 471 | }; 472 | }), function() { 473 | displayTable(); 474 | var content = generateMarkdownTable(); 475 | var readme = './README.md'; 476 | readText(readme, function(data) { 477 | var start = data.indexOf('## Minification comparison'); 478 | start = data.indexOf('|', start); 479 | var end = data.indexOf('##', start); 480 | end = data.lastIndexOf('|\n', end) + '|\n'.length; 481 | data = data.slice(0, start) + content + data.slice(end); 482 | writeText(readme, data); 483 | }); 484 | }); 485 | -------------------------------------------------------------------------------- /benchmarks/generated/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kangax/html-minifier/2f2db2eee9b972d4a4e275ae723a1f99a9e9da58/benchmarks/generated/.gitkeep -------------------------------------------------------------------------------- /benchmarks/index.json: -------------------------------------------------------------------------------- 1 | { 2 | "Amazon": "https://www.amazon.co.uk/", 3 | "BBC": "https://www.bbc.co.uk/", 4 | "Bootstrap CSS": "https://getbootstrap.com/docs/3.3/css/", 5 | "Eloquent Javascript": "https://eloquentjavascript.net/1st_edition/print.html", 6 | "ES draft": "https://tc39.github.io/ecma262/", 7 | "ES6 table": "https://kangax.github.io/compat-table/es6/", 8 | "Google": "https://www.google.com/", 9 | "HTMLMinifier": "https://github.com/kangax/html-minifier", 10 | "NBC": "https://www.nbc.com/", 11 | "New York Times": "https://www.nytimes.com/", 12 | "Stack Overflow": "https://stackoverflow.com/", 13 | "Twitter": "https://twitter.com/", 14 | "Wikipedia": "https://en.wikipedia.org/wiki/President_of_the_United_States" 15 | } 16 | -------------------------------------------------------------------------------- /cli.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /** 3 | * html-minifier CLI tool 4 | * 5 | * The MIT License (MIT) 6 | * 7 | * Copyright (c) 2014-2016 Zoltan Frombach 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 10 | * this software and associated documentation files (the "Software"), to deal in 11 | * the Software without restriction, including without limitation the rights to 12 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 13 | * the Software, and to permit persons to whom the Software is furnished to do so, 14 | * subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included in all 17 | * copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 21 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 22 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 23 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 | * 26 | */ 27 | 28 | 'use strict'; 29 | 30 | var camelCase = require('camel-case'); 31 | var fs = require('fs'); 32 | var info = require('./package.json'); 33 | var minify = require('./' + info.main).minify; 34 | var paramCase = require('param-case'); 35 | var path = require('path'); 36 | var program = require('commander'); 37 | 38 | program._name = info.name; 39 | program.version(info.version); 40 | 41 | function fatal(message) { 42 | console.error(message); 43 | process.exit(1); 44 | } 45 | 46 | /** 47 | * JSON does not support regexes, so, e.g., JSON.parse() will not create 48 | * a RegExp from the JSON value `[ "/matchString/" ]`, which is 49 | * technically just an array containing a string that begins and end with 50 | * a forward slash. To get a RegExp from a JSON string, it must be 51 | * constructed explicitly in JavaScript. 52 | * 53 | * The likelihood of actually wanting to match text that is enclosed in 54 | * forward slashes is probably quite rare, so if forward slashes were 55 | * included in an argument that requires a regex, the user most likely 56 | * thought they were part of the syntax for specifying a regex. 57 | * 58 | * In the unlikely case that forward slashes are indeed desired in the 59 | * search string, the user would need to enclose the expression in a 60 | * second set of slashes: 61 | * 62 | * --customAttrSrround "[\"//matchString//\"]" 63 | */ 64 | function parseRegExp(value) { 65 | if (value) { 66 | return new RegExp(value.replace(/^\/(.*)\/$/, '$1')); 67 | } 68 | } 69 | 70 | function parseJSON(value) { 71 | if (value) { 72 | try { 73 | return JSON.parse(value); 74 | } 75 | catch (e) { 76 | if (/^{/.test(value)) { 77 | fatal('Could not parse JSON value \'' + value + '\''); 78 | } 79 | return value; 80 | } 81 | } 82 | } 83 | 84 | function parseJSONArray(value) { 85 | if (value) { 86 | value = parseJSON(value); 87 | return Array.isArray(value) ? value : [value]; 88 | } 89 | } 90 | 91 | function parseJSONRegExpArray(value) { 92 | value = parseJSONArray(value); 93 | return value && value.map(parseRegExp); 94 | } 95 | 96 | function parseString(value) { 97 | return value; 98 | } 99 | 100 | var mainOptions = { 101 | caseSensitive: 'Treat attributes in case sensitive manner (useful for SVG; e.g. viewBox)', 102 | collapseBooleanAttributes: 'Omit attribute values from boolean attributes', 103 | collapseInlineTagWhitespace: 'Collapse white space around inline tag', 104 | collapseWhitespace: 'Collapse white space that contributes to text nodes in a document tree.', 105 | conservativeCollapse: 'Always collapse to 1 space (never remove it entirely)', 106 | continueOnParseError: 'Handle parse errors instead of aborting', 107 | customAttrAssign: ['Arrays of regex\'es that allow to support custom attribute assign expressions (e.g. \'<div flex?="{{mode != cover}}"></div>\')', parseJSONRegExpArray], 108 | customAttrCollapse: ['Regex that specifies custom attribute to strip newlines from (e.g. /ng-class/)', parseRegExp], 109 | customAttrSurround: ['Arrays of regex\'es that allow to support custom attribute surround expressions (e.g. <input {{#if value}}checked="checked"{{/if}}>)', parseJSONRegExpArray], 110 | customEventAttributes: ['Arrays of regex\'es that allow to support custom event attributes for minifyJS (e.g. ng-click)', parseJSONRegExpArray], 111 | decodeEntities: 'Use direct Unicode characters whenever possible', 112 | html5: 'Parse input according to HTML5 specifications', 113 | ignoreCustomComments: ['Array of regex\'es that allow to ignore certain comments, when matched', parseJSONRegExpArray], 114 | ignoreCustomFragments: ['Array of regex\'es that allow to ignore certain fragments, when matched (e.g. <?php ... ?>, {{ ... }})', parseJSONRegExpArray], 115 | includeAutoGeneratedTags: 'Insert tags generated by HTML parser', 116 | keepClosingSlash: 'Keep the trailing slash on singleton elements', 117 | maxLineLength: ['Max line length', parseInt], 118 | minifyCSS: ['Minify CSS in style elements and style attributes (uses clean-css)', parseJSON], 119 | minifyJS: ['Minify Javascript in script elements and on* attributes (uses uglify-js)', parseJSON], 120 | minifyURLs: ['Minify URLs in various attributes (uses relateurl)', parseJSON], 121 | preserveLineBreaks: 'Always collapse to 1 line break (never remove it entirely) when whitespace between tags include a line break.', 122 | preventAttributesEscaping: 'Prevents the escaping of the values of attributes.', 123 | processConditionalComments: 'Process contents of conditional comments through minifier', 124 | processScripts: ['Array of strings corresponding to types of script elements to process through minifier (e.g. "text/ng-template", "text/x-handlebars-template", etc.)', parseJSONArray], 125 | quoteCharacter: ['Type of quote to use for attribute values (\' or ")', parseString], 126 | removeAttributeQuotes: 'Remove quotes around attributes when possible.', 127 | removeComments: 'Strip HTML comments', 128 | removeEmptyAttributes: 'Remove all attributes with whitespace-only values', 129 | removeEmptyElements: 'Remove all elements with empty contents', 130 | removeOptionalTags: 'Remove unrequired tags', 131 | removeRedundantAttributes: 'Remove attributes when value matches default.', 132 | removeScriptTypeAttributes: 'Remove type="text/javascript" from script tags. Other type attribute values are left intact.', 133 | removeStyleLinkTypeAttributes: 'Remove type="text/css" from style and link tags. Other type attribute values are left intact.', 134 | removeTagWhitespace: 'Remove space between attributes whenever possible', 135 | sortAttributes: 'Sort attributes by frequency', 136 | sortClassName: 'Sort style classes by frequency', 137 | trimCustomFragments: 'Trim white space around ignoreCustomFragments.', 138 | useShortDoctype: 'Replaces the doctype with the short (HTML5) doctype' 139 | }; 140 | var mainOptionKeys = Object.keys(mainOptions); 141 | mainOptionKeys.forEach(function(key) { 142 | var option = mainOptions[key]; 143 | if (Array.isArray(option)) { 144 | key = key === 'minifyURLs' ? '--minify-urls' : '--' + paramCase(key); 145 | key += option[1] === parseJSON ? ' [value]' : ' <value>'; 146 | program.option(key, option[0], option[1]); 147 | } 148 | else if (~['html5', 'includeAutoGeneratedTags'].indexOf(key)) { 149 | program.option('--no-' + paramCase(key), option); 150 | } 151 | else { 152 | program.option('--' + paramCase(key), option); 153 | } 154 | }); 155 | program.option('-o --output <file>', 'Specify output file (if not specified STDOUT will be used for output)'); 156 | 157 | function readFile(file) { 158 | try { 159 | return fs.readFileSync(file, { encoding: 'utf8' }); 160 | } 161 | catch (e) { 162 | fatal('Cannot read ' + file + '\n' + e.message); 163 | } 164 | } 165 | 166 | var config = {}; 167 | program.option('-c --config-file <file>', 'Use config file', function(configPath) { 168 | var data = readFile(configPath); 169 | try { 170 | config = JSON.parse(data); 171 | } 172 | catch (je) { 173 | try { 174 | config = require(path.resolve(configPath)); 175 | } 176 | catch (ne) { 177 | fatal('Cannot read the specified config file.\nAs JSON: ' + je.message + '\nAs module: ' + ne.message); 178 | } 179 | } 180 | mainOptionKeys.forEach(function(key) { 181 | if (key in config) { 182 | var option = mainOptions[key]; 183 | if (Array.isArray(option)) { 184 | var value = config[key]; 185 | config[key] = option[1](typeof value === 'string' ? value : JSON.stringify(value)); 186 | } 187 | } 188 | }); 189 | }); 190 | program.option('--input-dir <dir>', 'Specify an input directory'); 191 | program.option('--output-dir <dir>', 'Specify an output directory'); 192 | program.option('--file-ext <ext>', 'Specify an extension to be read, ex: html'); 193 | var content; 194 | program.arguments('[files...]').action(function(files) { 195 | content = files.map(readFile).join(''); 196 | }).parse(process.argv); 197 | 198 | function createOptions() { 199 | var options = {}; 200 | mainOptionKeys.forEach(function(key) { 201 | var param = program[key === 'minifyURLs' ? 'minifyUrls' : camelCase(key)]; 202 | if (typeof param !== 'undefined') { 203 | options[key] = param; 204 | } 205 | else if (key in config) { 206 | options[key] = config[key]; 207 | } 208 | }); 209 | return options; 210 | } 211 | 212 | function mkdir(outputDir, callback) { 213 | fs.mkdir(outputDir, function(err) { 214 | if (err) { 215 | switch (err.code) { 216 | case 'ENOENT': 217 | return mkdir(path.join(outputDir, '..'), function() { 218 | mkdir(outputDir, callback); 219 | }); 220 | case 'EEXIST': 221 | break; 222 | default: 223 | fatal('Cannot create directory ' + outputDir + '\n' + err.message); 224 | } 225 | } 226 | callback(); 227 | }); 228 | } 229 | 230 | function processFile(inputFile, outputFile) { 231 | fs.readFile(inputFile, { encoding: 'utf8' }, function(err, data) { 232 | if (err) { 233 | fatal('Cannot read ' + inputFile + '\n' + err.message); 234 | } 235 | var minified; 236 | try { 237 | minified = minify(data, createOptions()); 238 | } 239 | catch (e) { 240 | fatal('Minification error on ' + inputFile + '\n' + e.message); 241 | } 242 | fs.writeFile(outputFile, minified, { encoding: 'utf8' }, function(err) { 243 | if (err) { 244 | fatal('Cannot write ' + outputFile + '\n' + err.message); 245 | } 246 | }); 247 | }); 248 | } 249 | 250 | function processDirectory(inputDir, outputDir, fileExt) { 251 | fs.readdir(inputDir, function(err, files) { 252 | if (err) { 253 | fatal('Cannot read directory ' + inputDir + '\n' + err.message); 254 | } 255 | files.forEach(function(file) { 256 | var inputFile = path.join(inputDir, file); 257 | var outputFile = path.join(outputDir, file); 258 | fs.stat(inputFile, function(err, stat) { 259 | if (err) { 260 | fatal('Cannot read ' + inputFile + '\n' + err.message); 261 | } 262 | else if (stat.isDirectory()) { 263 | processDirectory(inputFile, outputFile, fileExt); 264 | } 265 | else if (!fileExt || path.extname(file) === '.' + fileExt) { 266 | mkdir(outputDir, function() { 267 | processFile(inputFile, outputFile); 268 | }); 269 | } 270 | }); 271 | }); 272 | }); 273 | } 274 | 275 | function writeMinify() { 276 | var minified; 277 | try { 278 | minified = minify(content, createOptions()); 279 | } 280 | catch (e) { 281 | fatal('Minification error:\n' + e.message); 282 | } 283 | (program.output ? fs.createWriteStream(program.output).on('error', function(e) { 284 | fatal('Cannot write ' + program.output + '\n' + e.message); 285 | }) : process.stdout).write(minified); 286 | } 287 | 288 | var inputDir = program.inputDir; 289 | var outputDir = program.outputDir; 290 | var fileExt = program.fileExt; 291 | if (inputDir || outputDir) { 292 | if (!inputDir) { 293 | fatal('The option output-dir needs to be used with the option input-dir. If you are working with a single file, use -o.'); 294 | } 295 | else if (!outputDir) { 296 | fatal('You need to specify where to write the output files with the option --output-dir'); 297 | } 298 | processDirectory(inputDir, outputDir, fileExt); 299 | } 300 | // Minifying one or more files specified on the CMD line 301 | else if (content) { 302 | writeMinify(); 303 | } 304 | // Minifying input coming from STDIN 305 | else { 306 | content = ''; 307 | process.stdin.setEncoding('utf8'); 308 | process.stdin.on('data', function(data) { 309 | content += data; 310 | }).on('end', writeMinify); 311 | } 312 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | <!DOCTYPE html> 2 | <html lang="en"> 3 | <head> 4 | <meta charset="utf-8"> 5 | <meta http-equiv="X-UA-Compatible" content="IE=edge"> 6 | <meta name="google-site-verification" content="3tgjKRYCVRWTziuUgYny5vY4eU7Vddvat0pLnoCuPBs"> 7 | <title>HTML minifier</title> 8 | <link rel="stylesheet" href="assets/master.css"> 9 | </head> 10 | <body> 11 | <div id="outer-wrapper"> 12 | <div id="wrapper"> 13 | <h1>HTML Minifier <span>(v4.0.0)</span></h1> 14 | <textarea rows="8" cols="40" id="input"></textarea> 15 | <div class="minify-button"> 16 | <button type="button" id="minify-btn">Minify</button> 17 | </div> 18 | <textarea rows="8" cols="40" id="output" readonly></textarea> 19 | 20 | <p id="stats"></p> 21 | </div> 22 | <div id="options"> 23 | <ul> 24 | <li> 25 | <input type="checkbox" id="caseSensitive"> 26 | <label for="caseSensitive"> 27 | Case-sensitive 28 | </label> 29 | <span class="quiet short"> 30 | Treat attributes in case sensitive manner (useful for custom HTML tags) 31 | </span> 32 | </li> 33 | <li> 34 | <input type="checkbox" id="collapseBooleanAttributes" checked> 35 | <label for="collapseBooleanAttributes"> 36 | Collapse boolean attributes 37 | </label> 38 | <span class="quiet short"> 39 | Omit attribute values from boolean attributes 40 | </span> 41 | </li> 42 | <li> 43 | <input type="checkbox" id="collapseInlineTagWhitespace"> 44 | <label for="collapseInlineTagWhitespace" class="unsafe"> 45 | Collapse inline tag whitespace 46 | </label> 47 | <span class="quiet short"> 48 | Don't leave any spaces between <code>display:inline;</code> elements when collapsing. 49 | Must be used in conjunction with <code>collapseWhitespace=true</code> 50 | </span> 51 | </li> 52 | <li> 53 | <input type="checkbox" id="collapseWhitespace" checked> 54 | <label for="collapseWhitespace"> 55 | Collapse whitespace 56 | </label> 57 | <span class="quiet short"> 58 | Collapse white space that contributes to text nodes in a document tree 59 | </span> 60 | </li> 61 | <li> 62 | <input type="checkbox" id="conservativeCollapse"> 63 | <label for="conservativeCollapse"> 64 | Conservative collapse 65 | </label> 66 | <span class="quiet short"> 67 | Always collapse to 1 space (never remove it entirely). 68 | Must be used in conjunction with <code>collapseWhitespace=true</code> 69 | </span> 70 | </li> 71 | <li> 72 | <input type="checkbox" id="decodeEntities" checked> 73 | <label for="decodeEntities"> 74 | Decode Entity Characters 75 | </label> 76 | <span class="quiet short"> 77 | Use direct Unicode characters whenever possible 78 | </span> 79 | </li> 80 | <li> 81 | <input type="checkbox" id="html5" checked> 82 | <label for="html5"> 83 | HTML5 84 | </label> 85 | <span class="quiet short"> 86 | Parse input according to HTML5 specifications 87 | </span> 88 | </li> 89 | <li> 90 | <input type="checkbox" id="includeAutoGeneratedTags"> 91 | <label for="includeAutoGeneratedTags"> 92 | Include auto-generated tags 93 | </label> 94 | <span class="quiet short"> 95 | Insert tags generated by HTML parser 96 | </span> 97 | </li> 98 | <li> 99 | <input type="checkbox" id="keepClosingSlash"> 100 | <label for="keepClosingSlash"> 101 | Keep closing slash 102 | </label> 103 | <span class="quiet short"> 104 | Keep the trailing slash on singleton elements 105 | </span> 106 | </li> 107 | <li> 108 | <label for="maxLineLength"> 109 | Max. line length 110 | </label> 111 | <input type="text" id="maxLineLength"> 112 | <span class="quiet short"> 113 | Specify a maximum line length. Compressed output will be split by newlines at valid HTML split-points 114 | </span> 115 | </li> 116 | <li> 117 | <input type="checkbox" id="minifyCSS" checked> 118 | <label for="minifyCSS"> 119 | Minify CSS 120 | </label> 121 | <span class="quiet short"> 122 | Minify CSS in style elements and style attributes (uses <code>clean-css</code>) 123 | </span> 124 | </li> 125 | <li> 126 | <input type="checkbox" id="minifyJS" checked> 127 | <label for="minifyJS"> 128 | Minify JavaScript 129 | </label> 130 | <span class="quiet short"> 131 | Minify JavaScript in script elements and event attributes (uses <code>UglifyJS</code>) 132 | </span> 133 | </li> 134 | <li> 135 | <label for="minifyURLs"> 136 | Minify URLs 137 | </label> 138 | <input type="text" id="minifyURLs"> 139 | <span class="quiet short"> 140 | Minify URLs in various attributes (uses <code>relateurl</code>) 141 | </span> 142 | </li> 143 | <li> 144 | <input type="checkbox" id="preserveLineBreaks"> 145 | <label for="preserveLineBreaks"> 146 | Preserve line-breaks 147 | </label> 148 | <span class="quiet short"> 149 | Always collapse to 1 line break (never remove it entirely) when whitespace between tags include a line break. 150 | Must be used in conjunction with <code>collapseWhitespace=true</code> 151 | </span> 152 | </li> 153 | <li> 154 | <input type="checkbox" id="preventAttributesEscaping"> 155 | <label for="preventAttributesEscaping" class="unsafe"> 156 | Prevent attributes escaping 157 | </label> 158 | <span class="quiet short"> 159 | Prevents the escaping of the values of attributes 160 | </span> 161 | </li> 162 | <li> 163 | <input type="checkbox" id="processConditionalComments" checked> 164 | <label for="processConditionalComments"> 165 | Process conditional comments 166 | </label> 167 | <span class="quiet short"> 168 | Process contents of conditional comments through minifier 169 | </span> 170 | </li> 171 | <li> 172 | <label for="processScripts"> 173 | Process scripts 174 | </label> 175 | <input type="text" id="processScripts" value="text/html"> 176 | <span class="quiet short"> 177 | Comma-delimited string corresponding to types of script elements to process through minifier (e.g. <code>text/ng-template, text/x-handlebars-template</code>) 178 | </span> 179 | </li> 180 | <li> 181 | <label for="quoteCharacter"> 182 | Quote character 183 | </label> 184 | <input type="text" id="quoteCharacter"> 185 | <span class="quiet short"> 186 | Type of quote to use for attribute values (<code>'</code> or <code>"</code>) 187 | </span> 188 | </li> 189 | <li> 190 | <input type="checkbox" id="removeAttributeQuotes" checked> 191 | <label for="removeAttributeQuotes"> 192 | Remove attribute quotes 193 | </label> 194 | <span class="quiet short"> 195 | Remove quotes around attributes when possible 196 | </span> 197 | </li> 198 | <li> 199 | <input type="checkbox" id="removeComments" checked> 200 | <label for="removeComments"> 201 | Remove comments 202 | </label> 203 | <span class="quiet short"> 204 | Strip HTML comments 205 | </span> 206 | </li> 207 | <li> 208 | <input type="checkbox" id="removeEmptyAttributes" checked> 209 | <label for="removeEmptyAttributes"> 210 | Remove empty attributes 211 | </label> 212 | <span class="quiet short"> 213 | Remove all attributes with whitespace-only values 214 | </span> 215 | </li> 216 | <li> 217 | <input type="checkbox" id="removeEmptyElements"> 218 | <label for="removeEmptyElements" class="unsafe"> 219 | Remove empty elements 220 | </label> 221 | <span class="quiet short"> 222 | Remove all elements with empty contents 223 | </span> 224 | </li> 225 | <li> 226 | <input type="checkbox" id="removeOptionalTags" checked> 227 | <label for="removeOptionalTags"> 228 | Remove optional tags 229 | </label> 230 | </li> 231 | <li> 232 | <input type="checkbox" id="removeRedundantAttributes" checked> 233 | <label for="removeRedundantAttributes"> 234 | Remove redundant attributes 235 | </label> 236 | <span class="quiet short"> 237 | Remove attributes when value matches default. 238 | </span> 239 | </li> 240 | <li> 241 | <input type="checkbox" id="removeScriptTypeAttributes" checked> 242 | <label for="removeScriptTypeAttributes"> 243 | Remove script type attributes 244 | </label> 245 | <span class="quiet short"> 246 | Remove <code>type="text/javascript"</code> from <code>script</code> tags. 247 | Other <code>type</code> attribute values are left intact 248 | </span> 249 | </li> 250 | <li> 251 | <input type="checkbox" id="removeStyleLinkTypeAttributes" checked> 252 | <label for="removeStyleLinkTypeAttributes"> 253 | Remove style link type attributes 254 | </label> 255 | <span class="quiet short"> 256 | Remove <code>type="text/css"</code> from <code>style</code> and <code>link</code> tags. 257 | Other <code>type</code> attribute values are left intact 258 | </span> 259 | </li> 260 | <li> 261 | <input type="checkbox" id="removeTagWhitespace" checked> 262 | <label for="removeTagWhitespace" class="unsafe"> 263 | Remove tag whitespace 264 | </label> 265 | <span class="quiet short"> 266 | Remove space between attributes whenever possible. 267 | <i>Note that this will result in invalid HTML!</i> 268 | </span> 269 | </li> 270 | <li> 271 | <input type="checkbox" id="sortAttributes" checked> 272 | <label for="sortAttributes" class="unsafe"> 273 | Sort attributes 274 | </label> 275 | <span class="quiet short"> 276 | Sort attributes by frequency 277 | </span> 278 | </li> 279 | <li> 280 | <input type="checkbox" id="sortClassName" checked> 281 | <label for="sortClassName" class="unsafe"> 282 | Sort class name 283 | </label> 284 | <span class="quiet short"> 285 | Sort style classes by frequency 286 | </span> 287 | </li> 288 | <li> 289 | <input type="checkbox" id="trimCustomFragments" checked> 290 | <label for="trimCustomFragments"> 291 | Trim white space around custom fragments 292 | </label> 293 | <span class="quiet short"> 294 | Trim white space around <code>ignoreCustomFragments</code>. 295 | </span> 296 | </li> 297 | <li> 298 | <input type="checkbox" id="useShortDoctype" checked> 299 | <label for="useShortDoctype"> 300 | Use short <code>doctype</code> 301 | </label> 302 | <span class="quiet short"> 303 | Replaces the <code>doctype</code> with the short (HTML5) <code>doctype</code> 304 | </span> 305 | </li> 306 | </ul> 307 | <div class="controls"> 308 | <span>Select:</span> 309 | <a href="#" id="select-all">All</a>, 310 | <a href="#" id="select-none">None</a>, 311 | <a href="#" id="select-defaults">Reset</a> 312 | </div> 313 | </div> 314 | </div> 315 | 316 | <div class="footer"> 317 | <p class="quiet"> 318 | HTMLMinifier is made by <a href="http://perfectionkills.com/">kangax</a>, 319 | using tweaked version of HTML parser by <a href="http://ejohn.org/">John Resig</a> 320 | (which, in its turn, is based on work of <a href="http://erik.eae.net/">Erik Arvidsson</a>). 321 | Source and bugtracker are <a href="https://github.com/kangax/html-minifier">hosted on GitHub</a>. 322 | </p> 323 | </div> 324 | 325 | <script src="dist/htmlminifier.min.js"></script> 326 | <script src="assets/master.js"></script> 327 | </body> 328 | </html> 329 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "html-minifier", 3 | "description": "Highly configurable, well-tested, JavaScript-based HTML minifier.", 4 | "version": "4.0.0", 5 | "keywords": [ 6 | "cli", 7 | "compress", 8 | "compressor", 9 | "css", 10 | "html", 11 | "htmlmin", 12 | "javascript", 13 | "min", 14 | "minification", 15 | "minifier", 16 | "minify", 17 | "optimize", 18 | "optimizer", 19 | "pack", 20 | "packer", 21 | "parse", 22 | "parser", 23 | "uglifier", 24 | "uglify" 25 | ], 26 | "homepage": "https://kangax.github.io/html-minifier/", 27 | "author": "Juriy \"kangax\" Zaytsev", 28 | "maintainers": [ 29 | "Alex Lam <alexlamsl@gmail.com>", 30 | "Juriy Zaytsev <kangax@gmail.com> (http://perfectionkills.com/)" 31 | ], 32 | "contributors": [ 33 | "Gilmore Davidson (https://github.com/gilmoreorless)", 34 | "Hugo Wetterberg <hugo@wetterberg.nu>", 35 | "Zoltan Frombach <tssajo@gmail.com>" 36 | ], 37 | "license": "MIT", 38 | "bin": { 39 | "html-minifier": "./cli.js" 40 | }, 41 | "main": "src/htmlminifier.js", 42 | "repository": { 43 | "type": "git", 44 | "url": "git+https://github.com/kangax/html-minifier.git" 45 | }, 46 | "bugs": { 47 | "url": "https://github.com/kangax/html-minifier/issues" 48 | }, 49 | "engines": { 50 | "node": ">=10" 51 | }, 52 | "scripts": { 53 | "dist": "grunt dist", 54 | "test": "grunt test" 55 | }, 56 | "dependencies": { 57 | "camel-case": "^3.0.0", 58 | "clean-css": "^5.2.1", 59 | "commander": "^2.20.3", 60 | "he": "^1.2.0", 61 | "param-case": "^2.1.1", 62 | "relateurl": "^0.2.7", 63 | "uglify-js": "^3.14.2" 64 | }, 65 | "devDependencies": { 66 | "grunt": "^1.4.1", 67 | "grunt-browserify": "^6.0.0", 68 | "grunt-contrib-uglify": "^5.0.1", 69 | "grunt-eslint": "^23.0.0", 70 | "phantomjs-prebuilt": "^2.1.16", 71 | "qunit": "^2.17.2" 72 | }, 73 | "benchmarkDependencies": { 74 | "brotli": "^1.3.2", 75 | "chalk": "^4.1.2", 76 | "cli-table": "^0.3.6", 77 | "lzma": "^2.3.2", 78 | "minimize": "^2.2.0", 79 | "progress": "^2.0.3" 80 | }, 81 | "files": [ 82 | "src/*.js", 83 | "cli.js", 84 | "sample-cli-config-file.conf" 85 | ] 86 | } 87 | -------------------------------------------------------------------------------- /sample-cli-config-file.conf: -------------------------------------------------------------------------------- 1 | { 2 | "caseSensitive": false, 3 | "collapseBooleanAttributes": true, 4 | "collapseInlineTagWhitespace": false, 5 | "collapseWhitespace": true, 6 | "conservativeCollapse": false, 7 | "continueOnParseError": true, 8 | "customAttrCollapse": ".*", 9 | "decodeEntities": true, 10 | "html5": true, 11 | "ignoreCustomFragments": [ 12 | "<#[\\s\\S]*?#>", 13 | "<%[\\s\\S]*?%>", 14 | "<\\?[\\s\\S]*?\\?>" 15 | ], 16 | "includeAutoGeneratedTags": false, 17 | "keepClosingSlash": false, 18 | "maxLineLength": 0, 19 | "minifyCSS": true, 20 | "minifyJS": true, 21 | "preserveLineBreaks": false, 22 | "preventAttributesEscaping": false, 23 | "processConditionalComments": true, 24 | "processScripts": [ 25 | "text/html" 26 | ], 27 | "removeAttributeQuotes": true, 28 | "removeComments": true, 29 | "removeEmptyAttributes": true, 30 | "removeEmptyElements": true, 31 | "removeOptionalTags": true, 32 | "removeRedundantAttributes": true, 33 | "removeScriptTypeAttributes": true, 34 | "removeStyleLinkTypeAttributes": true, 35 | "removeTagWhitespace": true, 36 | "sortAttributes": true, 37 | "sortClassName": true, 38 | "trimCustomFragments": true, 39 | "useShortDoctype": true 40 | } 41 | -------------------------------------------------------------------------------- /src/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "browser": true 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /src/htmlminifier.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var CleanCSS = require('clean-css'); 4 | var decode = require('he').decode; 5 | var HTMLParser = require('./htmlparser').HTMLParser; 6 | var RelateUrl = require('relateurl'); 7 | var TokenChain = require('./tokenchain'); 8 | var UglifyJS = require('uglify-js'); 9 | var utils = require('./utils'); 10 | 11 | function trimWhitespace(str) { 12 | return str && str.replace(/^[ \n\r\t\f]+/, '').replace(/[ \n\r\t\f]+$/, ''); 13 | } 14 | 15 | function collapseWhitespaceAll(str) { 16 | // Non-breaking space is specifically handled inside the replacer function here: 17 | return str && str.replace(/[ \n\r\t\f\xA0]+/g, function(spaces) { 18 | return spaces === '\t' ? '\t' : spaces.replace(/(^|\xA0+)[^\xA0]+/g, '$1 '); 19 | }); 20 | } 21 | 22 | function collapseWhitespace(str, options, trimLeft, trimRight, collapseAll) { 23 | var lineBreakBefore = '', lineBreakAfter = ''; 24 | 25 | if (options.preserveLineBreaks) { 26 | str = str.replace(/^[ \n\r\t\f]*?[\n\r][ \n\r\t\f]*/, function() { 27 | lineBreakBefore = '\n'; 28 | return ''; 29 | }).replace(/[ \n\r\t\f]*?[\n\r][ \n\r\t\f]*$/, function() { 30 | lineBreakAfter = '\n'; 31 | return ''; 32 | }); 33 | } 34 | 35 | if (trimLeft) { 36 | // Non-breaking space is specifically handled inside the replacer function here: 37 | str = str.replace(/^[ \n\r\t\f\xA0]+/, function(spaces) { 38 | var conservative = !lineBreakBefore && options.conservativeCollapse; 39 | if (conservative && spaces === '\t') { 40 | return '\t'; 41 | } 42 | return spaces.replace(/^[^\xA0]+/, '').replace(/(\xA0+)[^\xA0]+/g, '$1 ') || (conservative ? ' ' : ''); 43 | }); 44 | } 45 | 46 | if (trimRight) { 47 | // Non-breaking space is specifically handled inside the replacer function here: 48 | str = str.replace(/[ \n\r\t\f\xA0]+$/, function(spaces) { 49 | var conservative = !lineBreakAfter && options.conservativeCollapse; 50 | if (conservative && spaces === '\t') { 51 | return '\t'; 52 | } 53 | return spaces.replace(/[^\xA0]+(\xA0+)/g, ' $1').replace(/[^\xA0]+$/, '') || (conservative ? ' ' : ''); 54 | }); 55 | } 56 | 57 | if (collapseAll) { 58 | // strip non space whitespace then compress spaces to one 59 | str = collapseWhitespaceAll(str); 60 | } 61 | 62 | return lineBreakBefore + str + lineBreakAfter; 63 | } 64 | 65 | var createMapFromString = utils.createMapFromString; 66 | // non-empty tags that will maintain whitespace around them 67 | var inlineTags = createMapFromString('a,abbr,acronym,b,bdi,bdo,big,button,cite,code,del,dfn,em,font,i,ins,kbd,label,mark,math,nobr,object,q,rp,rt,rtc,ruby,s,samp,select,small,span,strike,strong,sub,sup,svg,textarea,time,tt,u,var'); 68 | // non-empty tags that will maintain whitespace within them 69 | var inlineTextTags = createMapFromString('a,abbr,acronym,b,big,del,em,font,i,ins,kbd,mark,nobr,rp,s,samp,small,span,strike,strong,sub,sup,time,tt,u,var'); 70 | // self-closing tags that will maintain whitespace around them 71 | var selfClosingInlineTags = createMapFromString('comment,img,input,wbr'); 72 | 73 | function collapseWhitespaceSmart(str, prevTag, nextTag, options) { 74 | var trimLeft = prevTag && !selfClosingInlineTags(prevTag); 75 | if (trimLeft && !options.collapseInlineTagWhitespace) { 76 | trimLeft = prevTag.charAt(0) === '/' ? !inlineTags(prevTag.slice(1)) : !inlineTextTags(prevTag); 77 | } 78 | var trimRight = nextTag && !selfClosingInlineTags(nextTag); 79 | if (trimRight && !options.collapseInlineTagWhitespace) { 80 | trimRight = nextTag.charAt(0) === '/' ? !inlineTextTags(nextTag.slice(1)) : !inlineTags(nextTag); 81 | } 82 | return collapseWhitespace(str, options, trimLeft, trimRight, prevTag && nextTag); 83 | } 84 | 85 | function isConditionalComment(text) { 86 | return /^\[if\s[^\]]+]|\[endif]$/.test(text); 87 | } 88 | 89 | function isIgnoredComment(text, options) { 90 | for (var i = 0, len = options.ignoreCustomComments.length; i < len; i++) { 91 | if (options.ignoreCustomComments[i].test(text)) { 92 | return true; 93 | } 94 | } 95 | return false; 96 | } 97 | 98 | function isEventAttribute(attrName, options) { 99 | var patterns = options.customEventAttributes; 100 | if (patterns) { 101 | for (var i = patterns.length; i--;) { 102 | if (patterns[i].test(attrName)) { 103 | return true; 104 | } 105 | } 106 | return false; 107 | } 108 | return /^on[a-z]{3,}$/.test(attrName); 109 | } 110 | 111 | function canRemoveAttributeQuotes(value) { 112 | // https://mathiasbynens.be/notes/unquoted-attribute-values 113 | return /^[^ \t\n\f\r"'`=<>]+$/.test(value); 114 | } 115 | 116 | function attributesInclude(attributes, attribute) { 117 | for (var i = attributes.length; i--;) { 118 | if (attributes[i].name.toLowerCase() === attribute) { 119 | return true; 120 | } 121 | } 122 | return false; 123 | } 124 | 125 | function isAttributeRedundant(tag, attrName, attrValue, attrs) { 126 | attrValue = attrValue ? trimWhitespace(attrValue.toLowerCase()) : ''; 127 | 128 | return ( 129 | tag === 'script' && 130 | attrName === 'language' && 131 | attrValue === 'javascript' || 132 | 133 | tag === 'form' && 134 | attrName === 'method' && 135 | attrValue === 'get' || 136 | 137 | tag === 'input' && 138 | attrName === 'type' && 139 | attrValue === 'text' || 140 | 141 | tag === 'script' && 142 | attrName === 'charset' && 143 | !attributesInclude(attrs, 'src') || 144 | 145 | tag === 'a' && 146 | attrName === 'name' && 147 | attributesInclude(attrs, 'id') || 148 | 149 | tag === 'area' && 150 | attrName === 'shape' && 151 | attrValue === 'rect' 152 | ); 153 | } 154 | 155 | // https://mathiasbynens.be/demo/javascript-mime-type 156 | // https://developer.mozilla.org/en/docs/Web/HTML/Element/script#attr-type 157 | var executableScriptsMimetypes = utils.createMap([ 158 | 'text/javascript', 159 | 'text/ecmascript', 160 | 'text/jscript', 161 | 'application/javascript', 162 | 'application/x-javascript', 163 | 'application/ecmascript' 164 | ]); 165 | 166 | function isScriptTypeAttribute(attrValue) { 167 | attrValue = trimWhitespace(attrValue.split(/;/, 2)[0]).toLowerCase(); 168 | return attrValue === '' || executableScriptsMimetypes(attrValue); 169 | } 170 | 171 | function isExecutableScript(tag, attrs) { 172 | if (tag !== 'script') { 173 | return false; 174 | } 175 | for (var i = 0, len = attrs.length; i < len; i++) { 176 | var attrName = attrs[i].name.toLowerCase(); 177 | if (attrName === 'type') { 178 | return isScriptTypeAttribute(attrs[i].value); 179 | } 180 | } 181 | return true; 182 | } 183 | 184 | function isStyleLinkTypeAttribute(attrValue) { 185 | attrValue = trimWhitespace(attrValue).toLowerCase(); 186 | return attrValue === '' || attrValue === 'text/css'; 187 | } 188 | 189 | function isStyleSheet(tag, attrs) { 190 | if (tag !== 'style') { 191 | return false; 192 | } 193 | for (var i = 0, len = attrs.length; i < len; i++) { 194 | var attrName = attrs[i].name.toLowerCase(); 195 | if (attrName === 'type') { 196 | return isStyleLinkTypeAttribute(attrs[i].value); 197 | } 198 | } 199 | return true; 200 | } 201 | 202 | var isSimpleBoolean = createMapFromString('allowfullscreen,async,autofocus,autoplay,checked,compact,controls,declare,default,defaultchecked,defaultmuted,defaultselected,defer,disabled,enabled,formnovalidate,hidden,indeterminate,inert,ismap,itemscope,loop,multiple,muted,nohref,noresize,noshade,novalidate,nowrap,open,pauseonexit,readonly,required,reversed,scoped,seamless,selected,sortable,truespeed,typemustmatch,visible'); 203 | var isBooleanValue = createMapFromString('true,false'); 204 | 205 | function isBooleanAttribute(attrName, attrValue) { 206 | return isSimpleBoolean(attrName) || attrName === 'draggable' && !isBooleanValue(attrValue); 207 | } 208 | 209 | function isUriTypeAttribute(attrName, tag) { 210 | return ( 211 | /^(?:a|area|link|base)$/.test(tag) && attrName === 'href' || 212 | tag === 'img' && /^(?:src|longdesc|usemap)$/.test(attrName) || 213 | tag === 'object' && /^(?:classid|codebase|data|usemap)$/.test(attrName) || 214 | tag === 'q' && attrName === 'cite' || 215 | tag === 'blockquote' && attrName === 'cite' || 216 | (tag === 'ins' || tag === 'del') && attrName === 'cite' || 217 | tag === 'form' && attrName === 'action' || 218 | tag === 'input' && (attrName === 'src' || attrName === 'usemap') || 219 | tag === 'head' && attrName === 'profile' || 220 | tag === 'script' && (attrName === 'src' || attrName === 'for') 221 | ); 222 | } 223 | 224 | function isNumberTypeAttribute(attrName, tag) { 225 | return ( 226 | /^(?:a|area|object|button)$/.test(tag) && attrName === 'tabindex' || 227 | tag === 'input' && (attrName === 'maxlength' || attrName === 'tabindex') || 228 | tag === 'select' && (attrName === 'size' || attrName === 'tabindex') || 229 | tag === 'textarea' && /^(?:rows|cols|tabindex)$/.test(attrName) || 230 | tag === 'colgroup' && attrName === 'span' || 231 | tag === 'col' && attrName === 'span' || 232 | (tag === 'th' || tag === 'td') && (attrName === 'rowspan' || attrName === 'colspan') 233 | ); 234 | } 235 | 236 | function isLinkType(tag, attrs, value) { 237 | if (tag !== 'link') { 238 | return false; 239 | } 240 | for (var i = 0, len = attrs.length; i < len; i++) { 241 | if (attrs[i].name === 'rel' && attrs[i].value === value) { 242 | return true; 243 | } 244 | } 245 | } 246 | 247 | function isMediaQuery(tag, attrs, attrName) { 248 | return attrName === 'media' && (isLinkType(tag, attrs, 'stylesheet') || isStyleSheet(tag, attrs)); 249 | } 250 | 251 | var srcsetTags = createMapFromString('img,source'); 252 | 253 | function isSrcset(attrName, tag) { 254 | return attrName === 'srcset' && srcsetTags(tag); 255 | } 256 | 257 | function cleanAttributeValue(tag, attrName, attrValue, options, attrs) { 258 | if (isEventAttribute(attrName, options)) { 259 | attrValue = trimWhitespace(attrValue).replace(/^javascript:\s*/i, ''); 260 | return options.minifyJS(attrValue, true); 261 | } 262 | else if (attrName === 'class') { 263 | attrValue = trimWhitespace(attrValue); 264 | if (options.sortClassName) { 265 | attrValue = options.sortClassName(attrValue); 266 | } 267 | else { 268 | attrValue = collapseWhitespaceAll(attrValue); 269 | } 270 | return attrValue; 271 | } 272 | else if (isUriTypeAttribute(attrName, tag)) { 273 | attrValue = trimWhitespace(attrValue); 274 | return isLinkType(tag, attrs, 'canonical') ? attrValue : options.minifyURLs(attrValue); 275 | } 276 | else if (isNumberTypeAttribute(attrName, tag)) { 277 | return trimWhitespace(attrValue); 278 | } 279 | else if (attrName === 'style') { 280 | attrValue = trimWhitespace(attrValue); 281 | if (attrValue) { 282 | if (/;$/.test(attrValue) && !/&#?[0-9a-zA-Z]+;$/.test(attrValue)) { 283 | attrValue = attrValue.replace(/\s*;$/, ';'); 284 | } 285 | attrValue = options.minifyCSS(attrValue, 'inline'); 286 | } 287 | return attrValue; 288 | } 289 | else if (isSrcset(attrName, tag)) { 290 | // https://html.spec.whatwg.org/multipage/embedded-content.html#attr-img-srcset 291 | attrValue = trimWhitespace(attrValue).split(/\s+,\s*|\s*,\s+/).map(function(candidate) { 292 | var url = candidate; 293 | var descriptor = ''; 294 | var match = candidate.match(/\s+([1-9][0-9]*w|[0-9]+(?:\.[0-9]+)?x)$/); 295 | if (match) { 296 | url = url.slice(0, -match[0].length); 297 | var num = +match[1].slice(0, -1); 298 | var suffix = match[1].slice(-1); 299 | if (num !== 1 || suffix !== 'x') { 300 | descriptor = ' ' + num + suffix; 301 | } 302 | } 303 | return options.minifyURLs(url) + descriptor; 304 | }).join(', '); 305 | } 306 | else if (isMetaViewport(tag, attrs) && attrName === 'content') { 307 | attrValue = attrValue.replace(/\s+/g, '').replace(/[0-9]+\.[0-9]+/g, function(numString) { 308 | // "0.90000" -> "0.9" 309 | // "1.0" -> "1" 310 | // "1.0001" -> "1.0001" (unchanged) 311 | return (+numString).toString(); 312 | }); 313 | } 314 | else if (isContentSecurityPolicy(tag, attrs) && attrName.toLowerCase() === 'content') { 315 | return collapseWhitespaceAll(attrValue); 316 | } 317 | else if (options.customAttrCollapse && options.customAttrCollapse.test(attrName)) { 318 | attrValue = attrValue.replace(/\n+|\r+|\s{2,}/g, ''); 319 | } 320 | else if (tag === 'script' && attrName === 'type') { 321 | attrValue = trimWhitespace(attrValue.replace(/\s*;\s*/g, ';')); 322 | } 323 | else if (isMediaQuery(tag, attrs, attrName)) { 324 | attrValue = trimWhitespace(attrValue); 325 | return options.minifyCSS(attrValue, 'media'); 326 | } 327 | return attrValue; 328 | } 329 | 330 | function isMetaViewport(tag, attrs) { 331 | if (tag !== 'meta') { 332 | return false; 333 | } 334 | for (var i = 0, len = attrs.length; i < len; i++) { 335 | if (attrs[i].name === 'name' && attrs[i].value === 'viewport') { 336 | return true; 337 | } 338 | } 339 | } 340 | 341 | function isContentSecurityPolicy(tag, attrs) { 342 | if (tag !== 'meta') { 343 | return false; 344 | } 345 | for (var i = 0, len = attrs.length; i < len; i++) { 346 | if (attrs[i].name.toLowerCase() === 'http-equiv' && attrs[i].value.toLowerCase() === 'content-security-policy') { 347 | return true; 348 | } 349 | } 350 | } 351 | 352 | function ignoreCSS(id) { 353 | return '/* clean-css ignore:start */' + id + '/* clean-css ignore:end */'; 354 | } 355 | 356 | // Wrap CSS declarations for CleanCSS > 3.x 357 | // See https://github.com/jakubpawlowicz/clean-css/issues/418 358 | function wrapCSS(text, type) { 359 | switch (type) { 360 | case 'inline': 361 | return '*{' + text + '}'; 362 | case 'media': 363 | return '@media ' + text + '{a{top:0}}'; 364 | default: 365 | return text; 366 | } 367 | } 368 | 369 | function unwrapCSS(text, type) { 370 | var matches; 371 | switch (type) { 372 | case 'inline': 373 | matches = text.match(/^\*\{([\s\S]*)\}$/); 374 | break; 375 | case 'media': 376 | matches = text.match(/^@media ([\s\S]*?)\s*{[\s\S]*}$/); 377 | break; 378 | } 379 | return matches ? matches[1] : text; 380 | } 381 | 382 | function cleanConditionalComment(comment, options) { 383 | return options.processConditionalComments ? comment.replace(/^(\[if\s[^\]]+]>)([\s\S]*?)(<!\[endif])$/, function(match, prefix, text, suffix) { 384 | return prefix + minify(text, options, true) + suffix; 385 | }) : comment; 386 | } 387 | 388 | function processScript(text, options, currentAttrs) { 389 | for (var i = 0, len = currentAttrs.length; i < len; i++) { 390 | if (currentAttrs[i].name.toLowerCase() === 'type' && 391 | options.processScripts.indexOf(currentAttrs[i].value) > -1) { 392 | return minify(text, options); 393 | } 394 | } 395 | return text; 396 | } 397 | 398 | // Tag omission rules from https://html.spec.whatwg.org/multipage/syntax.html#optional-tags 399 | // with the following deviations: 400 | // - retain <body> if followed by <noscript> 401 | // - </rb>, </rt>, </rtc>, </rp> & </tfoot> follow https://www.w3.org/TR/html5/syntax.html#optional-tags 402 | // - retain all tags which are adjacent to non-standard HTML tags 403 | var optionalStartTags = createMapFromString('html,head,body,colgroup,tbody'); 404 | var optionalEndTags = createMapFromString('html,head,body,li,dt,dd,p,rb,rt,rtc,rp,optgroup,option,colgroup,caption,thead,tbody,tfoot,tr,td,th'); 405 | var headerTags = createMapFromString('meta,link,script,style,template,noscript'); 406 | var descriptionTags = createMapFromString('dt,dd'); 407 | var pBlockTags = createMapFromString('address,article,aside,blockquote,details,div,dl,fieldset,figcaption,figure,footer,form,h1,h2,h3,h4,h5,h6,header,hgroup,hr,main,menu,nav,ol,p,pre,section,table,ul'); 408 | var pInlineTags = createMapFromString('a,audio,del,ins,map,noscript,video'); 409 | var rubyTags = createMapFromString('rb,rt,rtc,rp'); 410 | var rtcTag = createMapFromString('rb,rtc,rp'); 411 | var optionTag = createMapFromString('option,optgroup'); 412 | var tableContentTags = createMapFromString('tbody,tfoot'); 413 | var tableSectionTags = createMapFromString('thead,tbody,tfoot'); 414 | var cellTags = createMapFromString('td,th'); 415 | var topLevelTags = createMapFromString('html,head,body'); 416 | var compactTags = createMapFromString('html,body'); 417 | var looseTags = createMapFromString('head,colgroup,caption'); 418 | var trailingTags = createMapFromString('dt,thead'); 419 | var htmlTags = createMapFromString('a,abbr,acronym,address,applet,area,article,aside,audio,b,base,basefont,bdi,bdo,bgsound,big,blink,blockquote,body,br,button,canvas,caption,center,cite,code,col,colgroup,command,content,data,datalist,dd,del,details,dfn,dialog,dir,div,dl,dt,element,em,embed,fieldset,figcaption,figure,font,footer,form,frame,frameset,h1,h2,h3,h4,h5,h6,head,header,hgroup,hr,html,i,iframe,image,img,input,ins,isindex,kbd,keygen,label,legend,li,link,listing,main,map,mark,marquee,menu,menuitem,meta,meter,multicol,nav,nobr,noembed,noframes,noscript,object,ol,optgroup,option,output,p,param,picture,plaintext,pre,progress,q,rb,rp,rt,rtc,ruby,s,samp,script,section,select,shadow,small,source,spacer,span,strike,strong,style,sub,summary,sup,table,tbody,td,template,textarea,tfoot,th,thead,time,title,tr,track,tt,u,ul,var,video,wbr,xmp'); 420 | 421 | function canRemoveParentTag(optionalStartTag, tag) { 422 | switch (optionalStartTag) { 423 | case 'html': 424 | case 'head': 425 | return true; 426 | case 'body': 427 | return !headerTags(tag); 428 | case 'colgroup': 429 | return tag === 'col'; 430 | case 'tbody': 431 | return tag === 'tr'; 432 | } 433 | return false; 434 | } 435 | 436 | function isStartTagMandatory(optionalEndTag, tag) { 437 | switch (tag) { 438 | case 'colgroup': 439 | return optionalEndTag === 'colgroup'; 440 | case 'tbody': 441 | return tableSectionTags(optionalEndTag); 442 | } 443 | return false; 444 | } 445 | 446 | function canRemovePrecedingTag(optionalEndTag, tag) { 447 | switch (optionalEndTag) { 448 | case 'html': 449 | case 'head': 450 | case 'body': 451 | case 'colgroup': 452 | case 'caption': 453 | return true; 454 | case 'li': 455 | case 'optgroup': 456 | case 'tr': 457 | return tag === optionalEndTag; 458 | case 'dt': 459 | case 'dd': 460 | return descriptionTags(tag); 461 | case 'p': 462 | return pBlockTags(tag); 463 | case 'rb': 464 | case 'rt': 465 | case 'rp': 466 | return rubyTags(tag); 467 | case 'rtc': 468 | return rtcTag(tag); 469 | case 'option': 470 | return optionTag(tag); 471 | case 'thead': 472 | case 'tbody': 473 | return tableContentTags(tag); 474 | case 'tfoot': 475 | return tag === 'tbody'; 476 | case 'td': 477 | case 'th': 478 | return cellTags(tag); 479 | } 480 | return false; 481 | } 482 | 483 | var reEmptyAttribute = new RegExp( 484 | '^(?:class|id|style|title|lang|dir|on(?:focus|blur|change|click|dblclick|mouse(' + 485 | '?:down|up|over|move|out)|key(?:press|down|up)))#39;); 486 | 487 | function canDeleteEmptyAttribute(tag, attrName, attrValue, options) { 488 | var isValueEmpty = !attrValue || /^\s*$/.test(attrValue); 489 | if (!isValueEmpty) { 490 | return false; 491 | } 492 | if (typeof options.removeEmptyAttributes === 'function') { 493 | return options.removeEmptyAttributes(attrName, tag); 494 | } 495 | return tag === 'input' && attrName === 'value' || reEmptyAttribute.test(attrName); 496 | } 497 | 498 | function hasAttrName(name, attrs) { 499 | for (var i = attrs.length - 1; i >= 0; i--) { 500 | if (attrs[i].name === name) { 501 | return true; 502 | } 503 | } 504 | return false; 505 | } 506 | 507 | function canRemoveElement(tag, attrs) { 508 | switch (tag) { 509 | case 'textarea': 510 | return false; 511 | case 'audio': 512 | case 'script': 513 | case 'video': 514 | if (hasAttrName('src', attrs)) { 515 | return false; 516 | } 517 | break; 518 | case 'iframe': 519 | if (hasAttrName('src', attrs) || hasAttrName('srcdoc', attrs)) { 520 | return false; 521 | } 522 | break; 523 | case 'object': 524 | if (hasAttrName('data', attrs)) { 525 | return false; 526 | } 527 | break; 528 | case 'applet': 529 | if (hasAttrName('code', attrs)) { 530 | return false; 531 | } 532 | break; 533 | } 534 | return true; 535 | } 536 | 537 | function canCollapseWhitespace(tag) { 538 | return !/^(?:script|style|pre|textarea)$/.test(tag); 539 | } 540 | 541 | function canTrimWhitespace(tag) { 542 | return !/^(?:pre|textarea)$/.test(tag); 543 | } 544 | 545 | function normalizeAttr(attr, attrs, tag, options) { 546 | var attrName = options.name(attr.name), 547 | attrValue = attr.value; 548 | 549 | if (options.decodeEntities && attrValue) { 550 | attrValue = decode(attrValue, { isAttributeValue: true }); 551 | } 552 | 553 | if (options.removeRedundantAttributes && 554 | isAttributeRedundant(tag, attrName, attrValue, attrs) || 555 | options.removeScriptTypeAttributes && tag === 'script' && 556 | attrName === 'type' && isScriptTypeAttribute(attrValue) || 557 | options.removeStyleLinkTypeAttributes && (tag === 'style' || tag === 'link') && 558 | attrName === 'type' && isStyleLinkTypeAttribute(attrValue)) { 559 | return; 560 | } 561 | 562 | if (attrValue) { 563 | attrValue = cleanAttributeValue(tag, attrName, attrValue, options, attrs); 564 | } 565 | 566 | if (options.removeEmptyAttributes && 567 | canDeleteEmptyAttribute(tag, attrName, attrValue, options)) { 568 | return; 569 | } 570 | 571 | if (options.decodeEntities && attrValue) { 572 | attrValue = attrValue.replace(/&(#?[0-9a-zA-Z]+;)/g, '&$1'); 573 | } 574 | 575 | return { 576 | attr: attr, 577 | name: attrName, 578 | value: attrValue 579 | }; 580 | } 581 | 582 | function buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr) { 583 | var attrName = normalized.name, 584 | attrValue = normalized.value, 585 | attr = normalized.attr, 586 | attrQuote = attr.quote, 587 | attrFragment, 588 | emittedAttrValue; 589 | 590 | if (typeof attrValue !== 'undefined' && (!options.removeAttributeQuotes || 591 | ~attrValue.indexOf(uidAttr) || !canRemoveAttributeQuotes(attrValue))) { 592 | if (!options.preventAttributesEscaping) { 593 | if (typeof options.quoteCharacter === 'undefined') { 594 | var apos = (attrValue.match(/'/g) || []).length; 595 | var quot = (attrValue.match(/"/g) || []).length; 596 | attrQuote = apos < quot ? '\'' : '"'; 597 | } 598 | else { 599 | attrQuote = options.quoteCharacter === '\'' ? '\'' : '"'; 600 | } 601 | if (attrQuote === '"') { 602 | attrValue = attrValue.replace(/"/g, '"'); 603 | } 604 | else { 605 | attrValue = attrValue.replace(/'/g, '''); 606 | } 607 | } 608 | emittedAttrValue = attrQuote + attrValue + attrQuote; 609 | if (!isLast && !options.removeTagWhitespace) { 610 | emittedAttrValue += ' '; 611 | } 612 | } 613 | // make sure trailing slash is not interpreted as HTML self-closing tag 614 | else if (isLast && !hasUnarySlash && !/\/$/.test(attrValue)) { 615 | emittedAttrValue = attrValue; 616 | } 617 | else { 618 | emittedAttrValue = attrValue + ' '; 619 | } 620 | 621 | if (typeof attrValue === 'undefined' || options.collapseBooleanAttributes && 622 | isBooleanAttribute(attrName.toLowerCase(), attrValue.toLowerCase())) { 623 | attrFragment = attrName; 624 | if (!isLast) { 625 | attrFragment += ' '; 626 | } 627 | } 628 | else { 629 | attrFragment = attrName + attr.customAssign + emittedAttrValue; 630 | } 631 | 632 | return attr.customOpen + attrFragment + attr.customClose; 633 | } 634 | 635 | function identity(value) { 636 | return value; 637 | } 638 | 639 | function processOptions(values) { 640 | var options = { 641 | name: function(name) { 642 | return name.toLowerCase(); 643 | }, 644 | canCollapseWhitespace: canCollapseWhitespace, 645 | canTrimWhitespace: canTrimWhitespace, 646 | html5: true, 647 | ignoreCustomComments: [/^!/], 648 | ignoreCustomFragments: [ 649 | /<%[\s\S]*?%>/, 650 | /<\?[\s\S]*?\?>/ 651 | ], 652 | includeAutoGeneratedTags: true, 653 | log: identity, 654 | minifyCSS: identity, 655 | minifyJS: identity, 656 | minifyURLs: identity 657 | }; 658 | Object.keys(values).forEach(function(key) { 659 | var value = values[key]; 660 | if (key === 'caseSensitive') { 661 | if (value) { 662 | options.name = identity; 663 | } 664 | } 665 | else if (key === 'log') { 666 | if (typeof value === 'function') { 667 | options.log = value; 668 | } 669 | } 670 | else if (key === 'minifyCSS' && typeof value !== 'function') { 671 | if (!value) { 672 | return; 673 | } 674 | if (typeof value !== 'object') { 675 | value = {}; 676 | } 677 | options.minifyCSS = function(text, type) { 678 | text = text.replace(/(url\s*\(\s*)("|'|)(.*?)\2(\s*\))/ig, function(match, prefix, quote, url, suffix) { 679 | return prefix + quote + options.minifyURLs(url) + quote + suffix; 680 | }); 681 | var cleanCssOutput = new CleanCSS(value).minify(wrapCSS(text, type)); 682 | if (cleanCssOutput.errors.length > 0) { 683 | cleanCssOutput.errors.forEach(options.log); 684 | return text; 685 | } 686 | return unwrapCSS(cleanCssOutput.styles, type); 687 | }; 688 | } 689 | else if (key === 'minifyJS' && typeof value !== 'function') { 690 | if (!value) { 691 | return; 692 | } 693 | if (typeof value !== 'object') { 694 | value = {}; 695 | } 696 | (value.parse || (value.parse = {})).bare_returns = false; 697 | options.minifyJS = function(text, inline) { 698 | var start = text.match(/^\s*<!--.*/); 699 | var code = start ? text.slice(start[0].length).replace(/\n\s*-->\s*$/, '') : text; 700 | value.parse.bare_returns = inline; 701 | var result = UglifyJS.minify(code, value); 702 | if (result.error) { 703 | options.log(result.error); 704 | return text; 705 | } 706 | return result.code.replace(/;$/, ''); 707 | }; 708 | } 709 | else if (key === 'minifyURLs' && typeof value !== 'function') { 710 | if (!value) { 711 | return; 712 | } 713 | if (typeof value === 'string') { 714 | value = { site: value }; 715 | } 716 | else if (typeof value !== 'object') { 717 | value = {}; 718 | } 719 | options.minifyURLs = function(text) { 720 | try { 721 | return RelateUrl.relate(text, value); 722 | } 723 | catch (err) { 724 | options.log(err); 725 | return text; 726 | } 727 | }; 728 | } 729 | else { 730 | options[key] = value; 731 | } 732 | }); 733 | return options; 734 | } 735 | 736 | function uniqueId(value) { 737 | var id; 738 | do { 739 | id = Math.random().toString(36).replace(/^0\.[0-9]*/, ''); 740 | } while (~value.indexOf(id)); 741 | return id; 742 | } 743 | 744 | var specialContentTags = createMapFromString('script,style'); 745 | 746 | function createSortFns(value, options, uidIgnore, uidAttr) { 747 | var attrChains = options.sortAttributes && Object.create(null); 748 | var classChain = options.sortClassName && new TokenChain(); 749 | 750 | function attrNames(attrs) { 751 | return attrs.map(function(attr) { 752 | return options.name(attr.name); 753 | }); 754 | } 755 | 756 | function shouldSkipUID(token, uid) { 757 | return !uid || token.indexOf(uid) === -1; 758 | } 759 | 760 | function shouldSkipUIDs(token) { 761 | return shouldSkipUID(token, uidIgnore) && shouldSkipUID(token, uidAttr); 762 | } 763 | 764 | function scan(input) { 765 | var currentTag, currentType; 766 | new HTMLParser(input, { 767 | start: function(tag, attrs) { 768 | if (attrChains) { 769 | if (!attrChains[tag]) { 770 | attrChains[tag] = new TokenChain(); 771 | } 772 | attrChains[tag].add(attrNames(attrs).filter(shouldSkipUIDs)); 773 | } 774 | for (var i = 0, len = attrs.length; i < len; i++) { 775 | var attr = attrs[i]; 776 | if (classChain && attr.value && options.name(attr.name) === 'class') { 777 | classChain.add(trimWhitespace(attr.value).split(/[ \t\n\f\r]+/).filter(shouldSkipUIDs)); 778 | } 779 | else if (options.processScripts && attr.name.toLowerCase() === 'type') { 780 | currentTag = tag; 781 | currentType = attr.value; 782 | } 783 | } 784 | }, 785 | end: function() { 786 | currentTag = ''; 787 | }, 788 | chars: function(text) { 789 | if (options.processScripts && specialContentTags(currentTag) && 790 | options.processScripts.indexOf(currentType) > -1) { 791 | scan(text); 792 | } 793 | } 794 | }); 795 | } 796 | 797 | var log = options.log; 798 | options.log = identity; 799 | options.sortAttributes = false; 800 | options.sortClassName = false; 801 | scan(minify(value, options)); 802 | options.log = log; 803 | if (attrChains) { 804 | var attrSorters = Object.create(null); 805 | for (var tag in attrChains) { 806 | attrSorters[tag] = attrChains[tag].createSorter(); 807 | } 808 | options.sortAttributes = function(tag, attrs) { 809 | var sorter = attrSorters[tag]; 810 | if (sorter) { 811 | var attrMap = Object.create(null); 812 | var names = attrNames(attrs); 813 | names.forEach(function(name, index) { 814 | (attrMap[name] || (attrMap[name] = [])).push(attrs[index]); 815 | }); 816 | sorter.sort(names).forEach(function(name, index) { 817 | attrs[index] = attrMap[name].shift(); 818 | }); 819 | } 820 | }; 821 | } 822 | if (classChain) { 823 | var sorter = classChain.createSorter(); 824 | options.sortClassName = function(value) { 825 | return sorter.sort(value.split(/[ \n\f\r]+/)).join(' '); 826 | }; 827 | } 828 | } 829 | 830 | function minify(value, options, partialMarkup) { 831 | if (options.collapseWhitespace) { 832 | value = collapseWhitespace(value, options, true, true); 833 | } 834 | 835 | var buffer = [], 836 | charsPrevTag, 837 | currentChars = '', 838 | hasChars, 839 | currentTag = '', 840 | currentAttrs = [], 841 | stackNoTrimWhitespace = [], 842 | stackNoCollapseWhitespace = [], 843 | optionalStartTag = '', 844 | optionalEndTag = '', 845 | ignoredMarkupChunks = [], 846 | ignoredCustomMarkupChunks = [], 847 | uidIgnore, 848 | uidAttr, 849 | uidPattern; 850 | 851 | // temporarily replace ignored chunks with comments, 852 | // so that we don't have to worry what's there. 853 | // for all we care there might be 854 | // completely-horribly-broken-alien-non-html-emoj-cthulhu-filled content 855 | value = value.replace(/<!-- htmlmin:ignore -->([\s\S]*?)<!-- htmlmin:ignore -->/g, function(match, group1) { 856 | if (!uidIgnore) { 857 | uidIgnore = uniqueId(value); 858 | var pattern = new RegExp('^' + uidIgnore + '([0-9]+)#39;); 859 | if (options.ignoreCustomComments) { 860 | options.ignoreCustomComments = options.ignoreCustomComments.slice(); 861 | } 862 | else { 863 | options.ignoreCustomComments = []; 864 | } 865 | options.ignoreCustomComments.push(pattern); 866 | } 867 | var token = '<!--' + uidIgnore + ignoredMarkupChunks.length + '-->'; 868 | ignoredMarkupChunks.push(group1); 869 | return token; 870 | }); 871 | 872 | var customFragments = options.ignoreCustomFragments.map(function(re) { 873 | return re.source; 874 | }); 875 | if (customFragments.length) { 876 | var reCustomIgnore = new RegExp('\\s*(?:' + customFragments.join('|') + ')+\\s*', 'g'); 877 | // temporarily replace custom ignored fragments with unique attributes 878 | value = value.replace(reCustomIgnore, function(match) { 879 | if (!uidAttr) { 880 | uidAttr = uniqueId(value); 881 | uidPattern = new RegExp('(\\s*)' + uidAttr + '([0-9]+)' + uidAttr + '(\\s*)', 'g'); 882 | if (options.minifyCSS) { 883 | options.minifyCSS = (function(fn) { 884 | return function(text, type) { 885 | text = text.replace(uidPattern, function(match, prefix, index) { 886 | var chunks = ignoredCustomMarkupChunks[+index]; 887 | return chunks[1] + uidAttr + index + uidAttr + chunks[2]; 888 | }); 889 | var ids = []; 890 | new CleanCSS().minify(wrapCSS(text, type)).warnings.forEach(function(warning) { 891 | var match = uidPattern.exec(warning); 892 | if (match) { 893 | var id = uidAttr + match[2] + uidAttr; 894 | text = text.replace(id, ignoreCSS(id)); 895 | ids.push(id); 896 | } 897 | }); 898 | text = fn(text, type); 899 | ids.forEach(function(id) { 900 | text = text.replace(ignoreCSS(id), id); 901 | }); 902 | return text; 903 | }; 904 | })(options.minifyCSS); 905 | } 906 | if (options.minifyJS) { 907 | options.minifyJS = (function(fn) { 908 | return function(text, type) { 909 | return fn(text.replace(uidPattern, function(match, prefix, index) { 910 | var chunks = ignoredCustomMarkupChunks[+index]; 911 | return chunks[1] + uidAttr + index + uidAttr + chunks[2]; 912 | }), type); 913 | }; 914 | })(options.minifyJS); 915 | } 916 | } 917 | var token = uidAttr + ignoredCustomMarkupChunks.length + uidAttr; 918 | ignoredCustomMarkupChunks.push(/^(\s*)[\s\S]*?(\s*)$/.exec(match)); 919 | return '\t' + token + '\t'; 920 | }); 921 | } 922 | 923 | if (options.sortAttributes && typeof options.sortAttributes !== 'function' || 924 | options.sortClassName && typeof options.sortClassName !== 'function') { 925 | createSortFns(value, options, uidIgnore, uidAttr); 926 | } 927 | 928 | function _canCollapseWhitespace(tag, attrs) { 929 | return options.canCollapseWhitespace(tag, attrs, canCollapseWhitespace); 930 | } 931 | 932 | function _canTrimWhitespace(tag, attrs) { 933 | return options.canTrimWhitespace(tag, attrs, canTrimWhitespace); 934 | } 935 | 936 | function removeStartTag() { 937 | var index = buffer.length - 1; 938 | while (index > 0 && !/^<[^/!]/.test(buffer[index])) { 939 | index--; 940 | } 941 | buffer.length = Math.max(0, index); 942 | } 943 | 944 | function removeEndTag() { 945 | var index = buffer.length - 1; 946 | while (index > 0 && !/^<\//.test(buffer[index])) { 947 | index--; 948 | } 949 | buffer.length = Math.max(0, index); 950 | } 951 | 952 | // look for trailing whitespaces, bypass any inline tags 953 | function trimTrailingWhitespace(index, nextTag) { 954 | for (var endTag = null; index >= 0 && _canTrimWhitespace(endTag); index--) { 955 | var str = buffer[index]; 956 | var match = str.match(/^<\/([\w:-]+)>$/); 957 | if (match) { 958 | endTag = match[1]; 959 | } 960 | else if (/>$/.test(str) || (buffer[index] = collapseWhitespaceSmart(str, null, nextTag, options))) { 961 | break; 962 | } 963 | } 964 | } 965 | 966 | // look for trailing whitespaces from previously processed text 967 | // which may not be trimmed due to a following comment or an empty 968 | // element which has now been removed 969 | function squashTrailingWhitespace(nextTag) { 970 | var charsIndex = buffer.length - 1; 971 | if (buffer.length > 1) { 972 | var item = buffer[buffer.length - 1]; 973 | if (/^(?:<!|$)/.test(item) && item.indexOf(uidIgnore) === -1) { 974 | charsIndex--; 975 | } 976 | } 977 | trimTrailingWhitespace(charsIndex, nextTag); 978 | } 979 | 980 | new HTMLParser(value, { 981 | partialMarkup: partialMarkup, 982 | continueOnParseError: options.continueOnParseError, 983 | customAttrAssign: options.customAttrAssign, 984 | customAttrSurround: options.customAttrSurround, 985 | html5: options.html5, 986 | 987 | start: function(tag, attrs, unary, unarySlash, autoGenerated) { 988 | if (tag.toLowerCase() === 'svg') { 989 | options = Object.create(options); 990 | options.caseSensitive = true; 991 | options.keepClosingSlash = true; 992 | options.name = identity; 993 | } 994 | tag = options.name(tag); 995 | currentTag = tag; 996 | charsPrevTag = tag; 997 | if (!inlineTextTags(tag)) { 998 | currentChars = ''; 999 | } 1000 | hasChars = false; 1001 | currentAttrs = attrs; 1002 | 1003 | var optional = options.removeOptionalTags; 1004 | if (optional) { 1005 | var htmlTag = htmlTags(tag); 1006 | // <html> may be omitted if first thing inside is not comment 1007 | // <head> may be omitted if first thing inside is an element 1008 | // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template> 1009 | // <colgroup> may be omitted if first thing inside is <col> 1010 | // <tbody> may be omitted if first thing inside is <tr> 1011 | if (htmlTag && canRemoveParentTag(optionalStartTag, tag)) { 1012 | removeStartTag(); 1013 | } 1014 | optionalStartTag = ''; 1015 | // end-tag-followed-by-start-tag omission rules 1016 | if (htmlTag && canRemovePrecedingTag(optionalEndTag, tag)) { 1017 | removeEndTag(); 1018 | // <colgroup> cannot be omitted if preceding </colgroup> is omitted 1019 | // <tbody> cannot be omitted if preceding </tbody>, </thead> or </tfoot> is omitted 1020 | optional = !isStartTagMandatory(optionalEndTag, tag); 1021 | } 1022 | optionalEndTag = ''; 1023 | } 1024 | 1025 | // set whitespace flags for nested tags (eg. <code> within a <pre>) 1026 | if (options.collapseWhitespace) { 1027 | if (!stackNoTrimWhitespace.length) { 1028 | squashTrailingWhitespace(tag); 1029 | } 1030 | if (!unary) { 1031 | if (!_canTrimWhitespace(tag, attrs) || stackNoTrimWhitespace.length) { 1032 | stackNoTrimWhitespace.push(tag); 1033 | } 1034 | if (!_canCollapseWhitespace(tag, attrs) || stackNoCollapseWhitespace.length) { 1035 | stackNoCollapseWhitespace.push(tag); 1036 | } 1037 | } 1038 | } 1039 | 1040 | var openTag = '<' + tag; 1041 | var hasUnarySlash = unarySlash && options.keepClosingSlash; 1042 | 1043 | buffer.push(openTag); 1044 | 1045 | if (options.sortAttributes) { 1046 | options.sortAttributes(tag, attrs); 1047 | } 1048 | 1049 | var parts = []; 1050 | for (var i = attrs.length, isLast = true; --i >= 0;) { 1051 | var normalized = normalizeAttr(attrs[i], attrs, tag, options); 1052 | if (normalized) { 1053 | parts.unshift(buildAttr(normalized, hasUnarySlash, options, isLast, uidAttr)); 1054 | isLast = false; 1055 | } 1056 | } 1057 | if (parts.length > 0) { 1058 | buffer.push(' '); 1059 | buffer.push.apply(buffer, parts); 1060 | } 1061 | // start tag must never be omitted if it has any attributes 1062 | else if (optional && optionalStartTags(tag)) { 1063 | optionalStartTag = tag; 1064 | } 1065 | 1066 | buffer.push(buffer.pop() + (hasUnarySlash ? '/' : '') + '>'); 1067 | 1068 | if (autoGenerated && !options.includeAutoGeneratedTags) { 1069 | removeStartTag(); 1070 | optionalStartTag = ''; 1071 | } 1072 | }, 1073 | end: function(tag, attrs, autoGenerated) { 1074 | if (tag.toLowerCase() === 'svg') { 1075 | options = Object.getPrototypeOf(options); 1076 | } 1077 | tag = options.name(tag); 1078 | 1079 | // check if current tag is in a whitespace stack 1080 | if (options.collapseWhitespace) { 1081 | if (stackNoTrimWhitespace.length) { 1082 | if (tag === stackNoTrimWhitespace[stackNoTrimWhitespace.length - 1]) { 1083 | stackNoTrimWhitespace.pop(); 1084 | } 1085 | } 1086 | else { 1087 | squashTrailingWhitespace('/' + tag); 1088 | } 1089 | if (stackNoCollapseWhitespace.length && 1090 | tag === stackNoCollapseWhitespace[stackNoCollapseWhitespace.length - 1]) { 1091 | stackNoCollapseWhitespace.pop(); 1092 | } 1093 | } 1094 | 1095 | var isElementEmpty = false; 1096 | if (tag === currentTag) { 1097 | currentTag = ''; 1098 | isElementEmpty = !hasChars; 1099 | } 1100 | 1101 | if (options.removeOptionalTags) { 1102 | // <html>, <head> or <body> may be omitted if the element is empty 1103 | if (isElementEmpty && topLevelTags(optionalStartTag)) { 1104 | removeStartTag(); 1105 | } 1106 | optionalStartTag = ''; 1107 | // </html> or </body> may be omitted if not followed by comment 1108 | // </head> may be omitted if not followed by space or comment 1109 | // </p> may be omitted if no more content in non-</a> parent 1110 | // except for </dt> or </thead>, end tags may be omitted if no more content in parent element 1111 | if (htmlTags(tag) && optionalEndTag && !trailingTags(optionalEndTag) && (optionalEndTag !== 'p' || !pInlineTags(tag))) { 1112 | removeEndTag(); 1113 | } 1114 | optionalEndTag = optionalEndTags(tag) ? tag : ''; 1115 | } 1116 | 1117 | if (options.removeEmptyElements && isElementEmpty && canRemoveElement(tag, attrs)) { 1118 | // remove last "element" from buffer 1119 | removeStartTag(); 1120 | optionalStartTag = ''; 1121 | optionalEndTag = ''; 1122 | } 1123 | else { 1124 | if (autoGenerated && !options.includeAutoGeneratedTags) { 1125 | optionalEndTag = ''; 1126 | } 1127 | else { 1128 | buffer.push('</' + tag + '>'); 1129 | } 1130 | charsPrevTag = '/' + tag; 1131 | if (!inlineTags(tag)) { 1132 | currentChars = ''; 1133 | } 1134 | else if (isElementEmpty) { 1135 | currentChars += '|'; 1136 | } 1137 | } 1138 | }, 1139 | chars: function(text, prevTag, nextTag) { 1140 | prevTag = prevTag === '' ? 'comment' : prevTag; 1141 | nextTag = nextTag === '' ? 'comment' : nextTag; 1142 | if (options.decodeEntities && text && !specialContentTags(currentTag)) { 1143 | text = decode(text); 1144 | } 1145 | if (options.collapseWhitespace) { 1146 | if (!stackNoTrimWhitespace.length) { 1147 | if (prevTag === 'comment') { 1148 | var prevComment = buffer[buffer.length - 1]; 1149 | if (prevComment.indexOf(uidIgnore) === -1) { 1150 | if (!prevComment) { 1151 | prevTag = charsPrevTag; 1152 | } 1153 | if (buffer.length > 1 && (!prevComment || !options.conservativeCollapse && / $/.test(currentChars))) { 1154 | var charsIndex = buffer.length - 2; 1155 | buffer[charsIndex] = buffer[charsIndex].replace(/\s+$/, function(trailingSpaces) { 1156 | text = trailingSpaces + text; 1157 | return ''; 1158 | }); 1159 | } 1160 | } 1161 | } 1162 | if (prevTag) { 1163 | if (prevTag === '/nobr' || prevTag === 'wbr') { 1164 | if (/^\s/.test(text)) { 1165 | var tagIndex = buffer.length - 1; 1166 | while (tagIndex > 0 && buffer[tagIndex].lastIndexOf('<' + prevTag) !== 0) { 1167 | tagIndex--; 1168 | } 1169 | trimTrailingWhitespace(tagIndex - 1, 'br'); 1170 | } 1171 | } 1172 | else if (inlineTextTags(prevTag.charAt(0) === '/' ? prevTag.slice(1) : prevTag)) { 1173 | text = collapseWhitespace(text, options, /(?:^|\s)$/.test(currentChars)); 1174 | } 1175 | } 1176 | if (prevTag || nextTag) { 1177 | text = collapseWhitespaceSmart(text, prevTag, nextTag, options); 1178 | } 1179 | else { 1180 | text = collapseWhitespace(text, options, true, true); 1181 | } 1182 | if (!text && /\s$/.test(currentChars) && prevTag && prevTag.charAt(0) === '/') { 1183 | trimTrailingWhitespace(buffer.length - 1, nextTag); 1184 | } 1185 | } 1186 | if (!stackNoCollapseWhitespace.length && nextTag !== 'html' && !(prevTag && nextTag)) { 1187 | text = collapseWhitespace(text, options, false, false, true); 1188 | } 1189 | } 1190 | if (options.processScripts && specialContentTags(currentTag)) { 1191 | text = processScript(text, options, currentAttrs); 1192 | } 1193 | if (isExecutableScript(currentTag, currentAttrs)) { 1194 | text = options.minifyJS(text); 1195 | } 1196 | if (isStyleSheet(currentTag, currentAttrs)) { 1197 | text = options.minifyCSS(text); 1198 | } 1199 | if (options.removeOptionalTags && text) { 1200 | // <html> may be omitted if first thing inside is not comment 1201 | // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template> 1202 | if (optionalStartTag === 'html' || optionalStartTag === 'body' && !/^\s/.test(text)) { 1203 | removeStartTag(); 1204 | } 1205 | optionalStartTag = ''; 1206 | // </html> or </body> may be omitted if not followed by comment 1207 | // </head>, </colgroup> or </caption> may be omitted if not followed by space or comment 1208 | if (compactTags(optionalEndTag) || looseTags(optionalEndTag) && !/^\s/.test(text)) { 1209 | removeEndTag(); 1210 | } 1211 | optionalEndTag = ''; 1212 | } 1213 | charsPrevTag = /^\s*$/.test(text) ? prevTag : 'comment'; 1214 | if (options.decodeEntities && text && !specialContentTags(currentTag)) { 1215 | // Escape any `&` symbols that start either: 1216 | // 1) a legacy named character reference (i.e. one that doesn't end with `;`) 1217 | // 2) or any other character reference (i.e. one that does end with `;`) 1218 | // Note that `&` can be escaped as `&`, without the semi-colon. 1219 | // https://mathiasbynens.be/notes/ambiguous-ampersands 1220 | text = text.replace(/&((?:Iacute|aacute|uacute|plusmn|Otilde|otilde|agrave|Agrave|Yacute|yacute|Oslash|oslash|atilde|Atilde|brvbar|ccedil|Ccedil|Ograve|curren|divide|eacute|Eacute|ograve|Oacute|egrave|Egrave|Ugrave|frac12|frac14|frac34|ugrave|oacute|iacute|Ntilde|ntilde|Uacute|middot|igrave|Igrave|iquest|Aacute|cedil|laquo|micro|iexcl|Icirc|icirc|acirc|Ucirc|Ecirc|ocirc|Ocirc|ecirc|ucirc|Aring|aring|AElig|aelig|acute|pound|raquo|Acirc|times|THORN|szlig|thorn|COPY|auml|ordf|ordm|Uuml|macr|uuml|Auml|ouml|Ouml|para|nbsp|euml|quot|QUOT|Euml|yuml|cent|sect|copy|sup1|sup2|sup3|iuml|Iuml|ETH|shy|reg|not|yen|amp|AMP|REG|uml|eth|deg|gt|GT|LT|lt)(?!;)|(?:#?[0-9a-zA-Z]+;))/g, '&$1').replace(/</g, '<'); 1221 | } 1222 | if (uidPattern && options.collapseWhitespace && stackNoTrimWhitespace.length) { 1223 | text = text.replace(uidPattern, function(match, prefix, index) { 1224 | return ignoredCustomMarkupChunks[+index][0]; 1225 | }); 1226 | } 1227 | currentChars += text; 1228 | if (text) { 1229 | hasChars = true; 1230 | } 1231 | buffer.push(text); 1232 | }, 1233 | comment: function(text, nonStandard) { 1234 | var prefix = nonStandard ? '<!' : '<!--'; 1235 | var suffix = nonStandard ? '>' : '-->'; 1236 | if (isConditionalComment(text)) { 1237 | text = prefix + cleanConditionalComment(text, options) + suffix; 1238 | } 1239 | else if (options.removeComments) { 1240 | if (isIgnoredComment(text, options)) { 1241 | text = '<!--' + text + '-->'; 1242 | } 1243 | else { 1244 | text = ''; 1245 | } 1246 | } 1247 | else { 1248 | text = prefix + text + suffix; 1249 | } 1250 | if (options.removeOptionalTags && text) { 1251 | // preceding comments suppress tag omissions 1252 | optionalStartTag = ''; 1253 | optionalEndTag = ''; 1254 | } 1255 | buffer.push(text); 1256 | }, 1257 | doctype: function(doctype) { 1258 | buffer.push(options.useShortDoctype ? '<!doctype' + 1259 | (options.removeTagWhitespace ? '' : ' ') + 'html>' : 1260 | collapseWhitespaceAll(doctype)); 1261 | } 1262 | }); 1263 | 1264 | if (options.removeOptionalTags) { 1265 | // <html> may be omitted if first thing inside is not comment 1266 | // <head> or <body> may be omitted if empty 1267 | if (topLevelTags(optionalStartTag)) { 1268 | removeStartTag(); 1269 | } 1270 | // except for </dt> or </thead>, end tags may be omitted if no more content in parent element 1271 | if (optionalEndTag && !trailingTags(optionalEndTag)) { 1272 | removeEndTag(); 1273 | } 1274 | } 1275 | if (options.collapseWhitespace) { 1276 | squashTrailingWhitespace('br'); 1277 | } 1278 | 1279 | return joinResultSegments(buffer, options, uidPattern ? function(str) { 1280 | return str.replace(uidPattern, function(match, prefix, index, suffix) { 1281 | var chunk = ignoredCustomMarkupChunks[+index][0]; 1282 | if (options.collapseWhitespace) { 1283 | if (prefix !== '\t') { 1284 | chunk = prefix + chunk; 1285 | } 1286 | if (suffix !== '\t') { 1287 | chunk += suffix; 1288 | } 1289 | return collapseWhitespace(chunk, { 1290 | preserveLineBreaks: options.preserveLineBreaks, 1291 | conservativeCollapse: !options.trimCustomFragments 1292 | }, /^[ \n\r\t\f]/.test(chunk), /[ \n\r\t\f]$/.test(chunk)); 1293 | } 1294 | return chunk; 1295 | }); 1296 | } : identity, uidIgnore ? function(str) { 1297 | return str.replace(new RegExp('<!--' + uidIgnore + '([0-9]+)-->', 'g'), function(match, index) { 1298 | return ignoredMarkupChunks[+index]; 1299 | }); 1300 | } : identity); 1301 | } 1302 | 1303 | function joinResultSegments(results, options, restoreCustom, restoreIgnore) { 1304 | var str; 1305 | var maxLineLength = options.maxLineLength; 1306 | if (maxLineLength) { 1307 | var line = '', lines = []; 1308 | while (results.length) { 1309 | var len = line.length; 1310 | var end = results[0].indexOf('\n'); 1311 | if (end < 0) { 1312 | line += restoreIgnore(restoreCustom(results.shift())); 1313 | } 1314 | else { 1315 | line += restoreIgnore(restoreCustom(results[0].slice(0, end))); 1316 | results[0] = results[0].slice(end + 1); 1317 | } 1318 | if (len > 0 && line.length > maxLineLength) { 1319 | lines.push(line.slice(0, len)); 1320 | line = line.slice(len); 1321 | } 1322 | else if (end >= 0) { 1323 | lines.push(line); 1324 | line = ''; 1325 | } 1326 | } 1327 | if (line) { 1328 | lines.push(line); 1329 | } 1330 | str = lines.join('\n'); 1331 | } 1332 | else { 1333 | str = restoreIgnore(restoreCustom(results.join(''))); 1334 | } 1335 | return options.collapseWhitespace ? collapseWhitespace(str, options, true, true) : str; 1336 | } 1337 | 1338 | exports.minify = function(value, options) { 1339 | var start = Date.now(); 1340 | options = processOptions(options || {}); 1341 | var result = minify(value, options); 1342 | options.log('minified in: ' + (Date.now() - start) + 'ms'); 1343 | return result; 1344 | }; 1345 | -------------------------------------------------------------------------------- /src/htmlparser.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * HTML Parser By John Resig (ejohn.org) 3 | * Modified by Juriy "kangax" Zaytsev 4 | * Original code by Erik Arvidsson, Mozilla Public License 5 | * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js 6 | */ 7 | 8 | /* 9 | * // Use like so: 10 | * HTMLParser(htmlString, { 11 | * start: function(tag, attrs, unary) {}, 12 | * end: function(tag) {}, 13 | * chars: function(text) {}, 14 | * comment: function(text) {} 15 | * }); 16 | * 17 | * // or to get an XML string: 18 | * HTMLtoXML(htmlString); 19 | * 20 | * // or to get an XML DOM Document 21 | * HTMLtoDOM(htmlString); 22 | * 23 | * // or to inject into an existing document/DOM node 24 | * HTMLtoDOM(htmlString, document); 25 | * HTMLtoDOM(htmlString, document.body); 26 | * 27 | */ 28 | 29 | /* global ActiveXObject, DOMDocument */ 30 | 31 | 'use strict'; 32 | 33 | var createMapFromString = require('./utils').createMapFromString; 34 | 35 | function makeMap(values) { 36 | return createMapFromString(values, true); 37 | } 38 | 39 | // Regular Expressions for parsing tags and attributes 40 | var singleAttrIdentifier = /([^\s"'<>/=]+)/, 41 | singleAttrAssigns = [/=/], 42 | singleAttrValues = [ 43 | // attr value double quotes 44 | /"([^"]*)"+/.source, 45 | // attr value, single quotes 46 | /'([^']*)'+/.source, 47 | // attr value, no quotes 48 | /([^ \t\n\f\r"'`=<>]+)/.source 49 | ], 50 | // https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-QName 51 | qnameCapture = (function() { 52 | // based on https://www.npmjs.com/package/ncname 53 | var combiningChar = '\\u0300-\\u0345\\u0360\\u0361\\u0483-\\u0486\\u0591-\\u05A1\\u05A3-\\u05B9\\u05BB-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u064B-\\u0652\\u0670\\u06D6-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED\\u0901-\\u0903\\u093C\\u093E-\\u094D\\u0951-\\u0954\\u0962\\u0963\\u0981-\\u0983\\u09BC\\u09BE-\\u09C4\\u09C7\\u09C8\\u09CB-\\u09CD\\u09D7\\u09E2\\u09E3\\u0A02\\u0A3C\\u0A3E-\\u0A42\\u0A47\\u0A48\\u0A4B-\\u0A4D\\u0A70\\u0A71\\u0A81-\\u0A83\\u0ABC\\u0ABE-\\u0AC5\\u0AC7-\\u0AC9\\u0ACB-\\u0ACD\\u0B01-\\u0B03\\u0B3C\\u0B3E-\\u0B43\\u0B47\\u0B48\\u0B4B-\\u0B4D\\u0B56\\u0B57\\u0B82\\u0B83\\u0BBE-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCD\\u0BD7\\u0C01-\\u0C03\\u0C3E-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C82\\u0C83\\u0CBE-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD\\u0CD5\\u0CD6\\u0D02\\u0D03\\u0D3E-\\u0D43\\u0D46-\\u0D48\\u0D4A-\\u0D4D\\u0D57\\u0E31\\u0E34-\\u0E3A\\u0E47-\\u0E4E\\u0EB1\\u0EB4-\\u0EB9\\u0EBB\\u0EBC\\u0EC8-\\u0ECD\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F3E\\u0F3F\\u0F71-\\u0F84\\u0F86-\\u0F8B\\u0F90-\\u0F95\\u0F97\\u0F99-\\u0FAD\\u0FB1-\\u0FB7\\u0FB9\\u20D0-\\u20DC\\u20E1\\u302A-\\u302F\\u3099\\u309A'; 54 | var digit = '0-9\\u0660-\\u0669\\u06F0-\\u06F9\\u0966-\\u096F\\u09E6-\\u09EF\\u0A66-\\u0A6F\\u0AE6-\\u0AEF\\u0B66-\\u0B6F\\u0BE7-\\u0BEF\\u0C66-\\u0C6F\\u0CE6-\\u0CEF\\u0D66-\\u0D6F\\u0E50-\\u0E59\\u0ED0-\\u0ED9\\u0F20-\\u0F29'; 55 | var extender = '\\xB7\\u02D0\\u02D1\\u0387\\u0640\\u0E46\\u0EC6\\u3005\\u3031-\\u3035\\u309D\\u309E\\u30FC-\\u30FE'; 56 | var letter = 'A-Za-z\\xC0-\\xD6\\xD8-\\xF6\\xF8-\\u0131\\u0134-\\u013E\\u0141-\\u0148\\u014A-\\u017E\\u0180-\\u01C3\\u01CD-\\u01F0\\u01F4\\u01F5\\u01FA-\\u0217\\u0250-\\u02A8\\u02BB-\\u02C1\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03CE\\u03D0-\\u03D6\\u03DA\\u03DC\\u03DE\\u03E0\\u03E2-\\u03F3\\u0401-\\u040C\\u040E-\\u044F\\u0451-\\u045C\\u045E-\\u0481\\u0490-\\u04C4\\u04C7\\u04C8\\u04CB\\u04CC\\u04D0-\\u04EB\\u04EE-\\u04F5\\u04F8\\u04F9\\u0531-\\u0556\\u0559\\u0561-\\u0586\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0621-\\u063A\\u0641-\\u064A\\u0671-\\u06B7\\u06BA-\\u06BE\\u06C0-\\u06CE\\u06D0-\\u06D3\\u06D5\\u06E5\\u06E6\\u0905-\\u0939\\u093D\\u0958-\\u0961\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09DC\\u09DD\\u09DF-\\u09E1\\u09F0\\u09F1\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A33\\u0A35\\u0A36\\u0A38\\u0A39\\u0A59-\\u0A5C\\u0A5E\\u0A72-\\u0A74\\u0A85-\\u0A8B\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABD\\u0AE0\\u0B05-\\u0B0C\\u0B0F\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B36-\\u0B39\\u0B3D\\u0B5C\\u0B5D\\u0B5F-\\u0B61\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB5\\u0BB7-\\u0BB9\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C60\\u0C61\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CDE\\u0CE0\\u0CE1\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D28\\u0D2A-\\u0D39\\u0D60\\u0D61\\u0E01-\\u0E2E\\u0E30\\u0E32\\u0E33\\u0E40-\\u0E45\\u0E81\\u0E82\\u0E84\\u0E87\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA\\u0EAB\\u0EAD\\u0EAE\\u0EB0\\u0EB2\\u0EB3\\u0EBD\\u0EC0-\\u0EC4\\u0F40-\\u0F47\\u0F49-\\u0F69\\u10A0-\\u10C5\\u10D0-\\u10F6\\u1100\\u1102\\u1103\\u1105-\\u1107\\u1109\\u110B\\u110C\\u110E-\\u1112\\u113C\\u113E\\u1140\\u114C\\u114E\\u1150\\u1154\\u1155\\u1159\\u115F-\\u1161\\u1163\\u1165\\u1167\\u1169\\u116D\\u116E\\u1172\\u1173\\u1175\\u119E\\u11A8\\u11AB\\u11AE\\u11AF\\u11B7\\u11B8\\u11BA\\u11BC-\\u11C2\\u11EB\\u11F0\\u11F9\\u1E00-\\u1E9B\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC2-\\u1FC4\\u1FC6-\\u1FCC\\u1FD0-\\u1FD3\\u1FD6-\\u1FDB\\u1FE0-\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FFC\\u2126\\u212A\\u212B\\u212E\\u2180-\\u2182\\u3007\\u3021-\\u3029\\u3041-\\u3094\\u30A1-\\u30FA\\u3105-\\u312C\\u4E00-\\u9FA5\\uAC00-\\uD7A3'; 57 | var ncname = '[' + letter + '_][' + letter + digit + '\\.\\-_' + combiningChar + extender + ']*'; 58 | return '((?:' + ncname + '\\:)?' + ncname + ')'; 59 | })(), 60 | startTagOpen = new RegExp('^<' + qnameCapture), 61 | startTagClose = /^\s*(\/?)>/, 62 | endTag = new RegExp('^<\\/' + qnameCapture + '[^>]*>'), 63 | doctype = /^<!DOCTYPE\s?[^>]+>/i; 64 | 65 | var IS_REGEX_CAPTURING_BROKEN = false; 66 | 'x'.replace(/x(.)?/g, function(m, g) { 67 | IS_REGEX_CAPTURING_BROKEN = g === ''; 68 | }); 69 | 70 | // Empty Elements 71 | var empty = makeMap('area,base,basefont,br,col,embed,frame,hr,img,input,isindex,keygen,link,meta,param,source,track,wbr'); 72 | 73 | // Inline Elements 74 | var inline = makeMap('a,abbr,acronym,applet,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,iframe,img,input,ins,kbd,label,map,noscript,object,q,s,samp,script,select,small,span,strike,strong,sub,sup,svg,textarea,tt,u,var'); 75 | 76 | // Elements that you can, intentionally, leave open 77 | // (and which close themselves) 78 | var closeSelf = makeMap('colgroup,dd,dt,li,option,p,td,tfoot,th,thead,tr,source'); 79 | 80 | // Attributes that have their values filled in disabled='disabled' 81 | var fillAttrs = makeMap('checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected'); 82 | 83 | // Special Elements (can contain anything) 84 | var special = makeMap('script,style'); 85 | 86 | // HTML5 tags https://html.spec.whatwg.org/multipage/indices.html#elements-3 87 | // Phrasing Content https://html.spec.whatwg.org/multipage/dom.html#phrasing-content 88 | var nonPhrasing = makeMap('address,article,aside,base,blockquote,body,caption,col,colgroup,dd,details,dialog,div,dl,dt,fieldset,figcaption,figure,footer,form,h1,h2,h3,h4,h5,h6,head,header,hgroup,hr,html,legend,li,menuitem,meta,ol,optgroup,option,param,rp,rt,source,style,summary,tbody,td,tfoot,th,thead,title,tr,track,ul'); 89 | 90 | var reCache = {}; 91 | 92 | function attrForHandler(handler) { 93 | var pattern = singleAttrIdentifier.source + 94 | '(?:\\s*(' + joinSingleAttrAssigns(handler) + ')' + 95 | '[ \\t\\n\\f\\r]*(?:' + singleAttrValues.join('|') + '))?'; 96 | if (handler.customAttrSurround) { 97 | var attrClauses = []; 98 | for (var i = handler.customAttrSurround.length - 1; i >= 0; i--) { 99 | attrClauses[i] = '(?:' + 100 | '(' + handler.customAttrSurround[i][0].source + ')\\s*' + 101 | pattern + 102 | '\\s*(' + handler.customAttrSurround[i][1].source + ')' + 103 | ')'; 104 | } 105 | attrClauses.push('(?:' + pattern + ')'); 106 | pattern = '(?:' + attrClauses.join('|') + ')'; 107 | } 108 | return new RegExp('^\\s*' + pattern); 109 | } 110 | 111 | function joinSingleAttrAssigns(handler) { 112 | return singleAttrAssigns.concat( 113 | handler.customAttrAssign || [] 114 | ).map(function(assign) { 115 | return '(?:' + assign.source + ')'; 116 | }).join('|'); 117 | } 118 | 119 | function HTMLParser(html, handler) { 120 | var stack = [], lastTag; 121 | var attribute = attrForHandler(handler); 122 | var last, prevTag, nextTag; 123 | while (html) { 124 | last = html; 125 | // Make sure we're not in a script or style element 126 | if (!lastTag || !special(lastTag)) { 127 | var textEnd = html.indexOf('<'); 128 | if (textEnd === 0) { 129 | // Comment: 130 | if (/^<!--/.test(html)) { 131 | var commentEnd = html.indexOf('-->'); 132 | 133 | if (commentEnd >= 0) { 134 | if (handler.comment) { 135 | handler.comment(html.substring(4, commentEnd)); 136 | } 137 | html = html.substring(commentEnd + 3); 138 | prevTag = ''; 139 | continue; 140 | } 141 | } 142 | 143 | // https://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment 144 | if (/^<!\[/.test(html)) { 145 | var conditionalEnd = html.indexOf(']>'); 146 | 147 | if (conditionalEnd >= 0) { 148 | if (handler.comment) { 149 | handler.comment(html.substring(2, conditionalEnd + 1), true /* non-standard */); 150 | } 151 | html = html.substring(conditionalEnd + 2); 152 | prevTag = ''; 153 | continue; 154 | } 155 | } 156 | 157 | // Doctype: 158 | var doctypeMatch = html.match(doctype); 159 | if (doctypeMatch) { 160 | if (handler.doctype) { 161 | handler.doctype(doctypeMatch[0]); 162 | } 163 | html = html.substring(doctypeMatch[0].length); 164 | prevTag = ''; 165 | continue; 166 | } 167 | 168 | // End tag: 169 | var endTagMatch = html.match(endTag); 170 | if (endTagMatch) { 171 | html = html.substring(endTagMatch[0].length); 172 | endTagMatch[0].replace(endTag, parseEndTag); 173 | prevTag = '/' + endTagMatch[1].toLowerCase(); 174 | continue; 175 | } 176 | 177 | // Start tag: 178 | var startTagMatch = parseStartTag(html); 179 | if (startTagMatch) { 180 | html = startTagMatch.rest; 181 | handleStartTag(startTagMatch); 182 | prevTag = startTagMatch.tagName.toLowerCase(); 183 | continue; 184 | } 185 | 186 | // Treat `<` as text 187 | if (handler.continueOnParseError) { 188 | textEnd = html.indexOf('<', 1); 189 | } 190 | } 191 | 192 | var text; 193 | if (textEnd >= 0) { 194 | text = html.substring(0, textEnd); 195 | html = html.substring(textEnd); 196 | } 197 | else { 198 | text = html; 199 | html = ''; 200 | } 201 | 202 | // next tag 203 | var nextTagMatch = parseStartTag(html); 204 | if (nextTagMatch) { 205 | nextTag = nextTagMatch.tagName; 206 | } 207 | else { 208 | nextTagMatch = html.match(endTag); 209 | if (nextTagMatch) { 210 | nextTag = '/' + nextTagMatch[1]; 211 | } 212 | else { 213 | nextTag = ''; 214 | } 215 | } 216 | 217 | if (handler.chars) { 218 | handler.chars(text, prevTag, nextTag); 219 | } 220 | prevTag = ''; 221 | } 222 | else { 223 | var stackedTag = lastTag.toLowerCase(); 224 | var reStackedTag = reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\\s\\S]*?)</' + stackedTag + '[^>]*>', 'i')); 225 | 226 | html = html.replace(reStackedTag, function(all, text) { 227 | if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') { 228 | text = text 229 | .replace(/<!--([\s\S]*?)-->/g, '$1') 230 | .replace(/<!\[CDATA\[([\s\S]*?)]]>/g, '$1'); 231 | } 232 | 233 | if (handler.chars) { 234 | handler.chars(text); 235 | } 236 | 237 | return ''; 238 | }); 239 | 240 | parseEndTag('</' + stackedTag + '>', stackedTag); 241 | } 242 | 243 | if (html === last) { 244 | throw new Error('Parse Error: ' + html); 245 | } 246 | } 247 | 248 | if (!handler.partialMarkup) { 249 | // Clean up any remaining tags 250 | parseEndTag(); 251 | } 252 | 253 | function parseStartTag(input) { 254 | var start = input.match(startTagOpen); 255 | if (start) { 256 | var match = { 257 | tagName: start[1], 258 | attrs: [] 259 | }; 260 | input = input.slice(start[0].length); 261 | var end, attr; 262 | while (!(end = input.match(startTagClose)) && (attr = input.match(attribute))) { 263 | input = input.slice(attr[0].length); 264 | match.attrs.push(attr); 265 | } 266 | if (end) { 267 | match.unarySlash = end[1]; 268 | match.rest = input.slice(end[0].length); 269 | return match; 270 | } 271 | } 272 | } 273 | 274 | function closeIfFound(tagName) { 275 | if (findTag(tagName) >= 0) { 276 | parseEndTag('', tagName); 277 | return true; 278 | } 279 | } 280 | 281 | function handleStartTag(match) { 282 | var tagName = match.tagName; 283 | var unarySlash = match.unarySlash; 284 | 285 | if (handler.html5) { 286 | if (lastTag === 'p' && nonPhrasing(tagName)) { 287 | parseEndTag('', lastTag); 288 | } 289 | else if (tagName === 'tbody') { 290 | closeIfFound('thead'); 291 | } 292 | else if (tagName === 'tfoot') { 293 | if (!closeIfFound('tbody')) { 294 | closeIfFound('thead'); 295 | } 296 | } 297 | if (tagName === 'col' && findTag('colgroup') < 0) { 298 | lastTag = 'colgroup'; 299 | stack.push({ tag: lastTag, attrs: [] }); 300 | if (handler.start) { 301 | handler.start(lastTag, [], false, ''); 302 | } 303 | } 304 | } 305 | 306 | if (!handler.html5 && !inline(tagName)) { 307 | while (lastTag && inline(lastTag)) { 308 | parseEndTag('', lastTag); 309 | } 310 | } 311 | 312 | if (closeSelf(tagName) && lastTag === tagName) { 313 | parseEndTag('', tagName); 314 | } 315 | 316 | var unary = empty(tagName) || tagName === 'html' && lastTag === 'head' || !!unarySlash; 317 | 318 | var attrs = match.attrs.map(function(args) { 319 | var name, value, customOpen, customClose, customAssign, quote; 320 | var ncp = 7; // number of captured parts, scalar 321 | 322 | // hackish work around FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778 323 | if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) { 324 | if (args[3] === '') { delete args[3]; } 325 | if (args[4] === '') { delete args[4]; } 326 | if (args[5] === '') { delete args[5]; } 327 | } 328 | 329 | function populate(index) { 330 | customAssign = args[index]; 331 | value = args[index + 1]; 332 | if (typeof value !== 'undefined') { 333 | return '"'; 334 | } 335 | value = args[index + 2]; 336 | if (typeof value !== 'undefined') { 337 | return '\''; 338 | } 339 | value = args[index + 3]; 340 | if (typeof value === 'undefined' && fillAttrs(name)) { 341 | value = name; 342 | } 343 | return ''; 344 | } 345 | 346 | var j = 1; 347 | if (handler.customAttrSurround) { 348 | for (var i = 0, l = handler.customAttrSurround.length; i < l; i++, j += ncp) { 349 | name = args[j + 1]; 350 | if (name) { 351 | quote = populate(j + 2); 352 | customOpen = args[j]; 353 | customClose = args[j + 6]; 354 | break; 355 | } 356 | } 357 | } 358 | 359 | if (!name && (name = args[j])) { 360 | quote = populate(j + 1); 361 | } 362 | 363 | return { 364 | name: name, 365 | value: value, 366 | customAssign: customAssign || '=', 367 | customOpen: customOpen || '', 368 | customClose: customClose || '', 369 | quote: quote || '' 370 | }; 371 | }); 372 | 373 | if (!unary) { 374 | stack.push({ tag: tagName, attrs: attrs }); 375 | lastTag = tagName; 376 | unarySlash = ''; 377 | } 378 | 379 | if (handler.start) { 380 | handler.start(tagName, attrs, unary, unarySlash); 381 | } 382 | } 383 | 384 | function findTag(tagName) { 385 | var pos; 386 | var needle = tagName.toLowerCase(); 387 | for (pos = stack.length - 1; pos >= 0; pos--) { 388 | if (stack[pos].tag.toLowerCase() === needle) { 389 | break; 390 | } 391 | } 392 | return pos; 393 | } 394 | 395 | function parseEndTag(tag, tagName) { 396 | var pos; 397 | 398 | // Find the closest opened tag of the same type 399 | if (tagName) { 400 | pos = findTag(tagName); 401 | } 402 | // If no tag name is provided, clean shop 403 | else { 404 | pos = 0; 405 | } 406 | 407 | if (pos >= 0) { 408 | // Close all the open elements, up the stack 409 | for (var i = stack.length - 1; i >= pos; i--) { 410 | if (handler.end) { 411 | handler.end(stack[i].tag, stack[i].attrs, i > pos || !tag); 412 | } 413 | } 414 | 415 | // Remove the open elements from the stack 416 | stack.length = pos; 417 | lastTag = pos && stack[pos - 1].tag; 418 | } 419 | else if (tagName.toLowerCase() === 'br') { 420 | if (handler.start) { 421 | handler.start(tagName, [], true, ''); 422 | } 423 | } 424 | else if (tagName.toLowerCase() === 'p') { 425 | if (handler.start) { 426 | handler.start(tagName, [], false, '', true); 427 | } 428 | if (handler.end) { 429 | handler.end(tagName, []); 430 | } 431 | } 432 | } 433 | } 434 | 435 | exports.HTMLParser = HTMLParser; 436 | exports.HTMLtoXML = function(html) { 437 | var results = ''; 438 | 439 | new HTMLParser(html, { 440 | start: function(tag, attrs, unary) { 441 | results += '<' + tag; 442 | 443 | for (var i = 0, len = attrs.length; i < len; i++) { 444 | results += ' ' + attrs[i].name + '="' + (attrs[i].value || '').replace(/"/g, '"') + '"'; 445 | } 446 | 447 | results += (unary ? '/' : '') + '>'; 448 | }, 449 | end: function(tag) { 450 | results += '</' + tag + '>'; 451 | }, 452 | chars: function(text) { 453 | results += text; 454 | }, 455 | comment: function(text) { 456 | results += '<!--' + text + '-->'; 457 | }, 458 | ignore: function(text) { 459 | results += text; 460 | } 461 | }); 462 | 463 | return results; 464 | }; 465 | 466 | exports.HTMLtoDOM = function(html, doc) { 467 | // There can be only one of these elements 468 | var one = { 469 | html: true, 470 | head: true, 471 | body: true, 472 | title: true 473 | }; 474 | 475 | // Enforce a structure for the document 476 | var structure = { 477 | link: 'head', 478 | base: 'head' 479 | }; 480 | 481 | if (doc) { 482 | doc = doc.ownerDocument || doc.getOwnerDocument && doc.getOwnerDocument() || doc; 483 | } 484 | else if (typeof DOMDocument !== 'undefined') { 485 | doc = new DOMDocument(); 486 | } 487 | else if (typeof document !== 'undefined' && document.implementation && document.implementation.createDocument) { 488 | doc = document.implementation.createDocument('', '', null); 489 | } 490 | else if (typeof ActiveX !== 'undefined') { 491 | doc = new ActiveXObject('Msxml.DOMDocument'); 492 | } 493 | 494 | var elems = [], 495 | documentElement = doc.documentElement || 496 | doc.getDocumentElement && doc.getDocumentElement(); 497 | 498 | // If we're dealing with an empty document then we 499 | // need to pre-populate it with the HTML document structure 500 | if (!documentElement && doc.createElement) { 501 | (function() { 502 | var html = doc.createElement('html'); 503 | var head = doc.createElement('head'); 504 | head.appendChild(doc.createElement('title')); 505 | html.appendChild(head); 506 | html.appendChild(doc.createElement('body')); 507 | doc.appendChild(html); 508 | })(); 509 | } 510 | 511 | // Find all the unique elements 512 | if (doc.getElementsByTagName) { 513 | for (var i in one) { 514 | one[i] = doc.getElementsByTagName(i)[0]; 515 | } 516 | } 517 | 518 | // If we're working with a document, inject contents into 519 | // the body element 520 | var curParentNode = one.body; 521 | 522 | new HTMLParser(html, { 523 | start: function(tagName, attrs, unary) { 524 | // If it's a pre-built element, then we can ignore 525 | // its construction 526 | if (one[tagName]) { 527 | curParentNode = one[tagName]; 528 | return; 529 | } 530 | 531 | var elem = doc.createElement(tagName); 532 | 533 | for (var attr in attrs) { 534 | elem.setAttribute(attrs[attr].name, attrs[attr].value); 535 | } 536 | 537 | if (structure[tagName] && typeof one[structure[tagName]] !== 'boolean') { 538 | one[structure[tagName]].appendChild(elem); 539 | } 540 | else if (curParentNode && curParentNode.appendChild) { 541 | curParentNode.appendChild(elem); 542 | } 543 | 544 | if (!unary) { 545 | elems.push(elem); 546 | curParentNode = elem; 547 | } 548 | }, 549 | end: function(/* tag */) { 550 | elems.length -= 1; 551 | 552 | // Init the new parentNode 553 | curParentNode = elems[elems.length - 1]; 554 | }, 555 | chars: function(text) { 556 | curParentNode.appendChild(doc.createTextNode(text)); 557 | }, 558 | comment: function(/* text */) { 559 | // create comment node 560 | }, 561 | ignore: function(/* text */) { 562 | // What to do here? 563 | } 564 | }); 565 | 566 | return doc; 567 | }; 568 | -------------------------------------------------------------------------------- /src/tokenchain.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | function Sorter() { 4 | } 5 | 6 | Sorter.prototype.sort = function(tokens, fromIndex) { 7 | fromIndex = fromIndex || 0; 8 | for (var i = 0, len = this.keys.length; i < len; i++) { 9 | var key = this.keys[i]; 10 | var token = key.slice(1); 11 | var index = tokens.indexOf(token, fromIndex); 12 | if (index !== -1) { 13 | do { 14 | if (index !== fromIndex) { 15 | tokens.splice(index, 1); 16 | tokens.splice(fromIndex, 0, token); 17 | } 18 | fromIndex++; 19 | } while ((index = tokens.indexOf(token, fromIndex)) !== -1); 20 | return this[key].sort(tokens, fromIndex); 21 | } 22 | } 23 | return tokens; 24 | }; 25 | 26 | function TokenChain() { 27 | } 28 | 29 | TokenChain.prototype = { 30 | add: function(tokens) { 31 | var self = this; 32 | tokens.forEach(function(token) { 33 | var key = '#39; + token; 34 | if (!self[key]) { 35 | self[key] = []; 36 | self[key].processed = 0; 37 | } 38 | self[key].push(tokens); 39 | }); 40 | }, 41 | createSorter: function() { 42 | var self = this; 43 | var sorter = new Sorter(); 44 | sorter.keys = Object.keys(self).sort(function(j, k) { 45 | var m = self[j].length; 46 | var n = self[k].length; 47 | return m < n ? 1 : m > n ? -1 : j < k ? -1 : j > k ? 1 : 0; 48 | }).filter(function(key) { 49 | if (self[key].processed < self[key].length) { 50 | var token = key.slice(1); 51 | var chain = new TokenChain(); 52 | self[key].forEach(function(tokens) { 53 | var index; 54 | while ((index = tokens.indexOf(token)) !== -1) { 55 | tokens.splice(index, 1); 56 | } 57 | tokens.forEach(function(token) { 58 | self['#39; + token].processed++; 59 | }); 60 | chain.add(tokens.slice(0)); 61 | }); 62 | sorter[key] = chain.createSorter(); 63 | return true; 64 | } 65 | return false; 66 | }); 67 | return sorter; 68 | } 69 | }; 70 | 71 | module.exports = TokenChain; 72 | -------------------------------------------------------------------------------- /src/utils.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | function createMap(values, ignoreCase) { 4 | var map = {}; 5 | values.forEach(function(value) { 6 | map[value] = 1; 7 | }); 8 | return ignoreCase ? function(value) { 9 | return map[value.toLowerCase()] === 1; 10 | } : function(value) { 11 | return map[value] === 1; 12 | }; 13 | } 14 | 15 | exports.createMap = createMap; 16 | exports.createMapFromString = function(values, ignoreCase) { 17 | return createMap(values.split(/,/), ignoreCase); 18 | }; 19 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | /* eslint-env phantomjs, qunit */ 2 | 'use strict'; 3 | 4 | function load(path) { 5 | var obj = require(path); 6 | for (var key in obj) { 7 | global[key] = obj[key]; 8 | } 9 | return obj; 10 | } 11 | 12 | var alert = console.log; 13 | var QUnit = load('qunit'); 14 | 15 | function hook() { 16 | var failures = []; 17 | QUnit.log(function(details) { 18 | if (!details.result) { 19 | failures.push(details); 20 | } 21 | }); 22 | QUnit.done(function(details) { 23 | details.failures = failures; 24 | alert(JSON.stringify(details)); 25 | }); 26 | QUnit.start(); 27 | } 28 | 29 | if (typeof phantom === 'undefined') { 30 | load('./src/htmlminifier'); 31 | require(process.argv[2]); 32 | hook(); 33 | } 34 | else { 35 | var system = require('system'); 36 | setTimeout(function() { 37 | system.stderr.write('timed out'); 38 | phantom.exit(1); 39 | }, 15000); 40 | var page = require('webpage').create(); 41 | page.onAlert = function(details) { 42 | console.log(details); 43 | phantom.exit(); 44 | }; 45 | page.open(system.args[1], function(status) { 46 | if (status !== 'success') { 47 | phantom.exit(1); 48 | } 49 | page.evaluate(hook); 50 | }); 51 | } 52 | -------------------------------------------------------------------------------- /tests/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "browser": true, 4 | "qunit": true 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /tests/index.html: -------------------------------------------------------------------------------- 1 | <!DOCTYPE html> 2 | <html lang="en"> 3 | <head> 4 | <meta charset="utf-8"> 5 | <meta name="google-site-verification" content="3tgjKRYCVRWTziuUgYny5vY4eU7Vddvat0pLnoCuPBs"> 6 | <title>HTML Minifier Tests</title> 7 | <link rel="stylesheet" href="https://code.jquery.com/qunit/qunit-2.17.2.css"> 8 | </head> 9 | <body> 10 | <div id="qunit"> 11 | <button onclick="QUnit.start()">Start</button> 12 | </div> 13 | <div id="qunit-fixture"></div> 14 | <script src="https://code.jquery.com/qunit/qunit-2.17.2.js"></script> 15 | <script src="../dist/htmlminifier.min.js"></script> 16 | <script src="minifier.js"></script> 17 | </body> 18 | </html> 19 | --------------------------------------------------------------------------------