├── lib ├── html_press │ ├── version.rb │ ├── html_entities.rb │ └── html.rb └── html_press.rb ├── .gitignore ├── .travis.yml ├── Gemfile ├── Rakefile ├── profile └── profile.rb ├── html_press.gemspec ├── LICENSE.txt ├── Readme.md └── spec └── html_press_spec.rb /lib/html_press/version.rb: -------------------------------------------------------------------------------- 1 | module HtmlPress 2 | VERSION = "0.8.2" 3 | end 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | .bundle 3 | Gemfile.lock 4 | pkg/* 5 | profile/reports/* 6 | /.project 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - 1.8.7 4 | - 1.9.2 5 | - 1.9.3 6 | - jruby-18mode 7 | - rbx-18mode 8 | # - jruby-19mode 9 | # - rbx-19mode -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "http://rubygems.org" 2 | 3 | # Specify your gem's dependencies in html_press.gemspec 4 | gemspec 5 | 6 | gem "ruby-prof", :platforms => :mri 7 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | 3 | [:build, :install, :release].each do |task_name| 4 | Rake::Task[task_name].prerequisites << :spec 5 | end 6 | 7 | require "rspec/core/rake_task" 8 | RSpec::Core::RakeTask.new 9 | 10 | task :default => :spec 11 | -------------------------------------------------------------------------------- /lib/html_press.rb: -------------------------------------------------------------------------------- 1 | require "html_press/version" 2 | require "html_press/html_entities" 3 | require "html_press/html" 4 | 5 | require 'multi_css' 6 | require 'multi_js' 7 | 8 | module HtmlPress 9 | def self.press(text, options = {}) 10 | HtmlPress::Html.new(options).press text 11 | end 12 | 13 | # for backward compatibility 14 | def self.compress(text, options = {}) 15 | HtmlPress::Html.new(options).press text 16 | end 17 | 18 | def self.js_compressor (text, options = nil) 19 | options ||= {} 20 | options[:output] ||= {} 21 | options[:output][:inline_script] = true 22 | MultiJs.compile(text, options).gsub(/;$/,'') 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /lib/html_press/html_entities.rb: -------------------------------------------------------------------------------- 1 | require 'htmlentities' 2 | 3 | module HtmlPress 4 | class Entities 5 | def initialize 6 | @replacement_hash = 'MINIFYENTITY' + Time.now.to_i.to_s 7 | @placeholders = [] 8 | end 9 | 10 | def reserve(content) 11 | @placeholders.push content 12 | '%' + @replacement_hash + '%' + (@placeholders.size - 1).to_s + '%' 13 | end 14 | 15 | def minify text 16 | out = text.dup 17 | 18 | out.gsub! /<|<|>|>|&|&/ do |m| 19 | reserve m 20 | end 21 | 22 | out = HTMLEntities.new.decode(out) 23 | 24 | re = Regexp.new('%' + @replacement_hash + '%(\d+)%') 25 | out.gsub! re do |m| 26 | m.gsub!(re, "\\1") 27 | @placeholders[m.to_i] 28 | end 29 | 30 | out 31 | end 32 | end 33 | 34 | def self.entities_compressor (text) 35 | Entities.new.minify(text) 36 | end 37 | end -------------------------------------------------------------------------------- /profile/profile.rb: -------------------------------------------------------------------------------- 1 | require "bundler" 2 | Bundler.setup 3 | 4 | require 'ruby-prof' 5 | require 'html_press' 6 | 7 | file_path = File.expand_path("../index.html", __FILE__) 8 | html = File.open(file_path, "r:UTF-8").read 9 | 10 | # require 'open-uri' 11 | # html = open('http://www.amazon.com/') {|f| f.read } 12 | 13 | before = html.bytesize 14 | html.force_encoding "UTF-8" if html.respond_to?(:force_encoding) 15 | 16 | RubyProf.start 17 | html = HtmlPress.press html 18 | result = RubyProf.stop 19 | 20 | after = html.bytesize 21 | puts "Economy: " + ((before - after).to_f/1024).round(2).to_s + "kb (" + 22 | (100*(before - after).to_f/before).round(2).to_s + "%)" 23 | 24 | report_path = File.expand_path("../reports", __FILE__) 25 | FileUtils.rm_rf(report_path) 26 | Dir.mkdir(report_path) unless File.exist?(report_path) 27 | printer = RubyProf::MultiPrinter.new(result) 28 | printer.print(:path => report_path, :profile => "profile") 29 | -------------------------------------------------------------------------------- /html_press.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | $:.push File.expand_path("../lib", __FILE__) 3 | require "html_press/version" 4 | 5 | Gem::Specification.new do |s| 6 | s.name = "html_press" 7 | s.version = HtmlPress::VERSION 8 | s.authors = ["stereobooster"] 9 | s.email = ["stereobooster@gmail.com"] 10 | s.homepage = "https://github.com/stereobooster/html_press" 11 | s.summary = %q{Compress html} 12 | s.description = %q{Ruby gem for compressing html} 13 | s.license = "MIT" 14 | 15 | s.files = `git ls-files`.split("\n") 16 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") 17 | s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } 18 | s.require_paths = ["lib"] 19 | 20 | s.add_development_dependency "rspec" 21 | s.add_development_dependency "rake" 22 | 23 | s.add_dependency "multi_css", ">= 0.1.0" 24 | s.add_dependency "multi_js", ">= 0.1.0" 25 | s.add_dependency "htmlentities" 26 | end 27 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 sterebooster 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # HtmlPress  [](https://travis-ci.org/stereobooster/html_press) [](https://gemnasium.com/stereobooster/html_press) [](https://codeclimate.com/github/stereobooster/html_press) 2 | 3 | ## How it works 4 | 5 | Remove all whitespace junk. Leave only HTML 6 | 7 | ``` 8 | 1. ┌――――――――――――――――――╖ 2. ┌――――――――――――――――――╖ 9 | ●――――――――――――――├―╢ ws junk ║ ●――――――――├―――――――――╢ ║ 10 | └――――――――――――――――――╜ └――――――――――――――――――╜ 11 | ``` 12 | 13 | ## Usage 14 | 15 | ### Ruby 16 | ```ruby 17 | require 'html_press' 18 | compressed_html = HtmlPress.press html 19 | ``` 20 | 21 | ### Jekyll 22 | see [jekyll_press](https://github.com/stereobooster/jekyll_press) 23 | 24 | ### Rails 25 | TODO :exclamation: 26 | 27 | ### Sinatra 28 | TODO :exclamation: 29 | 30 | ### Command line 31 | TODO :exclamation: 32 | 33 | ## TODO 34 | - use parser ([鋸](https://github.com/tenderlove/nokogiri)) instead of regexp's 35 | - add option to convert relative urls to absolute urls (for SEO) 36 | - [ambigious ampersands](http://mathiasbynens.be/notes/ambiguous-ampersands) for compression? 37 | - Support other js/css minifiers (Closure, YUI compressor) 38 | - htmlTydi 39 | - Rack plugin 40 | - add script to benchmark real projects like amazon or stackoverflow 41 | - support html5 tags 42 | - add more options 43 | - Optimization: make substring replace based on substring length and its position in initial string 44 | 45 | ## Alternatives 46 | - [html-minifier](https://github.com/kangax/html-minifier) (js), [test suite](https://github.com/kangax/html-minifier/blob/gh-pages/tests/index.html), ruby wrapper - [html_minifier](https://github.com/stereobooster/html_minifier) 47 | - [htmlcompressor](http://code.google.com/p/htmlcompressor/) (java), [test suite](http://code.google.com/p/htmlcompressor/source/browse/#svn%2Ftrunk%2Fsrc%2Ftest%2Fresources%2Fhtml%253Fstate%253Dclosed) 48 | - PHPTal compress (php), [test suite](https://svn.motion-twin.com/phptal/trunk/tests/CompressTest.php) 49 | - [W3 total cache](http://wordpress.org/extend/plugins/w3-total-cache/) - WP plugin from smashingmagazine contains html minifier (php) 50 | 51 | ## Additional tools 52 | - [jeanny](https://github.com/gfranco/jeanny) - rename css classes and ids in css and html files 53 | - make shorter pathes for images in css 54 | - [deadweight](https://github.com/aanand/deadweight) - remove unused css rules from css files 55 | - [csscss](http://zmoazeni.github.com/csscss/) will parse any CSS files you give it and let you know which rulesets have duplicated declarations. 56 | - [css-spriter](https://github.com/aberant/css-spriter), [sprite-factory](https://github.com/jakesgordon/sprite-factory) - combine images in sprites 57 | - resize images by size defined in html and vice versa embed size of images in html 58 | - [#1](http://habrahabr.ru/post/90761/), [#2](http://ap-project.org/English/Article/View/53/) - inline small images in css 59 | - [smusher](https://github.com/grosser/smusher), jpegtran, optipng - losslessly minify images 60 | - [sprockets](https://github.com/sstephenson/sprockets), [jammit](https://github.com/documentcloud/jammit) - asset bundlers 61 | - [w3c_validators](https://github.com/alexdunae/w3c_validators) 62 | - [reduce](https://github.com/grosser/reduce) 63 | 64 | ## Resources 65 | 66 | ### Minimize HTML 67 | - http://perfectionkills.com/experimenting-with-html-minifier 68 | - http://perfectionkills.com/optimizing-html 69 | - https://developers.google.com/speed/articles/optimizing-html 70 | 71 | ### Front-end optimization 72 | - https://developers.google.com/speed/docs/insights/rules 73 | - http://developer.yahoo.com/performance/rules.html 74 | -------------------------------------------------------------------------------- /lib/html_press/html.rb: -------------------------------------------------------------------------------- 1 | module HtmlPress 2 | class Html 3 | 4 | DEFAULTS = { 5 | :logger => false, 6 | :unquoted_attributes => false, 7 | :drop_empty_values => false, 8 | :strip_crlf => false, 9 | :js_minifier_options => false 10 | } 11 | 12 | def initialize (options = {}) 13 | @options = DEFAULTS.merge(options) 14 | if @options.keys.include? :dump_empty_values 15 | @options[:drop_empty_values] = @options.delete(:dump_empty_values) 16 | warn "dump_empty_values deprecated use drop_empty_values" 17 | end 18 | if @options[:logger] && !@options[:logger].respond_to?(:error) 19 | raise ArgumentError, 'Logger has no error method' 20 | end 21 | end 22 | 23 | def press (html) 24 | out = html.respond_to?(:read) ? html.read : html.dup 25 | 26 | @replacement_hash = 'MINIFYHTML' + Time.now.to_i.to_s 27 | @placeholders = [] 28 | 29 | out = process_ie_conditional_comments out 30 | out = process_scripts out 31 | out = process_styles out 32 | out = process_html_comments out 33 | out = process_pres out 34 | 35 | out = HtmlPress.entities_compressor out 36 | 37 | out = trim_lines out 38 | out = process_block_elements out 39 | out = process_textareas out 40 | 41 | # use newlines before 1st attribute in open tags (to limit line lengths) 42 | # out.gsub!(/(<[a-z\-:]+)\s+([^>]+>)/i, "\\1\n\\2") 43 | 44 | out = process_attributes out 45 | out = process_whitespaces out 46 | out = fill_placeholders out 47 | 48 | out 49 | end 50 | 51 | # for backward compatibility 52 | alias :compile :press 53 | 54 | protected 55 | 56 | # IE conditional comments 57 | def process_ie_conditional_comments (out) 58 | out.gsub /()\s*/ do 59 | m = $1 60 | comment = $2 61 | comment_compressed = Html.new.press(comment) 62 | m.gsub!(comment, comment_compressed) 63 | reserve m 64 | end 65 | end 66 | 67 | # replace SCRIPTs (and minify) with placeholders 68 | def process_scripts (out) 69 | out.gsub /(" 46 | script = " " 47 | HtmlPress.press(script).should eql pressed_script 48 | 49 | script = %q{} 50 | HtmlPress.press(script).should eql script 51 | end 52 | 53 | it "should compress css in style tags" do 54 | style = " div { margin: 0px 0px; \n} " 55 | pressed_style = "" 56 | style = " " 57 | HtmlPress.press(style).should eql pressed_style 58 | end 59 | 60 | it "should remove html comments" do 61 | HtmlPress.press("
").should eql "" 62 | end 63 | 64 | it "should leave IE conditional comments" do 65 | text = "" 66 | HtmlPress.press(text).should eql text 67 | end 68 | 69 | it "should work with special utf-8 symbols" do 70 | HtmlPress.press("✪ ").should eql "✪" 71 | end 72 | 73 | it "should work with tags in upper case" do 74 | HtmlPress.press("").should eql "" 75 | end 76 | 77 | it "should remove whitespaces between IE conditional comments" do 78 | text = " " 79 | text2 = " " 80 | # TODO ↑ remove this whitespace 81 | HtmlPress.press(text).should eql text2 82 | end 83 | 84 | it "should remove whitespaces between script tags" do 85 | text = " \t " 86 | text2 = " " 87 | HtmlPress.press(text).should eql text2 88 | end 89 | 90 | it "should concatenate adjacent script tags" do 91 | pending "Not implemented yet" do 92 | text = " \t " 93 | text2 = " " 94 | HtmlPress.press(text).should eql text2 95 | end 96 | end 97 | 98 | it "should treat text inside IE conditional comments as it was without comments" do 99 | text = "