├── lib └── html │ ├── pipeline │ ├── version.rb │ ├── plain_text_input_filter.rb │ ├── https_filter.rb │ ├── text_filter.rb │ ├── autolink_filter.rb │ ├── syntax_highlight_filter.rb │ ├── textile_filter.rb │ ├── markdown_filter.rb │ ├── toc_filter.rb │ ├── image_max_width_filter.rb │ ├── body_content.rb │ ├── emoji_filter.rb │ ├── camo_filter.rb │ ├── email_reply_filter.rb │ ├── @mention_filter.rb │ ├── sanitization_filter.rb │ └── filter.rb │ └── pipeline.rb ├── Gemfile ├── .travis.yml ├── .gitignore ├── Rakefile ├── CHANGELOG.md ├── test ├── html │ └── pipeline │ │ ├── emoji_filter_test.rb │ │ ├── plain_text_input_filter_test.rb │ │ ├── autolink_filter_test.rb │ │ ├── camo_filter_test.rb │ │ ├── toc_filter_test.rb │ │ ├── sanitization_filter_test.rb │ │ ├── image_max_width_filter_test.rb │ │ ├── markdown_filter_test.rb │ │ └── mention_filter_test.rb └── test_helper.rb ├── LICENSE ├── html-pipeline.gemspec └── README.md /lib/html/pipeline/version.rb: -------------------------------------------------------------------------------- 1 | module HTML 2 | class Pipeline 3 | VERSION = "0.0.6" 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in html-pipeline.gemspec 4 | gemspec 5 | 6 | group :development do 7 | gem 'bundler' 8 | gem 'rake' 9 | end 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | 3 | before_install: 4 | - sudo apt-get update -qq 5 | - sudo apt-get install -qq libicu-dev 6 | 7 | script: "bundle exec rake" 8 | 9 | rvm: 10 | - 1.8.7 11 | - 1.9.2 12 | - 1.9.3 13 | - ree -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | .yardoc 6 | Gemfile.lock 7 | InstalledFiles 8 | _yardoc 9 | coverage 10 | doc/ 11 | lib/bundler/man 12 | pkg 13 | rdoc 14 | spec/reports 15 | test/tmp 16 | test/version_tmp 17 | tmp 18 | bin/* 19 | vendor/gems -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env rake 2 | require "bundler/gem_tasks" 3 | require 'rake/testtask' 4 | 5 | Rake::TestTask.new do |t| 6 | t.libs << "test" 7 | t.test_files = FileList['test/**/*_test.rb'] 8 | t.verbose = true 9 | end 10 | 11 | task :default => :test -------------------------------------------------------------------------------- /lib/html/pipeline/plain_text_input_filter.rb: -------------------------------------------------------------------------------- 1 | module HTML 2 | class Pipeline 3 | # Simple filter for plain text input. HTML escapes the text input and wraps it 4 | # in a div. 5 | class PlainTextInputFilter < TextFilter 6 | def call 7 | "
#{EscapeUtils.escape_html(@text, false)}
" 8 | end 9 | end 10 | end 11 | end -------------------------------------------------------------------------------- /lib/html/pipeline/https_filter.rb: -------------------------------------------------------------------------------- 1 | module HTML 2 | class Pipeline 3 | # HTML Filter for replacing http github urls with https versions. 4 | class HttpsFilter < Filter 5 | def call 6 | doc.css('a[href^="http://github.com"]').each do |element| 7 | element['href'] = element['href'].sub(/^http:/,'https:') 8 | end 9 | doc 10 | end 11 | end 12 | end 13 | end -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## 0.0.6 4 | 5 | * don't mutate markdown strings: jakedouglas #32 6 | 7 | ## 0.0.5 8 | 9 | * fix li xss vulnerability in sanitization filter: vmg #31 10 | * gemspec cleanup: nbibler #23, jbarnette #24 11 | * doc updates: jch #16, pborreli #17, wickedshimmy #18, benubois #19, blackerby #21 12 | * loosen gemoji dependency: josh #15 13 | 14 | ## 0.0.4 15 | 16 | * initial public release -------------------------------------------------------------------------------- /lib/html/pipeline/text_filter.rb: -------------------------------------------------------------------------------- 1 | module HTML 2 | class Pipeline 3 | class TextFilter < Filter 4 | attr_reader :text 5 | 6 | def initialize(text, context = nil, result = nil) 7 | raise TypeError, "text cannot be HTML" if text.is_a?(DocumentFragment) 8 | # Ensure that this is always a string 9 | @text = text.respond_to?(:to_str) ? text.to_str : text.to_s 10 | super nil, context, result 11 | end 12 | end 13 | end 14 | end -------------------------------------------------------------------------------- /test/html/pipeline/emoji_filter_test.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper' 2 | 3 | class HTML::Pipeline::EmojiFilterTest < Test::Unit::TestCase 4 | EmojiFilter = HTML::Pipeline::EmojiFilter 5 | 6 | def test_emojify 7 | filter = EmojiFilter.new("

:shipit:

", {:asset_root => 'https://foo.com'}) 8 | doc = filter.call 9 | assert_match "https://foo.com/emoji/shipit.png", doc.search('img').attr('src').value 10 | end 11 | 12 | def test_required_context_validation 13 | exception = assert_raise(ArgumentError) { 14 | EmojiFilter.call("", {}) 15 | } 16 | assert_match /:asset_root/, exception.message 17 | end 18 | end -------------------------------------------------------------------------------- /lib/html/pipeline/autolink_filter.rb: -------------------------------------------------------------------------------- 1 | require 'rinku' 2 | 3 | module HTML 4 | class Pipeline 5 | # HTML Filter for auto_linking urls in HTML. 6 | # 7 | # Context options: 8 | # :autolink - boolean whether to autolink urls 9 | # :flags - additional Rinku flags. See https://github.com/vmg/rinku 10 | # 11 | # This filter does not write additional information to the context. 12 | class AutolinkFilter < Filter 13 | def call 14 | return html if context[:autolink] == false 15 | flags = 0 16 | flags |= context[:flags] if context[:flags] 17 | 18 | Rinku.auto_link(html, :urls, nil, %w[a script kbd pre code], flags) 19 | end 20 | end 21 | end 22 | end -------------------------------------------------------------------------------- /lib/html/pipeline/syntax_highlight_filter.rb: -------------------------------------------------------------------------------- 1 | require 'linguist' 2 | 3 | module HTML 4 | class Pipeline 5 | # HTML Filter that syntax highlights code blocks wrapped 6 | # in
.
 7 |     class SyntaxHighlightFilter < Filter
 8 |       def call
 9 |         doc.search('pre').each do |node|
10 |           next unless lang = node['lang']
11 |           next unless lexer = Pygments::Lexer[lang]
12 |           text = node.inner_text
13 | 
14 |           html = highlight_with_timeout_handling(lexer, text)
15 |           next if html.nil?
16 | 
17 |           node.replace(html)
18 |         end
19 |         doc
20 |       end
21 | 
22 |       def highlight_with_timeout_handling(lexer, text)
23 |         lexer.highlight(text)
24 |       rescue Timeout::Error => boom
25 |         nil
26 |       end
27 |     end
28 |   end
29 | end


--------------------------------------------------------------------------------
/lib/html/pipeline/textile_filter.rb:
--------------------------------------------------------------------------------
 1 | module HTML
 2 |   class Pipeline
 3 |     # HTML Filter that converts Textile text into HTML and converts into a
 4 |     # DocumentFragment. This is different from most filters in that it can take a
 5 |     # non-HTML as input. It must be used as the first filter in a pipeline.
 6 |     #
 7 |     # Context options:
 8 |     #   :autolink => false    Disable autolinking URLs
 9 |     #
10 |     # This filter does not write any additional information to the context hash.
11 |     #
12 |     # NOTE This filter is provided for really old comments only. It probably
13 |     # shouldn't be used for anything new.
14 |     class TextileFilter < TextFilter
15 |       # Convert Textile to HTML and convert into a DocumentFragment.
16 |       def call
17 |         RedCloth.new(@text).to_html
18 |       end
19 |     end
20 |   end
21 | end


--------------------------------------------------------------------------------
/test/html/pipeline/plain_text_input_filter_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | 
 3 | class HTML::Pipeline::PlainTextInputFilterTest < Test::Unit::TestCase
 4 |   PlainTextInputFilter = HTML::Pipeline::PlainTextInputFilter
 5 | 
 6 |   def test_fails_when_given_a_documentfragment
 7 |     body = "

heyo

" 8 | doc = Nokogiri::HTML::DocumentFragment.parse(body) 9 | assert_raise(TypeError) { PlainTextInputFilter.call(doc, {}) } 10 | end 11 | 12 | def test_wraps_input_in_a_div_element 13 | doc = PlainTextInputFilter.call("howdy pahtner", {}) 14 | assert_equal "
howdy pahtner
", doc.to_s 15 | end 16 | 17 | def test_html_escapes_plain_text_input 18 | doc = PlainTextInputFilter.call("See: ", {}) 19 | assert_equal "
See: <http://example.org>
", 20 | doc.to_s 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /test/html/pipeline/autolink_filter_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | 3 | AutolinkFilter = HTML::Pipeline::AutolinkFilter 4 | 5 | class HTML::Pipeline::AutolinkFilterTest < Test::Unit::TestCase 6 | def test_uses_rinku_for_autolinking 7 | # just try to parse a complicated piece of HTML 8 | # that Rails auto_link cannot handle 9 | assert_equal '

"http://www.github.com"

', 10 | AutolinkFilter.to_html('

"http://www.github.com"

') 11 | end 12 | 13 | def test_autolink_option 14 | assert_equal '

"http://www.github.com"

', 15 | AutolinkFilter.to_html('

"http://www.github.com"

', :autolink => false) 16 | end 17 | 18 | def test_autolink_flags 19 | assert_equal '

"http://github"

', 20 | AutolinkFilter.to_html('

"http://github"

', :flags => Rinku::AUTOLINK_SHORT_DOMAINS) 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /lib/html/pipeline/markdown_filter.rb: -------------------------------------------------------------------------------- 1 | require 'github/markdown' 2 | 3 | module HTML 4 | class Pipeline 5 | # HTML Filter that converts Markdown text into HTML and converts into a 6 | # DocumentFragment. This is different from most filters in that it can take a 7 | # non-HTML as input. It must be used as the first filter in a pipeline. 8 | # 9 | # Context options: 10 | # :gfm => false Disable GFM line-end processing 11 | # 12 | # This filter does not write any additional information to the context hash. 13 | class MarkdownFilter < TextFilter 14 | def initialize(text, context = nil, result = nil) 15 | super text, context, result 16 | @text = @text.gsub "\r", '' 17 | end 18 | 19 | # Convert Markdown to HTML using the best available implementation 20 | # and convert into a DocumentFragment. 21 | def call 22 | mode = (context[:gfm] != false) ? :gfm : :markdown 23 | html = GitHub::Markdown.to_html(@text, mode) 24 | html.rstrip! 25 | html 26 | end 27 | end 28 | end 29 | end -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 GitHub Inc. and Jerry Cheung 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /html-pipeline.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | require File.expand_path("../lib/html/pipeline/version", __FILE__) 3 | 4 | Gem::Specification.new do |gem| 5 | gem.name = "html-pipeline" 6 | gem.version = HTML::Pipeline::VERSION 7 | gem.license = "MIT" 8 | gem.authors = ["Ryan Tomayko", "Jerry Cheung"] 9 | gem.email = ["ryan@github.com", "jerry@github.com"] 10 | gem.description = %q{GitHub HTML processing filters and utilities} 11 | gem.summary = %q{Helpers for processing content through a chain of filters} 12 | gem.homepage = "https://github.com/jch/html-pipeline" 13 | 14 | gem.files = `git ls-files`.split $/ 15 | gem.test_files = gem.files.grep(%r{^test}) 16 | gem.require_paths = ["lib"] 17 | 18 | gem.add_dependency "gemoji", "~> 1.0" 19 | gem.add_dependency "nokogiri", "~> 1.4" 20 | gem.add_dependency "github-markdown", "~> 0.5" 21 | gem.add_dependency "sanitize", "~> 2.0" 22 | gem.add_dependency "github-linguist", "~> 2.1" 23 | gem.add_dependency "rinku", "~> 1.7" 24 | gem.add_dependency "escape_utils", "~> 0.2" 25 | gem.add_dependency "activesupport", ">= 2" 26 | end 27 | -------------------------------------------------------------------------------- /lib/html/pipeline/toc_filter.rb: -------------------------------------------------------------------------------- 1 | module HTML 2 | class Pipeline 3 | # HTML filter that adds a 'name' attribute to all headers 4 | # in a document, so they can be accessed from a table of contents 5 | # 6 | # TODO: besides adding the name attribute, we should get around to 7 | # eventually generating the Table of Contents itself, with links 8 | # to each header 9 | class TableOfContentsFilter < Filter 10 | def call 11 | headers = Hash.new(0) 12 | doc.css('h1, h2, h3, h4, h5, h6').each do |node| 13 | name = node.text.downcase 14 | name.gsub!(/[^\w\- ]/, '') # remove punctuation 15 | name.gsub!(' ', '-') # replace spaces with dash 16 | name = EscapeUtils.escape_uri(name) # escape extended UTF-8 chars 17 | 18 | uniq = (headers[name] > 0) ? "-#{headers[name]}" : '' 19 | headers[name] += 1 20 | if header_content = node.children.first 21 | header_content.add_previous_sibling(%Q{}) 22 | end 23 | end 24 | doc 25 | end 26 | end 27 | end 28 | end -------------------------------------------------------------------------------- /lib/html/pipeline/image_max_width_filter.rb: -------------------------------------------------------------------------------- 1 | module HTML 2 | class Pipeline 3 | # This filter rewrites image tags with a max-width inline style and also wraps 4 | # the image in an tag that causes the full size image to be opened in a 5 | # new tab. 6 | # 7 | # The max-width inline styles are especially useful in HTML email which 8 | # don't use a global stylesheets. 9 | class ImageMaxWidthFilter < Filter 10 | def call 11 | doc.search('img').each do |element| 12 | # Skip if there's already a style attribute. Not sure how this 13 | # would happen but we can reconsider it in the future. 14 | next if element['style'] 15 | 16 | # Bail out if src doesn't look like a valid http url. trying to avoid weird 17 | # js injection via javascript: urls. 18 | next if element['src'].to_s.strip =~ /\Ajavascript/i 19 | 20 | element['style'] = "max-width:100%;" 21 | 22 | if !has_ancestor?(element, %w(a)) 23 | link_image element 24 | end 25 | end 26 | 27 | doc 28 | end 29 | 30 | def link_image(element) 31 | link = doc.document.create_element('a', :href => element['src'], :target => '_blank') 32 | link.add_child(element.dup) 33 | element.replace(link) 34 | end 35 | end 36 | end 37 | end -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | require 'bundler/setup' 2 | require 'html/pipeline' 3 | require 'test/unit' 4 | 5 | require 'active_support/core_ext/object/try' 6 | 7 | module TestHelpers 8 | # Asserts that `needle` is not a member of `haystack`, where 9 | # `haystack` is any object that responds to `include?`. 10 | def assert_doesnt_include(needle, haystack, message = nil) 11 | error = ' included in ' 12 | message = build_message(message, error, needle.to_s, Array(haystack).map(&:to_s)) 13 | 14 | assert_block message do 15 | !haystack.include?(needle) 16 | end 17 | end 18 | 19 | # Asserts that `needle` is a member of `haystack`, where 20 | # `haystack` is any object that responds to `include?`. 21 | def assert_includes(needle, haystack, message = nil) 22 | error = ' not included in ' 23 | message = build_message(message, error, needle.to_s, Array(haystack).map(&:to_s)) 24 | 25 | assert_block message do 26 | haystack.include?(needle) 27 | end 28 | end 29 | 30 | # Asserts that two html fragments are equivalent. Attribute order 31 | # will be ignored. 32 | def assert_equal_html(expected, actual) 33 | assert_equal Nokogiri::HTML::DocumentFragment.parse(expected).to_hash, 34 | Nokogiri::HTML::DocumentFragment.parse(actual).to_hash 35 | end 36 | end 37 | 38 | Test::Unit::TestCase.send(:include, TestHelpers) -------------------------------------------------------------------------------- /lib/html/pipeline/body_content.rb: -------------------------------------------------------------------------------- 1 | module HTML 2 | class Pipeline 3 | # Public: Runs a String of content through an HTML processing pipeline, 4 | # providing easy access to a generated DocumentFragment. 5 | class BodyContent 6 | attr_reader :result 7 | 8 | # Public: Initialize a BodyContent. 9 | # 10 | # body - A String body. 11 | # context - A Hash of context options for the filters. 12 | # pipeline - A HTML::Pipeline object with one or more Filters. 13 | def initialize(body, context, pipeline) 14 | @body = body 15 | @context = context 16 | @pipeline = pipeline 17 | end 18 | 19 | # Public: Gets the memoized result of the body content as it passed through 20 | # the Pipeline. 21 | # 22 | # Returns a Hash, or something similar as defined by @pipeline.result_class. 23 | def result 24 | @result ||= @pipeline.call @body, @context 25 | end 26 | 27 | # Public: Gets the updated body from the Pipeline result. 28 | # 29 | # Returns a String or DocumentFragment. 30 | def output 31 | @output ||= result[:output] 32 | end 33 | 34 | # Public: Parses the output into a DocumentFragment. 35 | # 36 | # Returns a DocumentFragment. 37 | def document 38 | @document ||= HTML::Pipeline.parse output 39 | end 40 | end 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /test/html/pipeline/camo_filter_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | 3 | class HTML::Pipeline::CamoFilterTest < Test::Unit::TestCase 4 | CamoFilter = HTML::Pipeline::CamoFilter 5 | 6 | def setup 7 | @asset_proxy_url = 'https//assets.example.org' 8 | @asset_proxy_secret_key = 'ssssh-secret' 9 | @options = { 10 | :asset_proxy => @asset_proxy_url, 11 | :asset_proxy_secret_key => @asset_proxy_secret_key 12 | } 13 | end 14 | 15 | def test_camouflaging_http_image_urls 16 | orig = %(

) 17 | assert_includes 'img src="' + @asset_proxy_url, 18 | CamoFilter.call(orig, @options).to_s 19 | end 20 | 21 | def test_rewrites_dotcom_image_urls 22 | orig = %(

) 23 | assert_equal "

", 24 | CamoFilter.call(orig, @options).to_s 25 | end 26 | 27 | def test_not_camouflaging_https_image_urls 28 | orig = %(

) 29 | assert_doesnt_include 'img src="' + @asset_proxy_url, 30 | CamoFilter.call(orig, @options).to_s 31 | end 32 | 33 | def test_handling_images_with_no_src_attribute 34 | orig = %(

) 35 | assert_nothing_raised do 36 | CamoFilter.call(orig, @options).to_s 37 | end 38 | end 39 | 40 | def test_required_context_validation 41 | exception = assert_raise(ArgumentError) { 42 | CamoFilter.call("", {}) 43 | } 44 | assert_match /:asset_proxy[^_]/, exception.message 45 | assert_match /:asset_proxy_secret_key/, exception.message 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /test/html/pipeline/toc_filter_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | 3 | class HTML::Pipeline::TableOfContentsFilterTest < Test::Unit::TestCase 4 | TocFilter = HTML::Pipeline::TableOfContentsFilter 5 | 6 | def test_anchors_are_added_properly 7 | orig = %(

Ice cube

Will swarm on any motherfucker in a blue uniform

) 8 | assert_includes '
Straight Outta Compton 23 |

Dopeman

24 |

Express Yourself

25 |

Dopeman

) 26 | 27 | result = TocFilter.call(orig).to_s 28 | 29 | assert_includes '"dopeman"', result 30 | assert_includes '"dopeman-1"', result 31 | end 32 | 33 | def test_all_header_tags_are_found_when_adding_anchors 34 | orig = %(

"Funky President" by James Brown

35 |

"It's My Thing" by Marva Whitney

36 |

"Boogie Back" by Roy Ayers

37 |

"Feel Good" by Fancy

38 |
"Funky Drummer" by James Brown
39 |
"Ruthless Villain" by Eazy-E
40 | "Be Thankful for What You Got" by William DeVaughn) 41 | 42 | doc = TocFilter.call(orig) 43 | assert_equal 6, doc.search('a').size 44 | end 45 | end 46 | 47 | 48 | -------------------------------------------------------------------------------- /test/html/pipeline/sanitization_filter_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | 3 | class HTML::Pipeline::SanitizationFilterTest < Test::Unit::TestCase 4 | SanitizationFilter = HTML::Pipeline::SanitizationFilter 5 | 6 | def test_removing_script_tags 7 | orig = %(

) 8 | html = SanitizationFilter.call(orig).to_s 9 | assert_no_match /script/, html 10 | end 11 | 12 | def test_removing_style_tags 13 | orig = %(

) 14 | html = SanitizationFilter.call(orig).to_s 15 | assert_no_match /style/, html 16 | end 17 | 18 | def test_removing_style_attributes 19 | orig = %(

YO DAWG

) 20 | html = SanitizationFilter.call(orig).to_s 21 | assert_no_match /font-size/, html 22 | assert_no_match /style/, html 23 | end 24 | 25 | def test_removing_script_event_handler_attributes 26 | orig = %(
YO DAWG) 27 | html = SanitizationFilter.call(orig).to_s 28 | assert_no_match /javscript/, html 29 | assert_no_match /onclick/, html 30 | end 31 | 32 | def test_sanitizes_li_elements_not_contained_in_ul_or_ol 33 | stuff = "a\n
  • b
  • \nc" 34 | html = SanitizationFilter.call(stuff).to_s 35 | assert_equal "a\nb\nc", html 36 | end 37 | 38 | def test_does_not_sanitize_li_elements_contained_in_ul_or_ol 39 | stuff = "a\n\nc" 40 | assert_equal stuff, SanitizationFilter.call(stuff).to_s 41 | end 42 | 43 | def test_github_specific_protocols_are_not_removed 44 | stuff = 'Spill this yo and so on' 45 | assert_equal stuff, SanitizationFilter.call(stuff).to_s 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /lib/html/pipeline/emoji_filter.rb: -------------------------------------------------------------------------------- 1 | require 'emoji' 2 | 3 | module HTML 4 | class Pipeline 5 | # HTML filter that replaces :emoji: with images. 6 | # 7 | # Context: 8 | # :asset_root (required) - base url to link to emoji sprite 9 | class EmojiFilter < Filter 10 | # Build a regexp that matches all valid :emoji: names. 11 | EmojiPattern = /:(#{Emoji.names.map { |name| Regexp.escape(name) }.join('|')}):/ 12 | 13 | def call 14 | doc.search('text()').each do |node| 15 | content = node.to_html 16 | next if !content.include?(':') 17 | next if has_ancestor?(node, %w(pre code)) 18 | html = emoji_image_filter(content) 19 | next if html == content 20 | node.replace(html) 21 | end 22 | doc 23 | end 24 | 25 | # Implementation of validate hook. 26 | # Errors should raise exceptions or use an existing validator. 27 | def validate 28 | needs :asset_root 29 | end 30 | 31 | # Replace :emoji: with corresponding images. 32 | # 33 | # text - String text to replace :emoji: in. 34 | # 35 | # Returns a String with :emoji: replaced with images. 36 | def emoji_image_filter(text) 37 | return text unless text.include?(':') 38 | 39 | text.gsub EmojiPattern do |match| 40 | name = $1 41 | ":#{name}:" 42 | end 43 | end 44 | 45 | # The base url to link emoji sprites 46 | # 47 | # Raises ArgumentError if context option has not been provided. 48 | # Returns the context's asset_root. 49 | def asset_root 50 | context[:asset_root] 51 | end 52 | end 53 | end 54 | end -------------------------------------------------------------------------------- /test/html/pipeline/image_max_width_filter_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | 3 | class HTML::Pipeline::ImageMaxWidthFilterTest < Test::Unit::TestCase 4 | def filter(html) 5 | HTML::Pipeline::ImageMaxWidthFilter.call(html) 6 | end 7 | 8 | def test_rewrites_image_style_tags 9 | body = "

    Screenshot:

    " 10 | doc = Nokogiri::HTML::DocumentFragment.parse(body) 11 | 12 | res = filter(doc) 13 | assert_equal_html %q(

    Screenshot:

    ), 14 | res.to_html 15 | end 16 | 17 | def test_leaves_existing_image_style_tags_alone 18 | body = "

    " 19 | doc = Nokogiri::HTML::DocumentFragment.parse(body) 20 | 21 | res = filter(doc) 22 | assert_equal_html '

    ', 23 | res.to_html 24 | end 25 | 26 | def test_links_to_image 27 | body = "

    Screenshot:

    " 28 | doc = Nokogiri::HTML::DocumentFragment.parse(body) 29 | 30 | res = filter(doc) 31 | assert_equal_html '

    Screenshot:

    ', 32 | res.to_html 33 | end 34 | 35 | def test_doesnt_link_to_image_when_already_linked 36 | body = "

    Screenshot:

    " 37 | doc = Nokogiri::HTML::DocumentFragment.parse(body) 38 | 39 | res = filter(doc) 40 | assert_equal_html %q(

    Screenshot:

    ), 41 | res.to_html 42 | end 43 | 44 | def test_doesnt_screw_up_inlined_images 45 | body = "

    Screenshot , yes, this is a screenshot indeed.

    " 46 | doc = Nokogiri::HTML::DocumentFragment.parse(body) 47 | 48 | assert_equal_html %q(

    Screenshot , yes, this is a screenshot indeed.

    ), filter(doc).to_html 49 | end 50 | end 51 | -------------------------------------------------------------------------------- /lib/html/pipeline/camo_filter.rb: -------------------------------------------------------------------------------- 1 | require 'openssl' 2 | 3 | module HTML 4 | class Pipeline 5 | # HTML Filter for replacing http image URLs with camo versions. See: 6 | # 7 | # https://github.com/atmos/camo 8 | # 9 | # All images provided in user content should be run through this 10 | # filter so that http image sources do not cause mixed-content warnings 11 | # in browser clients. 12 | # 13 | # Context options: 14 | # :asset_proxy (required) - Base URL for constructed asset proxy URLs. 15 | # :asset_proxy_secret_key (required) - The shared secret used to encode URLs. 16 | # 17 | # This filter does not write additional information to the context. 18 | class CamoFilter < Filter 19 | # Hijacks images in the markup provided, replacing them with URLs that 20 | # go through the github asset proxy. 21 | def call 22 | doc.search("img").each do |element| 23 | next if element['src'].nil? 24 | src = element['src'].strip 25 | src = src.sub(%r!^http://github.com!, 'https://github.com') 26 | next if context[:disable_asset_proxy] 27 | 28 | if src =~ /^http:/ || src =~ /^https:\/\/img.skitch.com\// 29 | element['src'] = asset_proxy_url(src) 30 | else 31 | element['src'] = src 32 | end 33 | end 34 | doc 35 | end 36 | 37 | # Implementation of validate hook. 38 | # Errors should raise exceptions or use an existing validator. 39 | def validate 40 | needs :asset_proxy, :asset_proxy_secret_key 41 | end 42 | 43 | # The camouflaged URL for a given image URL. 44 | def asset_proxy_url(url) 45 | "#{asset_proxy_host}/#{asset_url_hash(url)}/#{hexencode(url)}" 46 | end 47 | 48 | # Private: calculate the HMAC digest for a image source URL. 49 | def asset_url_hash(url) 50 | digest = OpenSSL::Digest::Digest.new('sha1') 51 | OpenSSL::HMAC.hexdigest(digest, asset_proxy_secret_key, url) 52 | end 53 | 54 | # Private: the hostname to use for generated asset proxied URLs. 55 | def asset_proxy_host 56 | context[:asset_proxy] 57 | end 58 | 59 | def asset_proxy_secret_key 60 | context[:asset_proxy_secret_key] 61 | end 62 | 63 | # Private: helper to hexencode a string. Each byte ends up encoded into 64 | # two characters, zero padded value in the range [0-9a-f]. 65 | def hexencode(str) 66 | str.to_enum(:each_byte).map { |byte| "%02x" % byte }.join 67 | end 68 | end 69 | end 70 | end -------------------------------------------------------------------------------- /lib/html/pipeline/email_reply_filter.rb: -------------------------------------------------------------------------------- 1 | module HTML 2 | class Pipeline 3 | # HTML Filter that converts email reply text into an HTML DocumentFragment. 4 | # It must be used as the first filter in a pipeline. 5 | # 6 | # Context options: 7 | # None 8 | # 9 | # This filter does not write any additional information to the context hash. 10 | class EmailReplyFilter < TextFilter 11 | include EscapeUtils 12 | 13 | EMAIL_HIDDEN_HEADER = %(