├── lib
    └── html
    │   ├── pipeline
    │       ├── version.rb
    │       ├── text_filter.rb
    │       ├── plain_text_input_filter.rb
    │       ├── image_filter.rb
    │       ├── https_filter.rb
    │       ├── textile_filter.rb
    │       ├── autolink_filter.rb
    │       ├── markdown_filter.rb
    │       ├── image_max_width_filter.rb
    │       ├── syntax_highlight_filter.rb
    │       ├── body_content.rb
    │       ├── absolute_source_filter.rb
    │       ├── toc_filter.rb
    │       ├── email_reply_filter.rb
    │       ├── camo_filter.rb
    │       ├── emoji_filter.rb
    │       ├── @mention_filter.rb
    │       ├── filter.rb
    │       └── sanitization_filter.rb
    │   └── pipeline.rb
├── script
    ├── package
    ├── release
    └── changelog
├── .gitignore
├── Rakefile
├── .travis.yml
├── test
    ├── helpers
    │   └── mocked_instrumentation_service.rb
    ├── test_helper.rb
    └── html
    │   ├── pipeline
    │       ├── syntax_highlight_filter_test.rb
    │       ├── plain_text_input_filter_test.rb
    │       ├── image_filter_test.rb
    │       ├── autolink_filter_test.rb
    │       ├── https_filter_test.rb
    │       ├── absolute_source_filter_test.rb
    │       ├── image_max_width_filter_test.rb
    │       ├── emoji_filter_test.rb
    │       ├── camo_filter_test.rb
    │       ├── markdown_filter_test.rb
    │       ├── toc_filter_test.rb
    │       ├── sanitization_filter_test.rb
    │       └── mention_filter_test.rb
    │   └── pipeline_test.rb
├── Gemfile
├── LICENSE
├── html-pipeline.gemspec
├── CONTRIBUTING.md
├── bin
    └── html-pipeline
├── CHANGELOG.md
└── README.md


/lib/html/pipeline/version.rb:
--------------------------------------------------------------------------------
1 | module HTML
2 |   class Pipeline
3 |     VERSION = "2.2.2"
4 |   end
5 | end
6 | 


--------------------------------------------------------------------------------
/script/package:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # Usage: script/gem
3 | # Updates the gemspec and builds a new gem in the pkg directory.
4 | 
5 | mkdir -p pkg
6 | gem build *.gemspec
7 | mv *.gem pkg
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | .bundle
 4 | .config
 5 | .yardoc
 6 | Gemfile.lock
 7 | InstalledFiles
 8 | _yardoc
 9 | coverage
10 | doc/
11 | lib/bundler/man
12 | pkg
13 | rdoc
14 | spec/reports
15 | test/tmp
16 | test/version_tmp
17 | tmp
18 | exec/*
19 | vendor/gems


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env rake
 2 | require "bundler/gem_tasks"
 3 | require 'rake/testtask'
 4 | 
 5 | Rake::TestTask.new do |t|
 6 |   t.libs << "test"
 7 |   t.test_files = FileList['test/**/*_test.rb']
 8 |   t.verbose = true
 9 | end
10 | 
11 | task :default => :test


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: ruby
 2 | sudo: false
 3 | 
 4 | addons:
 5 |   apt:
 6 |     sources:
 7 |       - libicu-dev
 8 | 
 9 | script: "bundle exec rake"
10 | 
11 | rvm:
12 |   - 2.0
13 |   - 2.1
14 |   - 2.2
15 |   - ruby-head
16 | 
17 | matrix:
18 |   fast_finish: true
19 |   allow_failures:
20 |     - rvm: ruby-head
21 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/text_filter.rb:
--------------------------------------------------------------------------------
 1 | module HTML
 2 |   class Pipeline
 3 |     class TextFilter < Filter
 4 |       attr_reader :text
 5 | 
 6 |       def initialize(text, context = nil, result = nil)
 7 |         raise TypeError, "text cannot be HTML" if text.is_a?(DocumentFragment)
 8 |         # Ensure that this is always a string
 9 |         @text = text.respond_to?(:to_str) ? text.to_str : text.to_s
10 |         super nil, context, result
11 |       end
12 |     end
13 |   end
14 | end


--------------------------------------------------------------------------------
/script/release:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Usage: script/release
 3 | # Build the package, tag a commit, push it to origin, and then release the
 4 | # package publicly.
 5 | 
 6 | set -e
 7 | 
 8 | version="$(script/package | grep Version: | awk '{print $2}')"
 9 | [ -n "$version" ] || exit 1
10 | 
11 | echo $version
12 | git commit --allow-empty -a -m "Release $version"
13 | git tag "v$version"
14 | git push origin
15 | git push origin "v$version"
16 | gem push pkg/*-${version}.gem
17 | 


--------------------------------------------------------------------------------
/test/helpers/mocked_instrumentation_service.rb:
--------------------------------------------------------------------------------
 1 | class MockedInstrumentationService
 2 |   attr_reader :events
 3 |   def initialize(event = nil, events = [])
 4 |     @events = events
 5 |     subscribe event
 6 |   end
 7 |   def instrument(event, payload = nil)
 8 |     payload ||= {}
 9 |     res = yield payload
10 |     events << [event, payload, res] if @subscribe == event
11 |     res
12 |   end
13 |   def subscribe(event)
14 |     @subscribe = event
15 |     @events
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/plain_text_input_filter.rb:
--------------------------------------------------------------------------------
 1 | begin
 2 |   require "escape_utils"
 3 | rescue LoadError => _
 4 |   abort "Missing dependency 'escape_utils' for PlainTextInputFilter. See README.md for details."
 5 | end
 6 | 
 7 | module HTML
 8 |   class Pipeline
 9 |     # Simple filter for plain text input. HTML escapes the text input and wraps it
10 |     # in a div.
11 |     class PlainTextInputFilter < TextFilter
12 |       def call
13 |         "<div>#{EscapeUtils.escape_html(@text, false)}</div>"
14 |       end
15 |     end
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/image_filter.rb:
--------------------------------------------------------------------------------
 1 | module HTML
 2 |   class Pipeline
 3 |     # HTML Filter that converts image's url into <img> tag.
 4 |     # For example, it will convert
 5 |     #   http://example.com/test.jpg
 6 |     # into
 7 |     #   <img src="http://example.com/test.jpg" alt=""/>.
 8 | 
 9 |     class ImageFilter < TextFilter
10 |       def call
11 |         @text.gsub(/(https|http)?:\/\/.+\.(jpg|jpeg|bmp|gif|png)(\?\S+)?/i) do |match|
12 |         %|<img src="#{match}" alt=""/>|
13 |         end
14 |       end
15 |     end
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/test/test_helper.rb:
--------------------------------------------------------------------------------
 1 | require 'bundler/setup'
 2 | require 'html/pipeline'
 3 | require 'minitest/autorun'
 4 | 
 5 | require 'active_support/core_ext/string'
 6 | 
 7 | module TestHelpers
 8 |   # Asserts that two html fragments are equivalent. Attribute order
 9 |   # will be ignored.
10 |   def assert_equal_html(expected, actual)
11 |     assert_equal Nokogiri::HTML::DocumentFragment.parse(expected).to_hash,
12 |                  Nokogiri::HTML::DocumentFragment.parse(actual).to_hash
13 |   end
14 | end
15 | 
16 | Minitest::Test.send(:include, TestHelpers)
17 | 


--------------------------------------------------------------------------------
/test/html/pipeline/syntax_highlight_filter_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | 
 3 | SyntaxHighlightFilter = HTML::Pipeline::SyntaxHighlightFilter
 4 | 
 5 | class HTML::Pipeline::SyntaxHighlightFilterTest < Minitest::Test
 6 |   def test_highlight_default
 7 |     filter = SyntaxHighlightFilter.new \
 8 |       "<pre>hello</pre>", :highlight => "coffeescript"
 9 | 
10 |     doc = filter.call
11 |     assert !doc.css(".highlight-coffeescript").empty?
12 |   end
13 | 
14 |   def test_highlight_default_will_not_override
15 |     filter = SyntaxHighlightFilter.new \
16 |       "<pre lang='c'>hello</pre>", :highlight => "coffeescript"
17 | 
18 |     doc = filter.call
19 |     assert doc.css(".highlight-coffeescript").empty?
20 |     assert !doc.css(".highlight-c").empty?
21 |   end
22 | end
23 | 


--------------------------------------------------------------------------------
/test/html/pipeline/plain_text_input_filter_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | 
 3 | class HTML::Pipeline::PlainTextInputFilterTest < Minitest::Test
 4 |   PlainTextInputFilter = HTML::Pipeline::PlainTextInputFilter
 5 | 
 6 |   def test_fails_when_given_a_documentfragment
 7 |     body = "<p>heyo</p>"
 8 |     doc  = Nokogiri::HTML::DocumentFragment.parse(body)
 9 |     assert_raises(TypeError) { PlainTextInputFilter.call(doc, {}) }
10 |   end
11 | 
12 |   def test_wraps_input_in_a_div_element
13 |     doc = PlainTextInputFilter.call("howdy pahtner", {})
14 |     assert_equal "<div>howdy pahtner</div>", doc.to_s
15 |   end
16 | 
17 |   def test_html_escapes_plain_text_input
18 |     doc = PlainTextInputFilter.call("See: <http://example.org>", {})
19 |     assert_equal "<div>See: &lt;http://example.org&gt;</div>",
20 |       doc.to_s
21 |   end
22 | end
23 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/https_filter.rb:
--------------------------------------------------------------------------------
 1 | module HTML
 2 |   class Pipeline
 3 |     # HTML Filter for replacing http references to :http_url with https versions.
 4 |     # Subdomain references are not rewritten.
 5 |     #
 6 |     # Context options:
 7 |     #   :http_url - The HTTP url to force HTTPS. Falls back to :base_url
 8 |     class HttpsFilter < Filter
 9 |       def call
10 |         doc.css(%Q(a[href^="#{http_url}"])).each do |element|
11 |           element['href'] = element['href'].sub(/^http:/,'https:')
12 |         end
13 |         doc
14 |       end
15 | 
16 |       # HTTP url to replace. Falls back to :base_url
17 |       def http_url
18 |         context[:http_url] || context[:base_url]
19 |       end
20 | 
21 |       # Raise error if :http_url undefined
22 |       def validate
23 |         needs :http_url unless http_url
24 |       end
25 |     end
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/textile_filter.rb:
--------------------------------------------------------------------------------
 1 | begin
 2 |   require "redcloth"
 3 | rescue LoadError => _
 4 |   abort "Missing dependency 'RedCloth' for TextileFilter. See README.md for details."
 5 | end
 6 | 
 7 | module HTML
 8 |   class Pipeline
 9 |     # HTML Filter that converts Textile text into HTML and converts into a
10 |     # DocumentFragment. This is different from most filters in that it can take a
11 |     # non-HTML as input. It must be used as the first filter in a pipeline.
12 |     #
13 |     # Context options:
14 |     #   :autolink => false    Disable autolinking URLs
15 |     #
16 |     # This filter does not write any additional information to the context hash.
17 |     #
18 |     # NOTE This filter is provided for really old comments only. It probably
19 |     # shouldn't be used for anything new.
20 |     class TextileFilter < TextFilter
21 |       # Convert Textile to HTML and convert into a DocumentFragment.
22 |       def call
23 |         RedCloth.new(@text).to_html
24 |       end
25 |     end
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
 1 | source "https://rubygems.org"
 2 | 
 3 | # Specify your gem's dependencies in html-pipeline.gemspec
 4 | gemspec
 5 | 
 6 | group :development do
 7 |   gem "bundler"
 8 |   gem "rake"
 9 | end
10 | 
11 | group :test do
12 |   gem "minitest",           "~> 5.3"
13 |   gem "rinku",              "~> 1.7",   :require => false
14 |   gem "gemoji",             "~> 2.0",   :require => false
15 |   gem "RedCloth",           "~> 4.2.9", :require => false
16 |   gem "github-markdown",    "~> 0.5",   :require => false
17 |   gem "email_reply_parser", "~> 0.5",   :require => false
18 |   gem "sanitize",           "~> 2.0",   :require => false
19 | 
20 |   if RUBY_VERSION < "2.1.0"
21 |     gem "escape_utils",     "~> 0.3",   :require => false
22 |     gem "github-linguist",  "~> 2.6.2", :require => false
23 |   else
24 |     gem "escape_utils",     "~> 1.0",   :require => false
25 |     gem "github-linguist",  "~> 2.10",  :require => false
26 |   end
27 | 
28 |   if RUBY_VERSION < "1.9.3"
29 |     gem "activesupport", ">= 2", "< 4"
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/autolink_filter.rb:
--------------------------------------------------------------------------------
 1 | begin
 2 |   require "rinku"
 3 | rescue LoadError => _
 4 |   abort "Missing dependency 'rinku' for AutolinkFilter. See README.md for details."
 5 | end
 6 | 
 7 | module HTML
 8 |   class Pipeline
 9 |     # HTML Filter for auto_linking urls in HTML.
10 |     #
11 |     # Context options:
12 |     #   :autolink  - boolean whether to autolink urls
13 |     #   :link_attr - HTML attributes for the link that will be generated
14 |     #   :skip_tags - HTML tags inside which autolinking will be skipped.
15 |     #                See Rinku.skip_tags
16 |     #   :flags     - additional Rinku flags. See https://github.com/vmg/rinku
17 |     #
18 |     # This filter does not write additional information to the context.
19 |     class AutolinkFilter < Filter
20 |       def call
21 |         return html if context[:autolink] == false
22 | 
23 |         skip_tags = context[:skip_tags]
24 |         flags = 0
25 |         flags |= context[:flags] if context[:flags]
26 | 
27 |         Rinku.auto_link(html, :urls, context[:link_attr], skip_tags, flags)
28 |       end
29 |     end
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012 GitHub Inc. and Jerry Cheung
 2 | 
 3 | MIT License
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/test/html/pipeline/image_filter_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | 
 3 | ImageFilter = HTML::Pipeline::ImageFilter
 4 | 
 5 | class HTML::Pipeline::ImageFilterTest < Minitest::Test
 6 |   def filter(html)
 7 |     ImageFilter.to_html(html)
 8 |   end
 9 | 
10 |   def test_jpg
11 |     assert_equal %(<img src="http://example.com/test.jpg" alt=""/>),
12 |     filter(%(http://example.com/test.jpg))
13 |   end
14 | 
15 |   def test_jpeg
16 |     assert_equal %(<img src="http://example.com/test.jpeg" alt=""/>),
17 |     filter(%(http://example.com/test.jpeg))
18 |   end
19 | 
20 |   def test_bmp
21 |     assert_equal %(<img src="http://example.com/test.bmp" alt=""/>),
22 |     filter(%(http://example.com/test.bmp))
23 |   end
24 | 
25 |   def test_gif
26 |     assert_equal %(<img src="http://example.com/test.gif" alt=""/>),
27 |     filter(%(http://example.com/test.gif))
28 |   end
29 | 
30 |   def test_png
31 |     assert_equal %(<img src="http://example.com/test.png" alt=""/>),
32 |     filter(%(http://example.com/test.png))
33 |   end
34 | 
35 |   def test_https_url
36 |     assert_equal %(<img src="https://example.com/test.png" alt=""/>),
37 |     filter(%(https://example.com/test.png))
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/html-pipeline.gemspec:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | require File.expand_path("../lib/html/pipeline/version", __FILE__)
 3 | 
 4 | Gem::Specification.new do |gem|
 5 |   gem.name          = "html-pipeline"
 6 |   gem.version       = HTML::Pipeline::VERSION
 7 |   gem.license       = "MIT"
 8 |   gem.authors       = ["Ryan Tomayko", "Jerry Cheung"]
 9 |   gem.email         = ["ryan@github.com", "jerry@github.com"]
10 |   gem.description   = %q{GitHub HTML processing filters and utilities}
11 |   gem.summary       = %q{Helpers for processing content through a chain of filters}
12 |   gem.homepage      = "https://github.com/jch/html-pipeline"
13 | 
14 |   gem.files         = `git ls-files`.split $/
15 |   gem.test_files    = gem.files.grep(%r{^test})
16 |   gem.require_paths = ["lib"]
17 | 
18 |   gem.add_dependency "nokogiri", ">= 1.4"
19 |   gem.add_dependency "activesupport", [">= 2", "< 5"]
20 | 
21 |   gem.post_install_message = <<msg
22 | -------------------------------------------------
23 | Thank you for installing html-pipeline!
24 | You must bundle Filter gem dependencies.
25 | See html-pipeline README.md for more details.
26 | https://github.com/jch/html-pipeline#dependencies
27 | -------------------------------------------------
28 | msg
29 | end
30 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/markdown_filter.rb:
--------------------------------------------------------------------------------
 1 | begin
 2 |   require "github/markdown"
 3 | rescue LoadError => _
 4 |   abort "Missing dependency 'github-markdown' for MarkdownFilter. See README.md for details."
 5 | end
 6 | 
 7 | module HTML
 8 |   class Pipeline
 9 |     # HTML Filter that converts Markdown text into HTML and converts into a
10 |     # DocumentFragment. This is different from most filters in that it can take a
11 |     # non-HTML as input. It must be used as the first filter in a pipeline.
12 |     #
13 |     # Context options:
14 |     #   :gfm      => false    Disable GFM line-end processing
15 |     #
16 |     # This filter does not write any additional information to the context hash.
17 |     class MarkdownFilter < TextFilter
18 |       def initialize(text, context = nil, result = nil)
19 |         super text, context, result
20 |         @text = @text.gsub "\r", ''
21 |       end
22 | 
23 |       # Convert Markdown to HTML using the best available implementation
24 |       # and convert into a DocumentFragment.
25 |       def call
26 |         mode = (context[:gfm] != false) ? :gfm : :markdown
27 |         html = GitHub::Markdown.to_html(@text, mode)
28 |         html.rstrip!
29 |         html
30 |       end
31 |     end
32 |   end
33 | end
34 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/image_max_width_filter.rb:
--------------------------------------------------------------------------------
 1 | module HTML
 2 |   class Pipeline
 3 |     # This filter rewrites image tags with a max-width inline style and also wraps
 4 |     # the image in an <a> tag that causes the full size image to be opened in a
 5 |     # new tab.
 6 |     #
 7 |     # The max-width inline styles are especially useful in HTML email which
 8 |     # don't use a global stylesheets.
 9 |     class ImageMaxWidthFilter < Filter
10 |       def call
11 |         doc.search('img').each do |element|
12 |           # Skip if there's already a style attribute. Not sure how this
13 |           # would happen but we can reconsider it in the future.
14 |           next if element['style']
15 | 
16 |           # Bail out if src doesn't look like a valid http url. trying to avoid weird
17 |           # js injection via javascript: urls.
18 |           next if element['src'].to_s.strip =~ /\Ajavascript/i
19 | 
20 |           element['style'] = "max-width:100%;"
21 | 
22 |           if !has_ancestor?(element, %w(a))
23 |             link_image element
24 |           end
25 |         end
26 | 
27 |         doc
28 |       end
29 | 
30 |       def link_image(element)
31 |         link = doc.document.create_element('a', :href => element['src'], :target => '_blank')
32 |         link.add_child(element.dup)
33 |         element.replace(link)
34 |       end
35 |     end
36 |   end
37 | end


--------------------------------------------------------------------------------
/lib/html/pipeline/syntax_highlight_filter.rb:
--------------------------------------------------------------------------------
 1 | begin
 2 |   require "linguist"
 3 | rescue LoadError => _
 4 |   abort "Missing dependency 'github-linguist' for SyntaxHighlightFilter. See README.md for details."
 5 | end
 6 | 
 7 | module HTML
 8 |   class Pipeline
 9 |     # HTML Filter that syntax highlights code blocks wrapped
10 |     # in <pre lang="...">.
11 |     class SyntaxHighlightFilter < Filter
12 |       def call
13 |         doc.search('pre').each do |node|
14 |           default = context[:highlight] && context[:highlight].to_s
15 |           next unless lang = node['lang'] || default
16 |           next unless lexer = lexer_for(lang)
17 |           text = node.inner_text
18 | 
19 |           html = highlight_with_timeout_handling(lexer, text)
20 |           next if html.nil?
21 | 
22 |           if (node = node.replace(html).first)
23 |             klass = node["class"]
24 |             klass = [klass, "highlight-#{lang}"].compact.join " "
25 | 
26 |             node["class"] = klass
27 |           end
28 |         end
29 |         doc
30 |       end
31 | 
32 |       def highlight_with_timeout_handling(lexer, text)
33 |         lexer.highlight(text)
34 |       rescue Timeout::Error => boom
35 |         nil
36 |       end
37 | 
38 |       def lexer_for(lang)
39 |         (Linguist::Language[lang] && Linguist::Language[lang].lexer) || Pygments::Lexer[lang]
40 |       end
41 |     end
42 |   end
43 | end
44 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/body_content.rb:
--------------------------------------------------------------------------------
 1 | module HTML
 2 |   class Pipeline
 3 |     # Public: Runs a String of content through an HTML processing pipeline,
 4 |     # providing easy access to a generated DocumentFragment.
 5 |     class BodyContent
 6 |       attr_reader :result
 7 | 
 8 |       # Public: Initialize a BodyContent.
 9 |       #
10 |       # body     - A String body.
11 |       # context  - A Hash of context options for the filters.
12 |       # pipeline - A HTML::Pipeline object with one or more Filters.
13 |       def initialize(body, context, pipeline)
14 |         @body = body
15 |         @context = context
16 |         @pipeline = pipeline
17 |       end
18 | 
19 |       # Public: Gets the memoized result of the body content as it passed through
20 |       # the Pipeline.
21 |       #
22 |       # Returns a Hash, or something similar as defined by @pipeline.result_class.
23 |       def result
24 |         @result ||= @pipeline.call @body, @context
25 |       end
26 | 
27 |       # Public: Gets the updated body from the Pipeline result.
28 |       #
29 |       # Returns a String or DocumentFragment.
30 |       def output
31 |         @output ||= result[:output]
32 |       end
33 | 
34 |       # Public: Parses the output into a DocumentFragment.
35 |       #
36 |       # Returns a DocumentFragment.
37 |       def document
38 |         @document ||= HTML::Pipeline.parse output
39 |       end
40 |     end
41 |   end
42 | end
43 | 


--------------------------------------------------------------------------------
/test/html/pipeline/autolink_filter_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | 
 3 | AutolinkFilter = HTML::Pipeline::AutolinkFilter
 4 | 
 5 | class HTML::Pipeline::AutolinkFilterTest < Minitest::Test
 6 |   def test_uses_rinku_for_autolinking
 7 |     # just try to parse a complicated piece of HTML
 8 |     # that Rails auto_link cannot handle
 9 |     assert_equal '<p>"<a href="http://www.github.com">http://www.github.com</a>"</p>',
10 |       AutolinkFilter.to_html('<p>"http://www.github.com"</p>')
11 |   end
12 | 
13 |   def test_autolink_option
14 |     assert_equal '<p>"http://www.github.com"</p>',
15 |       AutolinkFilter.to_html('<p>"http://www.github.com"</p>', :autolink => false)
16 |   end
17 | 
18 |   def test_autolink_link_attr
19 |     assert_equal '<p>"<a href="http://www.github.com" target="_blank">http://www.github.com</a>"</p>',
20 |       AutolinkFilter.to_html('<p>"http://www.github.com"</p>', :link_attr => 'target="_blank"')
21 |   end
22 | 
23 |   def test_autolink_flags
24 |     assert_equal '<p>"<a href="http://github">http://github</a>"</p>',
25 |       AutolinkFilter.to_html('<p>"http://github"</p>', :flags => Rinku::AUTOLINK_SHORT_DOMAINS)
26 |   end
27 | 
28 |   def test_autolink_skip_tags
29 |     assert_equal '<code>"http://github.com"</code>',
30 |       AutolinkFilter.to_html('<code>"http://github.com"</code>')
31 | 
32 |     assert_equal '<code>"<a href="http://github.com">http://github.com</a>"</code>',
33 |       AutolinkFilter.to_html('<code>"http://github.com"</code>', :skip_tags => %w(kbd script))
34 |   end
35 | end
36 | 


--------------------------------------------------------------------------------
/script/changelog:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Usage: script/changelog [-r <repo>] [-b <base>] [-h <head>]
 3 | #
 4 | #  repo: base string of GitHub repository url. e.g. "user_or_org/repository". Defaults to git remote url.
 5 | #  base: git ref to compare from. e.g. "v1.3.1". Defaults to latest git tag.
 6 | #  head: git ref to compare to. Defaults to "HEAD".
 7 | #
 8 | # Generate a changelog preview from pull requests merged between `base` and
 9 | # `head`.
10 | #
11 | # https://github.com/jch/release-scripts/blob/master/changelog
12 | set -e
13 | 
14 | [ $# -eq 0 ] && set -- --help
15 | while [[ $# > 1 ]]
16 | do
17 |   key="$1"
18 |   case $key in
19 |     -r|--repo)
20 |     repo="$2"
21 |     shift
22 |     ;;
23 |     -b|--base)
24 |     base="$2"
25 |     shift
26 |     ;;
27 |     -h|--head)
28 |     head="$2"
29 |     shift
30 |     ;;
31 |     *)
32 |     ;;
33 |   esac
34 |   shift
35 | done
36 | 
37 | repo="${repo:-$(git remote -v | grep push | awk '{print $2}' | cut -d'/' -f4- | sed 's/\.git//')}"
38 | base="${base:-$(git tag -l | sort -t. -k 1,1n -k 2,2n -k 3,3n | tail -n 1)}"
39 | head="${head:-HEAD}"
40 | api_url="https://api.github.com"
41 | 
42 | # get merged PR's. Better way is to query the API for these, but this is easier
43 | for pr in $(git log --oneline $base..$head | grep "Merge pull request" | awk '{gsub("#",""); print $5}')
44 | do
45 |   # frustrated with trying to pull out the right values, fell back to ruby
46 |   curl -s "$api_url/repos/$repo/pulls/$pr" | ruby -rjson -e 'pr=JSON.parse(STDIN.read); puts "* #{pr[%q(title)]} [##{pr[%q(number)]}](#{pr[%q(html_url)]})"'
47 | done
48 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/absolute_source_filter.rb:
--------------------------------------------------------------------------------
 1 | require 'uri'
 2 | 
 3 | module HTML
 4 |   class Pipeline
 5 | 
 6 |     class AbsoluteSourceFilter < Filter
 7 |       # HTML Filter for replacing relative and root relative image URLs with
 8 |       # fully qualified URLs
 9 |       #
10 |       # This is useful if an image is root relative but should really be going
11 |       # through a cdn, or if the content for the page assumes the host is known
12 |       # i.e. scraped webpages and some RSS feeds.
13 |       #
14 |       # Context options:
15 |       #   :image_base_url - Base URL for image host for root relative src.
16 |       #   :image_subpage_url - For relative src.
17 |       #
18 |       # This filter does not write additional information to the context.
19 |       # This filter would need to be run before CamoFilter.
20 |       def call
21 |         doc.search("img").each do |element| 
22 |           next if element['src'].nil? || element['src'].empty?
23 |           src = element['src'].strip
24 |           unless src.start_with? 'http'
25 |             if src.start_with? '/'
26 |               base = image_base_url
27 |             else
28 |               base = image_subpage_url
29 |             end
30 |             element["src"] = URI.join(base, src).to_s
31 |           end
32 |         end
33 |         doc
34 |       end
35 |       
36 |       # Private: the base url you want to use
37 |       def image_base_url
38 |         context[:image_base_url] or raise "Missing context :image_base_url for #{self.class.name}"
39 |       end
40 | 
41 |       # Private: the relative url you want to use
42 |       def image_subpage_url
43 |         context[:image_subpage_url] or raise "Missing context :image_subpage_url for #{self.class.name}"
44 |       end
45 |     
46 |     end
47 |   end
48 | end


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | Thanks for using and improving `HTML::Pipeline`!
 4 | 
 5 | - [Submitting a New Issue](#submitting-a-new-issue)
 6 | - [Sending a Pull Request](#sending-a-pull-request)
 7 | 
 8 | ## Submitting a New Issue
 9 | 
10 | If there's an idea you'd like to propose, or a design change, feel free to file a new issue. 
11 | 
12 | If you have an implementation question or believe you've found a bug, please provide as many details as possible:
13 | 
14 | - Input document
15 | - Output HTML document
16 | - the exact `HTML::Pipeline` code you are using
17 | - output of the following from your project
18 | 
19 | ```
20 | ruby -v
21 | bundle exec nokogiri -v
22 | ```
23 | 
24 | ## Sending a Pull Request
25 | 
26 | [Pull requests][pr] are always welcome!
27 | 
28 | Check out [the project's issues list][issues] for ideas on what could be improved.
29 | 
30 | Before sending, please add tests and ensure the test suite passes.
31 | 
32 | ### Running the Tests
33 | 
34 | To run the full suite:
35 | 
36 |   `bundle exec rake`
37 | 
38 | To run a specific test file:
39 | 
40 |   `bundle exec ruby -Itest test/html/pipeline_test.rb`
41 | 
42 | To run a specific test:
43 | 
44 |   `bundle exec ruby -Itest test/html/pipeline/markdown_filter_test.rb -n test_disabling_gfm`  
45 | 
46 | To run the full suite with all [supported rubies][travisyaml] in bash:
47 | 
48 | ```bash
49 | rubies=(ree-1.8.7-2011.03 1.9.2-p290 1.9.3-p429 2.0.0-p247)
50 | for r in ${rubies[*]} 
51 | do 
52 |   rbenv local $r # switch to your version manager of choice
53 |   bundle install 
54 |   bundle exec rake 
55 | done
56 | ```
57 | 
58 | [issues]: https://github.com/jch/html-pipeline/issues
59 | [pr]: https://help.github.com/articles/using-pull-requests
60 | [travisyaml]: https://github.com/jch/html-pipeline/blob/master/.travis.yml
61 | 


--------------------------------------------------------------------------------
/test/html/pipeline/https_filter_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | 
 3 | HttpsFilter = HTML::Pipeline::HttpsFilter
 4 | 
 5 | class HTML::Pipeline::AutolinkFilterTest < Minitest::Test
 6 |   def filter(html)
 7 |     HttpsFilter.to_html(html, @options)
 8 |   end
 9 | 
10 |   def setup
11 |     @options = {:base_url => "http://github.com"}
12 |   end
13 | 
14 |   def test_http
15 |     assert_equal %(<a href="https://github.com">github.com</a>),
16 |           filter(%(<a href="http://github.com">github.com</a>))
17 |   end
18 | 
19 |   def test_https
20 |     assert_equal %(<a href="https://github.com">github.com</a>),
21 |           filter(%(<a href="https://github.com">github.com</a>))
22 |   end
23 | 
24 |   def test_subdomain
25 |     assert_equal %(<a href="http://help.github.com">github.com</a>),
26 |           filter(%(<a href="http://help.github.com">github.com</a>))
27 |   end
28 | 
29 |   def test_other
30 |     assert_equal %(<a href="http://github.io">github.io</a>),
31 |           filter(%(<a href="http://github.io">github.io</a>))
32 |   end
33 | 
34 |   def test_uses_http_url_over_base_url
35 |     @options =  {:http_url => "http://github.com", :base_url => "https://github.com"}
36 | 
37 |     assert_equal %(<a href="https://github.com">github.com</a>),
38 |           filter(%(<a href="http://github.com">github.com</a>))
39 |   end
40 | 
41 |   def test_only_http_url
42 |     @options = {:http_url => "http://github.com"}
43 | 
44 |     assert_equal %(<a href="https://github.com">github.com</a>),
45 |           filter(%(<a href="http://github.com">github.com</a>))
46 |   end
47 | 
48 |   def test_validates_http_url
49 |     @options.clear
50 |     exception = assert_raises(ArgumentError) { filter("") }
51 |     assert_match "HTML::Pipeline::HttpsFilter: :http_url", exception.message
52 |   end
53 | end
54 | 


--------------------------------------------------------------------------------
/test/html/pipeline/absolute_source_filter_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | 
 3 | class HTML::Pipeline::AbsoluteSourceFilterTest < Minitest::Test
 4 |   AbsoluteSourceFilter = HTML::Pipeline::AbsoluteSourceFilter
 5 | 
 6 |   def setup
 7 |     @image_base_url = 'http://assets.example.com'
 8 |     @image_subpage_url = 'http://blog.example.com/a/post'
 9 |     @options = {
10 |       :image_base_url    => @image_base_url,
11 |       :image_subpage_url => @image_subpage_url
12 |     }
13 |   end
14 | 
15 |   def test_rewrites_root_urls
16 |     orig = %(<p><img src="/img.png"></p>)
17 |     assert_equal "<p><img src=\"#{@image_base_url}/img.png\"></p>",
18 |       AbsoluteSourceFilter.call(orig, @options).to_s
19 |   end
20 | 
21 |   def test_rewrites_relative_urls
22 |     orig = %(<p><img src="post/img.png"></p>)
23 |     assert_equal "<p><img src=\"#{@image_subpage_url}/img.png\"></p>",
24 |       AbsoluteSourceFilter.call(orig, @options).to_s
25 |   end
26 | 
27 |   def test_does_not_rewrite_absolute_urls
28 |     orig = %(<p><img src="http://other.example.com/img.png"></p>)
29 |     result = AbsoluteSourceFilter.call(orig, @options).to_s
30 |     refute_match /@image_base_url/, result
31 |     refute_match /@image_subpage_url/, result
32 |   end
33 | 
34 |   def test_fails_when_context_is_missing
35 |     assert_raises RuntimeError do
36 |       AbsoluteSourceFilter.call("<img src=\"img.png\">", {})
37 |     end
38 |     assert_raises RuntimeError do
39 |       AbsoluteSourceFilter.call("<img src=\"/img.png\">", {})
40 |     end
41 |   end
42 |   
43 |   def test_tells_you_where_context_is_required
44 |     exception = assert_raises(RuntimeError) { 
45 |       AbsoluteSourceFilter.call("<img src=\"img.png\">", {}) 
46 |     }
47 |     assert_match 'HTML::Pipeline::AbsoluteSourceFilter', exception.message
48 | 
49 |     exception = assert_raises(RuntimeError) { 
50 |       AbsoluteSourceFilter.call("<img src=\"/img.png\">", {}) 
51 |     }
52 |     assert_match 'HTML::Pipeline::AbsoluteSourceFilter', exception.message
53 |   end
54 | 
55 | end
56 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/toc_filter.rb:
--------------------------------------------------------------------------------
 1 | module HTML
 2 |   class Pipeline
 3 |     # HTML filter that adds an 'id' attribute to all headers
 4 |     # in a document, so they can be accessed from a table of contents.
 5 |     #
 6 |     # Generates the Table of Contents, with links to each header.
 7 |     #
 8 |     # Examples
 9 |     #
10 |     #  TocPipeline =
11 |     #    HTML::Pipeline.new [
12 |     #      HTML::Pipeline::TableOfContentsFilter
13 |     #    ]
14 |     #  # => #<HTML::Pipeline:0x007fc13c4528d8...>
15 |     #  orig = %(<h1>Ice cube</h1><p>is not for the pop chart</p>)
16 |     #  # => "<h1>Ice cube</h1><p>is not for the pop chart</p>"
17 |     #  result = {}
18 |     #  # => {}
19 |     #  TocPipeline.call(orig, {}, result)
20 |     #  # => {:toc=> ...}
21 |     #  result[:toc]
22 |     #  # => "<ul class=\"section-nav\">\n<li><a href=\"#ice-cube\">...</li><ul>"
23 |     #  result[:output].to_s
24 |     #  # => "<h1>\n<a id=\"ice-cube\" class=\"anchor\" href=\"#ice-cube\">..."
25 |     class TableOfContentsFilter < Filter
26 |       PUNCTUATION_REGEXP = RUBY_VERSION > "1.9" ? /[^\p{Word}\- ]/u : /[^\w\- ]/
27 | 
28 |       def call
29 |         result[:toc] = ""
30 | 
31 |         headers = Hash.new(0)
32 |         doc.css('h1, h2, h3, h4, h5, h6').each do |node|
33 |           text = node.text
34 |           id = text.downcase
35 |           id.gsub!(PUNCTUATION_REGEXP, '') # remove punctuation
36 |           id.gsub!(' ', '-') # replace spaces with dash
37 | 
38 |           uniq = (headers[id] > 0) ? "-#{headers[id]}" : ''
39 |           headers[id] += 1
40 |           if header_content = node.children.first
41 |             result[:toc] << %Q{<li><a href="##{id}#{uniq}">#{text}</a></li>\n}
42 |             header_content.add_previous_sibling(%Q{<a id="#{id}#{uniq}" class="anchor" href="##{id}#{uniq}" aria-hidden="true"><span class="octicon octicon-link"></span></a>})
43 |           end
44 |         end
45 |         result[:toc] = %Q{<ul class="section-nav">\n#{result[:toc]}</ul>} unless result[:toc].empty?
46 |         doc
47 |       end
48 |     end
49 |   end
50 | end
51 | 


--------------------------------------------------------------------------------
/bin/html-pipeline:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'html/pipeline'
 3 | 
 4 | require 'optparse'
 5 | 
 6 | # Accept "help", too
 7 | ARGV.map!{|a| a == "help" ? "--help" : a }
 8 | 
 9 | OptionParser.new do |opts|
10 |   opts.banner = <<-HELP.gsub(/^    /, '')
11 |     Usage: html-pipeline [-h] [-f]
12 |            html-pipeline [FILTER [FILTER [...]]] < file.md
13 |            cat file.md | html-pipeline [FILTER [FILTER [...]]]
14 |   HELP
15 | 
16 |   opts.separator "Options:"
17 | 
18 |   opts.on("-f", "--filters", "List the available filters") do
19 |     filters = HTML::Pipeline.constants.grep(/\w+Filter$/).
20 |       map{|f| f.to_s.gsub(/Filter$/,'') }
21 | 
22 |     # Text filter doesn't work, no call method
23 |     filters -= ["Text"]
24 | 
25 |     abort <<-HELP.gsub(/^      /, '')
26 |       Available filters:
27 |         #{filters.join("\n        ")}
28 |     HELP
29 |   end
30 | end.parse!
31 | 
32 | # Default to a GitHub-ish pipeline
33 | if ARGV.empty?
34 | 
35 |   filters = [
36 |     HTML::Pipeline::MarkdownFilter,
37 |     HTML::Pipeline::SanitizationFilter,
38 |     HTML::Pipeline::ImageMaxWidthFilter,
39 |     HTML::Pipeline::EmojiFilter,
40 |     HTML::Pipeline::AutolinkFilter,
41 |     HTML::Pipeline::TableOfContentsFilter,
42 |   ]
43 | 
44 |   # Add syntax highlighting if linguist is present
45 |   begin
46 |     require 'linguist'
47 |     filters << HTML::Pipeline::SyntaxHighlightFilter
48 |   rescue LoadError
49 |   end
50 | 
51 | else
52 | 
53 |   def filter_named(name)
54 |     case name
55 |     when "Text"
56 |       raise NameError # Text filter doesn't work, no call method
57 |     end
58 | 
59 |     HTML::Pipeline.const_get("#{name}Filter")
60 |   rescue NameError => e
61 |     abort "Unknown filter '#{name}'. List filters with the -f option."
62 |   end
63 | 
64 |   filters = []
65 |   until ARGV.empty?
66 |     name = ARGV.shift
67 |     filters << filter_named(name)
68 |   end
69 | 
70 | end
71 | 
72 | context = {
73 |   :asset_root => "/assets",
74 |   :base_url   => "/",
75 |   :gfm        => true
76 | }
77 | 
78 | puts HTML::Pipeline.new(filters, context).call(ARGF.read)[:output]
79 | 


--------------------------------------------------------------------------------
/test/html/pipeline/image_max_width_filter_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | 
 3 | class HTML::Pipeline::ImageMaxWidthFilterTest < Minitest::Test
 4 |   def filter(html)
 5 |     HTML::Pipeline::ImageMaxWidthFilter.call(html)
 6 |   end
 7 | 
 8 |   def test_rewrites_image_style_tags
 9 |     body = "<p>Screenshot: <img src='screenshot.png'></p>"
10 |     doc  = Nokogiri::HTML::DocumentFragment.parse(body)
11 | 
12 |     res = filter(doc)
13 |     assert_equal_html %q(<p>Screenshot: <a target="_blank" href="screenshot.png"><img src="screenshot.png" style="max-width:100%;"></a></p>),
14 |       res.to_html
15 |   end
16 | 
17 |   def test_leaves_existing_image_style_tags_alone
18 |     body = "<p><img src='screenshot.png' style='width:100px;'></p>"
19 |     doc  = Nokogiri::HTML::DocumentFragment.parse(body)
20 | 
21 |     res = filter(doc)
22 |     assert_equal_html '<p><img src="screenshot.png" style="width:100px;"></p>',
23 |       res.to_html
24 |   end
25 | 
26 |   def test_links_to_image
27 |     body = "<p>Screenshot: <img src='screenshot.png'></p>"
28 |     doc  = Nokogiri::HTML::DocumentFragment.parse(body)
29 | 
30 |     res = filter(doc)
31 |     assert_equal_html '<p>Screenshot: <a target="_blank" href="screenshot.png"><img src="screenshot.png" style="max-width:100%;"></a></p>',
32 |       res.to_html
33 |   end
34 | 
35 |   def test_doesnt_link_to_image_when_already_linked
36 |     body = "<p>Screenshot: <a href='blah.png'><img src='screenshot.png'></a></p>"
37 |     doc  = Nokogiri::HTML::DocumentFragment.parse(body)
38 | 
39 |     res = filter(doc)
40 |     assert_equal_html %q(<p>Screenshot: <a href="blah.png"><img src="screenshot.png" style="max-width:100%;"></a></p>),
41 |       res.to_html
42 |   end
43 | 
44 |   def test_doesnt_screw_up_inlined_images
45 |     body = "<p>Screenshot <img src='screenshot.png'>, yes, this is a <b>screenshot</b> indeed.</p>"
46 |     doc  = Nokogiri::HTML::DocumentFragment.parse(body)
47 | 
48 |     assert_equal_html %q(<p>Screenshot <a target="_blank" href="screenshot.png"><img src="screenshot.png" style="max-width:100%;"></a>, yes, this is a <b>screenshot</b> indeed.</p>), filter(doc).to_html
49 |   end
50 | end
51 | 


--------------------------------------------------------------------------------
/test/html/pipeline/emoji_filter_test.rb:
--------------------------------------------------------------------------------
 1 | require 'test_helper'
 2 | 
 3 | class HTML::Pipeline::EmojiFilterTest < Minitest::Test
 4 |   EmojiFilter = HTML::Pipeline::EmojiFilter
 5 | 
 6 |   def test_emojify
 7 |     filter = EmojiFilter.new("<p>:shipit:</p>", {:asset_root => 'https://foo.com'})
 8 |     doc = filter.call
 9 |     assert_match "https://foo.com/emoji/shipit.png", doc.search('img').attr('src').value
10 |   end
11 | 
12 |   def test_uri_encoding
13 |     filter = EmojiFilter.new("<p>:+1:</p>", {:asset_root => 'https://foo.com'})
14 |     doc = filter.call
15 |     assert_match "https://foo.com/emoji/unicode/1f44d.png", doc.search('img').attr('src').value
16 |   end
17 | 
18 |   def test_required_context_validation
19 |     exception = assert_raises(ArgumentError) {
20 |       EmojiFilter.call("", {})
21 |     }
22 |     assert_match /:asset_root/, exception.message
23 |   end
24 | 
25 |   def test_custom_asset_path
26 |     filter = EmojiFilter.new("<p>:+1:</p>", {:asset_path => ':file_name', :asset_root => 'https://foo.com'})
27 |     doc = filter.call
28 |     assert_match "https://foo.com/unicode/1f44d.png", doc.search('img').attr('src').value
29 |   end
30 | 
31 |   def test_not_emojify_in_code_tags
32 |     body = "<code>:shipit:</code>"
33 |     filter = EmojiFilter.new(body, {:asset_root => 'https://foo.com'})
34 |     doc = filter.call
35 |     assert_equal body, doc.to_html
36 |   end
37 | 
38 |   def test_not_emojify_in_tt_tags
39 |     body = "<tt>:shipit:</tt>"
40 |     filter = EmojiFilter.new(body, {:asset_root => 'https://foo.com'})
41 |     doc = filter.call
42 |     assert_equal body, doc.to_html
43 |   end
44 | 
45 |   def test_not_emojify_in_pre_tags
46 |     body = "<pre>:shipit:</pre>"
47 |     filter = EmojiFilter.new(body, {:asset_root => 'https://foo.com'})
48 |     doc = filter.call
49 |     assert_equal body, doc.to_html
50 |   end
51 | 
52 |   def test_not_emojify_in_custom_single_tag_foo
53 |     body = "<foo>:shipit:</foo>"
54 |     filter = EmojiFilter.new(body, {:asset_root => 'https://foo.com', ignored_ancestor_tags: %w(foo)})
55 |     doc = filter.call
56 |     assert_equal body, doc.to_html
57 |   end
58 | 
59 |   def test_not_emojify_in_custom_multiple_tags_foo_and_bar
60 |     body = "<bar>:shipit:</bar>"
61 |     filter = EmojiFilter.new(body, {:asset_root => 'https://foo.com', ignored_ancestor_tags: %w(foo bar)})
62 |     doc = filter.call
63 |     assert_equal body, doc.to_html
64 |   end
65 | end
66 | 


--------------------------------------------------------------------------------
/test/html/pipeline_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | require "helpers/mocked_instrumentation_service"
 3 | 
 4 | class HTML::PipelineTest < Minitest::Test
 5 |   Pipeline = HTML::Pipeline
 6 |   class TestFilter
 7 |     def self.call(input, context, result)
 8 |       input.reverse
 9 |     end
10 |   end
11 | 
12 |   def setup
13 |     @context = {}
14 |     @result_class = Hash
15 |     @pipeline = Pipeline.new [TestFilter], @context, @result_class
16 |   end
17 | 
18 |   def test_filter_instrumentation
19 |     service = MockedInstrumentationService.new
20 |     events = service.subscribe "call_filter.html_pipeline"
21 |     @pipeline.instrumentation_service = service
22 |     filter(body = "hello")
23 |     event, payload, res = events.pop
24 |     assert event, "event expected"
25 |     assert_equal "call_filter.html_pipeline", event
26 |     assert_equal TestFilter.name, payload[:filter]
27 |     assert_equal @pipeline.class.name, payload[:pipeline]
28 |     assert_equal body.reverse, payload[:result][:output]
29 |   end
30 | 
31 |   def test_pipeline_instrumentation
32 |     service = MockedInstrumentationService.new
33 |     events = service.subscribe "call_pipeline.html_pipeline"
34 |     @pipeline.instrumentation_service = service
35 |     filter(body = "hello")
36 |     event, payload, res = events.pop
37 |     assert event, "event expected"
38 |     assert_equal "call_pipeline.html_pipeline", event
39 |     assert_equal @pipeline.filters.map(&:name), payload[:filters]
40 |     assert_equal @pipeline.class.name, payload[:pipeline]
41 |     assert_equal body.reverse, payload[:result][:output]
42 |   end
43 | 
44 |   def test_default_instrumentation_service
45 |     service = 'default'
46 |     Pipeline.default_instrumentation_service = service
47 |     pipeline = Pipeline.new [], @context, @result_class
48 |     assert_equal service, pipeline.instrumentation_service
49 |   ensure
50 |     Pipeline.default_instrumentation_service = nil
51 |   end
52 | 
53 |   def test_setup_instrumentation
54 |     assert_nil @pipeline.instrumentation_service
55 | 
56 |     service = MockedInstrumentationService.new
57 |     events = service.subscribe "call_pipeline.html_pipeline"
58 |     @pipeline.setup_instrumentation name = 'foo', service
59 | 
60 |     assert_equal service, @pipeline.instrumentation_service
61 |     assert_equal name, @pipeline.instrumentation_name
62 | 
63 |     filter(body = 'foo')
64 | 
65 |     event, payload, res = events.pop
66 |     assert event, "expected event"
67 |     assert_equal name, payload[:pipeline]
68 |     assert_equal body.reverse, payload[:result][:output]
69 |   end
70 | 
71 |   def filter(input)
72 |     @pipeline.call(input)
73 |   end
74 | end
75 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/email_reply_filter.rb:
--------------------------------------------------------------------------------
 1 | begin
 2 |   require "escape_utils"
 3 | rescue LoadError => _
 4 |   abort "Missing dependency 'escape_utils' for EmailReplyFilter. See README.md for details."
 5 | end
 6 | 
 7 | begin
 8 |   require "email_reply_parser"
 9 | rescue LoadError => _
10 |   abort "Missing dependency 'email_reply_parser' for EmailReplyFilter. See README.md for details."
11 | end
12 | 
13 | module HTML
14 |   class Pipeline
15 |     # HTML Filter that converts email reply text into an HTML DocumentFragment.
16 |     # It must be used as the first filter in a pipeline.
17 |     #
18 |     # Context options:
19 |     #   None
20 |     #
21 |     # This filter does not write any additional information to the context hash.
22 |     class EmailReplyFilter < TextFilter
23 |       include EscapeUtils
24 | 
25 |       EMAIL_HIDDEN_HEADER    = %(<span class="email-hidden-toggle"><a href="#">&hellip;</a></span><div class="email-hidden-reply" style="display:none">).freeze
26 |       EMAIL_QUOTED_HEADER    = %(<div class="email-quoted-reply">).freeze
27 |       EMAIL_SIGNATURE_HEADER = %(<div class="email-signature-reply">).freeze
28 |       EMAIL_FRAGMENT_HEADER  = %(<div class="email-fragment">).freeze
29 |       EMAIL_HEADER_END       = "</div>".freeze
30 | 
31 |       # Scans an email body to determine which bits are quoted and which should
32 |       # be hidden. EmailReplyParser is used to split the comment into an Array
33 |       # of quoted or unquoted Blocks. Now, we loop through them and attempt to
34 |       # add <div> tags around them so we can hide the hidden blocks, and style
35 |       # the quoted blocks differently. Since multiple blocks may be hidden, be
36 |       # sure to keep the "email-hidden-reply" <div>s around "email-quoted-reply"
37 |       # <div> tags. Call this on each comment of a visible thread in the order
38 |       # that they are displayed. Note: all comments are processed so we can
39 |       # maintain a Set of SHAs of paragraphs. Only plaintext comments skip the
40 |       # markdown step.
41 |       #
42 |       # Returns the email comment HTML as a String
43 |       def call
44 |         found_hidden = nil
45 |         paragraphs = EmailReplyParser.read(text.dup).fragments.map do |fragment|
46 |           pieces = [escape_html(fragment.to_s.strip).gsub(/^\s*(>|&gt;)/, '')]
47 |           if fragment.quoted?
48 |             pieces.unshift EMAIL_QUOTED_HEADER
49 |             pieces << EMAIL_HEADER_END
50 |           elsif fragment.signature?
51 |             pieces.unshift EMAIL_SIGNATURE_HEADER
52 |             pieces << EMAIL_HEADER_END
53 |           else
54 |             pieces.unshift EMAIL_FRAGMENT_HEADER
55 |             pieces << EMAIL_HEADER_END
56 |           end
57 |           if fragment.hidden? && !found_hidden
58 |             found_hidden = true
59 |             pieces.unshift EMAIL_HIDDEN_HEADER
60 |           end
61 |           pieces.join
62 |         end
63 |         paragraphs << EMAIL_HEADER_END if found_hidden
64 |         paragraphs.join("\n")
65 |       end
66 |     end
67 |   end
68 | end
69 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/camo_filter.rb:
--------------------------------------------------------------------------------
 1 | require 'openssl'
 2 | require 'uri'
 3 | 
 4 | module HTML
 5 |   class Pipeline
 6 |     # HTML Filter for replacing http image URLs with camo versions. See:
 7 |     #
 8 |     # https://github.com/atmos/camo
 9 |     #
10 |     # All images provided in user content should be run through this
11 |     # filter so that http image sources do not cause mixed-content warnings
12 |     # in browser clients.
13 |     #
14 |     # Context options:
15 |     #   :asset_proxy (required) - Base URL for constructed asset proxy URLs.
16 |     #   :asset_proxy_secret_key (required) - The shared secret used to encode URLs.
17 |     #   :asset_proxy_whitelist - Array of host Strings or Regexps to skip
18 |     #                            src rewriting.
19 |     #
20 |     # This filter does not write additional information to the context.
21 |     class CamoFilter < Filter
22 |       # Hijacks images in the markup provided, replacing them with URLs that
23 |       # go through the github asset proxy.
24 |       def call
25 |         return doc unless asset_proxy_enabled?
26 | 
27 |         doc.search("img").each do |element|
28 |           original_src = element['src']
29 |           next unless original_src
30 | 
31 |           begin
32 |             uri = URI.parse(original_src)
33 |           rescue Exception
34 |             next
35 |           end
36 | 
37 |           next if uri.host.nil?
38 |           next if asset_host_whitelisted?(uri.host)
39 | 
40 |           element['src'] = asset_proxy_url(original_src)
41 |           element['data-canonical-src'] = original_src
42 |         end
43 |         doc
44 |       end
45 | 
46 |       # Implementation of validate hook.
47 |       # Errors should raise exceptions or use an existing validator.
48 |       def validate
49 |         needs :asset_proxy, :asset_proxy_secret_key
50 |       end
51 | 
52 |       # The camouflaged URL for a given image URL.
53 |       def asset_proxy_url(url)
54 |         "#{asset_proxy_host}/#{asset_url_hash(url)}/#{hexencode(url)}"
55 |       end
56 | 
57 |       # Private: calculate the HMAC digest for a image source URL.
58 |       def asset_url_hash(url)
59 |         OpenSSL::HMAC.hexdigest('sha1', asset_proxy_secret_key, url)
60 |       end
61 | 
62 |       # Private: Return true if asset proxy filter should be enabled
63 |       def asset_proxy_enabled?
64 |         !context[:disable_asset_proxy]
65 |       end
66 | 
67 |       # Private: the host to use for generated asset proxied URLs.
68 |       def asset_proxy_host
69 |         context[:asset_proxy]
70 |       end
71 | 
72 |       def asset_proxy_secret_key
73 |         context[:asset_proxy_secret_key]
74 |       end
75 | 
76 |       def asset_proxy_whitelist
77 |         context[:asset_proxy_whitelist] || []
78 |       end
79 | 
80 |       def asset_host_whitelisted?(host)
81 |         asset_proxy_whitelist.any? do |test|
82 |           test.is_a?(String) ? host == test : test.match(host)
83 |         end
84 |       end
85 | 
86 |       # Private: helper to hexencode a string. Each byte ends up encoded into
87 |       # two characters, zero padded value in the range [0-9a-f].
88 |       def hexencode(str)
89 |         str.to_enum(:each_byte).map { |byte| "%02x" % byte }.join
90 |       end
91 |     end
92 |   end
93 | end
94 | 


--------------------------------------------------------------------------------
/test/html/pipeline/camo_filter_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | 
 3 | class HTML::Pipeline::CamoFilterTest < Minitest::Test
 4 |   CamoFilter = HTML::Pipeline::CamoFilter
 5 | 
 6 |   def setup
 7 |     @asset_proxy_url        = 'https//assets.example.org'
 8 |     @asset_proxy_secret_key = 'ssssh-secret'
 9 |     @options = {
10 |       :asset_proxy            => @asset_proxy_url,
11 |       :asset_proxy_secret_key => @asset_proxy_secret_key,
12 |       :asset_proxy_whitelist  => [/(^|\.)github\.com$/]
13 |     }
14 |   end
15 | 
16 |   def test_asset_proxy_disabled
17 |     orig = %(<p><img src="http://twitter.com/img.png"></p>)
18 |     assert_equal orig,
19 |       CamoFilter.call(orig, @options.merge(:disable_asset_proxy => true)).to_s
20 |   end
21 | 
22 |   def test_camouflaging_http_image_urls
23 |     orig = %(<p><img src="http://twitter.com/img.png"></p>)
24 |     assert_equal %(<p><img src="https//assets.example.org/a5ad43494e343b20d745586282be61ff530e6fa0/687474703a2f2f747769747465722e636f6d2f696d672e706e67" data-canonical-src="http://twitter.com/img.png"></p>),
25 |       CamoFilter.call(orig, @options).to_s
26 |   end
27 | 
28 |   def test_doesnt_rewrite_dotcom_image_urls
29 |     orig = %(<p><img src="https://github.com/img.png"></p>)
30 |     assert_equal orig, CamoFilter.call(orig, @options).to_s
31 |   end
32 | 
33 |   def test_doesnt_rewrite_dotcom_subdomain_image_urls
34 |     orig = %(<p><img src="https://raw.github.com/img.png"></p>)
35 |     assert_equal orig, CamoFilter.call(orig, @options).to_s
36 |   end
37 | 
38 |   def test_doesnt_rewrite_dotcom_subsubdomain_image_urls
39 |     orig = %(<p><img src="https://f.assets.github.com/img.png"></p>)
40 |     assert_equal orig, CamoFilter.call(orig, @options).to_s
41 |   end
42 | 
43 |   def test_camouflaging_github_prefixed_image_urls
44 |     orig = %(<p><img src="https://notgithub.com/img.png"></p>)
45 |     assert_equal %(<p><img src="https//assets.example.org/5d4a96c69713f850520538e04cb9661035cfb534/68747470733a2f2f6e6f746769746875622e636f6d2f696d672e706e67" data-canonical-src="https://notgithub.com/img.png"></p>),
46 |       CamoFilter.call(orig, @options).to_s
47 |   end
48 | 
49 |   def test_doesnt_rewrite_absolute_image_urls
50 |     orig = %(<p><img src="/img.png"></p>)
51 |     assert_equal orig, CamoFilter.call(orig, @options).to_s
52 |   end
53 | 
54 |   def test_doesnt_rewrite_relative_image_urls
55 |     orig = %(<p><img src="img.png"></p>)
56 |     assert_equal orig, CamoFilter.call(orig, @options).to_s
57 |   end
58 | 
59 |   def test_camouflaging_https_image_urls
60 |     orig = %(<p><img src="https://foo.com/img.png"></p>)
61 |     assert_equal %(<p><img src="https//assets.example.org/3c5c6dc74fd6592d2596209dfcb8b7e5461383c8/68747470733a2f2f666f6f2e636f6d2f696d672e706e67" data-canonical-src="https://foo.com/img.png"></p>),
62 |       CamoFilter.call(orig, @options).to_s
63 |   end
64 | 
65 |   def test_handling_images_with_no_src_attribute
66 |     orig = %(<p><img></p>)
67 |     assert_equal orig, CamoFilter.call(orig, @options).to_s
68 |   end
69 | 
70 |   def test_required_context_validation
71 |     exception = assert_raises(ArgumentError) {
72 |       CamoFilter.call("", {})
73 |     }
74 |     assert_match /:asset_proxy[^_]/, exception.message
75 |     assert_match /:asset_proxy_secret_key/, exception.message
76 |   end
77 | end
78 | 


--------------------------------------------------------------------------------
/test/html/pipeline/markdown_filter_test.rb:
--------------------------------------------------------------------------------
  1 | require "test_helper"
  2 | 
  3 | MarkdownFilter = HTML::Pipeline::MarkdownFilter
  4 | 
  5 | class HTML::Pipeline::MarkdownFilterTest < Minitest::Test
  6 |   def setup
  7 |     @haiku =
  8 |       "Pointing at the moon\n" +
  9 |       "Reminded of simple things\n" +
 10 |       "Moments matter most"
 11 |     @links =
 12 |       "See http://example.org/ for more info"
 13 |     @code =
 14 |       "```\n" +
 15 |       "def hello()" +
 16 |       "  'world'" +
 17 |       "end" +
 18 |       "```"
 19 |   end
 20 | 
 21 |   def test_fails_when_given_a_documentfragment
 22 |     body = "<p>heyo</p>"
 23 |     doc  = HTML::Pipeline.parse(body)
 24 |     assert_raises(TypeError) { MarkdownFilter.call(doc, {}) }
 25 |   end
 26 | 
 27 |   def test_gfm_enabled_by_default
 28 |     doc = MarkdownFilter.to_document(@haiku, {})
 29 |     assert doc.kind_of?(HTML::Pipeline::DocumentFragment)
 30 |     assert_equal 2, doc.search('br').size
 31 |   end
 32 | 
 33 |   def test_disabling_gfm
 34 |     doc = MarkdownFilter.to_document(@haiku, :gfm => false)
 35 |     assert doc.kind_of?(HTML::Pipeline::DocumentFragment)
 36 |     assert_equal 0, doc.search('br').size
 37 |   end
 38 | 
 39 |   def test_fenced_code_blocks
 40 |     doc = MarkdownFilter.to_document(@code)
 41 |     assert doc.kind_of?(HTML::Pipeline::DocumentFragment)
 42 |     assert_equal 1, doc.search('pre').size
 43 |   end
 44 | 
 45 |   def test_fenced_code_blocks_with_language
 46 |     doc = MarkdownFilter.to_document(@code.sub("```", "``` ruby"))
 47 |     assert doc.kind_of?(HTML::Pipeline::DocumentFragment)
 48 |     assert_equal 1, doc.search('pre').size
 49 |     assert_equal 'ruby', doc.search('pre').first['lang']
 50 |   end
 51 | end
 52 | 
 53 | class GFMTest < Minitest::Test
 54 |   def gfm(text)
 55 |     MarkdownFilter.call(text, :gfm => true)
 56 |   end
 57 | 
 58 |   def test_not_touch_single_underscores_inside_words
 59 |     assert_equal "<p>foo_bar</p>",
 60 |                  gfm("foo_bar")
 61 |   end
 62 | 
 63 |   def test_not_touch_underscores_in_code_blocks
 64 |     assert_equal "<pre><code>foo_bar_baz\n</code></pre>",
 65 |                  gfm("    foo_bar_baz")
 66 |   end
 67 | 
 68 |   def test_not_touch_underscores_in_pre_blocks
 69 |     assert_equal "<pre>\nfoo_bar_baz\n</pre>",
 70 |                  gfm("<pre>\nfoo_bar_baz\n</pre>")
 71 |   end
 72 | 
 73 |   def test_not_touch_two_or_more_underscores_inside_words
 74 |     assert_equal "<p>foo_bar_baz</p>",
 75 |                  gfm("foo_bar_baz")
 76 |   end
 77 | 
 78 |   def test_turn_newlines_into_br_tags_in_simple_cases
 79 |     assert_equal "<p>foo<br>\nbar</p>",
 80 |                  gfm("foo\nbar")
 81 |   end
 82 | 
 83 |   def test_convert_newlines_in_all_groups
 84 |     assert_equal "<p>apple<br>\npear<br>\norange</p>\n\n" +
 85 |                  "<p>ruby<br>\npython<br>\nerlang</p>",
 86 |                  gfm("apple\npear\norange\n\nruby\npython\nerlang")
 87 |   end
 88 | 
 89 |   def test_convert_newlines_in_even_long_groups
 90 |     assert_equal "<p>apple<br>\npear<br>\norange<br>\nbanana</p>\n\n" +
 91 |                  "<p>ruby<br>\npython<br>\nerlang</p>",
 92 |                  gfm("apple\npear\norange\nbanana\n\nruby\npython\nerlang")
 93 |   end
 94 | 
 95 |   def test_not_convert_newlines_in_lists
 96 |     assert_equal "<h1>foo</h1>\n\n<h1>bar</h1>",
 97 |                  gfm("# foo\n# bar")
 98 |     assert_equal "<ul>\n<li>foo</li>\n<li>bar</li>\n</ul>",
 99 |                  gfm("* foo\n* bar")
100 |   end
101 | end
102 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/emoji_filter.rb:
--------------------------------------------------------------------------------
  1 | require "cgi"
  2 | 
  3 | begin
  4 |   require "gemoji"
  5 | rescue LoadError => _
  6 |   abort "Missing dependency 'gemoji' for EmojiFilter. See README.md for details."
  7 | end
  8 | 
  9 | module HTML
 10 |   class Pipeline
 11 |     # HTML filter that replaces :emoji: with images.
 12 |     #
 13 |     # Context:
 14 |     #   :asset_root (required) - base url to link to emoji sprite
 15 |     #   :asset_path (optional) - url path to link to emoji sprite. :file_name can be used as a placeholder for the sprite file name. If no asset_path is set "emoji/:file_name" is used.
 16 |     #   :ignored_ancestor_tags (optional) - Tags to stop the emojification. Node has matched ancestor HTML tags will not be emojified. Default to pre, code, and tt tags. Extra tags please pass in the form of array, e.g., %w(blockquote summary).
 17 |     class EmojiFilter < Filter
 18 | 
 19 |       DEFAULT_IGNORED_ANCESTOR_TAGS = %w(pre code tt).freeze
 20 | 
 21 |       def call
 22 |         doc.search('.//text()').each do |node|
 23 |           content = node.to_html
 24 |           next unless content.include?(':')
 25 |           next if has_ancestor?(node, ignored_ancestor_tags)
 26 |           html = emoji_image_filter(content)
 27 |           next if html == content
 28 |           node.replace(html)
 29 |         end
 30 |         doc
 31 |       end
 32 | 
 33 |       # Implementation of validate hook.
 34 |       # Errors should raise exceptions or use an existing validator.
 35 |       def validate
 36 |         needs :asset_root
 37 |       end
 38 | 
 39 |       # Replace :emoji: with corresponding images.
 40 |       #
 41 |       # text - String text to replace :emoji: in.
 42 |       #
 43 |       # Returns a String with :emoji: replaced with images.
 44 |       def emoji_image_filter(text)
 45 |         text.gsub(emoji_pattern) do |match|
 46 |           emoji_image_tag($1)
 47 |         end
 48 |       end
 49 | 
 50 |       # The base url to link emoji sprites
 51 |       #
 52 |       # Raises ArgumentError if context option has not been provided.
 53 |       # Returns the context's asset_root.
 54 |       def asset_root
 55 |         context[:asset_root]
 56 |       end
 57 | 
 58 |       # The url path to link emoji sprites
 59 |       #
 60 |       # :file_name can be used in the asset_path as a placeholder for the sprite file name. If no asset_path is set in the context "emoji/:file_name" is used.
 61 |       # Returns the context's asset_path or the default path if no context asset_path is given.
 62 |       def asset_path(name)
 63 |         if context[:asset_path]
 64 |           context[:asset_path].gsub(":file_name", emoji_filename(name))
 65 |         else
 66 |           File.join("emoji", emoji_filename(name))
 67 |         end
 68 |       end
 69 | 
 70 |       private
 71 | 
 72 |       # Build an emoji image tag
 73 |       def emoji_image_tag(name)
 74 |         "<img class='emoji' title=':#{name}:' alt=':#{name}:' src='#{emoji_url(name)}' height='20' width='20' align='absmiddle' />"
 75 |       end
 76 | 
 77 |       def emoji_url(name)
 78 |         File.join(asset_root, asset_path(name))
 79 |       end
 80 | 
 81 |       # Build a regexp that matches all valid :emoji: names.
 82 |       def self.emoji_pattern
 83 |         @emoji_pattern ||= /:(#{emoji_names.map { |name| Regexp.escape(name) }.join('|')}):/
 84 |       end
 85 | 
 86 |       def emoji_pattern
 87 |         self.class.emoji_pattern
 88 |       end
 89 | 
 90 |       def self.emoji_names
 91 |         Emoji.all.map(&:aliases).flatten.sort
 92 |       end
 93 | 
 94 |       def emoji_filename(name)
 95 |         Emoji.find_by_alias(name).image_filename
 96 |       end
 97 | 
 98 |       # Return ancestor tags to stop the emojification.
 99 |       #
100 |       # @return [Array<String>] Ancestor tags.
101 |       def ignored_ancestor_tags
102 |         if context[:ignored_ancestor_tags]
103 |           DEFAULT_IGNORED_ANCESTOR_TAGS | context[:ignored_ancestor_tags]
104 |         else
105 |           DEFAULT_IGNORED_ANCESTOR_TAGS
106 |         end
107 |       end
108 |     end
109 |   end
110 | end
111 | 


--------------------------------------------------------------------------------
/test/html/pipeline/toc_filter_test.rb:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | require "test_helper"
  3 | 
  4 | class HTML::Pipeline::TableOfContentsFilterTest < Minitest::Test
  5 |   TocFilter = HTML::Pipeline::TableOfContentsFilter
  6 | 
  7 |   TocPipeline =
  8 |     HTML::Pipeline.new [
  9 |       HTML::Pipeline::TableOfContentsFilter
 10 |     ]
 11 | 
 12 |   def toc
 13 |     result = {}
 14 |     TocPipeline.call(@orig, {}, result)
 15 |     result[:toc]
 16 |   end
 17 | 
 18 |   def test_anchors_are_added_properly
 19 |     orig = %(<h1>Ice cube</h1><p>Will swarm on any motherfucker in a blue uniform</p>)
 20 |     assert_includes TocFilter.call(orig).to_s, '<a id='
 21 |   end
 22 | 
 23 |   def test_toc_list_added_properly
 24 |     @orig = %(<h1>Ice cube</h1><p>Will swarm on any motherfucker in a blue uniform</p>)
 25 |     assert_includes toc, %Q{<ul class="section-nav">\n<li><a href="}
 26 |   end
 27 | 
 28 |   def test_anchors_have_sane_names
 29 |     orig = %(<h1>Dr Dre</h1><h1>Ice Cube</h1><h1>Eazy-E</h1><h1>MC Ren</h1>)
 30 |     result = TocFilter.call(orig).to_s
 31 | 
 32 |     assert_includes result, '"dr-dre"'
 33 |     assert_includes result, '"ice-cube"'
 34 |     assert_includes result, '"eazy-e"'
 35 |     assert_includes result, '"mc-ren"'
 36 |   end
 37 | 
 38 |   def test_anchors_have_aria_hidden
 39 |     orig = "<h1>Straight Outta Compton</h1>"
 40 |     result = TocFilter.call(orig).to_s
 41 |     assert_includes result, 'aria-hidden="true"'
 42 |   end
 43 | 
 44 |   def test_toc_hrefs_have_sane_values
 45 |     @orig = %(<h1>Dr Dre</h1><h1>Ice Cube</h1><h1>Eazy-E</h1><h1>MC Ren</h1>)
 46 |     assert_includes toc, '"#dr-dre"'
 47 |     assert_includes toc, '"#ice-cube"'
 48 |     assert_includes toc, '"#eazy-e"'
 49 |     assert_includes toc, '"#mc-ren"'
 50 |   end
 51 | 
 52 |   def test_dupe_headers_have_unique_trailing_identifiers
 53 |     orig = %(<h1>Straight Outta Compton</h1>
 54 |              <h2>Dopeman</h2>
 55 |              <h3>Express Yourself</h3>
 56 |              <h1>Dopeman</h1>)
 57 | 
 58 |     result = TocFilter.call(orig).to_s
 59 | 
 60 |     assert_includes result, '"dopeman"'
 61 |     assert_includes result, '"dopeman-1"'
 62 |   end
 63 | 
 64 |   def test_dupe_headers_have_unique_toc_anchors
 65 |     @orig = %(<h1>Straight Outta Compton</h1>
 66 |               <h2>Dopeman</h2>
 67 |               <h3>Express Yourself</h3>
 68 |               <h1>Dopeman</h1>)
 69 | 
 70 |     assert_includes toc, '"#dopeman"'
 71 |     assert_includes toc, '"#dopeman-1"'
 72 |   end
 73 | 
 74 |   def test_all_header_tags_are_found_when_adding_anchors
 75 |     orig = %(<h1>"Funky President" by James Brown</h1>
 76 |              <h2>"It's My Thing" by Marva Whitney</h2>
 77 |              <h3>"Boogie Back" by Roy Ayers</h3>
 78 |              <h4>"Feel Good" by Fancy</h4>
 79 |              <h5>"Funky Drummer" by James Brown</h5>
 80 |              <h6>"Ruthless Villain" by Eazy-E</h6>
 81 |              <h7>"Be Thankful for What You Got" by William DeVaughn</h7>)
 82 | 
 83 |     doc = TocFilter.call(orig)
 84 | 
 85 |     assert_equal 6, doc.search('a').size
 86 |   end
 87 | 
 88 |   def test_toc_is_complete
 89 |     @orig = %(<h1>"Funky President" by James Brown</h1>
 90 |               <h2>"It's My Thing" by Marva Whitney</h2>
 91 |               <h3>"Boogie Back" by Roy Ayers</h3>
 92 |               <h4>"Feel Good" by Fancy</h4>
 93 |               <h5>"Funky Drummer" by James Brown</h5>
 94 |               <h6>"Ruthless Villain" by Eazy-E</h6>
 95 |               <h7>"Be Thankful for What You Got" by William DeVaughn</h7>)
 96 | 
 97 |     expected = %Q{<ul class="section-nav">\n<li><a href="#funky-president-by-james-brown">"Funky President" by James Brown</a></li>\n<li><a href="#its-my-thing-by-marva-whitney">"It's My Thing" by Marva Whitney</a></li>\n<li><a href="#boogie-back-by-roy-ayers">"Boogie Back" by Roy Ayers</a></li>\n<li><a href="#feel-good-by-fancy">"Feel Good" by Fancy</a></li>\n<li><a href="#funky-drummer-by-james-brown">"Funky Drummer" by James Brown</a></li>\n<li><a href="#ruthless-villain-by-eazy-e">"Ruthless Villain" by Eazy-E</a></li>\n</ul>}
 98 | 
 99 |     assert_equal expected, toc
100 |   end
101 | 
102 |   if RUBY_VERSION > "1.9" # not sure how to make this work on 1.8.7
103 | 
104 |     def test_anchors_with_utf8_characters
105 |       orig = %(<h1>日本語</h1>
106 |                <h1>Русский</h1)
107 | 
108 |       rendered_h1s = TocFilter.call(orig).search('h1').map(&:to_s)
109 | 
110 |       assert_equal "<h1>\n<a id=\"日本語\" class=\"anchor\" href=\"#%E6%97%A5%E6%9C%AC%E8%AA%9E\" aria-hidden=\"true\"><span class=\"octicon octicon-link\"></span></a>日本語</h1>",
111 |                    rendered_h1s[0]
112 |       assert_equal "<h1>\n<a id=\"Русский\" class=\"anchor\" href=\"#%D0%A0%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B9\" aria-hidden=\"true\"><span class=\"octicon octicon-link\"></span></a>Русский</h1>",
113 |                    rendered_h1s[1]
114 |     end
115 | 
116 |     def test_toc_with_utf8_characters
117 |       @orig = %(<h1>日本語</h1>
118 |                 <h1>Русский</h1)
119 | 
120 |       rendered_toc = Nokogiri::HTML::DocumentFragment.parse(toc).to_s
121 | 
122 |       expected = %Q{<ul class="section-nav">\n<li><a href="#%E6%97%A5%E6%9C%AC%E8%AA%9E">日本語</a></li>\n<li><a href="#%D0%A0%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B9">Русский</a></li>\n</ul>}
123 | 
124 |       assert_equal expected, rendered_toc
125 |     end
126 |   end
127 | end
128 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/@mention_filter.rb:
--------------------------------------------------------------------------------
  1 | require 'set'
  2 | 
  3 | module HTML
  4 |   class Pipeline
  5 |     # HTML filter that replaces @user mentions with links. Mentions within <pre>,
  6 |     # <code>, and <a> elements are ignored. Mentions that reference users that do
  7 |     # not exist are ignored.
  8 |     #
  9 |     # Context options:
 10 |     #   :base_url - Used to construct links to user profile pages for each
 11 |     #               mention.
 12 |     #   :info_url - Used to link to "more info" when someone mentions @mention
 13 |     #               or @mentioned.
 14 |     #   :username_pattern - Used to provide a custom regular expression to
 15 |     #                       identify usernames
 16 |     #
 17 |     class MentionFilter < Filter
 18 |       # Public: Find user @mentions in text.  See
 19 |       # MentionFilter#mention_link_filter.
 20 |       #
 21 |       #   MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
 22 |       #     "<a href=...>#{login}</a>"
 23 |       #   end
 24 |       #
 25 |       # text - String text to search.
 26 |       #
 27 |       # Yields the String match, the String login name, and a Boolean determining
 28 |       # if the match = "@mention[ed]".  The yield's return replaces the match in
 29 |       # the original text.
 30 |       #
 31 |       # Returns a String replaced with the return of the block.
 32 |       def self.mentioned_logins_in(text, username_pattern=UsernamePattern)
 33 |         text.gsub MentionPatterns[username_pattern] do |match|
 34 |           login = $1
 35 |           yield match, login, MentionLogins.include?(login.downcase)
 36 |         end
 37 |       end
 38 | 
 39 |       # Hash that contains all of the mention patterns used by the pipeline
 40 |       MentionPatterns = Hash.new do |hash, key|
 41 |         hash[key] = /
 42 |           (?:^|\W)                    # beginning of string or non-word char
 43 |           @((?>#{key}))  # @username
 44 |           (?!\/)                      # without a trailing slash
 45 |           (?=
 46 |             \.+[ \t\W]|               # dots followed by space or non-word character
 47 |             \.+$|                     # dots at end of line
 48 |             [^0-9a-zA-Z_.]|           # non-word character except dot
 49 |             $                         # end of line
 50 |           )
 51 |         /ix
 52 |       end
 53 | 
 54 |       # Default pattern used to extract usernames from text. The value can be
 55 |       # overriden by providing the username_pattern variable in the context.
 56 |       UsernamePattern = /[a-z0-9][a-z0-9-]*/
 57 | 
 58 |       # List of username logins that, when mentioned, link to the blog post
 59 |       # about @mentions instead of triggering a real mention.
 60 |       MentionLogins = %w(
 61 |         mention
 62 |         mentions
 63 |         mentioned
 64 |         mentioning
 65 |       )
 66 | 
 67 |       # Don't look for mentions in text nodes that are children of these elements
 68 |       IGNORE_PARENTS = %w(pre code a style).to_set
 69 | 
 70 |       def call
 71 |         result[:mentioned_usernames] ||= []
 72 | 
 73 |         doc.search('.//text()').each do |node|
 74 |           content = node.to_html
 75 |           next if !content.include?('@')
 76 |           next if has_ancestor?(node, IGNORE_PARENTS)
 77 |           html = mention_link_filter(content, base_url, info_url, username_pattern)
 78 |           next if html == content
 79 |           node.replace(html)
 80 |         end
 81 |         doc
 82 |       end
 83 | 
 84 |       # The URL to provide when someone @mentions a "mention" name, such as
 85 |       # @mention or @mentioned, that will give them more info on mentions.
 86 |       def info_url
 87 |         context[:info_url] || nil
 88 |       end
 89 | 
 90 |       def username_pattern
 91 |         context[:username_pattern] || UsernamePattern
 92 |       end
 93 | 
 94 |       # Replace user @mentions in text with links to the mentioned user's
 95 |       # profile page.
 96 |       #
 97 |       # text      - String text to replace @mention usernames in.
 98 |       # base_url  - The base URL used to construct user profile URLs.
 99 |       # info_url  - The "more info" URL used to link to more info on @mentions.
100 |       #             If nil we don't link @mention or @mentioned.
101 |       # username_pattern  - Regular expression used to identify usernames in
102 |       #                     text
103 |       #
104 |       # Returns a string with @mentions replaced with links. All links have a
105 |       # 'user-mention' class name attached for styling.
106 |       def mention_link_filter(text, base_url='/', info_url=nil, username_pattern=UsernamePattern)
107 |         self.class.mentioned_logins_in(text, username_pattern) do |match, login, is_mentioned|
108 |           link =
109 |             if is_mentioned
110 |               link_to_mention_info(login, info_url)
111 |             else
112 |               link_to_mentioned_user(login)
113 |             end
114 | 
115 |           link ? match.sub("@#{login}", link) : match
116 |         end
117 |       end
118 | 
119 |       def link_to_mention_info(text, info_url=nil)
120 |         return "@#{text}" if info_url.nil?
121 |         "<a href='#{info_url}' class='user-mention'>" +
122 |         "@#{text}" +
123 |         "</a>"
124 |       end
125 | 
126 |       def link_to_mentioned_user(login)
127 |         result[:mentioned_usernames] |= [login]
128 | 
129 |         url = base_url.dup
130 |         url << "/" unless url =~ /[\/~]\z/
131 | 
132 |         "<a href='#{url << login}' class='user-mention'>" +
133 |         "@#{login}" +
134 |         "</a>"
135 |       end
136 |     end
137 |   end
138 | end
139 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/filter.rb:
--------------------------------------------------------------------------------
  1 | module HTML
  2 |   class Pipeline
  3 |     # Base class for user content HTML filters. Each filter takes an
  4 |     # HTML string or Nokogiri::HTML::DocumentFragment, performs
  5 |     # modifications and/or writes information to the result hash. Filters must
  6 |     # return a DocumentFragment (typically the same instance provided to the call
  7 |     # method) or a String with HTML markup.
  8 |     #
  9 |     # Example filter that replaces all images with trollface:
 10 |     #
 11 |     #   class FuuuFilter < HTML::Pipeline::Filter
 12 |     #     def call
 13 |     #       doc.search('img').each do |img|
 14 |     #         img['src'] = "http://paradoxdgn.com/junk/avatars/trollface.jpg"
 15 |     #       end
 16 |     #     end
 17 |     #   end
 18 |     #
 19 |     # The context Hash passes options to filters and should not be changed in
 20 |     # place.  A Result Hash allows filters to make extracted information
 21 |     # available to the caller and is mutable.
 22 |     #
 23 |     # Common context options:
 24 |     #   :base_url   - The site's base URL
 25 |     #   :repository - A Repository providing context for the HTML being processed
 26 |     #
 27 |     # Each filter may define additional options and output values. See the class
 28 |     # docs for more info.
 29 |     class Filter
 30 |       class InvalidDocumentException < StandardError; end
 31 | 
 32 |       def initialize(doc, context = nil, result = nil)
 33 |         if doc.kind_of?(String)
 34 |           @html = doc.to_str
 35 |           @doc = nil
 36 |         else
 37 |           @doc = doc
 38 |           @html = nil
 39 |         end
 40 |         @context = context || {}
 41 |         @result = result || {}
 42 |         validate
 43 |       end
 44 | 
 45 |       # Public: Returns a simple Hash used to pass extra information into filters
 46 |       # and also to allow filters to make extracted information available to the
 47 |       # caller.
 48 |       attr_reader :context
 49 | 
 50 |       # Public: Returns a Hash used to allow filters to pass back information
 51 |       # to callers of the various Pipelines.  This can be used for
 52 |       # #mentioned_users, for example.
 53 |       attr_reader :result
 54 | 
 55 |       # The Nokogiri::HTML::DocumentFragment to be manipulated. If the filter was
 56 |       # provided a String, parse into a DocumentFragment the first time this
 57 |       # method is called.
 58 |       def doc
 59 |         @doc ||= parse_html(html)
 60 |       end
 61 | 
 62 |       # The String representation of the document. If a DocumentFragment was
 63 |       # provided to the Filter, it is serialized into a String when this method is
 64 |       # called.
 65 |       def html
 66 |         raise InvalidDocumentException if @html.nil? && @doc.nil?
 67 |         @html || doc.to_html
 68 |       end
 69 | 
 70 |       # The main filter entry point. The doc attribute is guaranteed to be a
 71 |       # Nokogiri::HTML::DocumentFragment when invoked. Subclasses should modify
 72 |       # this document in place or extract information and add it to the context
 73 |       # hash.
 74 |       def call
 75 |         raise NotImplementedError
 76 |       end
 77 | 
 78 |       # Make sure the context has everything we need. Noop: Subclasses can override.
 79 |       def validate
 80 |       end
 81 | 
 82 |       # The Repository object provided in the context hash, or nil when no
 83 |       # :repository was specified.
 84 |       #
 85 |       # It's assumed that the repository context has already been checked
 86 |       # for permissions
 87 |       def repository
 88 |         context[:repository]
 89 |       end
 90 | 
 91 |       # The User object provided in the context hash, or nil when no user
 92 |       # was specified
 93 |       def current_user
 94 |         context[:current_user]
 95 |       end
 96 | 
 97 |       # The site's base URL provided in the context hash, or '/' when no
 98 |       # base URL was specified.
 99 |       def base_url
100 |         context[:base_url] || '/'
101 |       end
102 | 
103 |       # Ensure the passed argument is a DocumentFragment. When a string is
104 |       # provided, it is parsed and returned; otherwise, the DocumentFragment is
105 |       # returned unmodified.
106 |       def parse_html(html)
107 |         HTML::Pipeline.parse(html)
108 |       end
109 | 
110 |       # Helper method for filter subclasses used to determine if any of a node's
111 |       # ancestors have one of the tag names specified.
112 |       #
113 |       # node - The Node object to check.
114 |       # tags - An array of tag name strings to check. These should be downcase.
115 |       #
116 |       # Returns true when the node has a matching ancestor.
117 |       def has_ancestor?(node, tags)
118 |         while node = node.parent
119 |           if tags.include?(node.name.downcase)
120 |             break true
121 |           end
122 |         end
123 |       end
124 | 
125 |       # Perform a filter on doc with the given context.
126 |       #
127 |       # Returns a HTML::Pipeline::DocumentFragment or a String containing HTML
128 |       # markup.
129 |       def self.call(doc, context = nil, result = nil)
130 |         new(doc, context, result).call
131 |       end
132 | 
133 |       # Like call but guarantees that a DocumentFragment is returned, even when
134 |       # the last filter returns a String.
135 |       def self.to_document(input, context = nil)
136 |         html = call(input, context)
137 |         HTML::Pipeline::parse(html)
138 |       end
139 | 
140 |       # Like call but guarantees that a string of HTML markup is returned.
141 |       def self.to_html(input, context = nil)
142 |         output = call(input, context)
143 |         if output.respond_to?(:to_html)
144 |           output.to_html
145 |         else
146 |           output.to_s
147 |         end
148 |       end
149 | 
150 |       # Validator for required context. This will check that anything passed in
151 |       # contexts exists in @contexts
152 |       #
153 |       # If any errors are found an ArgumentError will be raised with a
154 |       # message listing all the missing contexts and the filters that
155 |       # require them.
156 |       def needs(*keys)
157 |         missing = keys.reject { |key| context.include? key }
158 | 
159 |         if missing.any?
160 |           raise ArgumentError,
161 |             "Missing context keys for #{self.class.name}: #{missing.map(&:inspect).join ', '}"
162 |         end
163 |       end
164 |     end
165 |   end
166 | end
167 | 


--------------------------------------------------------------------------------
/test/html/pipeline/sanitization_filter_test.rb:
--------------------------------------------------------------------------------
  1 | require "test_helper"
  2 | 
  3 | class HTML::Pipeline::SanitizationFilterTest < Minitest::Test
  4 |   SanitizationFilter = HTML::Pipeline::SanitizationFilter
  5 | 
  6 |   def test_removing_script_tags
  7 |     orig = %(<p><img src="http://github.com/img.png" /><script></script></p>)
  8 |     html = SanitizationFilter.call(orig).to_s
  9 |     refute_match /script/, html
 10 |   end
 11 | 
 12 |   def test_removing_style_tags
 13 |     orig = %(<p><style>hey now</style></p>)
 14 |     html = SanitizationFilter.call(orig).to_s
 15 |     refute_match /style/, html
 16 |   end
 17 | 
 18 |   def test_removing_style_attributes
 19 |     orig = %(<p style='font-size:1000%'>YO DAWG</p>)
 20 |     html = SanitizationFilter.call(orig).to_s
 21 |     refute_match /font-size/, html
 22 |     refute_match /style/, html
 23 |   end
 24 | 
 25 |   def test_removing_script_event_handler_attributes
 26 |     orig = %(<a onclick='javascript:alert(0)'>YO DAWG</a>)
 27 |     html = SanitizationFilter.call(orig).to_s
 28 |     refute_match /javscript/, html
 29 |     refute_match /onclick/, html
 30 |   end
 31 | 
 32 |   def test_sanitizes_li_elements_not_contained_in_ul_or_ol
 33 |     stuff = "a\n<li>b</li>\nc"
 34 |     html  = SanitizationFilter.call(stuff).to_s
 35 |     assert_equal "a\nb\nc", html
 36 |   end
 37 | 
 38 |   def test_does_not_sanitize_li_elements_contained_in_ul_or_ol
 39 |     stuff = "a\n<ul><li>b</li></ul>\nc"
 40 |     assert_equal stuff, SanitizationFilter.call(stuff).to_s
 41 |   end
 42 | 
 43 |   def test_github_specific_protocols_are_not_removed
 44 |     stuff = '<a href="github-windows://spillthelog">Spill this yo</a> and so on'
 45 |     assert_equal stuff, SanitizationFilter.call(stuff).to_s
 46 |   end
 47 | 
 48 |   def test_unknown_schemes_are_removed
 49 |     stuff = '<a href="something-weird://heyyy">Wat</a> is this'
 50 |     html  = SanitizationFilter.call(stuff).to_s
 51 |     assert_equal '<a>Wat</a> is this', html
 52 |   end
 53 | 
 54 |   def test_whitelisted_longdesc_schemes_are_allowed
 55 |     stuff = '<img src="./foo.jpg" longdesc="http://longdesc.com">'
 56 |     html  = SanitizationFilter.call(stuff).to_s
 57 |     assert_equal '<img src="./foo.jpg" longdesc="http://longdesc.com">', html
 58 |   end
 59 | 
 60 |   def test_weird_longdesc_schemes_are_removed
 61 |     stuff = '<img src="./foo.jpg" longdesc="javascript:alert(1)">'
 62 |     html  = SanitizationFilter.call(stuff).to_s
 63 |     assert_equal '<img src="./foo.jpg">', html
 64 |   end
 65 | 
 66 |   def test_standard_schemes_are_removed_if_not_specified_in_anchor_schemes
 67 |     stuff  = '<a href="http://www.example.com/">No href for you</a>'
 68 |     filter = SanitizationFilter.new(stuff, {:anchor_schemes => []})
 69 |     html   = filter.call.to_s
 70 |     assert_equal '<a>No href for you</a>', html
 71 |   end
 72 | 
 73 |   def test_custom_anchor_schemes_are_not_removed
 74 |     stuff  = '<a href="something-weird://heyyy">Wat</a> is this'
 75 |     filter = SanitizationFilter.new(stuff, {:anchor_schemes => ['something-weird']})
 76 |     html   = filter.call.to_s
 77 |     assert_equal stuff, html
 78 |   end
 79 | 
 80 |   def test_anchor_schemes_are_merged_with_other_anchor_restrictions
 81 |     stuff  = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this'
 82 |     whitelist = {
 83 |       :elements   => ['a'],
 84 |       :attributes => {'a' => ['href', 'ping']},
 85 |       :protocols  => {'a' => {'ping' => ['http']}}
 86 |     }
 87 |     filter = SanitizationFilter.new(stuff, {:whitelist => whitelist, :anchor_schemes => ['something-weird']})
 88 |     html   = filter.call.to_s
 89 |     assert_equal '<a href="something-weird://heyyy">Wat</a> is this', html
 90 |   end
 91 | 
 92 |   def test_uses_anchor_schemes_from_whitelist_when_not_separately_specified
 93 |     stuff  = '<a href="something-weird://heyyy">Wat</a> is this'
 94 |     whitelist = {
 95 |       :elements   => ['a'],
 96 |       :attributes => {'a' => ['href']},
 97 |       :protocols  => {'a' => {'href' => ['something-weird']}}
 98 |     }
 99 |     filter = SanitizationFilter.new(stuff, {:whitelist => whitelist})
100 |     html   = filter.call.to_s
101 |     assert_equal stuff, html
102 |   end
103 | 
104 |   def test_whitelist_contains_default_anchor_schemes
105 |     assert_equal SanitizationFilter::WHITELIST[:protocols]['a']['href'], ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
106 |   end
107 | 
108 |   def test_whitelist_from_full_constant
109 |     stuff  = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this'
110 |     filter = SanitizationFilter.new(stuff, :whitelist => SanitizationFilter::FULL)
111 |     html   = filter.call.to_s
112 |     assert_equal 'Wat is this', html
113 |   end
114 | 
115 |   def test_exports_default_anchor_schemes
116 |     assert_equal SanitizationFilter::ANCHOR_SCHEMES, ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
117 |   end
118 | 
119 |   def test_script_contents_are_removed
120 |     orig = '<script>JavaScript!</script>'
121 |     assert_equal "", SanitizationFilter.call(orig).to_s
122 |   end
123 | 
124 |   def test_table_rows_and_cells_removed_if_not_in_table
125 |     orig = %(<tr><td>Foo</td></tr><td>Bar</td>)
126 |     assert_equal 'FooBar', SanitizationFilter.call(orig).to_s
127 |   end
128 | 
129 |   def test_table_sections_removed_if_not_in_table
130 |     orig = %(<thead><tr><td>Foo</td></tr></thead>)
131 |     assert_equal 'Foo', SanitizationFilter.call(orig).to_s
132 |   end
133 | 
134 |   def test_table_sections_are_not_removed
135 |     orig = %(<table>
136 | <thead><tr><th>Column 1</th></tr></thead>
137 | <tfoot><tr><td>Sum</td></tr></tfoot>
138 | <tbody><tr><td>1</td></tr></tbody>
139 | </table>)
140 |     assert_equal orig, SanitizationFilter.call(orig).to_s
141 |   end
142 | 
143 |   def test_summary_tag_are_not_removed
144 |     orig = %(<summary>Foo</summary>)
145 |     assert_equal orig, SanitizationFilter.call(orig).to_s
146 |   end
147 | 
148 |   def test_details_tag_and_open_attribute_are_not_removed
149 |     orig = %(<details open>Foo</details>)
150 |     assert_equal orig, SanitizationFilter.call(orig).to_s
151 |   end
152 | 
153 |   def test_nested_details_tag_are_not_removed
154 |     orig = <<-NESTED
155 |       <details>
156 |         <summary>Foo</summary>
157 |         <details>
158 |           Bar
159 |           <summary>Baz</summary>
160 |         </details>
161 |         Qux
162 |       </details>
163 |     NESTED
164 |     assert_equal orig, SanitizationFilter.call(orig).to_s
165 |   end
166 | end
167 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/sanitization_filter.rb:
--------------------------------------------------------------------------------
  1 | begin
  2 |   require "sanitize"
  3 | rescue LoadError => _
  4 |   abort "Missing dependency 'sanitize' for SanitizationFilter. See README.md for details."
  5 | end
  6 | 
  7 | module HTML
  8 |   class Pipeline
  9 |     # HTML filter with sanization routines and whitelists. This module defines
 10 |     # what HTML is allowed in user provided content and fixes up issues with
 11 |     # unbalanced tags and whatnot.
 12 |     #
 13 |     # See the Sanitize docs for more information on the underlying library:
 14 |     #
 15 |     # https://github.com/rgrove/sanitize/#readme
 16 |     #
 17 |     # Context options:
 18 |     #   :whitelist      - The sanitizer whitelist configuration to use. This
 19 |     #                     can be one of the options constants defined in this
 20 |     #                     class or a custom sanitize options hash.
 21 |     #   :anchor_schemes - The URL schemes to allow in <a href> attributes. The
 22 |     #                     default set is provided in the ANCHOR_SCHEMES
 23 |     #                     constant in this class. If passed, this overrides any
 24 |     #                     schemes specified in the whitelist configuration.
 25 |     #
 26 |     # This filter does not write additional information to the context.
 27 |     class SanitizationFilter < Filter
 28 |       LISTS     = Set.new(%w(ul ol).freeze)
 29 |       LIST_ITEM = 'li'.freeze
 30 | 
 31 |       # List of table child elements. These must be contained by a <table> element
 32 |       # or they are not allowed through. Otherwise they can be used to break out
 33 |       # of places we're using tables to contain formatted user content (like pull
 34 |       # request review comments).
 35 |       TABLE_ITEMS = Set.new(%w(tr td th).freeze)
 36 |       TABLE = 'table'.freeze
 37 |       TABLE_SECTIONS = Set.new(%w(thead tbody tfoot).freeze)
 38 | 
 39 |       # These schemes are the only ones allowed in <a href> attributes by default.
 40 |       ANCHOR_SCHEMES = ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac'].freeze
 41 | 
 42 |       # The main sanitization whitelist. Only these elements and attributes are
 43 |       # allowed through by default.
 44 |       WHITELIST = {
 45 |         :elements => %w(
 46 |           h1 h2 h3 h4 h5 h6 h7 h8 br b i strong em a pre code img tt
 47 |           div ins del sup sub p ol ul table thead tbody tfoot blockquote
 48 |           dl dt dd kbd q samp var hr ruby rt rp li tr td th s strike summary details
 49 |         ),
 50 |         :remove_contents => ['script'],
 51 |         :attributes => {
 52 |           'a'          => ['href'],
 53 |           'img'        => ['src', 'longdesc'],
 54 |           'div'        => ['itemscope', 'itemtype'],
 55 |           'blockquote' => ['cite'],
 56 |           'del'        => ['cite'],
 57 |           'ins'        => ['cite'],
 58 |           'q'          => ['cite'],
 59 |           :all         => ['abbr', 'accept', 'accept-charset',
 60 |                            'accesskey', 'action', 'align', 'alt', 'axis',
 61 |                            'border', 'cellpadding', 'cellspacing', 'char',
 62 |                            'charoff', 'charset', 'checked',
 63 |                            'clear', 'cols', 'colspan', 'color',
 64 |                            'compact', 'coords', 'datetime', 'dir',
 65 |                            'disabled', 'enctype', 'for', 'frame',
 66 |                            'headers', 'height', 'hreflang',
 67 |                            'hspace', 'ismap', 'label', 'lang',
 68 |                            'maxlength', 'media', 'method',
 69 |                            'multiple', 'name', 'nohref', 'noshade',
 70 |                            'nowrap', 'open', 'prompt', 'readonly', 'rel', 'rev',
 71 |                            'rows', 'rowspan', 'rules', 'scope',
 72 |                            'selected', 'shape', 'size', 'span',
 73 |                            'start', 'summary', 'tabindex', 'target',
 74 |                            'title', 'type', 'usemap', 'valign', 'value',
 75 |                            'vspace', 'width', 'itemprop']
 76 |         },
 77 |         :protocols => {
 78 |           'a'          => {'href' => ANCHOR_SCHEMES},
 79 |           'blockquote' => {'cite' => ['http', 'https', :relative]},
 80 |           'del'        => {'cite' => ['http', 'https', :relative]},
 81 |           'ins'        => {'cite' => ['http', 'https', :relative]},
 82 |           'q'          => {'cite' => ['http', 'https', :relative]},
 83 |           'img'        => {
 84 |             'src'      => ['http', 'https', :relative],
 85 |             'longdesc' => ['http', 'https', :relative]
 86 |           }
 87 |         },
 88 |         :transformers => [
 89 |           # Top-level <li> elements are removed because they can break out of
 90 |           # containing markup.
 91 |           lambda { |env|
 92 |             name, node = env[:node_name], env[:node]
 93 |             if name == LIST_ITEM && !node.ancestors.any?{ |n| LISTS.include?(n.name) }
 94 |               node.replace(node.children)
 95 |             end
 96 |           },
 97 | 
 98 |           # Table child elements that are not contained by a <table> are removed.
 99 |           lambda { |env|
100 |             name, node = env[:node_name], env[:node]
101 |             if (TABLE_SECTIONS.include?(name) || TABLE_ITEMS.include?(name)) && !node.ancestors.any? { |n| n.name == TABLE }
102 |               node.replace(node.children)
103 |             end
104 |           }
105 |         ]
106 |       }
107 | 
108 |       # A more limited sanitization whitelist. This includes all attributes,
109 |       # protocols, and transformers from WHITELIST but with a more locked down
110 |       # set of allowed elements.
111 |       LIMITED = WHITELIST.merge(
112 |         :elements => %w(b i strong em a pre code img ins del sup sub p ol ul li))
113 | 
114 |       # Strip all HTML tags from the document.
115 |       FULL = { :elements => [] }
116 | 
117 |       # Sanitize markup using the Sanitize library.
118 |       def call
119 |         Sanitize.clean_node!(doc, whitelist)
120 |       end
121 | 
122 |       # The whitelist to use when sanitizing. This can be passed in the context
123 |       # hash to the filter but defaults to WHITELIST constant value above.
124 |       def whitelist
125 |         whitelist = context[:whitelist] || WHITELIST
126 |         anchor_schemes = context[:anchor_schemes]
127 |         return whitelist unless anchor_schemes
128 |         whitelist = whitelist.dup
129 |         whitelist[:protocols] = (whitelist[:protocols] || {}).dup
130 |         whitelist[:protocols]['a'] = (whitelist[:protocols]['a'] || {}).merge('href' => anchor_schemes)
131 |         whitelist
132 |       end
133 |     end
134 |   end
135 | end
136 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # CHANGELOG
  2 | 
  3 | ## 2.2.2
  4 | 
  5 | * Fix for calling mention_link_filter with only one argument [#230](https://github.com/jch/html-pipeline/pull/230)
  6 | * Add html-pipeline-linkify_github to 3rd Party Extensions in README [#228](https://github.com/jch/html-pipeline/pull/228)
  7 | 
  8 | ## 2.2.1
  9 | 
 10 | * Soften Nokogiri dependency to versions ">= 1.4" [#208](https://github.com/jch/html-pipeline/pull/208)
 11 | 
 12 | ## 2.2.0
 13 | 
 14 | * Only allow cite attribute on blockquote and restrict schemes [#223](https://github.com/jch/html-pipeline/pull/223)
 15 | 
 16 | ## 2.1.0
 17 | 
 18 | * Whitelist schemes for longdesc [#221](https://github.com/jch/html-pipeline/pull/221)
 19 | * Extract emoji image tag generation to own method [#195](https://github.com/jch/html-pipeline/pull/195)
 20 | * Update README.md [#211](https://github.com/jch/html-pipeline/pull/211)
 21 | * Add ImageFilter for image url to img tag conversion [#207](https://github.com/jch/html-pipeline/pull/207)
 22 | 
 23 | ## 2.0
 24 | 
 25 | **New**
 26 | 
 27 | * Implement new EmojiFilter context option: ignored_ancestor_tags to accept more ignored tags. [#170](https://github.com/jch/html-pipeline/pull/170) @JuanitoFatas
 28 | * Add GitHub flavor Markdown Task List extension [#162](https://github.com/jch/html-pipeline/pull/162) @simeonwillbanks
 29 | * @mention allow for custom regex to identify usernames. [#157](https://github.com/jch/html-pipeline/pull/157) @brittballard
 30 | * EmojiFilter now requires gemoji ~> 2. [#159](https://github.com/jch/html-pipeline/pull/159) @jch
 31 | 
 32 | **Changes**
 33 | 
 34 | * Restrict nokogiri to >= 1.4, <= 1.6.5 [#176](https://github.com/jch/html-pipeline/pull/176) @simeonwillbanks
 35 | * MentionFilter#link_to_mentioned_user: Replace String introspection with Regexp match [#172](https://github.com/jch/html-pipeline/pull/172) @simeonwillbanks
 36 | * Whitelist summary and details element. [#171](https://github.com/jch/html-pipeline/pull/171) @JuanitoFatas
 37 | * Support ~login for MentionFilter. [#167](https://github.com/jch/html-pipeline/pull/167) @JuanitoFatas
 38 | * Revert "Search for text nodes on DocumentFragments without root tags" [#158](https://github.com/jch/html-pipeline/pull/158) @jch
 39 | * Drop support for ruby ree, 1.9.2, 1.9.3 [#156](https://github.com/jch/html-pipeline/pull/156) @jch
 40 | * Skip EmojiFilter in `<tt>` tags [#147](https://github.com/jch/html-pipeline/pull/147) @moskvax
 41 | * Use Linguist lexers [#153](https://github.com/jch/html-pipeline/pull/153) @pchaigno
 42 | * Constrain Active Support >= 2, < 5 [#180](https://github.com/jch/html-pipeline/pull/180) @jch
 43 | 
 44 | ## 1.11.0
 45 | 
 46 |   * Search for text nodes on DocumentFragments without root tags #146 Razer6
 47 |   * Don't filter @mentions in `<style>` tags #145 jch
 48 |   * Prefer `http_url` in HttpsFilter. `base_url` still works. #142 bkeepers
 49 |   * Remove duplicate check in EmojiFilter #141 Razer6
 50 | 
 51 | ## 1.10.0
 52 | 
 53 |   * Anchor TOCFilter with id's instead of name's #140 bkeepers
 54 |   * Add `details` to sanitization whitelist #139 tansaku
 55 |   * Fix README spelling #137 Razer6
 56 |   * Remove ActiveSupport `try` dependency #132 simeonwillbanks
 57 | 
 58 | ## 1.9.0
 59 | 
 60 |   * Generalize https filter with :base_url #124 #131 rymohr
 61 |   * Clean up gemspec dependencies #130 mislav
 62 |   * EmojiFilter compatibility with gemoji v2 #129 mislav
 63 |   * Now using Minitest #126 simeonwillbanks
 64 | 
 65 | ## 1.8.0
 66 | 
 67 |   * Add custom path support for EmojiFilter #122 bradly
 68 |   * Reorganize README and add table of contents #118 simeonwillbanks
 69 | 
 70 | ## 1.7.0
 71 | 
 72 |   * SanitizationFilter whitelists <s> and <strike> elements #120 charliesome
 73 |   * ruby 2.1.1 support #119 simeonwillbanks
 74 | 
 75 | ## 1.6.0
 76 | 
 77 |   * Doc update for syntax highlighting #108 simeonwillbanks
 78 |   * Add missing dependency for EmailReplyFilter #110 foca
 79 |   * Fix deprecation warning for Digest::Digest #103 chrishunt
 80 | 
 81 | ## 1.5.0
 82 | 
 83 |   * More flexible whitelist configuration for SanitizationFilter #98 aroben
 84 | 
 85 | ## 1.4.0
 86 | 
 87 |   * Fix CamoFilter double entity encoding. #101 josh
 88 | 
 89 | ## 1.3.0
 90 | 
 91 | 1.2.0 didn't actually include the following changes. Yanked that release.
 92 | 
 93 |   * CamoFilter now camos https images. #96 josh
 94 | 
 95 | ## 1.1.0
 96 | 
 97 |   * escape emoji filenames in urls #92 jayroh
 98 | 
 99 | ## 1.0.0
100 | 
101 | To upgrade to this release, you will need to include separate gems for each of
102 | the filters. See [this section of the README](/README.md#dependencies) for
103 | details.
104 | 
105 |   * filter dependencies are no longer included #80 from simeonwillbanks/simple-dependency-management
106 |   * Add link_attr option to Autolink filter #89 from excid3/master
107 |   * Add ActiveSupport back in as dependency for xml-mini #85 from mojavelinux/xml-mini
108 | 
109 | ## 0.3.1
110 | 
111 |   * Guard against nil node replacement in SyntaxHighlightFilter #84 jbarnette
112 | 
113 | ## 0.3.0
114 | 
115 |   * Add support for manually specified default language in SyntaxHighlightFilter #81 jbarnette
116 | 
117 | ## 0.2.1
118 | 
119 |   * Moves ActiveSupport as a development dependency #79
120 | 
121 | ## 0.2.0
122 | 
123 |   * Fix README typo #74 tricknotes
124 |   * TableOfContentsFilter generates list of sections #75 simeonwillbanks
125 | 
126 | ## 0.1.0
127 | 
128 | I realized I wasn't properly following [semver](http://semver.org) for interface
129 | changes and new features. Starting from this release, semver will be followed.
130 | 
131 |   * Whitelist table section elements in sanitization filter #55 mojavelinux
132 |   * Update readme typo #57 envygeeks
133 |   * TOC unicode characters and anchor names for Ruby > 1.9 #64 jakedouglas/non_english_anchors
134 |   * Add :skip_tags option for AutolinkFilter #65 pengwynn
135 |   * Fix CI dependency issues #67 jch
136 |   * Fix ignored test and add Ruby 2.0 to CI. #71, #72 tricknotes
137 | 
138 | ## 0.0.14
139 | 
140 |   * Remove unused can_access_repo? method jch
141 | 
142 | ## 0.0.13
143 | 
144 |   * Update icon class name (only affects TOC pipeline) cameronmcefee #52
145 | 
146 | ## 0.0.12
147 | 
148 |   * add additional payload information for instrumentation mtodd #46
149 |   * generate and link to gem docs in README
150 | 
151 | ## 0.0.11
152 | 
153 |   * add instrumentation support. readme cleanup mtodd #45
154 | 
155 | ## 0.0.10
156 | 
157 |   * add bin/html-pipeline util indirect #44
158 |   * add result[:mentioned_usernames] for MentionFilter fachen #42
159 | 
160 | ## 0.0.9
161 | 
162 |   * bump escape_utils ~> 0.3, github-linguist ~> 2.6.2 brianmario #41
163 |   * remove nokogiri monkey patch for ruby >= 1.9 defunkt #40
164 | 
165 | ## 0.0.8
166 | 
167 |   * raise LoadError instead of printing to stderr if linguist is missing. gjtorikian #36
168 | 
169 | ## 0.0.7
170 | 
171 |   * optionally require github-linguist chrislloyd #33
172 | 
173 | ## 0.0.6
174 | 
175 |   * don't mutate markdown strings: jakedouglas #32
176 | 
177 | ## 0.0.5
178 | 
179 |   * fix li xss vulnerability in sanitization filter: vmg #31
180 |   * gemspec cleanup: nbibler #23, jbarnette #24
181 |   * doc updates: jch #16, pborreli #17, wickedshimmy #18, benubois #19, blackerby #21
182 |   * loosen gemoji dependency: josh #15
183 | 
184 | ## 0.0.4
185 | 
186 |   * initial public release
187 | 


--------------------------------------------------------------------------------
/test/html/pipeline/mention_filter_test.rb:
--------------------------------------------------------------------------------
  1 | require "test_helper"
  2 | 
  3 | class HTML::Pipeline::MentionFilterTest < Minitest::Test
  4 |   def filter(html, base_url='/', info_url=nil, username_pattern=nil)
  5 |     HTML::Pipeline::MentionFilter.call(html, :base_url => base_url, :info_url => info_url, :username_pattern => username_pattern)
  6 |   end
  7 | 
  8 |   def test_filtering_a_documentfragment
  9 |     body = "<p>@kneath: check it out.</p>"
 10 |     doc  = Nokogiri::HTML::DocumentFragment.parse(body)
 11 | 
 12 |     res  = filter(doc, '/')
 13 |     assert_same doc, res
 14 | 
 15 |     link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>"
 16 |     assert_equal "<p>#{link}: check it out.</p>",
 17 |       res.to_html
 18 |   end
 19 | 
 20 |   def test_filtering_plain_text
 21 |     body = "<p>@kneath: check it out.</p>"
 22 |     res  = filter(body, '/')
 23 | 
 24 |     link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>"
 25 |     assert_equal "<p>#{link}: check it out.</p>",
 26 |       res.to_html
 27 |   end
 28 | 
 29 |   def test_not_replacing_mentions_in_pre_tags
 30 |     body = "<pre>@kneath: okay</pre>"
 31 |     assert_equal body, filter(body).to_html
 32 |   end
 33 | 
 34 |   def test_not_replacing_mentions_in_code_tags
 35 |     body = "<p><code>@kneath:</code> okay</p>"
 36 |     assert_equal body, filter(body).to_html
 37 |   end
 38 | 
 39 |   def test_not_replacing_mentions_in_style_tags
 40 |     body = "<style>@media (min-width: 768px) { color: red; }</style>"
 41 |     assert_equal body, filter(body).to_html
 42 |   end
 43 | 
 44 |   def test_not_replacing_mentions_in_links
 45 |     body = "<p><a>@kneath</a> okay</p>"
 46 |     assert_equal body, filter(body).to_html
 47 |   end
 48 | 
 49 |   def test_entity_encoding_and_whatnot
 50 |     body = "<p>@&#x6b;neath what's up</p>"
 51 |     link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>"
 52 |     assert_equal "<p>#{link} what's up</p>", filter(body, '/').to_html
 53 |   end
 54 | 
 55 |   def test_html_injection
 56 |     body = "<p>@kneath &lt;script>alert(0)&lt;/script></p>"
 57 |     link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>"
 58 |     assert_equal "<p>#{link} &lt;script&gt;alert(0)&lt;/script&gt;</p>",
 59 |       filter(body, '/').to_html
 60 |   end
 61 | 
 62 |   def test_links_to_nothing_when_no_info_url_given
 63 |     body = "<p>How do I @mention someone?</p>"
 64 |     assert_equal "<p>How do I @mention someone?</p>",
 65 |       filter(body, '/').to_html
 66 |   end
 67 | 
 68 |   def test_links_to_more_info_when_info_url_given
 69 |     body = "<p>How do I @mention someone?</p>"
 70 |     link = "<a href=\"https://github.com/blog/821\" class=\"user-mention\">@mention</a>"
 71 |     assert_equal "<p>How do I #{link} someone?</p>",
 72 |       filter(body, '/', 'https://github.com/blog/821').to_html
 73 |   end
 74 | 
 75 |   def test_base_url_slash
 76 |     body = "<p>Hi, @jch!</p>"
 77 |     link = "<a href=\"/jch\" class=\"user-mention\">@jch</a>"
 78 |     assert_equal "<p>Hi, #{link}!</p>",
 79 |       filter(body, '/').to_html
 80 |   end
 81 | 
 82 |   def test_base_url_under_custom_route
 83 |     body = "<p>Hi, @jch!</p>"
 84 |     link = "<a href=\"/userprofile/jch\" class=\"user-mention\">@jch</a>"
 85 |     assert_equal "<p>Hi, #{link}!</p>",
 86 |       filter(body, '/userprofile').to_html
 87 |   end
 88 | 
 89 |   def test_base_url_slash_with_tilde
 90 |     body = "<p>Hi, @jch!</p>"
 91 |     link = "<a href=\"/~jch\" class=\"user-mention\">@jch</a>"
 92 |     assert_equal "<p>Hi, #{link}!</p>",
 93 |       filter(body, '/~').to_html
 94 |   end
 95 | 
 96 |   MarkdownPipeline =
 97 |     HTML::Pipeline.new [
 98 |       HTML::Pipeline::MarkdownFilter,
 99 |       HTML::Pipeline::MentionFilter
100 |     ]
101 | 
102 |   def mentioned_usernames
103 |     result = {}
104 |     MarkdownPipeline.call(@body, {}, result)
105 |     result[:mentioned_usernames]
106 |   end
107 | 
108 |   def test_matches_usernames_in_body
109 |     @body = "@test how are you?"
110 |     assert_equal %w[test], mentioned_usernames
111 |   end
112 | 
113 |   def test_matches_usernames_with_dashes
114 |     @body = "hi @some-user"
115 |     assert_equal %w[some-user], mentioned_usernames
116 |   end
117 | 
118 |   def test_matches_usernames_followed_by_a_single_dot
119 |     @body = "okay @some-user."
120 |     assert_equal %w[some-user], mentioned_usernames
121 |   end
122 | 
123 |   def test_matches_usernames_followed_by_multiple_dots
124 |     @body = "okay @some-user..."
125 |     assert_equal %w[some-user], mentioned_usernames
126 |   end
127 | 
128 |   def test_does_not_match_email_addresses
129 |     @body = "aman@tmm1.net"
130 |     assert_equal [], mentioned_usernames
131 |   end
132 | 
133 |   def test_does_not_match_domain_name_looking_things
134 |     @body = "we need a @github.com email"
135 |     assert_equal [], mentioned_usernames
136 |   end
137 | 
138 |   def test_does_not_match_organization_team_mentions
139 |     @body = "we need to @github/enterprise know"
140 |     assert_equal [], mentioned_usernames
141 |   end
142 | 
143 |   def test_matches_colon_suffixed_names
144 |     @body = "@tmm1: what do you think?"
145 |     assert_equal %w[tmm1], mentioned_usernames
146 |   end
147 | 
148 |   def test_matches_list_of_names
149 |     @body = "@defunkt @atmos @kneath"
150 |     assert_equal %w[defunkt atmos kneath], mentioned_usernames
151 |   end
152 | 
153 |   def test_matches_list_of_names_with_commas
154 |     @body = "/cc @defunkt, @atmos, @kneath"
155 |     assert_equal %w[defunkt atmos kneath], mentioned_usernames
156 |   end
157 | 
158 |   def test_matches_inside_brackets
159 |     @body = "(@mislav) and [@rtomayko]"
160 |     assert_equal %w[mislav rtomayko], mentioned_usernames
161 |   end
162 | 
163 |   def test_doesnt_ignore_invalid_users
164 |     @body = "@defunkt @mojombo and @somedude"
165 |     assert_equal ['defunkt', 'mojombo', 'somedude'], mentioned_usernames
166 |   end
167 | 
168 |   def test_returns_distinct_set
169 |     @body = "/cc @defunkt, @atmos, @kneath, @defunkt, @defunkt"
170 |     assert_equal %w[defunkt atmos kneath], mentioned_usernames
171 |   end
172 | 
173 |   def test_does_not_match_inline_code_block_with_multiple_code_blocks
174 |     @body = "something\n\n`/cc @defunkt @atmos @kneath` `/cc @atmos/atmos`"
175 |     assert_equal %w[], mentioned_usernames
176 |   end
177 | 
178 |   def test_mention_at_end_of_parenthetical_sentence
179 |     @body = "(We're talking 'bout @ymendel.)"
180 |     assert_equal %w[ymendel], mentioned_usernames
181 |   end
182 | 
183 |   def test_username_pattern_can_be_customized
184 |     body = "<p>@_abc: test.</p>"
185 |     doc  = Nokogiri::HTML::DocumentFragment.parse(body)
186 | 
187 |     res  = filter(doc, '/', nil, /(_[a-z]{3})/)
188 | 
189 |     link = "<a href=\"/_abc\" class=\"user-mention\">@_abc</a>"
190 |     assert_equal "<p>#{link}: test.</p>",
191 |       res.to_html
192 |   end
193 | 
194 |   def test_filter_does_not_create_a_new_object_for_default_username_pattern
195 |     body = "<div>@test</div>"
196 |     doc = Nokogiri::HTML::DocumentFragment.parse(body)
197 | 
198 |     filter(doc.clone, '/', nil)
199 |     pattern_count = HTML::Pipeline::MentionFilter::MentionPatterns.length
200 |     filter(doc.clone, '/', nil)
201 | 
202 |     assert_equal pattern_count, HTML::Pipeline::MentionFilter::MentionPatterns.length
203 |     filter(doc.clone, '/', nil, /test/)
204 |     assert_equal pattern_count + 1, HTML::Pipeline::MentionFilter::MentionPatterns.length
205 |   end
206 | 
207 |   def test_mention_link_filter
208 |     filter = HTML::Pipeline::MentionFilter.new nil
209 |     expected = "<a href='/hubot' class='user-mention'>@hubot</a>"
210 |     assert_equal expected, filter.mention_link_filter("@hubot")
211 |   end
212 | end
213 | 


--------------------------------------------------------------------------------
/lib/html/pipeline.rb:
--------------------------------------------------------------------------------
  1 | require "nokogiri"
  2 | require "active_support/xml_mini/nokogiri" # convert Documents to hashes
  3 | 
  4 | module HTML
  5 |   # GitHub HTML processing filters and utilities. This module includes a small
  6 |   # framework for defining DOM based content filters and applying them to user
  7 |   # provided content.
  8 |   #
  9 |   # See HTML::Pipeline::Filter for information on building filters.
 10 |   #
 11 |   # Construct a Pipeline for running multiple HTML filters.  A pipeline is created once
 12 |   # with one to many filters, and it then can be `call`ed many times over the course
 13 |   # of its lifetime with input.
 14 |   #
 15 |   # filters         - Array of Filter objects. Each must respond to call(doc,
 16 |   #                   context) and return the modified DocumentFragment or a
 17 |   #                   String containing HTML markup. Filters are performed in the
 18 |   #                   order provided.
 19 |   # default_context - The default context hash. Values specified here will be merged
 20 |   #                   into values from the each individual pipeline run.  Can NOT be
 21 |   #                   nil.  Default: empty Hash.
 22 |   # result_class    - The default Class of the result object for individual
 23 |   #                   calls.  Default: Hash.  Protip:  Pass in a Struct to get
 24 |   #                   some semblance of type safety.
 25 |   class Pipeline
 26 |     autoload :VERSION,               'html/pipeline/version'
 27 |     autoload :Filter,                'html/pipeline/filter'
 28 |     autoload :AbsoluteSourceFilter,  'html/pipeline/absolute_source_filter'
 29 |     autoload :BodyContent,           'html/pipeline/body_content'
 30 |     autoload :AutolinkFilter,        'html/pipeline/autolink_filter'
 31 |     autoload :CamoFilter,            'html/pipeline/camo_filter'
 32 |     autoload :EmailReplyFilter,      'html/pipeline/email_reply_filter'
 33 |     autoload :EmojiFilter,           'html/pipeline/emoji_filter'
 34 |     autoload :HttpsFilter,           'html/pipeline/https_filter'
 35 |     autoload :ImageFilter,           'html/pipeline/image_filter'
 36 |     autoload :ImageMaxWidthFilter,   'html/pipeline/image_max_width_filter'
 37 |     autoload :MarkdownFilter,        'html/pipeline/markdown_filter'
 38 |     autoload :MentionFilter,         'html/pipeline/@mention_filter'
 39 |     autoload :PlainTextInputFilter,  'html/pipeline/plain_text_input_filter'
 40 |     autoload :SanitizationFilter,    'html/pipeline/sanitization_filter'
 41 |     autoload :SyntaxHighlightFilter, 'html/pipeline/syntax_highlight_filter'
 42 |     autoload :TextileFilter,         'html/pipeline/textile_filter'
 43 |     autoload :TableOfContentsFilter, 'html/pipeline/toc_filter'
 44 |     autoload :TextFilter,            'html/pipeline/text_filter'
 45 | 
 46 |     # Our DOM implementation.
 47 |     DocumentFragment = Nokogiri::HTML::DocumentFragment
 48 | 
 49 |     # Parse a String into a DocumentFragment object. When a DocumentFragment is
 50 |     # provided, return it verbatim.
 51 |     def self.parse(document_or_html)
 52 |       document_or_html ||= ''
 53 |       if document_or_html.is_a?(String)
 54 |         DocumentFragment.parse(document_or_html)
 55 |       else
 56 |         document_or_html
 57 |       end
 58 |     end
 59 | 
 60 |     # Public: Returns an Array of Filter objects for this Pipeline.
 61 |     attr_reader :filters
 62 | 
 63 |     # Public: Instrumentation service for the pipeline.
 64 |     # Set an ActiveSupport::Notifications compatible object to enable.
 65 |     attr_accessor :instrumentation_service
 66 | 
 67 |     # Public: String name for this Pipeline. Defaults to Class name.
 68 |     attr_writer :instrumentation_name
 69 |     def instrumentation_name
 70 |       @instrumentation_name || self.class.name
 71 |     end
 72 | 
 73 |     class << self
 74 |       # Public: Default instrumentation service for new pipeline objects.
 75 |       attr_accessor :default_instrumentation_service
 76 |     end
 77 | 
 78 |     def initialize(filters, default_context = {}, result_class = nil)
 79 |       raise ArgumentError, "default_context cannot be nil" if default_context.nil?
 80 |       @filters = filters.flatten.freeze
 81 |       @default_context = default_context.freeze
 82 |       @result_class = result_class || Hash
 83 |       @instrumentation_service = self.class.default_instrumentation_service
 84 |     end
 85 | 
 86 |     # Apply all filters in the pipeline to the given HTML.
 87 |     #
 88 |     # html    - A String containing HTML or a DocumentFragment object.
 89 |     # context - The context hash passed to each filter. See the Filter docs
 90 |     #           for more info on possible values. This object MUST NOT be modified
 91 |     #           in place by filters.  Use the Result for passing state back.
 92 |     # result  - The result Hash passed to each filter for modification.  This
 93 |     #           is where Filters store extracted information from the content.
 94 |     #
 95 |     # Returns the result Hash after being filtered by this Pipeline.  Contains an
 96 |     # :output key with the DocumentFragment or String HTML markup based on the
 97 |     # output of the last filter in the pipeline.
 98 |     def call(html, context = {}, result = nil)
 99 |       context = @default_context.merge(context)
100 |       context = context.freeze
101 |       result ||= @result_class.new
102 |       payload = default_payload :filters => @filters.map(&:name),
103 |         :context => context, :result => result
104 |       instrument "call_pipeline.html_pipeline", payload do
105 |         result[:output] =
106 |           @filters.inject(html) do |doc, filter|
107 |             perform_filter(filter, doc, context, result)
108 |           end
109 |       end
110 |       result
111 |     end
112 | 
113 |     # Internal: Applies a specific filter to the supplied doc.
114 |     #
115 |     # The filter is instrumented.
116 |     #
117 |     # Returns the result of the filter.
118 |     def perform_filter(filter, doc, context, result)
119 |       payload = default_payload :filter => filter.name,
120 |         :context => context, :result => result
121 |       instrument "call_filter.html_pipeline", payload do
122 |         filter.call(doc, context, result)
123 |       end
124 |     end
125 | 
126 |     # Like call but guarantee the value returned is a DocumentFragment.
127 |     # Pipelines may return a DocumentFragment or a String. Callers that need a
128 |     # DocumentFragment should use this method.
129 |     def to_document(input, context = {}, result = nil)
130 |       result = call(input, context, result)
131 |       HTML::Pipeline.parse(result[:output])
132 |     end
133 | 
134 |     # Like call but guarantee the value returned is a string of HTML markup.
135 |     def to_html(input, context = {}, result = nil)
136 |       result = call(input, context, result = nil)
137 |       output = result[:output]
138 |       if output.respond_to?(:to_html)
139 |         output.to_html
140 |       else
141 |         output.to_s
142 |       end
143 |     end
144 | 
145 |     # Public: setup instrumentation for this pipeline.
146 |     #
147 |     # Returns nothing.
148 |     def setup_instrumentation(name = nil, service = nil)
149 |       self.instrumentation_name = name
150 |       self.instrumentation_service =
151 |         service || self.class.default_instrumentation_service
152 |     end
153 | 
154 |     # Internal: if the `instrumentation_service` object is set, instruments the
155 |     # block, otherwise the block is ran without instrumentation.
156 |     #
157 |     # Returns the result of the provided block.
158 |     def instrument(event, payload = nil)
159 |       payload ||= default_payload
160 |       return yield(payload) unless instrumentation_service
161 |       instrumentation_service.instrument event, payload do |payload|
162 |         yield payload
163 |       end
164 |     end
165 | 
166 |     # Internal: Default payload for instrumentation.
167 |     #
168 |     # Accepts a Hash of additional payload data to be merged.
169 |     #
170 |     # Returns a Hash.
171 |     def default_payload(payload = {})
172 |       {:pipeline => instrumentation_name}.merge(payload)
173 |     end
174 |   end
175 | end
176 | 
177 | # XXX nokogiri monkey patches for 1.8
178 | if not ''.respond_to?(:force_encoding)
179 |   class Nokogiri::XML::Node
180 |     # Work around an issue with utf-8 encoded data being erroneously converted to
181 |     # ... some other shit when replacing text nodes. See 'utf-8 output 2' in
182 |     # user_content_test.rb for details.
183 |     def replace_with_encoding_fix(replacement)
184 |       if replacement.respond_to?(:to_str)
185 |         replacement = document.fragment("<div>#{replacement}</div>").children.first.children
186 |       end
187 |       replace_without_encoding_fix(replacement)
188 |     end
189 | 
190 |     alias_method :replace_without_encoding_fix, :replace
191 |     alias_method :replace, :replace_with_encoding_fix
192 | 
193 |     def swap(replacement)
194 |       replace(replacement)
195 |       self
196 |     end
197 |   end
198 | end
199 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # HTML::Pipeline [![Build Status](https://travis-ci.org/jch/html-pipeline.svg?branch=master)](https://travis-ci.org/jch/html-pipeline)
  2 | 
  3 | GitHub HTML processing filters and utilities. This module includes a small
  4 | framework for defining DOM based content filters and applying them to user
  5 | provided content. Read an introduction about this project in
  6 | [this blog post](https://github.com/blog/1311-html-pipeline-chainable-content-filters).
  7 | 
  8 | - [Installation](#installation)
  9 | - [Usage](#usage)
 10 |   - [Examples](#examples)
 11 | - [Filters](#filters)
 12 | - [Dependencies](#dependencies)
 13 | - [Documentation](#documentation)
 14 | - [Extending](#extending)
 15 |   - [3rd Party Extensions](#3rd-party-extensions)
 16 | - [Instrumenting](#instrumenting)
 17 | - [Contributing](#contributing)
 18 |   - [Contributors](#contributors)
 19 |   - [Releasing A New Version](#releasing-a-new-version)
 20 | 
 21 | ## Installation
 22 | 
 23 | Add this line to your application's Gemfile:
 24 | 
 25 | ```ruby
 26 | gem 'html-pipeline'
 27 | ```
 28 | 
 29 | And then execute:
 30 | 
 31 | ```sh
 32 | $ bundle
 33 | ```
 34 | 
 35 | Or install it yourself as:
 36 | 
 37 | ```sh
 38 | $ gem install html-pipeline
 39 | ```
 40 | 
 41 | ## Usage
 42 | 
 43 | This library provides a handful of chainable HTML filters to transform user
 44 | content into markup. A filter takes an HTML string or
 45 | `Nokogiri::HTML::DocumentFragment`, optionally manipulates it, and then
 46 | outputs the result.
 47 | 
 48 | For example, to transform Markdown source into Markdown HTML:
 49 | 
 50 | ```ruby
 51 | require 'html/pipeline'
 52 | 
 53 | filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!")
 54 | filter.call
 55 | ```
 56 | 
 57 | Filters can be combined into a pipeline which causes each filter to hand its
 58 | output to the next filter's input. So if you wanted to have content be
 59 | filtered through Markdown and be syntax highlighted, you can create the
 60 | following pipeline:
 61 | 
 62 | ```ruby
 63 | pipeline = HTML::Pipeline.new [
 64 |   HTML::Pipeline::MarkdownFilter,
 65 |   HTML::Pipeline::SyntaxHighlightFilter
 66 | ]
 67 | result = pipeline.call <<-CODE
 68 | This is *great*:
 69 | 
 70 |     some_code(:first)
 71 | 
 72 | CODE
 73 | result[:output].to_s
 74 | ```
 75 | 
 76 | Prints:
 77 | 
 78 | ```html
 79 | <p>This is <em>great</em>:</p>
 80 | 
 81 | <pre><code>some_code(:first)
 82 | </code></pre>
 83 | ```
 84 | 
 85 | To generate CSS for HTML formatted code, use the [pygments.rb](https://github.com/tmm1/pygments.rb#usage) `#css` method. `pygments.rb` is a dependency of the `SyntaxHighlightFilter`.
 86 | 
 87 | Some filters take an optional **context** and/or **result** hash. These are
 88 | used to pass around arguments and metadata between filters in a pipeline. For
 89 | example, if you don't want to use GitHub formatted Markdown, you can pass an
 90 | option in the context hash:
 91 | 
 92 | ```ruby
 93 | filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!", :gfm => false)
 94 | filter.call
 95 | ```
 96 | 
 97 | ### Examples
 98 | 
 99 | We define different pipelines for different parts of our app. Here are a few
100 | paraphrased snippets to get you started:
101 | 
102 | ```ruby
103 | # The context hash is how you pass options between different filters.
104 | # See individual filter source for explanation of options.
105 | context = {
106 |   :asset_root => "http://your-domain.com/where/your/images/live/icons",
107 |   :base_url   => "http://your-domain.com"
108 | }
109 | 
110 | # Pipeline providing sanitization and image hijacking but no mention
111 | # related features.
112 | SimplePipeline = Pipeline.new [
113 |   SanitizationFilter,
114 |   TableOfContentsFilter, # add 'name' anchors to all headers and generate toc list
115 |   CamoFilter,
116 |   ImageMaxWidthFilter,
117 |   SyntaxHighlightFilter,
118 |   EmojiFilter,
119 |   AutolinkFilter
120 | ], context
121 | 
122 | # Pipeline used for user provided content on the web
123 | MarkdownPipeline = Pipeline.new [
124 |   MarkdownFilter,
125 |   SanitizationFilter,
126 |   CamoFilter,
127 |   ImageMaxWidthFilter,
128 |   HttpsFilter,
129 |   MentionFilter,
130 |   EmojiFilter,
131 |   SyntaxHighlightFilter
132 | ], context.merge(:gfm => true) # enable github formatted markdown
133 | 
134 | 
135 | # Define a pipeline based on another pipeline's filters
136 | NonGFMMarkdownPipeline = Pipeline.new(MarkdownPipeline.filters,
137 |   context.merge(:gfm => false))
138 | 
139 | # Pipelines aren't limited to the web. You can use them for email
140 | # processing also.
141 | HtmlEmailPipeline = Pipeline.new [
142 |   PlainTextInputFilter,
143 |   ImageMaxWidthFilter
144 | ], {}
145 | 
146 | # Just emoji.
147 | EmojiPipeline = Pipeline.new [
148 |   PlainTextInputFilter,
149 |   EmojiFilter
150 | ], context
151 | ```
152 | 
153 | ## Filters
154 | 
155 | * `MentionFilter` - replace `@user` mentions with links
156 | * `AbsoluteSourceFilter` - replace relative image urls with fully qualified versions
157 | * `AutolinkFilter` - auto_linking urls in HTML
158 | * `CamoFilter` - replace http image urls with [camo-fied](https://github.com/atmos/camo) https versions
159 | * `EmailReplyFilter` - util filter for working with emails
160 | * `EmojiFilter` - everyone loves [emoji](http://www.emoji-cheat-sheet.com/)!
161 | * `HttpsFilter` - HTML Filter for replacing http github urls with https versions.
162 | * `ImageMaxWidthFilter` - link to full size image for large images
163 | * `MarkdownFilter` - convert markdown to html
164 | * `PlainTextInputFilter` - html escape text and wrap the result in a div
165 | * `SanitizationFilter` - whitelist sanitize user markup
166 | * `SyntaxHighlightFilter` - [code syntax highlighter](#syntax-highlighting)
167 | * `TextileFilter` - convert textile to html
168 | * `TableOfContentsFilter` - anchor headings with name attributes and generate Table of Contents html unordered list linking headings
169 | 
170 | ## Dependencies
171 | 
172 | Filter gem dependencies are not bundled; you must bundle the filter's gem
173 | dependencies. The below list details filters with dependencies. For example,
174 | `SyntaxHighlightFilter` uses [github-linguist](https://github.com/github/linguist)
175 | to detect and highlight languages. For example, to use the `SyntaxHighlightFilter`,
176 | add the following to your Gemfile:
177 | 
178 | ```ruby
179 | gem 'github-linguist'
180 | ```
181 | 
182 | * `AutolinkFilter` - `rinku`
183 | * `EmailReplyFilter` - `escape_utils`, `email_reply_parser`
184 | * `EmojiFilter` - `gemoji`
185 | * `MarkdownFilter` - `github-markdown`
186 | * `PlainTextInputFilter` - `escape_utils`
187 | * `SanitizationFilter` - `sanitize`
188 | * `SyntaxHighlightFilter` - `github-linguist`
189 | * `TextileFilter` - `RedCloth`
190 | 
191 | _Note:_ See [Gemfile](/Gemfile) `:test` block for version requirements.
192 | 
193 | ## Documentation
194 | 
195 | Full reference documentation can be [found here](http://rubydoc.info/gems/html-pipeline/frames).
196 | 
197 | ## Extending
198 | To write a custom filter, you need a class with a `call` method that inherits
199 | from `HTML::Pipeline::Filter`.
200 | 
201 | For example this filter adds a base url to images that are root relative:
202 | 
203 | ```ruby
204 | require 'uri'
205 | 
206 | class RootRelativeFilter < HTML::Pipeline::Filter
207 | 
208 |   def call
209 |     doc.search("img").each do |img|
210 |       next if img['src'].nil?
211 |       src = img['src'].strip
212 |       if src.start_with? '/'
213 |         img["src"] = URI.join(context[:base_url], src).to_s
214 |       end
215 |     end
216 |     doc
217 |   end
218 | 
219 | end
220 | ```
221 | 
222 | Now this filter can be used in a pipeline:
223 | 
224 | ```ruby
225 | Pipeline.new [ RootRelativeFilter ], { :base_url => 'http://somehost.com' }
226 | ```
227 | 
228 | ### 3rd Party Extensions
229 | 
230 | If you have an idea for a filter, propose it as
231 | [an issue](https://github.com/jch/html-pipeline/issues) first. This allows us discuss
232 | whether the filter is a common enough use case to belong in this gem, or should be
233 | built as an external gem.
234 | 
235 | Here are some extensions people have built:
236 | 
237 | * [html-pipeline-asciidoc_filter](https://github.com/asciidoctor/html-pipeline-asciidoc_filter)
238 | * [jekyll-html-pipeline](https://github.com/gjtorikian/jekyll-html-pipeline)
239 | * [nanoc-html-pipeline](https://github.com/burnto/nanoc-html-pipeline)
240 | * [html-pipeline-bity](https://github.com/dewski/html-pipeline-bitly)
241 | * [html-pipeline-cite](https://github.com/lifted-studios/html-pipeline-cite)
242 | * [tilt-html-pipeline](https://github.com/bradgessler/tilt-html-pipeline)
243 | * [html-pipeline-wiki-link'](https://github.com/lifted-studios/html-pipeline-wiki-link) - WikiMedia-style wiki links
244 | * [task_list](https://github.com/github/task_list) - GitHub flavor Markdown Task List
245 | * [html-pipeline-rouge_filter](https://github.com/JuanitoFatas/html-pipeline-rouge_filter) - Syntax highlight with [Rouge](https://github.com/jneen/rouge/)
246 | * [html-pipeline-nico_link](https://github.com/rutan/html-pipeline-nico_link) - An HTML::Pipeline filter for [niconico](http://www.nicovideo.jp) description links
247 | * [html-pipeline-gitlab](https://gitlab.com/gitlab-org/html-pipeline-gitlab) - This gem implements various filters for html-pipeline used by GitLab
248 | * [html-pipeline-youtube](https://github.com/st0012/html-pipeline-youtube) - An HTML::Pipeline filter for YouTube links
249 | * [html-pipeline-flickr](https://github.com/st0012/html-pipeline-flickr) - An HTML::Pipeline filter for Flickr links
250 | * [html-pipeline-vimeo](https://github.com/dlackty/html-pipeline-vimeo) - An HTML::Pipeline filter for Vimeo links
251 | * [html-pipeline-hashtag](https://github.com/mr-dxdy/html-pipeline-hashtag) - An HTML::Pipeline filter for hashtags
252 | * [html-pipeline-linkify_github](https://github.com/jollygoodcode/html-pipeline-linkify_github) - An HTML::Pipeline filter to autolink GitHub urls
253 | 
254 | ## Instrumenting
255 | 
256 | Filters and Pipelines can be set up to be instrumented when called. The pipeline
257 | must be setup with an [ActiveSupport::Notifications]
258 | (http://api.rubyonrails.org/classes/ActiveSupport/Notifications.html)
259 | compatible service object and a name. New pipeline objects will default to the
260 | `HTML::Pipeline.default_instrumentation_service` object.
261 | 
262 | ``` ruby
263 | # the AS::Notifications-compatible service object
264 | service = ActiveSupport::Notifications
265 | 
266 | # instrument a specific pipeline
267 | pipeline = HTML::Pipeline.new [MarkdownFilter], context
268 | pipeline.setup_instrumentation "MarkdownPipeline", service
269 | 
270 | # or set default instrumentation service for all new pipelines
271 | HTML::Pipeline.default_instrumentation_service = service
272 | pipeline = HTML::Pipeline.new [MarkdownFilter], context
273 | pipeline.setup_instrumentation "MarkdownPipeline"
274 | ```
275 | 
276 | Filters are instrumented when they are run through the pipeline. A
277 | `call_filter.html_pipeline` event is published once the filter finishes. The
278 | `payload` should include the `filter` name. Each filter will trigger its own
279 | instrumentation call.
280 | 
281 | ``` ruby
282 | service.subscribe "call_filter.html_pipeline" do |event, start, ending, transaction_id, payload|
283 |   payload[:pipeline] #=> "MarkdownPipeline", set with `setup_instrumentation`
284 |   payload[:filter] #=> "MarkdownFilter"
285 |   payload[:context] #=> context Hash
286 |   payload[:result] #=> instance of result class
287 |   payload[:result][:output] #=> output HTML String or Nokogiri::DocumentFragment
288 | end
289 | ```
290 | 
291 | The full pipeline is also instrumented:
292 | 
293 | ``` ruby
294 | service.subscribe "call_pipeline.html_pipeline" do |event, start, ending, transaction_id, payload|
295 |   payload[:pipeline] #=> "MarkdownPipeline", set with `setup_instrumentation`
296 |   payload[:filters] #=> ["MarkdownFilter"]
297 |   payload[:doc] #=> HTML String or Nokogiri::DocumentFragment
298 |   payload[:context] #=> context Hash
299 |   payload[:result] #=> instance of result class
300 |   payload[:result][:output] #=> output HTML String or Nokogiri::DocumentFragment
301 | end
302 | ```
303 | 
304 | ## FAQ
305 | 
306 | ### 1. Why doesn't my pipeline work when there's no root element in the document?
307 | 
308 | To make a pipeline work on a plain text document, put the `PlainTextInputFilter`
309 | at the beginning of your pipeline. This will wrap the content in a `div` so the
310 | filters have a root element to work with. If you're passing in an HTML fragment,
311 | but it doesn't have a root element, you can wrap the content in a `div`
312 | yourself. For example:
313 | 
314 | ```ruby
315 | EmojiPipeline = Pipeline.new [
316 |   PlainTextInputFilter,  # <- Wraps input in a div and escapes html tags
317 |   EmojiFilter
318 | ], context
319 | 
320 | plain_text = "Gutentag! :wave:"
321 | EmojiPipeline.call(plain_text)
322 | 
323 | html_fragment = "This is outside of an html element, but <strong>this isn't. :+1:</strong>"
324 | EmojiPipeline.call("<div>#{html_fragment}</div>") # <- Wrap your own html fragments to avoid escaping
325 | ```
326 | 
327 | ### 2. How do I customize a whitelist for `SanitizationFilter`s?
328 | 
329 | `SanitizationFilter::WHITELIST` is the default whitelist used if no `:whitelist`
330 | argument is given in the context. The default is a good starting template for
331 | you to add additional elements. You can either modify the constant's value, or
332 | re-define your own constant and pass that in via the context.
333 | 
334 | ## Contributing
335 | 
336 | Please review the [Contributing Guide](https://github.com/jch/html-pipeline/blob/master/CONTRIBUTING.md).
337 | 
338 | 1. [Fork it](https://help.github.com/articles/fork-a-repo)
339 | 2. Create your feature branch (`git checkout -b my-new-feature`)
340 | 3. Commit your changes (`git commit -am 'Added some feature'`)
341 | 4. Push to the branch (`git push origin my-new-feature`)
342 | 5. Create new [Pull Request](https://help.github.com/articles/using-pull-requests)
343 | 
344 | To see what has changed in recent versions, see the [CHANGELOG](https://github.com/jch/html-pipeline/blob/master/CHANGELOG.md).
345 | 
346 | ### Contributors
347 | 
348 | Thanks to all of [these contributors](https://github.com/jch/html-pipeline/graphs/contributors).
349 | 
350 | Project is a member of the [OSS Manifesto](http://ossmanifesto.org/).
351 | 
352 | ### Releasing A New Version
353 | 
354 | This section is for gem maintainers to cut a new version of the gem.
355 | 
356 | * create a new branch named `release-x.y.z` where `x.y.z` follows [semver](http://semver.org)
357 | * update lib/html/pipeline/version.rb to next version number X.X.X
358 | * update CHANGELOG.md. Prepare a draft with `script/changelog`
359 | * push branch and create a new pull request
360 | * after tests are green, merge to master
361 | * on the master branch, run `script/release`
362 | 


--------------------------------------------------------------------------------