tag that causes the full size image to be opened in a
5 | # new tab.
6 | #
7 | # The max-width inline styles are especially useful in HTML email which
8 | # don't use a global stylesheets.
9 | class ImageMaxWidthFilter < Filter
10 | def call
11 | doc.search('img').each do |element|
12 | # Skip if there's already a style attribute. Not sure how this
13 | # would happen but we can reconsider it in the future.
14 | next if element['style']
15 |
16 | # Bail out if src doesn't look like a valid http url. trying to avoid weird
17 | # js injection via javascript: urls.
18 | next if element['src'].to_s.strip =~ /\Ajavascript/i
19 |
20 | element['style'] = "max-width:100%;"
21 |
22 | if !has_ancestor?(element, %w(a))
23 | link_image element
24 | end
25 | end
26 |
27 | doc
28 | end
29 |
30 | def link_image(element)
31 | link = doc.document.create_element('a', :href => element['src'], :target => '_blank')
32 | link.add_child(element.dup)
33 | element.replace(link)
34 | end
35 | end
36 | end
37 | end
--------------------------------------------------------------------------------
/lib/html/pipeline/syntax_highlight_filter.rb:
--------------------------------------------------------------------------------
1 | begin
2 | require "linguist"
3 | rescue LoadError => _
4 | abort "Missing dependency 'github-linguist' for SyntaxHighlightFilter. See README.md for details."
5 | end
6 |
7 | module HTML
8 | class Pipeline
9 | # HTML Filter that syntax highlights code blocks wrapped
10 | # in .
11 | class SyntaxHighlightFilter < Filter
12 | def call
13 | doc.search('pre').each do |node|
14 | default = context[:highlight] && context[:highlight].to_s
15 | next unless lang = node['lang'] || default
16 | next unless lexer = lexer_for(lang)
17 | text = node.inner_text
18 |
19 | html = highlight_with_timeout_handling(lexer, text)
20 | next if html.nil?
21 |
22 | if (node = node.replace(html).first)
23 | klass = node["class"]
24 | klass = [klass, "highlight-#{lang}"].compact.join " "
25 |
26 | node["class"] = klass
27 | end
28 | end
29 | doc
30 | end
31 |
32 | def highlight_with_timeout_handling(lexer, text)
33 | lexer.highlight(text)
34 | rescue Timeout::Error => boom
35 | nil
36 | end
37 |
38 | def lexer_for(lang)
39 | (Linguist::Language[lang] && Linguist::Language[lang].lexer) || Pygments::Lexer[lang]
40 | end
41 | end
42 | end
43 | end
44 |
--------------------------------------------------------------------------------
/lib/html/pipeline/body_content.rb:
--------------------------------------------------------------------------------
1 | module HTML
2 | class Pipeline
3 | # Public: Runs a String of content through an HTML processing pipeline,
4 | # providing easy access to a generated DocumentFragment.
5 | class BodyContent
6 | attr_reader :result
7 |
8 | # Public: Initialize a BodyContent.
9 | #
10 | # body - A String body.
11 | # context - A Hash of context options for the filters.
12 | # pipeline - A HTML::Pipeline object with one or more Filters.
13 | def initialize(body, context, pipeline)
14 | @body = body
15 | @context = context
16 | @pipeline = pipeline
17 | end
18 |
19 | # Public: Gets the memoized result of the body content as it passed through
20 | # the Pipeline.
21 | #
22 | # Returns a Hash, or something similar as defined by @pipeline.result_class.
23 | def result
24 | @result ||= @pipeline.call @body, @context
25 | end
26 |
27 | # Public: Gets the updated body from the Pipeline result.
28 | #
29 | # Returns a String or DocumentFragment.
30 | def output
31 | @output ||= result[:output]
32 | end
33 |
34 | # Public: Parses the output into a DocumentFragment.
35 | #
36 | # Returns a DocumentFragment.
37 | def document
38 | @document ||= HTML::Pipeline.parse output
39 | end
40 | end
41 | end
42 | end
43 |
--------------------------------------------------------------------------------
/test/html/pipeline/autolink_filter_test.rb:
--------------------------------------------------------------------------------
1 | require "test_helper"
2 |
3 | AutolinkFilter = HTML::Pipeline::AutolinkFilter
4 |
5 | class HTML::Pipeline::AutolinkFilterTest < Minitest::Test
6 | def test_uses_rinku_for_autolinking
7 | # just try to parse a complicated piece of HTML
8 | # that Rails auto_link cannot handle
9 | assert_equal '"http://www.github.com"
',
10 | AutolinkFilter.to_html('"http://www.github.com"
')
11 | end
12 |
13 | def test_autolink_option
14 | assert_equal '"http://www.github.com"
',
15 | AutolinkFilter.to_html('"http://www.github.com"
', :autolink => false)
16 | end
17 |
18 | def test_autolink_link_attr
19 | assert_equal '"http://www.github.com"
',
20 | AutolinkFilter.to_html('"http://www.github.com"
', :link_attr => 'target="_blank"')
21 | end
22 |
23 | def test_autolink_flags
24 | assert_equal '"http://github"
',
25 | AutolinkFilter.to_html('"http://github"
', :flags => Rinku::AUTOLINK_SHORT_DOMAINS)
26 | end
27 |
28 | def test_autolink_skip_tags
29 | assert_equal '"http://github.com"',
30 | AutolinkFilter.to_html('"http://github.com"')
31 |
32 | assert_equal '"http://github.com"',
33 | AutolinkFilter.to_html('"http://github.com"', :skip_tags => %w(kbd script))
34 | end
35 | end
36 |
--------------------------------------------------------------------------------
/script/changelog:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Usage: script/changelog [-r ] [-b ] [-h ]
3 | #
4 | # repo: base string of GitHub repository url. e.g. "user_or_org/repository". Defaults to git remote url.
5 | # base: git ref to compare from. e.g. "v1.3.1". Defaults to latest git tag.
6 | # head: git ref to compare to. Defaults to "HEAD".
7 | #
8 | # Generate a changelog preview from pull requests merged between `base` and
9 | # `head`.
10 | #
11 | # https://github.com/jch/release-scripts/blob/master/changelog
12 | set -e
13 |
14 | [ $# -eq 0 ] && set -- --help
15 | while [[ $# > 1 ]]
16 | do
17 | key="$1"
18 | case $key in
19 | -r|--repo)
20 | repo="$2"
21 | shift
22 | ;;
23 | -b|--base)
24 | base="$2"
25 | shift
26 | ;;
27 | -h|--head)
28 | head="$2"
29 | shift
30 | ;;
31 | *)
32 | ;;
33 | esac
34 | shift
35 | done
36 |
37 | repo="${repo:-$(git remote -v | grep push | awk '{print $2}' | cut -d'/' -f4- | sed 's/\.git//')}"
38 | base="${base:-$(git tag -l | sort -t. -k 1,1n -k 2,2n -k 3,3n | tail -n 1)}"
39 | head="${head:-HEAD}"
40 | api_url="https://api.github.com"
41 |
42 | # get merged PR's. Better way is to query the API for these, but this is easier
43 | for pr in $(git log --oneline $base..$head | grep "Merge pull request" | awk '{gsub("#",""); print $5}')
44 | do
45 | # frustrated with trying to pull out the right values, fell back to ruby
46 | curl -s "$api_url/repos/$repo/pulls/$pr" | ruby -rjson -e 'pr=JSON.parse(STDIN.read); puts "* #{pr[%q(title)]} [##{pr[%q(number)]}](#{pr[%q(html_url)]})"'
47 | done
48 |
--------------------------------------------------------------------------------
/lib/html/pipeline/absolute_source_filter.rb:
--------------------------------------------------------------------------------
1 | require 'uri'
2 |
3 | module HTML
4 | class Pipeline
5 |
6 | class AbsoluteSourceFilter < Filter
7 | # HTML Filter for replacing relative and root relative image URLs with
8 | # fully qualified URLs
9 | #
10 | # This is useful if an image is root relative but should really be going
11 | # through a cdn, or if the content for the page assumes the host is known
12 | # i.e. scraped webpages and some RSS feeds.
13 | #
14 | # Context options:
15 | # :image_base_url - Base URL for image host for root relative src.
16 | # :image_subpage_url - For relative src.
17 | #
18 | # This filter does not write additional information to the context.
19 | # This filter would need to be run before CamoFilter.
20 | def call
21 | doc.search("img").each do |element|
22 | next if element['src'].nil? || element['src'].empty?
23 | src = element['src'].strip
24 | unless src.start_with? 'http'
25 | if src.start_with? '/'
26 | base = image_base_url
27 | else
28 | base = image_subpage_url
29 | end
30 | element["src"] = URI.join(base, src).to_s
31 | end
32 | end
33 | doc
34 | end
35 |
36 | # Private: the base url you want to use
37 | def image_base_url
38 | context[:image_base_url] or raise "Missing context :image_base_url for #{self.class.name}"
39 | end
40 |
41 | # Private: the relative url you want to use
42 | def image_subpage_url
43 | context[:image_subpage_url] or raise "Missing context :image_subpage_url for #{self.class.name}"
44 | end
45 |
46 | end
47 | end
48 | end
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | Thanks for using and improving `HTML::Pipeline`!
4 |
5 | - [Submitting a New Issue](#submitting-a-new-issue)
6 | - [Sending a Pull Request](#sending-a-pull-request)
7 |
8 | ## Submitting a New Issue
9 |
10 | If there's an idea you'd like to propose, or a design change, feel free to file a new issue.
11 |
12 | If you have an implementation question or believe you've found a bug, please provide as many details as possible:
13 |
14 | - Input document
15 | - Output HTML document
16 | - the exact `HTML::Pipeline` code you are using
17 | - output of the following from your project
18 |
19 | ```
20 | ruby -v
21 | bundle exec nokogiri -v
22 | ```
23 |
24 | ## Sending a Pull Request
25 |
26 | [Pull requests][pr] are always welcome!
27 |
28 | Check out [the project's issues list][issues] for ideas on what could be improved.
29 |
30 | Before sending, please add tests and ensure the test suite passes.
31 |
32 | ### Running the Tests
33 |
34 | To run the full suite:
35 |
36 | `bundle exec rake`
37 |
38 | To run a specific test file:
39 |
40 | `bundle exec ruby -Itest test/html/pipeline_test.rb`
41 |
42 | To run a specific test:
43 |
44 | `bundle exec ruby -Itest test/html/pipeline/markdown_filter_test.rb -n test_disabling_gfm`
45 |
46 | To run the full suite with all [supported rubies][travisyaml] in bash:
47 |
48 | ```bash
49 | rubies=(ree-1.8.7-2011.03 1.9.2-p290 1.9.3-p429 2.0.0-p247)
50 | for r in ${rubies[*]}
51 | do
52 | rbenv local $r # switch to your version manager of choice
53 | bundle install
54 | bundle exec rake
55 | done
56 | ```
57 |
58 | [issues]: https://github.com/jch/html-pipeline/issues
59 | [pr]: https://help.github.com/articles/using-pull-requests
60 | [travisyaml]: https://github.com/jch/html-pipeline/blob/master/.travis.yml
61 |
--------------------------------------------------------------------------------
/test/html/pipeline/https_filter_test.rb:
--------------------------------------------------------------------------------
1 | require "test_helper"
2 |
3 | HttpsFilter = HTML::Pipeline::HttpsFilter
4 |
5 | class HTML::Pipeline::AutolinkFilterTest < Minitest::Test
6 | def filter(html)
7 | HttpsFilter.to_html(html, @options)
8 | end
9 |
10 | def setup
11 | @options = {:base_url => "http://github.com"}
12 | end
13 |
14 | def test_http
15 | assert_equal %(github.com),
16 | filter(%(github.com))
17 | end
18 |
19 | def test_https
20 | assert_equal %(github.com),
21 | filter(%(github.com))
22 | end
23 |
24 | def test_subdomain
25 | assert_equal %(github.com),
26 | filter(%(github.com))
27 | end
28 |
29 | def test_other
30 | assert_equal %(github.io),
31 | filter(%(github.io))
32 | end
33 |
34 | def test_uses_http_url_over_base_url
35 | @options = {:http_url => "http://github.com", :base_url => "https://github.com"}
36 |
37 | assert_equal %(github.com),
38 | filter(%(github.com))
39 | end
40 |
41 | def test_only_http_url
42 | @options = {:http_url => "http://github.com"}
43 |
44 | assert_equal %(github.com),
45 | filter(%(github.com))
46 | end
47 |
48 | def test_validates_http_url
49 | @options.clear
50 | exception = assert_raises(ArgumentError) { filter("") }
51 | assert_match "HTML::Pipeline::HttpsFilter: :http_url", exception.message
52 | end
53 | end
54 |
--------------------------------------------------------------------------------
/test/html/pipeline/absolute_source_filter_test.rb:
--------------------------------------------------------------------------------
1 | require "test_helper"
2 |
3 | class HTML::Pipeline::AbsoluteSourceFilterTest < Minitest::Test
4 | AbsoluteSourceFilter = HTML::Pipeline::AbsoluteSourceFilter
5 |
6 | def setup
7 | @image_base_url = 'http://assets.example.com'
8 | @image_subpage_url = 'http://blog.example.com/a/post'
9 | @options = {
10 | :image_base_url => @image_base_url,
11 | :image_subpage_url => @image_subpage_url
12 | }
13 | end
14 |
15 | def test_rewrites_root_urls
16 | orig = %(
)
17 | assert_equal "
",
18 | AbsoluteSourceFilter.call(orig, @options).to_s
19 | end
20 |
21 | def test_rewrites_relative_urls
22 | orig = %(
)
23 | assert_equal "
",
24 | AbsoluteSourceFilter.call(orig, @options).to_s
25 | end
26 |
27 | def test_does_not_rewrite_absolute_urls
28 | orig = %(
)
29 | result = AbsoluteSourceFilter.call(orig, @options).to_s
30 | refute_match /@image_base_url/, result
31 | refute_match /@image_subpage_url/, result
32 | end
33 |
34 | def test_fails_when_context_is_missing
35 | assert_raises RuntimeError do
36 | AbsoluteSourceFilter.call("
", {})
37 | end
38 | assert_raises RuntimeError do
39 | AbsoluteSourceFilter.call("
", {})
40 | end
41 | end
42 |
43 | def test_tells_you_where_context_is_required
44 | exception = assert_raises(RuntimeError) {
45 | AbsoluteSourceFilter.call("
", {})
46 | }
47 | assert_match 'HTML::Pipeline::AbsoluteSourceFilter', exception.message
48 |
49 | exception = assert_raises(RuntimeError) {
50 | AbsoluteSourceFilter.call("
", {})
51 | }
52 | assert_match 'HTML::Pipeline::AbsoluteSourceFilter', exception.message
53 | end
54 |
55 | end
56 |
--------------------------------------------------------------------------------
/lib/html/pipeline/toc_filter.rb:
--------------------------------------------------------------------------------
1 | module HTML
2 | class Pipeline
3 | # HTML filter that adds an 'id' attribute to all headers
4 | # in a document, so they can be accessed from a table of contents.
5 | #
6 | # Generates the Table of Contents, with links to each header.
7 | #
8 | # Examples
9 | #
10 | # TocPipeline =
11 | # HTML::Pipeline.new [
12 | # HTML::Pipeline::TableOfContentsFilter
13 | # ]
14 | # # => #
15 | # orig = %(Ice cube
is not for the pop chart
)
16 | # # => "Ice cube
is not for the pop chart
"
17 | # result = {}
18 | # # => {}
19 | # TocPipeline.call(orig, {}, result)
20 | # # => {:toc=> ...}
21 | # result[:toc]
22 | # # => "\n- ...
"
23 | # result[:output].to_s
24 | # # => "\n..."
25 | class TableOfContentsFilter < Filter
26 | PUNCTUATION_REGEXP = RUBY_VERSION > "1.9" ? /[^\p{Word}\- ]/u : /[^\w\- ]/
27 |
28 | def call
29 | result[:toc] = ""
30 |
31 | headers = Hash.new(0)
32 | doc.css('h1, h2, h3, h4, h5, h6').each do |node|
33 | text = node.text
34 | id = text.downcase
35 | id.gsub!(PUNCTUATION_REGEXP, '') # remove punctuation
36 | id.gsub!(' ', '-') # replace spaces with dash
37 |
38 | uniq = (headers[id] > 0) ? "-#{headers[id]}" : ''
39 | headers[id] += 1
40 | if header_content = node.children.first
41 | result[:toc] << %Q{- #{text}
\n}
42 | header_content.add_previous_sibling(%Q{})
43 | end
44 | end
45 | result[:toc] = %Q{} unless result[:toc].empty?
46 | doc
47 | end
48 | end
49 | end
50 | end
51 |
--------------------------------------------------------------------------------
/bin/html-pipeline:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | require 'html/pipeline'
3 |
4 | require 'optparse'
5 |
6 | # Accept "help", too
7 | ARGV.map!{|a| a == "help" ? "--help" : a }
8 |
9 | OptionParser.new do |opts|
10 | opts.banner = <<-HELP.gsub(/^ /, '')
11 | Usage: html-pipeline [-h] [-f]
12 | html-pipeline [FILTER [FILTER [...]]] < file.md
13 | cat file.md | html-pipeline [FILTER [FILTER [...]]]
14 | HELP
15 |
16 | opts.separator "Options:"
17 |
18 | opts.on("-f", "--filters", "List the available filters") do
19 | filters = HTML::Pipeline.constants.grep(/\w+Filter$/).
20 | map{|f| f.to_s.gsub(/Filter$/,'') }
21 |
22 | # Text filter doesn't work, no call method
23 | filters -= ["Text"]
24 |
25 | abort <<-HELP.gsub(/^ /, '')
26 | Available filters:
27 | #{filters.join("\n ")}
28 | HELP
29 | end
30 | end.parse!
31 |
32 | # Default to a GitHub-ish pipeline
33 | if ARGV.empty?
34 |
35 | filters = [
36 | HTML::Pipeline::MarkdownFilter,
37 | HTML::Pipeline::SanitizationFilter,
38 | HTML::Pipeline::ImageMaxWidthFilter,
39 | HTML::Pipeline::EmojiFilter,
40 | HTML::Pipeline::AutolinkFilter,
41 | HTML::Pipeline::TableOfContentsFilter,
42 | ]
43 |
44 | # Add syntax highlighting if linguist is present
45 | begin
46 | require 'linguist'
47 | filters << HTML::Pipeline::SyntaxHighlightFilter
48 | rescue LoadError
49 | end
50 |
51 | else
52 |
53 | def filter_named(name)
54 | case name
55 | when "Text"
56 | raise NameError # Text filter doesn't work, no call method
57 | end
58 |
59 | HTML::Pipeline.const_get("#{name}Filter")
60 | rescue NameError => e
61 | abort "Unknown filter '#{name}'. List filters with the -f option."
62 | end
63 |
64 | filters = []
65 | until ARGV.empty?
66 | name = ARGV.shift
67 | filters << filter_named(name)
68 | end
69 |
70 | end
71 |
72 | context = {
73 | :asset_root => "/assets",
74 | :base_url => "/",
75 | :gfm => true
76 | }
77 |
78 | puts HTML::Pipeline.new(filters, context).call(ARGF.read)[:output]
79 |
--------------------------------------------------------------------------------
/test/html/pipeline/image_max_width_filter_test.rb:
--------------------------------------------------------------------------------
1 | require "test_helper"
2 |
3 | class HTML::Pipeline::ImageMaxWidthFilterTest < Minitest::Test
4 | def filter(html)
5 | HTML::Pipeline::ImageMaxWidthFilter.call(html)
6 | end
7 |
8 | def test_rewrites_image_style_tags
9 | body = "
Screenshot: 
"
10 | doc = Nokogiri::HTML::DocumentFragment.parse(body)
11 |
12 | res = filter(doc)
13 | assert_equal_html %q(Screenshot: 
),
14 | res.to_html
15 | end
16 |
17 | def test_leaves_existing_image_style_tags_alone
18 | body = "
"
19 | doc = Nokogiri::HTML::DocumentFragment.parse(body)
20 |
21 | res = filter(doc)
22 | assert_equal_html '
',
23 | res.to_html
24 | end
25 |
26 | def test_links_to_image
27 | body = "Screenshot: 
"
28 | doc = Nokogiri::HTML::DocumentFragment.parse(body)
29 |
30 | res = filter(doc)
31 | assert_equal_html 'Screenshot: 
',
32 | res.to_html
33 | end
34 |
35 | def test_doesnt_link_to_image_when_already_linked
36 | body = "Screenshot: 
"
37 | doc = Nokogiri::HTML::DocumentFragment.parse(body)
38 |
39 | res = filter(doc)
40 | assert_equal_html %q(Screenshot: 
),
41 | res.to_html
42 | end
43 |
44 | def test_doesnt_screw_up_inlined_images
45 | body = "Screenshot
, yes, this is a screenshot indeed.
"
46 | doc = Nokogiri::HTML::DocumentFragment.parse(body)
47 |
48 | assert_equal_html %q(Screenshot
, yes, this is a screenshot indeed.
), filter(doc).to_html
49 | end
50 | end
51 |
--------------------------------------------------------------------------------
/test/html/pipeline/emoji_filter_test.rb:
--------------------------------------------------------------------------------
1 | require 'test_helper'
2 |
3 | class HTML::Pipeline::EmojiFilterTest < Minitest::Test
4 | EmojiFilter = HTML::Pipeline::EmojiFilter
5 |
6 | def test_emojify
7 | filter = EmojiFilter.new(":shipit:
", {:asset_root => 'https://foo.com'})
8 | doc = filter.call
9 | assert_match "https://foo.com/emoji/shipit.png", doc.search('img').attr('src').value
10 | end
11 |
12 | def test_uri_encoding
13 | filter = EmojiFilter.new(":+1:
", {:asset_root => 'https://foo.com'})
14 | doc = filter.call
15 | assert_match "https://foo.com/emoji/unicode/1f44d.png", doc.search('img').attr('src').value
16 | end
17 |
18 | def test_required_context_validation
19 | exception = assert_raises(ArgumentError) {
20 | EmojiFilter.call("", {})
21 | }
22 | assert_match /:asset_root/, exception.message
23 | end
24 |
25 | def test_custom_asset_path
26 | filter = EmojiFilter.new(":+1:
", {:asset_path => ':file_name', :asset_root => 'https://foo.com'})
27 | doc = filter.call
28 | assert_match "https://foo.com/unicode/1f44d.png", doc.search('img').attr('src').value
29 | end
30 |
31 | def test_not_emojify_in_code_tags
32 | body = ":shipit:"
33 | filter = EmojiFilter.new(body, {:asset_root => 'https://foo.com'})
34 | doc = filter.call
35 | assert_equal body, doc.to_html
36 | end
37 |
38 | def test_not_emojify_in_tt_tags
39 | body = ":shipit:"
40 | filter = EmojiFilter.new(body, {:asset_root => 'https://foo.com'})
41 | doc = filter.call
42 | assert_equal body, doc.to_html
43 | end
44 |
45 | def test_not_emojify_in_pre_tags
46 | body = "
:shipit:
"
47 | filter = EmojiFilter.new(body, {:asset_root => 'https://foo.com'})
48 | doc = filter.call
49 | assert_equal body, doc.to_html
50 | end
51 |
52 | def test_not_emojify_in_custom_single_tag_foo
53 | body = ":shipit:"
54 | filter = EmojiFilter.new(body, {:asset_root => 'https://foo.com', ignored_ancestor_tags: %w(foo)})
55 | doc = filter.call
56 | assert_equal body, doc.to_html
57 | end
58 |
59 | def test_not_emojify_in_custom_multiple_tags_foo_and_bar
60 | body = ":shipit:"
61 | filter = EmojiFilter.new(body, {:asset_root => 'https://foo.com', ignored_ancestor_tags: %w(foo bar)})
62 | doc = filter.call
63 | assert_equal body, doc.to_html
64 | end
65 | end
66 |
--------------------------------------------------------------------------------
/test/html/pipeline_test.rb:
--------------------------------------------------------------------------------
1 | require "test_helper"
2 | require "helpers/mocked_instrumentation_service"
3 |
4 | class HTML::PipelineTest < Minitest::Test
5 | Pipeline = HTML::Pipeline
6 | class TestFilter
7 | def self.call(input, context, result)
8 | input.reverse
9 | end
10 | end
11 |
12 | def setup
13 | @context = {}
14 | @result_class = Hash
15 | @pipeline = Pipeline.new [TestFilter], @context, @result_class
16 | end
17 |
18 | def test_filter_instrumentation
19 | service = MockedInstrumentationService.new
20 | events = service.subscribe "call_filter.html_pipeline"
21 | @pipeline.instrumentation_service = service
22 | filter(body = "hello")
23 | event, payload, res = events.pop
24 | assert event, "event expected"
25 | assert_equal "call_filter.html_pipeline", event
26 | assert_equal TestFilter.name, payload[:filter]
27 | assert_equal @pipeline.class.name, payload[:pipeline]
28 | assert_equal body.reverse, payload[:result][:output]
29 | end
30 |
31 | def test_pipeline_instrumentation
32 | service = MockedInstrumentationService.new
33 | events = service.subscribe "call_pipeline.html_pipeline"
34 | @pipeline.instrumentation_service = service
35 | filter(body = "hello")
36 | event, payload, res = events.pop
37 | assert event, "event expected"
38 | assert_equal "call_pipeline.html_pipeline", event
39 | assert_equal @pipeline.filters.map(&:name), payload[:filters]
40 | assert_equal @pipeline.class.name, payload[:pipeline]
41 | assert_equal body.reverse, payload[:result][:output]
42 | end
43 |
44 | def test_default_instrumentation_service
45 | service = 'default'
46 | Pipeline.default_instrumentation_service = service
47 | pipeline = Pipeline.new [], @context, @result_class
48 | assert_equal service, pipeline.instrumentation_service
49 | ensure
50 | Pipeline.default_instrumentation_service = nil
51 | end
52 |
53 | def test_setup_instrumentation
54 | assert_nil @pipeline.instrumentation_service
55 |
56 | service = MockedInstrumentationService.new
57 | events = service.subscribe "call_pipeline.html_pipeline"
58 | @pipeline.setup_instrumentation name = 'foo', service
59 |
60 | assert_equal service, @pipeline.instrumentation_service
61 | assert_equal name, @pipeline.instrumentation_name
62 |
63 | filter(body = 'foo')
64 |
65 | event, payload, res = events.pop
66 | assert event, "expected event"
67 | assert_equal name, payload[:pipeline]
68 | assert_equal body.reverse, payload[:result][:output]
69 | end
70 |
71 | def filter(input)
72 | @pipeline.call(input)
73 | end
74 | end
75 |
--------------------------------------------------------------------------------
/lib/html/pipeline/email_reply_filter.rb:
--------------------------------------------------------------------------------
1 | begin
2 | require "escape_utils"
3 | rescue LoadError => _
4 | abort "Missing dependency 'escape_utils' for EmailReplyFilter. See README.md for details."
5 | end
6 |
7 | begin
8 | require "email_reply_parser"
9 | rescue LoadError => _
10 | abort "Missing dependency 'email_reply_parser' for EmailReplyFilter. See README.md for details."
11 | end
12 |
13 | module HTML
14 | class Pipeline
15 | # HTML Filter that converts email reply text into an HTML DocumentFragment.
16 | # It must be used as the first filter in a pipeline.
17 | #
18 | # Context options:
19 | # None
20 | #
21 | # This filter does not write any additional information to the context hash.
22 | class EmailReplyFilter < TextFilter
23 | include EscapeUtils
24 |
25 | EMAIL_HIDDEN_HEADER = %(…).freeze
26 | EMAIL_QUOTED_HEADER = %(
).freeze
27 | EMAIL_SIGNATURE_HEADER = %(
).freeze
28 | EMAIL_FRAGMENT_HEADER = %(
).freeze
29 | EMAIL_HEADER_END = "
".freeze
30 |
31 | # Scans an email body to determine which bits are quoted and which should
32 | # be hidden. EmailReplyParser is used to split the comment into an Array
33 | # of quoted or unquoted Blocks. Now, we loop through them and attempt to
34 | # add
tags around them so we can hide the hidden blocks, and style
35 | # the quoted blocks differently. Since multiple blocks may be hidden, be
36 | # sure to keep the "email-hidden-reply"
s around "email-quoted-reply"
37 | #
tags. Call this on each comment of a visible thread in the order
38 | # that they are displayed. Note: all comments are processed so we can
39 | # maintain a Set of SHAs of paragraphs. Only plaintext comments skip the
40 | # markdown step.
41 | #
42 | # Returns the email comment HTML as a String
43 | def call
44 | found_hidden = nil
45 | paragraphs = EmailReplyParser.read(text.dup).fragments.map do |fragment|
46 | pieces = [escape_html(fragment.to_s.strip).gsub(/^\s*(>|>)/, '')]
47 | if fragment.quoted?
48 | pieces.unshift EMAIL_QUOTED_HEADER
49 | pieces << EMAIL_HEADER_END
50 | elsif fragment.signature?
51 | pieces.unshift EMAIL_SIGNATURE_HEADER
52 | pieces << EMAIL_HEADER_END
53 | else
54 | pieces.unshift EMAIL_FRAGMENT_HEADER
55 | pieces << EMAIL_HEADER_END
56 | end
57 | if fragment.hidden? && !found_hidden
58 | found_hidden = true
59 | pieces.unshift EMAIL_HIDDEN_HEADER
60 | end
61 | pieces.join
62 | end
63 | paragraphs << EMAIL_HEADER_END if found_hidden
64 | paragraphs.join("\n")
65 | end
66 | end
67 | end
68 | end
69 |
--------------------------------------------------------------------------------
/lib/html/pipeline/camo_filter.rb:
--------------------------------------------------------------------------------
1 | require 'openssl'
2 | require 'uri'
3 |
4 | module HTML
5 | class Pipeline
6 | # HTML Filter for replacing http image URLs with camo versions. See:
7 | #
8 | # https://github.com/atmos/camo
9 | #
10 | # All images provided in user content should be run through this
11 | # filter so that http image sources do not cause mixed-content warnings
12 | # in browser clients.
13 | #
14 | # Context options:
15 | # :asset_proxy (required) - Base URL for constructed asset proxy URLs.
16 | # :asset_proxy_secret_key (required) - The shared secret used to encode URLs.
17 | # :asset_proxy_whitelist - Array of host Strings or Regexps to skip
18 | # src rewriting.
19 | #
20 | # This filter does not write additional information to the context.
21 | class CamoFilter < Filter
22 | # Hijacks images in the markup provided, replacing them with URLs that
23 | # go through the github asset proxy.
24 | def call
25 | return doc unless asset_proxy_enabled?
26 |
27 | doc.search("img").each do |element|
28 | original_src = element['src']
29 | next unless original_src
30 |
31 | begin
32 | uri = URI.parse(original_src)
33 | rescue Exception
34 | next
35 | end
36 |
37 | next if uri.host.nil?
38 | next if asset_host_whitelisted?(uri.host)
39 |
40 | element['src'] = asset_proxy_url(original_src)
41 | element['data-canonical-src'] = original_src
42 | end
43 | doc
44 | end
45 |
46 | # Implementation of validate hook.
47 | # Errors should raise exceptions or use an existing validator.
48 | def validate
49 | needs :asset_proxy, :asset_proxy_secret_key
50 | end
51 |
52 | # The camouflaged URL for a given image URL.
53 | def asset_proxy_url(url)
54 | "#{asset_proxy_host}/#{asset_url_hash(url)}/#{hexencode(url)}"
55 | end
56 |
57 | # Private: calculate the HMAC digest for a image source URL.
58 | def asset_url_hash(url)
59 | OpenSSL::HMAC.hexdigest('sha1', asset_proxy_secret_key, url)
60 | end
61 |
62 | # Private: Return true if asset proxy filter should be enabled
63 | def asset_proxy_enabled?
64 | !context[:disable_asset_proxy]
65 | end
66 |
67 | # Private: the host to use for generated asset proxied URLs.
68 | def asset_proxy_host
69 | context[:asset_proxy]
70 | end
71 |
72 | def asset_proxy_secret_key
73 | context[:asset_proxy_secret_key]
74 | end
75 |
76 | def asset_proxy_whitelist
77 | context[:asset_proxy_whitelist] || []
78 | end
79 |
80 | def asset_host_whitelisted?(host)
81 | asset_proxy_whitelist.any? do |test|
82 | test.is_a?(String) ? host == test : test.match(host)
83 | end
84 | end
85 |
86 | # Private: helper to hexencode a string. Each byte ends up encoded into
87 | # two characters, zero padded value in the range [0-9a-f].
88 | def hexencode(str)
89 | str.to_enum(:each_byte).map { |byte| "%02x" % byte }.join
90 | end
91 | end
92 | end
93 | end
94 |
--------------------------------------------------------------------------------
/test/html/pipeline/camo_filter_test.rb:
--------------------------------------------------------------------------------
1 | require "test_helper"
2 |
3 | class HTML::Pipeline::CamoFilterTest < Minitest::Test
4 | CamoFilter = HTML::Pipeline::CamoFilter
5 |
6 | def setup
7 | @asset_proxy_url = 'https//assets.example.org'
8 | @asset_proxy_secret_key = 'ssssh-secret'
9 | @options = {
10 | :asset_proxy => @asset_proxy_url,
11 | :asset_proxy_secret_key => @asset_proxy_secret_key,
12 | :asset_proxy_whitelist => [/(^|\.)github\.com$/]
13 | }
14 | end
15 |
16 | def test_asset_proxy_disabled
17 | orig = %(

)
18 | assert_equal orig,
19 | CamoFilter.call(orig, @options.merge(:disable_asset_proxy => true)).to_s
20 | end
21 |
22 | def test_camouflaging_http_image_urls
23 | orig = %(

)
24 | assert_equal %(

),
25 | CamoFilter.call(orig, @options).to_s
26 | end
27 |
28 | def test_doesnt_rewrite_dotcom_image_urls
29 | orig = %(

)
30 | assert_equal orig, CamoFilter.call(orig, @options).to_s
31 | end
32 |
33 | def test_doesnt_rewrite_dotcom_subdomain_image_urls
34 | orig = %(

)
35 | assert_equal orig, CamoFilter.call(orig, @options).to_s
36 | end
37 |
38 | def test_doesnt_rewrite_dotcom_subsubdomain_image_urls
39 | orig = %(

)
40 | assert_equal orig, CamoFilter.call(orig, @options).to_s
41 | end
42 |
43 | def test_camouflaging_github_prefixed_image_urls
44 | orig = %(

)
45 | assert_equal %(

),
46 | CamoFilter.call(orig, @options).to_s
47 | end
48 |
49 | def test_doesnt_rewrite_absolute_image_urls
50 | orig = %(

)
51 | assert_equal orig, CamoFilter.call(orig, @options).to_s
52 | end
53 |
54 | def test_doesnt_rewrite_relative_image_urls
55 | orig = %(

)
56 | assert_equal orig, CamoFilter.call(orig, @options).to_s
57 | end
58 |
59 | def test_camouflaging_https_image_urls
60 | orig = %(

)
61 | assert_equal %(

),
62 | CamoFilter.call(orig, @options).to_s
63 | end
64 |
65 | def test_handling_images_with_no_src_attribute
66 | orig = %(
![]()
)
67 | assert_equal orig, CamoFilter.call(orig, @options).to_s
68 | end
69 |
70 | def test_required_context_validation
71 | exception = assert_raises(ArgumentError) {
72 | CamoFilter.call("", {})
73 | }
74 | assert_match /:asset_proxy[^_]/, exception.message
75 | assert_match /:asset_proxy_secret_key/, exception.message
76 | end
77 | end
78 |
--------------------------------------------------------------------------------
/test/html/pipeline/markdown_filter_test.rb:
--------------------------------------------------------------------------------
1 | require "test_helper"
2 |
3 | MarkdownFilter = HTML::Pipeline::MarkdownFilter
4 |
5 | class HTML::Pipeline::MarkdownFilterTest < Minitest::Test
6 | def setup
7 | @haiku =
8 | "Pointing at the moon\n" +
9 | "Reminded of simple things\n" +
10 | "Moments matter most"
11 | @links =
12 | "See http://example.org/ for more info"
13 | @code =
14 | "```\n" +
15 | "def hello()" +
16 | " 'world'" +
17 | "end" +
18 | "```"
19 | end
20 |
21 | def test_fails_when_given_a_documentfragment
22 | body = "
heyo
"
23 | doc = HTML::Pipeline.parse(body)
24 | assert_raises(TypeError) { MarkdownFilter.call(doc, {}) }
25 | end
26 |
27 | def test_gfm_enabled_by_default
28 | doc = MarkdownFilter.to_document(@haiku, {})
29 | assert doc.kind_of?(HTML::Pipeline::DocumentFragment)
30 | assert_equal 2, doc.search('br').size
31 | end
32 |
33 | def test_disabling_gfm
34 | doc = MarkdownFilter.to_document(@haiku, :gfm => false)
35 | assert doc.kind_of?(HTML::Pipeline::DocumentFragment)
36 | assert_equal 0, doc.search('br').size
37 | end
38 |
39 | def test_fenced_code_blocks
40 | doc = MarkdownFilter.to_document(@code)
41 | assert doc.kind_of?(HTML::Pipeline::DocumentFragment)
42 | assert_equal 1, doc.search('pre').size
43 | end
44 |
45 | def test_fenced_code_blocks_with_language
46 | doc = MarkdownFilter.to_document(@code.sub("```", "``` ruby"))
47 | assert doc.kind_of?(HTML::Pipeline::DocumentFragment)
48 | assert_equal 1, doc.search('pre').size
49 | assert_equal 'ruby', doc.search('pre').first['lang']
50 | end
51 | end
52 |
53 | class GFMTest < Minitest::Test
54 | def gfm(text)
55 | MarkdownFilter.call(text, :gfm => true)
56 | end
57 |
58 | def test_not_touch_single_underscores_inside_words
59 | assert_equal "
foo_bar
",
60 | gfm("foo_bar")
61 | end
62 |
63 | def test_not_touch_underscores_in_code_blocks
64 | assert_equal "
foo_bar_baz\n
",
65 | gfm(" foo_bar_baz")
66 | end
67 |
68 | def test_not_touch_underscores_in_pre_blocks
69 | assert_equal "
\nfoo_bar_baz\n
",
70 | gfm("
\nfoo_bar_baz\n
")
71 | end
72 |
73 | def test_not_touch_two_or_more_underscores_inside_words
74 | assert_equal "
foo_bar_baz
",
75 | gfm("foo_bar_baz")
76 | end
77 |
78 | def test_turn_newlines_into_br_tags_in_simple_cases
79 | assert_equal "
foo
\nbar
",
80 | gfm("foo\nbar")
81 | end
82 |
83 | def test_convert_newlines_in_all_groups
84 | assert_equal "
apple
\npear
\norange
\n\n" +
85 | "
ruby
\npython
\nerlang
",
86 | gfm("apple\npear\norange\n\nruby\npython\nerlang")
87 | end
88 |
89 | def test_convert_newlines_in_even_long_groups
90 | assert_equal "
apple
\npear
\norange
\nbanana
\n\n" +
91 | "
ruby
\npython
\nerlang
",
92 | gfm("apple\npear\norange\nbanana\n\nruby\npython\nerlang")
93 | end
94 |
95 | def test_not_convert_newlines_in_lists
96 | assert_equal "
foo
\n\n
bar
",
97 | gfm("# foo\n# bar")
98 | assert_equal "
",
99 | gfm("* foo\n* bar")
100 | end
101 | end
102 |
--------------------------------------------------------------------------------
/lib/html/pipeline/emoji_filter.rb:
--------------------------------------------------------------------------------
1 | require "cgi"
2 |
3 | begin
4 | require "gemoji"
5 | rescue LoadError => _
6 | abort "Missing dependency 'gemoji' for EmojiFilter. See README.md for details."
7 | end
8 |
9 | module HTML
10 | class Pipeline
11 | # HTML filter that replaces :emoji: with images.
12 | #
13 | # Context:
14 | # :asset_root (required) - base url to link to emoji sprite
15 | # :asset_path (optional) - url path to link to emoji sprite. :file_name can be used as a placeholder for the sprite file name. If no asset_path is set "emoji/:file_name" is used.
16 | # :ignored_ancestor_tags (optional) - Tags to stop the emojification. Node has matched ancestor HTML tags will not be emojified. Default to pre, code, and tt tags. Extra tags please pass in the form of array, e.g., %w(blockquote summary).
17 | class EmojiFilter < Filter
18 |
19 | DEFAULT_IGNORED_ANCESTOR_TAGS = %w(pre code tt).freeze
20 |
21 | def call
22 | doc.search('.//text()').each do |node|
23 | content = node.to_html
24 | next unless content.include?(':')
25 | next if has_ancestor?(node, ignored_ancestor_tags)
26 | html = emoji_image_filter(content)
27 | next if html == content
28 | node.replace(html)
29 | end
30 | doc
31 | end
32 |
33 | # Implementation of validate hook.
34 | # Errors should raise exceptions or use an existing validator.
35 | def validate
36 | needs :asset_root
37 | end
38 |
39 | # Replace :emoji: with corresponding images.
40 | #
41 | # text - String text to replace :emoji: in.
42 | #
43 | # Returns a String with :emoji: replaced with images.
44 | def emoji_image_filter(text)
45 | text.gsub(emoji_pattern) do |match|
46 | emoji_image_tag($1)
47 | end
48 | end
49 |
50 | # The base url to link emoji sprites
51 | #
52 | # Raises ArgumentError if context option has not been provided.
53 | # Returns the context's asset_root.
54 | def asset_root
55 | context[:asset_root]
56 | end
57 |
58 | # The url path to link emoji sprites
59 | #
60 | # :file_name can be used in the asset_path as a placeholder for the sprite file name. If no asset_path is set in the context "emoji/:file_name" is used.
61 | # Returns the context's asset_path or the default path if no context asset_path is given.
62 | def asset_path(name)
63 | if context[:asset_path]
64 | context[:asset_path].gsub(":file_name", emoji_filename(name))
65 | else
66 | File.join("emoji", emoji_filename(name))
67 | end
68 | end
69 |
70 | private
71 |
72 | # Build an emoji image tag
73 | def emoji_image_tag(name)
74 | "
})
"
75 | end
76 |
77 | def emoji_url(name)
78 | File.join(asset_root, asset_path(name))
79 | end
80 |
81 | # Build a regexp that matches all valid :emoji: names.
82 | def self.emoji_pattern
83 | @emoji_pattern ||= /:(#{emoji_names.map { |name| Regexp.escape(name) }.join('|')}):/
84 | end
85 |
86 | def emoji_pattern
87 | self.class.emoji_pattern
88 | end
89 |
90 | def self.emoji_names
91 | Emoji.all.map(&:aliases).flatten.sort
92 | end
93 |
94 | def emoji_filename(name)
95 | Emoji.find_by_alias(name).image_filename
96 | end
97 |
98 | # Return ancestor tags to stop the emojification.
99 | #
100 | # @return [Array
] Ancestor tags.
101 | def ignored_ancestor_tags
102 | if context[:ignored_ancestor_tags]
103 | DEFAULT_IGNORED_ANCESTOR_TAGS | context[:ignored_ancestor_tags]
104 | else
105 | DEFAULT_IGNORED_ANCESTOR_TAGS
106 | end
107 | end
108 | end
109 | end
110 | end
111 |
--------------------------------------------------------------------------------
/test/html/pipeline/toc_filter_test.rb:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | require "test_helper"
3 |
4 | class HTML::Pipeline::TableOfContentsFilterTest < Minitest::Test
5 | TocFilter = HTML::Pipeline::TableOfContentsFilter
6 |
7 | TocPipeline =
8 | HTML::Pipeline.new [
9 | HTML::Pipeline::TableOfContentsFilter
10 | ]
11 |
12 | def toc
13 | result = {}
14 | TocPipeline.call(@orig, {}, result)
15 | result[:toc]
16 | end
17 |
18 | def test_anchors_are_added_properly
19 | orig = %(Ice cube
Will swarm on any motherfucker in a blue uniform
)
20 | assert_includes TocFilter.call(orig).to_s, 'Dr DreIce Cube
Eazy-E
MC Ren
)
46 | assert_includes toc, '"#dr-dre"'
47 | assert_includes toc, '"#ice-cube"'
48 | assert_includes toc, '"#eazy-e"'
49 | assert_includes toc, '"#mc-ren"'
50 | end
51 |
52 | def test_dupe_headers_have_unique_trailing_identifiers
53 | orig = %(Straight Outta Compton
54 | Dopeman
55 | Express Yourself
56 | Dopeman
)
57 |
58 | result = TocFilter.call(orig).to_s
59 |
60 | assert_includes result, '"dopeman"'
61 | assert_includes result, '"dopeman-1"'
62 | end
63 |
64 | def test_dupe_headers_have_unique_toc_anchors
65 | @orig = %(Straight Outta Compton
66 | Dopeman
67 | Express Yourself
68 | Dopeman
)
69 |
70 | assert_includes toc, '"#dopeman"'
71 | assert_includes toc, '"#dopeman-1"'
72 | end
73 |
74 | def test_all_header_tags_are_found_when_adding_anchors
75 | orig = %("Funky President" by James Brown
76 | "It's My Thing" by Marva Whitney
77 | "Boogie Back" by Roy Ayers
78 | "Feel Good" by Fancy
79 | "Funky Drummer" by James Brown
80 | "Ruthless Villain" by Eazy-E
81 | "Be Thankful for What You Got" by William DeVaughn)
82 |
83 | doc = TocFilter.call(orig)
84 |
85 | assert_equal 6, doc.search('a').size
86 | end
87 |
88 | def test_toc_is_complete
89 | @orig = %("Funky President" by James Brown
90 | "It's My Thing" by Marva Whitney
91 | "Boogie Back" by Roy Ayers
92 | "Feel Good" by Fancy
93 | "Funky Drummer" by James Brown
94 | "Ruthless Villain" by Eazy-E
95 | "Be Thankful for What You Got" by William DeVaughn)
96 |
97 | expected = %Q{}
98 |
99 | assert_equal expected, toc
100 | end
101 |
102 | if RUBY_VERSION > "1.9" # not sure how to make this work on 1.8.7
103 |
104 | def test_anchors_with_utf8_characters
105 | orig = %(日本語
106 | Русский
\n日本語",
111 | rendered_h1s[0]
112 | assert_equal "\nРусский
",
113 | rendered_h1s[1]
114 | end
115 |
116 | def test_toc_with_utf8_characters
117 | @orig = %(日本語
118 | Русский
\n日本語\nРусский\n}
123 |
124 | assert_equal expected, rendered_toc
125 | end
126 | end
127 | end
128 |
--------------------------------------------------------------------------------
/lib/html/pipeline/@mention_filter.rb:
--------------------------------------------------------------------------------
1 | require 'set'
2 |
3 | module HTML
4 | class Pipeline
5 | # HTML filter that replaces @user mentions with links. Mentions within ,
6 | # , and elements are ignored. Mentions that reference users that do
7 | # not exist are ignored.
8 | #
9 | # Context options:
10 | # :base_url - Used to construct links to user profile pages for each
11 | # mention.
12 | # :info_url - Used to link to "more info" when someone mentions @mention
13 | # or @mentioned.
14 | # :username_pattern - Used to provide a custom regular expression to
15 | # identify usernames
16 | #
17 | class MentionFilter < Filter
18 | # Public: Find user @mentions in text. See
19 | # MentionFilter#mention_link_filter.
20 | #
21 | # MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
22 | # "#{login}"
23 | # end
24 | #
25 | # text - String text to search.
26 | #
27 | # Yields the String match, the String login name, and a Boolean determining
28 | # if the match = "@mention[ed]". The yield's return replaces the match in
29 | # the original text.
30 | #
31 | # Returns a String replaced with the return of the block.
32 | def self.mentioned_logins_in(text, username_pattern=UsernamePattern)
33 | text.gsub MentionPatterns[username_pattern] do |match|
34 | login = $1
35 | yield match, login, MentionLogins.include?(login.downcase)
36 | end
37 | end
38 |
39 | # Hash that contains all of the mention patterns used by the pipeline
40 | MentionPatterns = Hash.new do |hash, key|
41 | hash[key] = /
42 | (?:^|\W) # beginning of string or non-word char
43 | @((?>#{key})) # @username
44 | (?!\/) # without a trailing slash
45 | (?=
46 | \.+[ \t\W]| # dots followed by space or non-word character
47 | \.+$| # dots at end of line
48 | [^0-9a-zA-Z_.]| # non-word character except dot
49 | $ # end of line
50 | )
51 | /ix
52 | end
53 |
54 | # Default pattern used to extract usernames from text. The value can be
55 | # overriden by providing the username_pattern variable in the context.
56 | UsernamePattern = /[a-z0-9][a-z0-9-]*/
57 |
58 | # List of username logins that, when mentioned, link to the blog post
59 | # about @mentions instead of triggering a real mention.
60 | MentionLogins = %w(
61 | mention
62 | mentions
63 | mentioned
64 | mentioning
65 | )
66 |
67 | # Don't look for mentions in text nodes that are children of these elements
68 | IGNORE_PARENTS = %w(pre code a style).to_set
69 |
70 | def call
71 | result[:mentioned_usernames] ||= []
72 |
73 | doc.search('.//text()').each do |node|
74 | content = node.to_html
75 | next if !content.include?('@')
76 | next if has_ancestor?(node, IGNORE_PARENTS)
77 | html = mention_link_filter(content, base_url, info_url, username_pattern)
78 | next if html == content
79 | node.replace(html)
80 | end
81 | doc
82 | end
83 |
84 | # The URL to provide when someone @mentions a "mention" name, such as
85 | # @mention or @mentioned, that will give them more info on mentions.
86 | def info_url
87 | context[:info_url] || nil
88 | end
89 |
90 | def username_pattern
91 | context[:username_pattern] || UsernamePattern
92 | end
93 |
94 | # Replace user @mentions in text with links to the mentioned user's
95 | # profile page.
96 | #
97 | # text - String text to replace @mention usernames in.
98 | # base_url - The base URL used to construct user profile URLs.
99 | # info_url - The "more info" URL used to link to more info on @mentions.
100 | # If nil we don't link @mention or @mentioned.
101 | # username_pattern - Regular expression used to identify usernames in
102 | # text
103 | #
104 | # Returns a string with @mentions replaced with links. All links have a
105 | # 'user-mention' class name attached for styling.
106 | def mention_link_filter(text, base_url='/', info_url=nil, username_pattern=UsernamePattern)
107 | self.class.mentioned_logins_in(text, username_pattern) do |match, login, is_mentioned|
108 | link =
109 | if is_mentioned
110 | link_to_mention_info(login, info_url)
111 | else
112 | link_to_mentioned_user(login)
113 | end
114 |
115 | link ? match.sub("@#{login}", link) : match
116 | end
117 | end
118 |
119 | def link_to_mention_info(text, info_url=nil)
120 | return "@#{text}" if info_url.nil?
121 | "" +
122 | "@#{text}" +
123 | ""
124 | end
125 |
126 | def link_to_mentioned_user(login)
127 | result[:mentioned_usernames] |= [login]
128 |
129 | url = base_url.dup
130 | url << "/" unless url =~ /[\/~]\z/
131 |
132 | "" +
133 | "@#{login}" +
134 | ""
135 | end
136 | end
137 | end
138 | end
139 |
--------------------------------------------------------------------------------
/lib/html/pipeline/filter.rb:
--------------------------------------------------------------------------------
1 | module HTML
2 | class Pipeline
3 | # Base class for user content HTML filters. Each filter takes an
4 | # HTML string or Nokogiri::HTML::DocumentFragment, performs
5 | # modifications and/or writes information to the result hash. Filters must
6 | # return a DocumentFragment (typically the same instance provided to the call
7 | # method) or a String with HTML markup.
8 | #
9 | # Example filter that replaces all images with trollface:
10 | #
11 | # class FuuuFilter < HTML::Pipeline::Filter
12 | # def call
13 | # doc.search('img').each do |img|
14 | # img['src'] = "http://paradoxdgn.com/junk/avatars/trollface.jpg"
15 | # end
16 | # end
17 | # end
18 | #
19 | # The context Hash passes options to filters and should not be changed in
20 | # place. A Result Hash allows filters to make extracted information
21 | # available to the caller and is mutable.
22 | #
23 | # Common context options:
24 | # :base_url - The site's base URL
25 | # :repository - A Repository providing context for the HTML being processed
26 | #
27 | # Each filter may define additional options and output values. See the class
28 | # docs for more info.
29 | class Filter
30 | class InvalidDocumentException < StandardError; end
31 |
32 | def initialize(doc, context = nil, result = nil)
33 | if doc.kind_of?(String)
34 | @html = doc.to_str
35 | @doc = nil
36 | else
37 | @doc = doc
38 | @html = nil
39 | end
40 | @context = context || {}
41 | @result = result || {}
42 | validate
43 | end
44 |
45 | # Public: Returns a simple Hash used to pass extra information into filters
46 | # and also to allow filters to make extracted information available to the
47 | # caller.
48 | attr_reader :context
49 |
50 | # Public: Returns a Hash used to allow filters to pass back information
51 | # to callers of the various Pipelines. This can be used for
52 | # #mentioned_users, for example.
53 | attr_reader :result
54 |
55 | # The Nokogiri::HTML::DocumentFragment to be manipulated. If the filter was
56 | # provided a String, parse into a DocumentFragment the first time this
57 | # method is called.
58 | def doc
59 | @doc ||= parse_html(html)
60 | end
61 |
62 | # The String representation of the document. If a DocumentFragment was
63 | # provided to the Filter, it is serialized into a String when this method is
64 | # called.
65 | def html
66 | raise InvalidDocumentException if @html.nil? && @doc.nil?
67 | @html || doc.to_html
68 | end
69 |
70 | # The main filter entry point. The doc attribute is guaranteed to be a
71 | # Nokogiri::HTML::DocumentFragment when invoked. Subclasses should modify
72 | # this document in place or extract information and add it to the context
73 | # hash.
74 | def call
75 | raise NotImplementedError
76 | end
77 |
78 | # Make sure the context has everything we need. Noop: Subclasses can override.
79 | def validate
80 | end
81 |
82 | # The Repository object provided in the context hash, or nil when no
83 | # :repository was specified.
84 | #
85 | # It's assumed that the repository context has already been checked
86 | # for permissions
87 | def repository
88 | context[:repository]
89 | end
90 |
91 | # The User object provided in the context hash, or nil when no user
92 | # was specified
93 | def current_user
94 | context[:current_user]
95 | end
96 |
97 | # The site's base URL provided in the context hash, or '/' when no
98 | # base URL was specified.
99 | def base_url
100 | context[:base_url] || '/'
101 | end
102 |
103 | # Ensure the passed argument is a DocumentFragment. When a string is
104 | # provided, it is parsed and returned; otherwise, the DocumentFragment is
105 | # returned unmodified.
106 | def parse_html(html)
107 | HTML::Pipeline.parse(html)
108 | end
109 |
110 | # Helper method for filter subclasses used to determine if any of a node's
111 | # ancestors have one of the tag names specified.
112 | #
113 | # node - The Node object to check.
114 | # tags - An array of tag name strings to check. These should be downcase.
115 | #
116 | # Returns true when the node has a matching ancestor.
117 | def has_ancestor?(node, tags)
118 | while node = node.parent
119 | if tags.include?(node.name.downcase)
120 | break true
121 | end
122 | end
123 | end
124 |
125 | # Perform a filter on doc with the given context.
126 | #
127 | # Returns a HTML::Pipeline::DocumentFragment or a String containing HTML
128 | # markup.
129 | def self.call(doc, context = nil, result = nil)
130 | new(doc, context, result).call
131 | end
132 |
133 | # Like call but guarantees that a DocumentFragment is returned, even when
134 | # the last filter returns a String.
135 | def self.to_document(input, context = nil)
136 | html = call(input, context)
137 | HTML::Pipeline::parse(html)
138 | end
139 |
140 | # Like call but guarantees that a string of HTML markup is returned.
141 | def self.to_html(input, context = nil)
142 | output = call(input, context)
143 | if output.respond_to?(:to_html)
144 | output.to_html
145 | else
146 | output.to_s
147 | end
148 | end
149 |
150 | # Validator for required context. This will check that anything passed in
151 | # contexts exists in @contexts
152 | #
153 | # If any errors are found an ArgumentError will be raised with a
154 | # message listing all the missing contexts and the filters that
155 | # require them.
156 | def needs(*keys)
157 | missing = keys.reject { |key| context.include? key }
158 |
159 | if missing.any?
160 | raise ArgumentError,
161 | "Missing context keys for #{self.class.name}: #{missing.map(&:inspect).join ', '}"
162 | end
163 | end
164 | end
165 | end
166 | end
167 |
--------------------------------------------------------------------------------
/test/html/pipeline/sanitization_filter_test.rb:
--------------------------------------------------------------------------------
1 | require "test_helper"
2 |
3 | class HTML::Pipeline::SanitizationFilterTest < Minitest::Test
4 | SanitizationFilter = HTML::Pipeline::SanitizationFilter
5 |
6 | def test_removing_script_tags
7 | orig = %(
)
8 | html = SanitizationFilter.call(orig).to_s
9 | refute_match /script/, html
10 | end
11 |
12 | def test_removing_style_tags
13 | orig = %()
14 | html = SanitizationFilter.call(orig).to_s
15 | refute_match /style/, html
16 | end
17 |
18 | def test_removing_style_attributes
19 | orig = %(YO DAWG
)
20 | html = SanitizationFilter.call(orig).to_s
21 | refute_match /font-size/, html
22 | refute_match /style/, html
23 | end
24 |
25 | def test_removing_script_event_handler_attributes
26 | orig = %(YO DAWG)
27 | html = SanitizationFilter.call(orig).to_s
28 | refute_match /javscript/, html
29 | refute_match /onclick/, html
30 | end
31 |
32 | def test_sanitizes_li_elements_not_contained_in_ul_or_ol
33 | stuff = "a\nb\nc"
34 | html = SanitizationFilter.call(stuff).to_s
35 | assert_equal "a\nb\nc", html
36 | end
37 |
38 | def test_does_not_sanitize_li_elements_contained_in_ul_or_ol
39 | stuff = "a\n\nc"
40 | assert_equal stuff, SanitizationFilter.call(stuff).to_s
41 | end
42 |
43 | def test_github_specific_protocols_are_not_removed
44 | stuff = 'Spill this yo and so on'
45 | assert_equal stuff, SanitizationFilter.call(stuff).to_s
46 | end
47 |
48 | def test_unknown_schemes_are_removed
49 | stuff = 'Wat is this'
50 | html = SanitizationFilter.call(stuff).to_s
51 | assert_equal 'Wat is this', html
52 | end
53 |
54 | def test_whitelisted_longdesc_schemes_are_allowed
55 | stuff = '
'
56 | html = SanitizationFilter.call(stuff).to_s
57 | assert_equal '
', html
58 | end
59 |
60 | def test_weird_longdesc_schemes_are_removed
61 | stuff = '
'
62 | html = SanitizationFilter.call(stuff).to_s
63 | assert_equal '
', html
64 | end
65 |
66 | def test_standard_schemes_are_removed_if_not_specified_in_anchor_schemes
67 | stuff = 'No href for you'
68 | filter = SanitizationFilter.new(stuff, {:anchor_schemes => []})
69 | html = filter.call.to_s
70 | assert_equal 'No href for you', html
71 | end
72 |
73 | def test_custom_anchor_schemes_are_not_removed
74 | stuff = 'Wat is this'
75 | filter = SanitizationFilter.new(stuff, {:anchor_schemes => ['something-weird']})
76 | html = filter.call.to_s
77 | assert_equal stuff, html
78 | end
79 |
80 | def test_anchor_schemes_are_merged_with_other_anchor_restrictions
81 | stuff = 'Wat is this'
82 | whitelist = {
83 | :elements => ['a'],
84 | :attributes => {'a' => ['href', 'ping']},
85 | :protocols => {'a' => {'ping' => ['http']}}
86 | }
87 | filter = SanitizationFilter.new(stuff, {:whitelist => whitelist, :anchor_schemes => ['something-weird']})
88 | html = filter.call.to_s
89 | assert_equal 'Wat is this', html
90 | end
91 |
92 | def test_uses_anchor_schemes_from_whitelist_when_not_separately_specified
93 | stuff = 'Wat is this'
94 | whitelist = {
95 | :elements => ['a'],
96 | :attributes => {'a' => ['href']},
97 | :protocols => {'a' => {'href' => ['something-weird']}}
98 | }
99 | filter = SanitizationFilter.new(stuff, {:whitelist => whitelist})
100 | html = filter.call.to_s
101 | assert_equal stuff, html
102 | end
103 |
104 | def test_whitelist_contains_default_anchor_schemes
105 | assert_equal SanitizationFilter::WHITELIST[:protocols]['a']['href'], ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
106 | end
107 |
108 | def test_whitelist_from_full_constant
109 | stuff = 'Wat is this'
110 | filter = SanitizationFilter.new(stuff, :whitelist => SanitizationFilter::FULL)
111 | html = filter.call.to_s
112 | assert_equal 'Wat is this', html
113 | end
114 |
115 | def test_exports_default_anchor_schemes
116 | assert_equal SanitizationFilter::ANCHOR_SCHEMES, ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']
117 | end
118 |
119 | def test_script_contents_are_removed
120 | orig = ''
121 | assert_equal "", SanitizationFilter.call(orig).to_s
122 | end
123 |
124 | def test_table_rows_and_cells_removed_if_not_in_table
125 | orig = %(| Foo |
Bar | )
126 | assert_equal 'FooBar', SanitizationFilter.call(orig).to_s
127 | end
128 |
129 | def test_table_sections_removed_if_not_in_table
130 | orig = %(| Foo |
)
131 | assert_equal 'Foo', SanitizationFilter.call(orig).to_s
132 | end
133 |
134 | def test_table_sections_are_not_removed
135 | orig = %(
136 | | Column 1 |
137 | | Sum |
138 | | 1 |
139 |
)
140 | assert_equal orig, SanitizationFilter.call(orig).to_s
141 | end
142 |
143 | def test_summary_tag_are_not_removed
144 | orig = %(Foo)
145 | assert_equal orig, SanitizationFilter.call(orig).to_s
146 | end
147 |
148 | def test_details_tag_and_open_attribute_are_not_removed
149 | orig = %(Foo )
150 | assert_equal orig, SanitizationFilter.call(orig).to_s
151 | end
152 |
153 | def test_nested_details_tag_are_not_removed
154 | orig = <<-NESTED
155 |
156 | Foo
157 |
158 | Bar
159 | Baz
160 |
161 | Qux
162 |
163 | NESTED
164 | assert_equal orig, SanitizationFilter.call(orig).to_s
165 | end
166 | end
167 |
--------------------------------------------------------------------------------
/lib/html/pipeline/sanitization_filter.rb:
--------------------------------------------------------------------------------
1 | begin
2 | require "sanitize"
3 | rescue LoadError => _
4 | abort "Missing dependency 'sanitize' for SanitizationFilter. See README.md for details."
5 | end
6 |
7 | module HTML
8 | class Pipeline
9 | # HTML filter with sanization routines and whitelists. This module defines
10 | # what HTML is allowed in user provided content and fixes up issues with
11 | # unbalanced tags and whatnot.
12 | #
13 | # See the Sanitize docs for more information on the underlying library:
14 | #
15 | # https://github.com/rgrove/sanitize/#readme
16 | #
17 | # Context options:
18 | # :whitelist - The sanitizer whitelist configuration to use. This
19 | # can be one of the options constants defined in this
20 | # class or a custom sanitize options hash.
21 | # :anchor_schemes - The URL schemes to allow in attributes. The
22 | # default set is provided in the ANCHOR_SCHEMES
23 | # constant in this class. If passed, this overrides any
24 | # schemes specified in the whitelist configuration.
25 | #
26 | # This filter does not write additional information to the context.
27 | class SanitizationFilter < Filter
28 | LISTS = Set.new(%w(ul ol).freeze)
29 | LIST_ITEM = 'li'.freeze
30 |
31 | # List of table child elements. These must be contained by a element
32 | # or they are not allowed through. Otherwise they can be used to break out
33 | # of places we're using tables to contain formatted user content (like pull
34 | # request review comments).
35 | TABLE_ITEMS = Set.new(%w(tr td th).freeze)
36 | TABLE = 'table'.freeze
37 | TABLE_SECTIONS = Set.new(%w(thead tbody tfoot).freeze)
38 |
39 | # These schemes are the only ones allowed in attributes by default.
40 | ANCHOR_SCHEMES = ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac'].freeze
41 |
42 | # The main sanitization whitelist. Only these elements and attributes are
43 | # allowed through by default.
44 | WHITELIST = {
45 | :elements => %w(
46 | h1 h2 h3 h4 h5 h6 h7 h8 br b i strong em a pre code img tt
47 | div ins del sup sub p ol ul table thead tbody tfoot blockquote
48 | dl dt dd kbd q samp var hr ruby rt rp li tr td th s strike summary details
49 | ),
50 | :remove_contents => ['script'],
51 | :attributes => {
52 | 'a' => ['href'],
53 | 'img' => ['src', 'longdesc'],
54 | 'div' => ['itemscope', 'itemtype'],
55 | 'blockquote' => ['cite'],
56 | 'del' => ['cite'],
57 | 'ins' => ['cite'],
58 | 'q' => ['cite'],
59 | :all => ['abbr', 'accept', 'accept-charset',
60 | 'accesskey', 'action', 'align', 'alt', 'axis',
61 | 'border', 'cellpadding', 'cellspacing', 'char',
62 | 'charoff', 'charset', 'checked',
63 | 'clear', 'cols', 'colspan', 'color',
64 | 'compact', 'coords', 'datetime', 'dir',
65 | 'disabled', 'enctype', 'for', 'frame',
66 | 'headers', 'height', 'hreflang',
67 | 'hspace', 'ismap', 'label', 'lang',
68 | 'maxlength', 'media', 'method',
69 | 'multiple', 'name', 'nohref', 'noshade',
70 | 'nowrap', 'open', 'prompt', 'readonly', 'rel', 'rev',
71 | 'rows', 'rowspan', 'rules', 'scope',
72 | 'selected', 'shape', 'size', 'span',
73 | 'start', 'summary', 'tabindex', 'target',
74 | 'title', 'type', 'usemap', 'valign', 'value',
75 | 'vspace', 'width', 'itemprop']
76 | },
77 | :protocols => {
78 | 'a' => {'href' => ANCHOR_SCHEMES},
79 | 'blockquote' => {'cite' => ['http', 'https', :relative]},
80 | 'del' => {'cite' => ['http', 'https', :relative]},
81 | 'ins' => {'cite' => ['http', 'https', :relative]},
82 | 'q' => {'cite' => ['http', 'https', :relative]},
83 | 'img' => {
84 | 'src' => ['http', 'https', :relative],
85 | 'longdesc' => ['http', 'https', :relative]
86 | }
87 | },
88 | :transformers => [
89 | # Top-level elements are removed because they can break out of
90 | # containing markup.
91 | lambda { |env|
92 | name, node = env[:node_name], env[:node]
93 | if name == LIST_ITEM && !node.ancestors.any?{ |n| LISTS.include?(n.name) }
94 | node.replace(node.children)
95 | end
96 | },
97 |
98 | # Table child elements that are not contained by a are removed.
99 | lambda { |env|
100 | name, node = env[:node_name], env[:node]
101 | if (TABLE_SECTIONS.include?(name) || TABLE_ITEMS.include?(name)) && !node.ancestors.any? { |n| n.name == TABLE }
102 | node.replace(node.children)
103 | end
104 | }
105 | ]
106 | }
107 |
108 | # A more limited sanitization whitelist. This includes all attributes,
109 | # protocols, and transformers from WHITELIST but with a more locked down
110 | # set of allowed elements.
111 | LIMITED = WHITELIST.merge(
112 | :elements => %w(b i strong em a pre code img ins del sup sub p ol ul li))
113 |
114 | # Strip all HTML tags from the document.
115 | FULL = { :elements => [] }
116 |
117 | # Sanitize markup using the Sanitize library.
118 | def call
119 | Sanitize.clean_node!(doc, whitelist)
120 | end
121 |
122 | # The whitelist to use when sanitizing. This can be passed in the context
123 | # hash to the filter but defaults to WHITELIST constant value above.
124 | def whitelist
125 | whitelist = context[:whitelist] || WHITELIST
126 | anchor_schemes = context[:anchor_schemes]
127 | return whitelist unless anchor_schemes
128 | whitelist = whitelist.dup
129 | whitelist[:protocols] = (whitelist[:protocols] || {}).dup
130 | whitelist[:protocols]['a'] = (whitelist[:protocols]['a'] || {}).merge('href' => anchor_schemes)
131 | whitelist
132 | end
133 | end
134 | end
135 | end
136 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # CHANGELOG
2 |
3 | ## 2.2.2
4 |
5 | * Fix for calling mention_link_filter with only one argument [#230](https://github.com/jch/html-pipeline/pull/230)
6 | * Add html-pipeline-linkify_github to 3rd Party Extensions in README [#228](https://github.com/jch/html-pipeline/pull/228)
7 |
8 | ## 2.2.1
9 |
10 | * Soften Nokogiri dependency to versions ">= 1.4" [#208](https://github.com/jch/html-pipeline/pull/208)
11 |
12 | ## 2.2.0
13 |
14 | * Only allow cite attribute on blockquote and restrict schemes [#223](https://github.com/jch/html-pipeline/pull/223)
15 |
16 | ## 2.1.0
17 |
18 | * Whitelist schemes for longdesc [#221](https://github.com/jch/html-pipeline/pull/221)
19 | * Extract emoji image tag generation to own method [#195](https://github.com/jch/html-pipeline/pull/195)
20 | * Update README.md [#211](https://github.com/jch/html-pipeline/pull/211)
21 | * Add ImageFilter for image url to img tag conversion [#207](https://github.com/jch/html-pipeline/pull/207)
22 |
23 | ## 2.0
24 |
25 | **New**
26 |
27 | * Implement new EmojiFilter context option: ignored_ancestor_tags to accept more ignored tags. [#170](https://github.com/jch/html-pipeline/pull/170) @JuanitoFatas
28 | * Add GitHub flavor Markdown Task List extension [#162](https://github.com/jch/html-pipeline/pull/162) @simeonwillbanks
29 | * @mention allow for custom regex to identify usernames. [#157](https://github.com/jch/html-pipeline/pull/157) @brittballard
30 | * EmojiFilter now requires gemoji ~> 2. [#159](https://github.com/jch/html-pipeline/pull/159) @jch
31 |
32 | **Changes**
33 |
34 | * Restrict nokogiri to >= 1.4, <= 1.6.5 [#176](https://github.com/jch/html-pipeline/pull/176) @simeonwillbanks
35 | * MentionFilter#link_to_mentioned_user: Replace String introspection with Regexp match [#172](https://github.com/jch/html-pipeline/pull/172) @simeonwillbanks
36 | * Whitelist summary and details element. [#171](https://github.com/jch/html-pipeline/pull/171) @JuanitoFatas
37 | * Support ~login for MentionFilter. [#167](https://github.com/jch/html-pipeline/pull/167) @JuanitoFatas
38 | * Revert "Search for text nodes on DocumentFragments without root tags" [#158](https://github.com/jch/html-pipeline/pull/158) @jch
39 | * Drop support for ruby ree, 1.9.2, 1.9.3 [#156](https://github.com/jch/html-pipeline/pull/156) @jch
40 | * Skip EmojiFilter in `` tags [#147](https://github.com/jch/html-pipeline/pull/147) @moskvax
41 | * Use Linguist lexers [#153](https://github.com/jch/html-pipeline/pull/153) @pchaigno
42 | * Constrain Active Support >= 2, < 5 [#180](https://github.com/jch/html-pipeline/pull/180) @jch
43 |
44 | ## 1.11.0
45 |
46 | * Search for text nodes on DocumentFragments without root tags #146 Razer6
47 | * Don't filter @mentions in `"
41 | assert_equal body, filter(body).to_html
42 | end
43 |
44 | def test_not_replacing_mentions_in_links
45 | body = "@kneath okay
"
46 | assert_equal body, filter(body).to_html
47 | end
48 |
49 | def test_entity_encoding_and_whatnot
50 | body = "@kneath what's up
"
51 | link = "@kneath"
52 | assert_equal "#{link} what's up
", filter(body, '/').to_html
53 | end
54 |
55 | def test_html_injection
56 | body = "@kneath <script>alert(0)</script>
"
57 | link = "@kneath"
58 | assert_equal "#{link} <script>alert(0)</script>
",
59 | filter(body, '/').to_html
60 | end
61 |
62 | def test_links_to_nothing_when_no_info_url_given
63 | body = "How do I @mention someone?
"
64 | assert_equal "How do I @mention someone?
",
65 | filter(body, '/').to_html
66 | end
67 |
68 | def test_links_to_more_info_when_info_url_given
69 | body = "How do I @mention someone?
"
70 | link = "@mention"
71 | assert_equal "How do I #{link} someone?
",
72 | filter(body, '/', 'https://github.com/blog/821').to_html
73 | end
74 |
75 | def test_base_url_slash
76 | body = "Hi, @jch!
"
77 | link = "@jch"
78 | assert_equal "Hi, #{link}!
",
79 | filter(body, '/').to_html
80 | end
81 |
82 | def test_base_url_under_custom_route
83 | body = "Hi, @jch!
"
84 | link = "@jch"
85 | assert_equal "Hi, #{link}!
",
86 | filter(body, '/userprofile').to_html
87 | end
88 |
89 | def test_base_url_slash_with_tilde
90 | body = "Hi, @jch!
"
91 | link = "@jch"
92 | assert_equal "Hi, #{link}!
",
93 | filter(body, '/~').to_html
94 | end
95 |
96 | MarkdownPipeline =
97 | HTML::Pipeline.new [
98 | HTML::Pipeline::MarkdownFilter,
99 | HTML::Pipeline::MentionFilter
100 | ]
101 |
102 | def mentioned_usernames
103 | result = {}
104 | MarkdownPipeline.call(@body, {}, result)
105 | result[:mentioned_usernames]
106 | end
107 |
108 | def test_matches_usernames_in_body
109 | @body = "@test how are you?"
110 | assert_equal %w[test], mentioned_usernames
111 | end
112 |
113 | def test_matches_usernames_with_dashes
114 | @body = "hi @some-user"
115 | assert_equal %w[some-user], mentioned_usernames
116 | end
117 |
118 | def test_matches_usernames_followed_by_a_single_dot
119 | @body = "okay @some-user."
120 | assert_equal %w[some-user], mentioned_usernames
121 | end
122 |
123 | def test_matches_usernames_followed_by_multiple_dots
124 | @body = "okay @some-user..."
125 | assert_equal %w[some-user], mentioned_usernames
126 | end
127 |
128 | def test_does_not_match_email_addresses
129 | @body = "aman@tmm1.net"
130 | assert_equal [], mentioned_usernames
131 | end
132 |
133 | def test_does_not_match_domain_name_looking_things
134 | @body = "we need a @github.com email"
135 | assert_equal [], mentioned_usernames
136 | end
137 |
138 | def test_does_not_match_organization_team_mentions
139 | @body = "we need to @github/enterprise know"
140 | assert_equal [], mentioned_usernames
141 | end
142 |
143 | def test_matches_colon_suffixed_names
144 | @body = "@tmm1: what do you think?"
145 | assert_equal %w[tmm1], mentioned_usernames
146 | end
147 |
148 | def test_matches_list_of_names
149 | @body = "@defunkt @atmos @kneath"
150 | assert_equal %w[defunkt atmos kneath], mentioned_usernames
151 | end
152 |
153 | def test_matches_list_of_names_with_commas
154 | @body = "/cc @defunkt, @atmos, @kneath"
155 | assert_equal %w[defunkt atmos kneath], mentioned_usernames
156 | end
157 |
158 | def test_matches_inside_brackets
159 | @body = "(@mislav) and [@rtomayko]"
160 | assert_equal %w[mislav rtomayko], mentioned_usernames
161 | end
162 |
163 | def test_doesnt_ignore_invalid_users
164 | @body = "@defunkt @mojombo and @somedude"
165 | assert_equal ['defunkt', 'mojombo', 'somedude'], mentioned_usernames
166 | end
167 |
168 | def test_returns_distinct_set
169 | @body = "/cc @defunkt, @atmos, @kneath, @defunkt, @defunkt"
170 | assert_equal %w[defunkt atmos kneath], mentioned_usernames
171 | end
172 |
173 | def test_does_not_match_inline_code_block_with_multiple_code_blocks
174 | @body = "something\n\n`/cc @defunkt @atmos @kneath` `/cc @atmos/atmos`"
175 | assert_equal %w[], mentioned_usernames
176 | end
177 |
178 | def test_mention_at_end_of_parenthetical_sentence
179 | @body = "(We're talking 'bout @ymendel.)"
180 | assert_equal %w[ymendel], mentioned_usernames
181 | end
182 |
183 | def test_username_pattern_can_be_customized
184 | body = "@_abc: test.
"
185 | doc = Nokogiri::HTML::DocumentFragment.parse(body)
186 |
187 | res = filter(doc, '/', nil, /(_[a-z]{3})/)
188 |
189 | link = "@_abc"
190 | assert_equal "#{link}: test.
",
191 | res.to_html
192 | end
193 |
194 | def test_filter_does_not_create_a_new_object_for_default_username_pattern
195 | body = "@test
"
196 | doc = Nokogiri::HTML::DocumentFragment.parse(body)
197 |
198 | filter(doc.clone, '/', nil)
199 | pattern_count = HTML::Pipeline::MentionFilter::MentionPatterns.length
200 | filter(doc.clone, '/', nil)
201 |
202 | assert_equal pattern_count, HTML::Pipeline::MentionFilter::MentionPatterns.length
203 | filter(doc.clone, '/', nil, /test/)
204 | assert_equal pattern_count + 1, HTML::Pipeline::MentionFilter::MentionPatterns.length
205 | end
206 |
207 | def test_mention_link_filter
208 | filter = HTML::Pipeline::MentionFilter.new nil
209 | expected = "@hubot"
210 | assert_equal expected, filter.mention_link_filter("@hubot")
211 | end
212 | end
213 |
--------------------------------------------------------------------------------
/lib/html/pipeline.rb:
--------------------------------------------------------------------------------
1 | require "nokogiri"
2 | require "active_support/xml_mini/nokogiri" # convert Documents to hashes
3 |
4 | module HTML
5 | # GitHub HTML processing filters and utilities. This module includes a small
6 | # framework for defining DOM based content filters and applying them to user
7 | # provided content.
8 | #
9 | # See HTML::Pipeline::Filter for information on building filters.
10 | #
11 | # Construct a Pipeline for running multiple HTML filters. A pipeline is created once
12 | # with one to many filters, and it then can be `call`ed many times over the course
13 | # of its lifetime with input.
14 | #
15 | # filters - Array of Filter objects. Each must respond to call(doc,
16 | # context) and return the modified DocumentFragment or a
17 | # String containing HTML markup. Filters are performed in the
18 | # order provided.
19 | # default_context - The default context hash. Values specified here will be merged
20 | # into values from the each individual pipeline run. Can NOT be
21 | # nil. Default: empty Hash.
22 | # result_class - The default Class of the result object for individual
23 | # calls. Default: Hash. Protip: Pass in a Struct to get
24 | # some semblance of type safety.
25 | class Pipeline
26 | autoload :VERSION, 'html/pipeline/version'
27 | autoload :Filter, 'html/pipeline/filter'
28 | autoload :AbsoluteSourceFilter, 'html/pipeline/absolute_source_filter'
29 | autoload :BodyContent, 'html/pipeline/body_content'
30 | autoload :AutolinkFilter, 'html/pipeline/autolink_filter'
31 | autoload :CamoFilter, 'html/pipeline/camo_filter'
32 | autoload :EmailReplyFilter, 'html/pipeline/email_reply_filter'
33 | autoload :EmojiFilter, 'html/pipeline/emoji_filter'
34 | autoload :HttpsFilter, 'html/pipeline/https_filter'
35 | autoload :ImageFilter, 'html/pipeline/image_filter'
36 | autoload :ImageMaxWidthFilter, 'html/pipeline/image_max_width_filter'
37 | autoload :MarkdownFilter, 'html/pipeline/markdown_filter'
38 | autoload :MentionFilter, 'html/pipeline/@mention_filter'
39 | autoload :PlainTextInputFilter, 'html/pipeline/plain_text_input_filter'
40 | autoload :SanitizationFilter, 'html/pipeline/sanitization_filter'
41 | autoload :SyntaxHighlightFilter, 'html/pipeline/syntax_highlight_filter'
42 | autoload :TextileFilter, 'html/pipeline/textile_filter'
43 | autoload :TableOfContentsFilter, 'html/pipeline/toc_filter'
44 | autoload :TextFilter, 'html/pipeline/text_filter'
45 |
46 | # Our DOM implementation.
47 | DocumentFragment = Nokogiri::HTML::DocumentFragment
48 |
49 | # Parse a String into a DocumentFragment object. When a DocumentFragment is
50 | # provided, return it verbatim.
51 | def self.parse(document_or_html)
52 | document_or_html ||= ''
53 | if document_or_html.is_a?(String)
54 | DocumentFragment.parse(document_or_html)
55 | else
56 | document_or_html
57 | end
58 | end
59 |
60 | # Public: Returns an Array of Filter objects for this Pipeline.
61 | attr_reader :filters
62 |
63 | # Public: Instrumentation service for the pipeline.
64 | # Set an ActiveSupport::Notifications compatible object to enable.
65 | attr_accessor :instrumentation_service
66 |
67 | # Public: String name for this Pipeline. Defaults to Class name.
68 | attr_writer :instrumentation_name
69 | def instrumentation_name
70 | @instrumentation_name || self.class.name
71 | end
72 |
73 | class << self
74 | # Public: Default instrumentation service for new pipeline objects.
75 | attr_accessor :default_instrumentation_service
76 | end
77 |
78 | def initialize(filters, default_context = {}, result_class = nil)
79 | raise ArgumentError, "default_context cannot be nil" if default_context.nil?
80 | @filters = filters.flatten.freeze
81 | @default_context = default_context.freeze
82 | @result_class = result_class || Hash
83 | @instrumentation_service = self.class.default_instrumentation_service
84 | end
85 |
86 | # Apply all filters in the pipeline to the given HTML.
87 | #
88 | # html - A String containing HTML or a DocumentFragment object.
89 | # context - The context hash passed to each filter. See the Filter docs
90 | # for more info on possible values. This object MUST NOT be modified
91 | # in place by filters. Use the Result for passing state back.
92 | # result - The result Hash passed to each filter for modification. This
93 | # is where Filters store extracted information from the content.
94 | #
95 | # Returns the result Hash after being filtered by this Pipeline. Contains an
96 | # :output key with the DocumentFragment or String HTML markup based on the
97 | # output of the last filter in the pipeline.
98 | def call(html, context = {}, result = nil)
99 | context = @default_context.merge(context)
100 | context = context.freeze
101 | result ||= @result_class.new
102 | payload = default_payload :filters => @filters.map(&:name),
103 | :context => context, :result => result
104 | instrument "call_pipeline.html_pipeline", payload do
105 | result[:output] =
106 | @filters.inject(html) do |doc, filter|
107 | perform_filter(filter, doc, context, result)
108 | end
109 | end
110 | result
111 | end
112 |
113 | # Internal: Applies a specific filter to the supplied doc.
114 | #
115 | # The filter is instrumented.
116 | #
117 | # Returns the result of the filter.
118 | def perform_filter(filter, doc, context, result)
119 | payload = default_payload :filter => filter.name,
120 | :context => context, :result => result
121 | instrument "call_filter.html_pipeline", payload do
122 | filter.call(doc, context, result)
123 | end
124 | end
125 |
126 | # Like call but guarantee the value returned is a DocumentFragment.
127 | # Pipelines may return a DocumentFragment or a String. Callers that need a
128 | # DocumentFragment should use this method.
129 | def to_document(input, context = {}, result = nil)
130 | result = call(input, context, result)
131 | HTML::Pipeline.parse(result[:output])
132 | end
133 |
134 | # Like call but guarantee the value returned is a string of HTML markup.
135 | def to_html(input, context = {}, result = nil)
136 | result = call(input, context, result = nil)
137 | output = result[:output]
138 | if output.respond_to?(:to_html)
139 | output.to_html
140 | else
141 | output.to_s
142 | end
143 | end
144 |
145 | # Public: setup instrumentation for this pipeline.
146 | #
147 | # Returns nothing.
148 | def setup_instrumentation(name = nil, service = nil)
149 | self.instrumentation_name = name
150 | self.instrumentation_service =
151 | service || self.class.default_instrumentation_service
152 | end
153 |
154 | # Internal: if the `instrumentation_service` object is set, instruments the
155 | # block, otherwise the block is ran without instrumentation.
156 | #
157 | # Returns the result of the provided block.
158 | def instrument(event, payload = nil)
159 | payload ||= default_payload
160 | return yield(payload) unless instrumentation_service
161 | instrumentation_service.instrument event, payload do |payload|
162 | yield payload
163 | end
164 | end
165 |
166 | # Internal: Default payload for instrumentation.
167 | #
168 | # Accepts a Hash of additional payload data to be merged.
169 | #
170 | # Returns a Hash.
171 | def default_payload(payload = {})
172 | {:pipeline => instrumentation_name}.merge(payload)
173 | end
174 | end
175 | end
176 |
177 | # XXX nokogiri monkey patches for 1.8
178 | if not ''.respond_to?(:force_encoding)
179 | class Nokogiri::XML::Node
180 | # Work around an issue with utf-8 encoded data being erroneously converted to
181 | # ... some other shit when replacing text nodes. See 'utf-8 output 2' in
182 | # user_content_test.rb for details.
183 | def replace_with_encoding_fix(replacement)
184 | if replacement.respond_to?(:to_str)
185 | replacement = document.fragment("#{replacement}
").children.first.children
186 | end
187 | replace_without_encoding_fix(replacement)
188 | end
189 |
190 | alias_method :replace_without_encoding_fix, :replace
191 | alias_method :replace, :replace_with_encoding_fix
192 |
193 | def swap(replacement)
194 | replace(replacement)
195 | self
196 | end
197 | end
198 | end
199 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # HTML::Pipeline [](https://travis-ci.org/jch/html-pipeline)
2 |
3 | GitHub HTML processing filters and utilities. This module includes a small
4 | framework for defining DOM based content filters and applying them to user
5 | provided content. Read an introduction about this project in
6 | [this blog post](https://github.com/blog/1311-html-pipeline-chainable-content-filters).
7 |
8 | - [Installation](#installation)
9 | - [Usage](#usage)
10 | - [Examples](#examples)
11 | - [Filters](#filters)
12 | - [Dependencies](#dependencies)
13 | - [Documentation](#documentation)
14 | - [Extending](#extending)
15 | - [3rd Party Extensions](#3rd-party-extensions)
16 | - [Instrumenting](#instrumenting)
17 | - [Contributing](#contributing)
18 | - [Contributors](#contributors)
19 | - [Releasing A New Version](#releasing-a-new-version)
20 |
21 | ## Installation
22 |
23 | Add this line to your application's Gemfile:
24 |
25 | ```ruby
26 | gem 'html-pipeline'
27 | ```
28 |
29 | And then execute:
30 |
31 | ```sh
32 | $ bundle
33 | ```
34 |
35 | Or install it yourself as:
36 |
37 | ```sh
38 | $ gem install html-pipeline
39 | ```
40 |
41 | ## Usage
42 |
43 | This library provides a handful of chainable HTML filters to transform user
44 | content into markup. A filter takes an HTML string or
45 | `Nokogiri::HTML::DocumentFragment`, optionally manipulates it, and then
46 | outputs the result.
47 |
48 | For example, to transform Markdown source into Markdown HTML:
49 |
50 | ```ruby
51 | require 'html/pipeline'
52 |
53 | filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!")
54 | filter.call
55 | ```
56 |
57 | Filters can be combined into a pipeline which causes each filter to hand its
58 | output to the next filter's input. So if you wanted to have content be
59 | filtered through Markdown and be syntax highlighted, you can create the
60 | following pipeline:
61 |
62 | ```ruby
63 | pipeline = HTML::Pipeline.new [
64 | HTML::Pipeline::MarkdownFilter,
65 | HTML::Pipeline::SyntaxHighlightFilter
66 | ]
67 | result = pipeline.call <<-CODE
68 | This is *great*:
69 |
70 | some_code(:first)
71 |
72 | CODE
73 | result[:output].to_s
74 | ```
75 |
76 | Prints:
77 |
78 | ```html
79 | This is great:
80 |
81 | some_code(:first)
82 |
83 | ```
84 |
85 | To generate CSS for HTML formatted code, use the [pygments.rb](https://github.com/tmm1/pygments.rb#usage) `#css` method. `pygments.rb` is a dependency of the `SyntaxHighlightFilter`.
86 |
87 | Some filters take an optional **context** and/or **result** hash. These are
88 | used to pass around arguments and metadata between filters in a pipeline. For
89 | example, if you don't want to use GitHub formatted Markdown, you can pass an
90 | option in the context hash:
91 |
92 | ```ruby
93 | filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!", :gfm => false)
94 | filter.call
95 | ```
96 |
97 | ### Examples
98 |
99 | We define different pipelines for different parts of our app. Here are a few
100 | paraphrased snippets to get you started:
101 |
102 | ```ruby
103 | # The context hash is how you pass options between different filters.
104 | # See individual filter source for explanation of options.
105 | context = {
106 | :asset_root => "http://your-domain.com/where/your/images/live/icons",
107 | :base_url => "http://your-domain.com"
108 | }
109 |
110 | # Pipeline providing sanitization and image hijacking but no mention
111 | # related features.
112 | SimplePipeline = Pipeline.new [
113 | SanitizationFilter,
114 | TableOfContentsFilter, # add 'name' anchors to all headers and generate toc list
115 | CamoFilter,
116 | ImageMaxWidthFilter,
117 | SyntaxHighlightFilter,
118 | EmojiFilter,
119 | AutolinkFilter
120 | ], context
121 |
122 | # Pipeline used for user provided content on the web
123 | MarkdownPipeline = Pipeline.new [
124 | MarkdownFilter,
125 | SanitizationFilter,
126 | CamoFilter,
127 | ImageMaxWidthFilter,
128 | HttpsFilter,
129 | MentionFilter,
130 | EmojiFilter,
131 | SyntaxHighlightFilter
132 | ], context.merge(:gfm => true) # enable github formatted markdown
133 |
134 |
135 | # Define a pipeline based on another pipeline's filters
136 | NonGFMMarkdownPipeline = Pipeline.new(MarkdownPipeline.filters,
137 | context.merge(:gfm => false))
138 |
139 | # Pipelines aren't limited to the web. You can use them for email
140 | # processing also.
141 | HtmlEmailPipeline = Pipeline.new [
142 | PlainTextInputFilter,
143 | ImageMaxWidthFilter
144 | ], {}
145 |
146 | # Just emoji.
147 | EmojiPipeline = Pipeline.new [
148 | PlainTextInputFilter,
149 | EmojiFilter
150 | ], context
151 | ```
152 |
153 | ## Filters
154 |
155 | * `MentionFilter` - replace `@user` mentions with links
156 | * `AbsoluteSourceFilter` - replace relative image urls with fully qualified versions
157 | * `AutolinkFilter` - auto_linking urls in HTML
158 | * `CamoFilter` - replace http image urls with [camo-fied](https://github.com/atmos/camo) https versions
159 | * `EmailReplyFilter` - util filter for working with emails
160 | * `EmojiFilter` - everyone loves [emoji](http://www.emoji-cheat-sheet.com/)!
161 | * `HttpsFilter` - HTML Filter for replacing http github urls with https versions.
162 | * `ImageMaxWidthFilter` - link to full size image for large images
163 | * `MarkdownFilter` - convert markdown to html
164 | * `PlainTextInputFilter` - html escape text and wrap the result in a div
165 | * `SanitizationFilter` - whitelist sanitize user markup
166 | * `SyntaxHighlightFilter` - [code syntax highlighter](#syntax-highlighting)
167 | * `TextileFilter` - convert textile to html
168 | * `TableOfContentsFilter` - anchor headings with name attributes and generate Table of Contents html unordered list linking headings
169 |
170 | ## Dependencies
171 |
172 | Filter gem dependencies are not bundled; you must bundle the filter's gem
173 | dependencies. The below list details filters with dependencies. For example,
174 | `SyntaxHighlightFilter` uses [github-linguist](https://github.com/github/linguist)
175 | to detect and highlight languages. For example, to use the `SyntaxHighlightFilter`,
176 | add the following to your Gemfile:
177 |
178 | ```ruby
179 | gem 'github-linguist'
180 | ```
181 |
182 | * `AutolinkFilter` - `rinku`
183 | * `EmailReplyFilter` - `escape_utils`, `email_reply_parser`
184 | * `EmojiFilter` - `gemoji`
185 | * `MarkdownFilter` - `github-markdown`
186 | * `PlainTextInputFilter` - `escape_utils`
187 | * `SanitizationFilter` - `sanitize`
188 | * `SyntaxHighlightFilter` - `github-linguist`
189 | * `TextileFilter` - `RedCloth`
190 |
191 | _Note:_ See [Gemfile](/Gemfile) `:test` block for version requirements.
192 |
193 | ## Documentation
194 |
195 | Full reference documentation can be [found here](http://rubydoc.info/gems/html-pipeline/frames).
196 |
197 | ## Extending
198 | To write a custom filter, you need a class with a `call` method that inherits
199 | from `HTML::Pipeline::Filter`.
200 |
201 | For example this filter adds a base url to images that are root relative:
202 |
203 | ```ruby
204 | require 'uri'
205 |
206 | class RootRelativeFilter < HTML::Pipeline::Filter
207 |
208 | def call
209 | doc.search("img").each do |img|
210 | next if img['src'].nil?
211 | src = img['src'].strip
212 | if src.start_with? '/'
213 | img["src"] = URI.join(context[:base_url], src).to_s
214 | end
215 | end
216 | doc
217 | end
218 |
219 | end
220 | ```
221 |
222 | Now this filter can be used in a pipeline:
223 |
224 | ```ruby
225 | Pipeline.new [ RootRelativeFilter ], { :base_url => 'http://somehost.com' }
226 | ```
227 |
228 | ### 3rd Party Extensions
229 |
230 | If you have an idea for a filter, propose it as
231 | [an issue](https://github.com/jch/html-pipeline/issues) first. This allows us discuss
232 | whether the filter is a common enough use case to belong in this gem, or should be
233 | built as an external gem.
234 |
235 | Here are some extensions people have built:
236 |
237 | * [html-pipeline-asciidoc_filter](https://github.com/asciidoctor/html-pipeline-asciidoc_filter)
238 | * [jekyll-html-pipeline](https://github.com/gjtorikian/jekyll-html-pipeline)
239 | * [nanoc-html-pipeline](https://github.com/burnto/nanoc-html-pipeline)
240 | * [html-pipeline-bity](https://github.com/dewski/html-pipeline-bitly)
241 | * [html-pipeline-cite](https://github.com/lifted-studios/html-pipeline-cite)
242 | * [tilt-html-pipeline](https://github.com/bradgessler/tilt-html-pipeline)
243 | * [html-pipeline-wiki-link'](https://github.com/lifted-studios/html-pipeline-wiki-link) - WikiMedia-style wiki links
244 | * [task_list](https://github.com/github/task_list) - GitHub flavor Markdown Task List
245 | * [html-pipeline-rouge_filter](https://github.com/JuanitoFatas/html-pipeline-rouge_filter) - Syntax highlight with [Rouge](https://github.com/jneen/rouge/)
246 | * [html-pipeline-nico_link](https://github.com/rutan/html-pipeline-nico_link) - An HTML::Pipeline filter for [niconico](http://www.nicovideo.jp) description links
247 | * [html-pipeline-gitlab](https://gitlab.com/gitlab-org/html-pipeline-gitlab) - This gem implements various filters for html-pipeline used by GitLab
248 | * [html-pipeline-youtube](https://github.com/st0012/html-pipeline-youtube) - An HTML::Pipeline filter for YouTube links
249 | * [html-pipeline-flickr](https://github.com/st0012/html-pipeline-flickr) - An HTML::Pipeline filter for Flickr links
250 | * [html-pipeline-vimeo](https://github.com/dlackty/html-pipeline-vimeo) - An HTML::Pipeline filter for Vimeo links
251 | * [html-pipeline-hashtag](https://github.com/mr-dxdy/html-pipeline-hashtag) - An HTML::Pipeline filter for hashtags
252 | * [html-pipeline-linkify_github](https://github.com/jollygoodcode/html-pipeline-linkify_github) - An HTML::Pipeline filter to autolink GitHub urls
253 |
254 | ## Instrumenting
255 |
256 | Filters and Pipelines can be set up to be instrumented when called. The pipeline
257 | must be setup with an [ActiveSupport::Notifications]
258 | (http://api.rubyonrails.org/classes/ActiveSupport/Notifications.html)
259 | compatible service object and a name. New pipeline objects will default to the
260 | `HTML::Pipeline.default_instrumentation_service` object.
261 |
262 | ``` ruby
263 | # the AS::Notifications-compatible service object
264 | service = ActiveSupport::Notifications
265 |
266 | # instrument a specific pipeline
267 | pipeline = HTML::Pipeline.new [MarkdownFilter], context
268 | pipeline.setup_instrumentation "MarkdownPipeline", service
269 |
270 | # or set default instrumentation service for all new pipelines
271 | HTML::Pipeline.default_instrumentation_service = service
272 | pipeline = HTML::Pipeline.new [MarkdownFilter], context
273 | pipeline.setup_instrumentation "MarkdownPipeline"
274 | ```
275 |
276 | Filters are instrumented when they are run through the pipeline. A
277 | `call_filter.html_pipeline` event is published once the filter finishes. The
278 | `payload` should include the `filter` name. Each filter will trigger its own
279 | instrumentation call.
280 |
281 | ``` ruby
282 | service.subscribe "call_filter.html_pipeline" do |event, start, ending, transaction_id, payload|
283 | payload[:pipeline] #=> "MarkdownPipeline", set with `setup_instrumentation`
284 | payload[:filter] #=> "MarkdownFilter"
285 | payload[:context] #=> context Hash
286 | payload[:result] #=> instance of result class
287 | payload[:result][:output] #=> output HTML String or Nokogiri::DocumentFragment
288 | end
289 | ```
290 |
291 | The full pipeline is also instrumented:
292 |
293 | ``` ruby
294 | service.subscribe "call_pipeline.html_pipeline" do |event, start, ending, transaction_id, payload|
295 | payload[:pipeline] #=> "MarkdownPipeline", set with `setup_instrumentation`
296 | payload[:filters] #=> ["MarkdownFilter"]
297 | payload[:doc] #=> HTML String or Nokogiri::DocumentFragment
298 | payload[:context] #=> context Hash
299 | payload[:result] #=> instance of result class
300 | payload[:result][:output] #=> output HTML String or Nokogiri::DocumentFragment
301 | end
302 | ```
303 |
304 | ## FAQ
305 |
306 | ### 1. Why doesn't my pipeline work when there's no root element in the document?
307 |
308 | To make a pipeline work on a plain text document, put the `PlainTextInputFilter`
309 | at the beginning of your pipeline. This will wrap the content in a `div` so the
310 | filters have a root element to work with. If you're passing in an HTML fragment,
311 | but it doesn't have a root element, you can wrap the content in a `div`
312 | yourself. For example:
313 |
314 | ```ruby
315 | EmojiPipeline = Pipeline.new [
316 | PlainTextInputFilter, # <- Wraps input in a div and escapes html tags
317 | EmojiFilter
318 | ], context
319 |
320 | plain_text = "Gutentag! :wave:"
321 | EmojiPipeline.call(plain_text)
322 |
323 | html_fragment = "This is outside of an html element, but this isn't. :+1:"
324 | EmojiPipeline.call("#{html_fragment}
") # <- Wrap your own html fragments to avoid escaping
325 | ```
326 |
327 | ### 2. How do I customize a whitelist for `SanitizationFilter`s?
328 |
329 | `SanitizationFilter::WHITELIST` is the default whitelist used if no `:whitelist`
330 | argument is given in the context. The default is a good starting template for
331 | you to add additional elements. You can either modify the constant's value, or
332 | re-define your own constant and pass that in via the context.
333 |
334 | ## Contributing
335 |
336 | Please review the [Contributing Guide](https://github.com/jch/html-pipeline/blob/master/CONTRIBUTING.md).
337 |
338 | 1. [Fork it](https://help.github.com/articles/fork-a-repo)
339 | 2. Create your feature branch (`git checkout -b my-new-feature`)
340 | 3. Commit your changes (`git commit -am 'Added some feature'`)
341 | 4. Push to the branch (`git push origin my-new-feature`)
342 | 5. Create new [Pull Request](https://help.github.com/articles/using-pull-requests)
343 |
344 | To see what has changed in recent versions, see the [CHANGELOG](https://github.com/jch/html-pipeline/blob/master/CHANGELOG.md).
345 |
346 | ### Contributors
347 |
348 | Thanks to all of [these contributors](https://github.com/jch/html-pipeline/graphs/contributors).
349 |
350 | Project is a member of the [OSS Manifesto](http://ossmanifesto.org/).
351 |
352 | ### Releasing A New Version
353 |
354 | This section is for gem maintainers to cut a new version of the gem.
355 |
356 | * create a new branch named `release-x.y.z` where `x.y.z` follows [semver](http://semver.org)
357 | * update lib/html/pipeline/version.rb to next version number X.X.X
358 | * update CHANGELOG.md. Prepare a draft with `script/changelog`
359 | * push branch and create a new pull request
360 | * after tests are green, merge to master
361 | * on the master branch, run `script/release`
362 |
--------------------------------------------------------------------------------