heyo

" 8 | doc = Nokogiri::HTML::DocumentFragment.parse(body) 9 | assert_raise(TypeError) { PlainTextInputFilter.call(doc, {}) } 10 | end 11 | 12 | def test_wraps_input_in_a_div_element 13 | doc = PlainTextInputFilter.call("howdy pahtner", {}) 14 | assert_equal "

howdy pahtner

", doc.to_s 15 | end 16 | 17 | def test_html_escapes_plain_text_input 18 | doc = PlainTextInputFilter.call("See: ", {}) 19 | assert_equal "

See: <http://example.org>

", 20 | doc.to_s 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /test/html/pipeline/autolink_filter_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | 3 | AutolinkFilter = HTML::Pipeline::AutolinkFilter 4 | 5 | class HTML::Pipeline::AutolinkFilterTest < Test::Unit::TestCase 6 | def test_uses_rinku_for_autolinking 7 | # just try to parse a complicated piece of HTML 8 | # that Rails auto_link cannot handle 9 | assert_equal '

"http://www.github.com"

', 10 | AutolinkFilter.to_html('

"http://www.github.com"

') 11 | end 12 | 13 | def test_autolink_option 14 | assert_equal '

"http://www.github.com"

', 15 | AutolinkFilter.to_html('

"http://www.github.com"

', :autolink => false) 16 | end 17 | 18 | def test_autolink_flags 19 | assert_equal '

"http://github"

', 20 | AutolinkFilter.to_html('

"http://github"

', :flags => Rinku::AUTOLINK_SHORT_DOMAINS) 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /lib/html/pipeline/markdown_filter.rb: -------------------------------------------------------------------------------- 1 | require 'github/markdown' 2 | 3 | module HTML 4 | class Pipeline 5 | # HTML Filter that converts Markdown text into HTML and converts into a 6 | # DocumentFragment. This is different from most filters in that it can take a 7 | # non-HTML as input. It must be used as the first filter in a pipeline. 8 | # 9 | # Context options: 10 | # :gfm => false Disable GFM line-end processing 11 | # 12 | # This filter does not write any additional information to the context hash. 13 | class MarkdownFilter < TextFilter 14 | def initialize(text, context = nil, result = nil) 15 | super text, context, result 16 | @text = @text.gsub "\r", '' 17 | end 18 | 19 | # Convert Markdown to HTML using the best available implementation 20 | # and convert into a DocumentFragment. 21 | def call 22 | mode = (context[:gfm] != false) ? :gfm : :markdown 23 | html = GitHub::Markdown.to_html(@text, mode) 24 | html.rstrip! 25 | html 26 | end 27 | end 28 | end 29 | end -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 GitHub Inc. and Jerry Cheung 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /html-pipeline.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | require File.expand_path("../lib/html/pipeline/version", __FILE__) 3 | 4 | Gem::Specification.new do |gem| 5 | gem.name = "html-pipeline" 6 | gem.version = HTML::Pipeline::VERSION 7 | gem.license = "MIT" 8 | gem.authors = ["Ryan Tomayko", "Jerry Cheung"] 9 | gem.email = ["ryan@github.com", "jerry@github.com"] 10 | gem.description = %q{GitHub HTML processing filters and utilities} 11 | gem.summary = %q{Helpers for processing content through a chain of filters} 12 | gem.homepage = "https://github.com/jch/html-pipeline" 13 | 14 | gem.files = `git ls-files`.split $/ 15 | gem.test_files = gem.files.grep(%r{^test}) 16 | gem.require_paths = ["lib"] 17 | 18 | gem.add_dependency "gemoji", "~> 1.0" 19 | gem.add_dependency "nokogiri", "~> 1.4" 20 | gem.add_dependency "github-markdown", "~> 0.5" 21 | gem.add_dependency "sanitize", "~> 2.0" 22 | gem.add_dependency "github-linguist", "~> 2.1" 23 | gem.add_dependency "rinku", "~> 1.7" 24 | gem.add_dependency "escape_utils", "~> 0.2" 25 | gem.add_dependency "activesupport", ">= 2" 26 | end 27 | -------------------------------------------------------------------------------- /lib/html/pipeline/toc_filter.rb: -------------------------------------------------------------------------------- 1 | module HTML 2 | class Pipeline 3 | # HTML filter that adds a 'name' attribute to all headers 4 | # in a document, so they can be accessed from a table of contents 5 | # 6 | # TODO: besides adding the name attribute, we should get around to 7 | # eventually generating the Table of Contents itself, with links 8 | # to each header 9 | class TableOfContentsFilter < Filter 10 | def call 11 | headers = Hash.new(0) 12 | doc.css('h1, h2, h3, h4, h5, h6').each do |node| 13 | name = node.text.downcase 14 | name.gsub!(/[^\w\- ]/, '') # remove punctuation 15 | name.gsub!(' ', '-') # replace spaces with dash 16 | name = EscapeUtils.escape_uri(name) # escape extended UTF-8 chars 17 | 18 | uniq = (headers[name] > 0) ? "-#{headers[name]}" : '' 19 | headers[name] += 1 20 | if header_content = node.children.first 21 | header_content.add_previous_sibling(%Q{}) 22 | end 23 | end 24 | doc 25 | end 26 | end 27 | end 28 | end -------------------------------------------------------------------------------- /lib/html/pipeline/image_max_width_filter.rb: -------------------------------------------------------------------------------- 1 | module HTML 2 | class Pipeline 3 | # This filter rewrites image tags with a max-width inline style and also wraps 4 | # the image in an tag that causes the full size image to be opened in a 5 | # new tab. 6 | # 7 | # The max-width inline styles are especially useful in HTML email which 8 | # don't use a global stylesheets. 9 | class ImageMaxWidthFilter < Filter 10 | def call 11 | doc.search('img').each do |element| 12 | # Skip if there's already a style attribute. Not sure how this 13 | # would happen but we can reconsider it in the future. 14 | next if element['style'] 15 | 16 | # Bail out if src doesn't look like a valid http url. trying to avoid weird 17 | # js injection via javascript: urls. 18 | next if element['src'].to_s.strip =~ /\Ajavascript/i 19 | 20 | element['style'] = "max-width:100%;" 21 | 22 | if !has_ancestor?(element, %w(a)) 23 | link_image element 24 | end 25 | end 26 | 27 | doc 28 | end 29 | 30 | def link_image(element) 31 | link = doc.document.create_element('a', :href => element['src'], :target => '_blank') 32 | link.add_child(element.dup) 33 | element.replace(link) 34 | end 35 | end 36 | end 37 | end -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | require 'bundler/setup' 2 | require 'html/pipeline' 3 | require 'test/unit' 4 | 5 | require 'active_support/core_ext/object/try' 6 | 7 | module TestHelpers 8 | # Asserts that `needle` is not a member of `haystack`, where 9 | # `haystack` is any object that responds to `include?`. 10 | def assert_doesnt_include(needle, haystack, message = nil) 11 | error = ' included in ' 12 | message = build_message(message, error, needle.to_s, Array(haystack).map(&:to_s)) 13 | 14 | assert_block message do 15 | !haystack.include?(needle) 16 | end 17 | end 18 | 19 | # Asserts that `needle` is a member of `haystack`, where 20 | # `haystack` is any object that responds to `include?`. 21 | def assert_includes(needle, haystack, message = nil) 22 | error = ' not included in ' 23 | message = build_message(message, error, needle.to_s, Array(haystack).map(&:to_s)) 24 | 25 | assert_block message do 26 | haystack.include?(needle) 27 | end 28 | end 29 | 30 | # Asserts that two html fragments are equivalent. Attribute order 31 | # will be ignored. 32 | def assert_equal_html(expected, actual) 33 | assert_equal Nokogiri::HTML::DocumentFragment.parse(expected).to_hash, 34 | Nokogiri::HTML::DocumentFragment.parse(actual).to_hash 35 | end 36 | end 37 | 38 | Test::Unit::TestCase.send(:include, TestHelpers) -------------------------------------------------------------------------------- /lib/html/pipeline/body_content.rb: -------------------------------------------------------------------------------- 1 | module HTML 2 | class Pipeline 3 | # Public: Runs a String of content through an HTML processing pipeline, 4 | # providing easy access to a generated DocumentFragment. 5 | class BodyContent 6 | attr_reader :result 7 | 8 | # Public: Initialize a BodyContent. 9 | # 10 | # body - A String body. 11 | # context - A Hash of context options for the filters. 12 | # pipeline - A HTML::Pipeline object with one or more Filters. 13 | def initialize(body, context, pipeline) 14 | @body = body 15 | @context = context 16 | @pipeline = pipeline 17 | end 18 | 19 | # Public: Gets the memoized result of the body content as it passed through 20 | # the Pipeline. 21 | # 22 | # Returns a Hash, or something similar as defined by @pipeline.result_class. 23 | def result 24 | @result ||= @pipeline.call @body, @context 25 | end 26 | 27 | # Public: Gets the updated body from the Pipeline result. 28 | # 29 | # Returns a String or DocumentFragment. 30 | def output 31 | @output ||= result[:output] 32 | end 33 | 34 | # Public: Parses the output into a DocumentFragment. 35 | # 36 | # Returns a DocumentFragment. 37 | def document 38 | @document ||= HTML::Pipeline.parse output 39 | end 40 | end 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /test/html/pipeline/camo_filter_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | 3 | class HTML::Pipeline::CamoFilterTest < Test::Unit::TestCase 4 | CamoFilter = HTML::Pipeline::CamoFilter 5 | 6 | def setup 7 | @asset_proxy_url = 'https//assets.example.org' 8 | @asset_proxy_secret_key = 'ssssh-secret' 9 | @options = { 10 | :asset_proxy => @asset_proxy_url, 11 | :asset_proxy_secret_key => @asset_proxy_secret_key 12 | } 13 | end 14 | 15 | def test_camouflaging_http_image_urls 16 | orig = %(

) 17 | assert_includes 'img src="' + @asset_proxy_url, 18 | CamoFilter.call(orig, @options).to_s 19 | end 20 | 21 | def test_rewrites_dotcom_image_urls 22 | orig = %(

) 23 | assert_equal "

", 24 | CamoFilter.call(orig, @options).to_s 25 | end 26 | 27 | def test_not_camouflaging_https_image_urls 28 | orig = %(

) 29 | assert_doesnt_include 'img src="' + @asset_proxy_url, 30 | CamoFilter.call(orig, @options).to_s 31 | end 32 | 33 | def test_handling_images_with_no_src_attribute 34 | orig = %(

Ice cube

Will swarm on any motherfucker in a blue uniform

) 8 | assert_includes 'Straight Outta Compton 23 |

Dopeman

24 |

Express Yourself

25 |

Dopeman

) 26 | 27 | result = TocFilter.call(orig).to_s 28 | 29 | assert_includes '"dopeman"', result 30 | assert_includes '"dopeman-1"', result 31 | end 32 | 33 | def test_all_header_tags_are_found_when_adding_anchors 34 | orig = %(

"Funky President" by James Brown

35 |

"It's My Thing" by Marva Whitney

36 |

YO DAWG

\nc" 34 | html = SanitizationFilter.call(stuff).to_s 35 | assert_equal "a\nb\nc", html 36 | end 37 | 38 | def test_does_not_sanitize_li_elements_contained_in_ul_or_ol 39 | stuff = "a\n

\nc" 40 | assert_equal stuff, SanitizationFilter.call(stuff).to_s 41 | end 42 | 43 | def test_github_specific_protocols_are_not_removed 44 | stuff = 'Spill this yo and so on' 45 | assert_equal stuff, SanitizationFilter.call(stuff).to_s 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /lib/html/pipeline/emoji_filter.rb: -------------------------------------------------------------------------------- 1 | require 'emoji' 2 | 3 | module HTML 4 | class Pipeline 5 | # HTML filter that replaces :emoji: with images. 6 | # 7 | # Context: 8 | # :asset_root (required) - base url to link to emoji sprite 9 | class EmojiFilter < Filter 10 | # Build a regexp that matches all valid :emoji: names. 11 | EmojiPattern = /:(#{Emoji.names.map { |name| Regexp.escape(name) }.join('|')}):/ 12 | 13 | def call 14 | doc.search('text()').each do |node| 15 | content = node.to_html 16 | next if !content.include?(':') 17 | next if has_ancestor?(node, %w(pre code)) 18 | html = emoji_image_filter(content) 19 | next if html == content 20 | node.replace(html) 21 | end 22 | doc 23 | end 24 | 25 | # Implementation of validate hook. 26 | # Errors should raise exceptions or use an existing validator. 27 | def validate 28 | needs :asset_root 29 | end 30 | 31 | # Replace :emoji: with corresponding images. 32 | # 33 | # text - String text to replace :emoji: in. 34 | # 35 | # Returns a String with :emoji: replaced with images. 36 | def emoji_image_filter(text) 37 | return text unless text.include?(':') 38 | 39 | text.gsub EmojiPattern do |match| 40 | name = $1 41 | "

" 42 | end 43 | end 44 | 45 | # The base url to link emoji sprites 46 | # 47 | # Raises ArgumentError if context option has not been provided. 48 | # Returns the context's asset_root. 49 | def asset_root 50 | context[:asset_root] 51 | end 52 | end 53 | end 54 | end -------------------------------------------------------------------------------- /test/html/pipeline/image_max_width_filter_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | 3 | class HTML::Pipeline::ImageMaxWidthFilterTest < Test::Unit::TestCase 4 | def filter(html) 5 | HTML::Pipeline::ImageMaxWidthFilter.call(html) 6 | end 7 | 8 | def test_rewrites_image_style_tags 9 | body = "

Screenshot:

" 10 | doc = Nokogiri::HTML::DocumentFragment.parse(body) 11 | 12 | res = filter(doc) 13 | assert_equal_html %q(

Screenshot:

), 14 | res.to_html 15 | end 16 | 17 | def test_leaves_existing_image_style_tags_alone 18 | body = "

" 19 | doc = Nokogiri::HTML::DocumentFragment.parse(body) 20 | 21 | res = filter(doc) 22 | assert_equal_html '

', 23 | res.to_html 24 | end 25 | 26 | def test_links_to_image 27 | body = "

Screenshot:

" 28 | doc = Nokogiri::HTML::DocumentFragment.parse(body) 29 | 30 | res = filter(doc) 31 | assert_equal_html '

Screenshot:

', 32 | res.to_html 33 | end 34 | 35 | def test_doesnt_link_to_image_when_already_linked 36 | body = "

Screenshot:

" 37 | doc = Nokogiri::HTML::DocumentFragment.parse(body) 38 | 39 | res = filter(doc) 40 | assert_equal_html %q(

Screenshot:

), 41 | res.to_html 42 | end 43 | 44 | def test_doesnt_screw_up_inlined_images 45 | body = "

Screenshot , yes, this is a screenshot indeed.

" 46 | doc = Nokogiri::HTML::DocumentFragment.parse(body) 47 | 48 | assert_equal_html %q(

Screenshot , yes, this is a screenshot indeed.

), filter(doc).to_html 49 | end 50 | end 51 | -------------------------------------------------------------------------------- /lib/html/pipeline/camo_filter.rb: -------------------------------------------------------------------------------- 1 | require 'openssl' 2 | 3 | module HTML 4 | class Pipeline 5 | # HTML Filter for replacing http image URLs with camo versions. See: 6 | # 7 | # https://github.com/atmos/camo 8 | # 9 | # All images provided in user content should be run through this 10 | # filter so that http image sources do not cause mixed-content warnings 11 | # in browser clients. 12 | # 13 | # Context options: 14 | # :asset_proxy (required) - Base URL for constructed asset proxy URLs. 15 | # :asset_proxy_secret_key (required) - The shared secret used to encode URLs. 16 | # 17 | # This filter does not write additional information to the context. 18 | class CamoFilter < Filter 19 | # Hijacks images in the markup provided, replacing them with URLs that 20 | # go through the github asset proxy. 21 | def call 22 | doc.search("img").each do |element| 23 | next if element['src'].nil? 24 | src = element['src'].strip 25 | src = src.sub(%r!^http://github.com!, 'https://github.com') 26 | next if context[:disable_asset_proxy] 27 | 28 | if src =~ /^http:/ || src =~ /^https:\/\/img.skitch.com\// 29 | element['src'] = asset_proxy_url(src) 30 | else 31 | element['src'] = src 32 | end 33 | end 34 | doc 35 | end 36 | 37 | # Implementation of validate hook. 38 | # Errors should raise exceptions or use an existing validator. 39 | def validate 40 | needs :asset_proxy, :asset_proxy_secret_key 41 | end 42 | 43 | # The camouflaged URL for a given image URL. 44 | def asset_proxy_url(url) 45 | "#{asset_proxy_host}/#{asset_url_hash(url)}/#{hexencode(url)}" 46 | end 47 | 48 | # Private: calculate the HMAC digest for a image source URL. 49 | def asset_url_hash(url) 50 | digest = OpenSSL::Digest::Digest.new('sha1') 51 | OpenSSL::HMAC.hexdigest(digest, asset_proxy_secret_key, url) 52 | end 53 | 54 | # Private: the hostname to use for generated asset proxied URLs. 55 | def asset_proxy_host 56 | context[:asset_proxy] 57 | end 58 | 59 | def asset_proxy_secret_key 60 | context[:asset_proxy_secret_key] 61 | end 62 | 63 | # Private: helper to hexencode a string. Each byte ends up encoded into 64 | # two characters, zero padded value in the range [0-9a-f]. 65 | def hexencode(str) 66 | str.to_enum(:each_byte).map { |byte| "%02x" % byte }.join 67 | end 68 | end 69 | end 70 | end -------------------------------------------------------------------------------- /lib/html/pipeline/email_reply_filter.rb: -------------------------------------------------------------------------------- 1 | module HTML 2 | class Pipeline 3 | # HTML Filter that converts email reply text into an HTML DocumentFragment. 4 | # It must be used as the first filter in a pipeline. 5 | # 6 | # Context options: 7 | # None 8 | # 9 | # This filter does not write any additional information to the context hash. 10 | class EmailReplyFilter < TextFilter 11 | include EscapeUtils 12 | 13 | EMAIL_HIDDEN_HEADER = %(…

).freeze 14 | EMAIL_QUOTED_HEADER = %(

).freeze 15 | EMAIL_SIGNATURE_HEADER = %(

).freeze 16 | EMAIL_FRAGMENT_HEADER = %(

).freeze 17 | EMAIL_HEADER_END = "

tags around them so we can hide the hidden blocks, and style 23 | # the quoted blocks differently. Since multiple blocks may be hidden, be 24 | # sure to keep the "email-hidden-reply"

s around "email-quoted-reply" 25 | #

heyo

" 23 | doc = HTML::Pipeline.parse(body) 24 | assert_raise(TypeError) { MarkdownFilter.call(doc, {}) } 25 | end 26 | 27 | def test_gfm_enabled_by_default 28 | doc = MarkdownFilter.to_document(@haiku, {}) 29 | assert doc.kind_of?(HTML::Pipeline::DocumentFragment) 30 | assert_equal 2, doc.search('br').size 31 | end 32 | 33 | def test_disabling_gfm 34 | doc = MarkdownFilter.to_document(@haiku, :gfm => false) 35 | assert doc.kind_of?(HTML::Pipeline::DocumentFragment) 36 | assert_equal 0, doc.search('br').size 37 | end 38 | 39 | def test_fenced_code_blocks 40 | doc = MarkdownFilter.to_document(@code) 41 | assert doc.kind_of?(HTML::Pipeline::DocumentFragment) 42 | assert_equal 1, doc.search('pre').size 43 | end 44 | 45 | def test_fenced_code_blocks_with_language 46 | doc = MarkdownFilter.to_document(@code.sub("```", "``` ruby")) 47 | assert doc.kind_of?(HTML::Pipeline::DocumentFragment) 48 | assert_equal 1, doc.search('pre').size 49 | assert_equal 'ruby', doc.search('pre').first['lang'] 50 | end 51 | end 52 | 53 | class GFMTest < Test::Unit::TestCase 54 | def gfm(text) 55 | MarkdownFilter.call(text, :gfm => true) 56 | end 57 | 58 | def test_not_touch_single_underscores_inside_words 59 | assert_equal "

foo_bar

", 60 | gfm("foo_bar") 61 | end 62 | 63 | def test_not_touch_underscores_in_code_blocks 64 | assert_equal "

foo_bar_baz\n

", 65 | gfm(" foo_bar_baz") 66 | end 67 | 68 | def test_not_touch_underscores_in_pre_blocks 69 | assert_equal "

\nfoo_bar_baz\n

", 70 | gfm("

\nfoo_bar_baz\n

") 71 | end 72 | 73 | def test_not_touch_two_or_more_underscores_inside_words 74 | assert_equal "

foo_bar_baz

", 75 | gfm("foo_bar_baz") 76 | end 77 | 78 | def test_turn_newlines_into_br_tags_in_simple_cases 79 | assert_equal "

foo
\nbar

", 80 | gfm("foo\nbar") 81 | end 82 | 83 | def test_convert_newlines_in_all_groups 84 | assert_equal "

apple
\npear
\norange

\n\n" + 85 | "

ruby
\npython
\nerlang

", 86 | gfm("apple\npear\norange\n\nruby\npython\nerlang") 87 | end 88 | 89 | def test_convert_newlines_in_even_long_groups 90 | assert_equal "

apple
\npear
\norange
\nbanana

\n\n" + 91 | "

ruby
\npython
\nerlang

", 92 | gfm("apple\npear\norange\nbanana\n\nruby\npython\nerlang") 93 | end 94 | 95 | def test_not_convert_newlines_in_lists 96 | assert_equal "

foo

\n\n

bar

", 97 | gfm("# foo\n# bar") 98 | assert_equal "

", 99 | gfm("* foo\n* bar") 100 | end 101 | end 102 | -------------------------------------------------------------------------------- /lib/html/pipeline/@mention_filter.rb: -------------------------------------------------------------------------------- 1 | require 'set' 2 | 3 | module HTML 4 | class Pipeline 5 | # HTML filter that replaces @user mentions with links. Mentions within

,
  6 |     # , and  elements are ignored. Mentions that reference users that do
  7 |     # not exist are ignored.
  8 |     #
  9 |     # Context options:
 10 |     #   :base_url - Used to construct links to user profile pages for each
 11 |     #               mention.
 12 |     #   :info_url - Used to link to "more info" when someone mentions @mention
 13 |     #               or @mentioned.
 14 |     #
 15 |     class MentionFilter < Filter
 16 |       # Public: Find user @mentions in text.  See
 17 |       # MentionFilter#mention_link_filter.
 18 |       #
 19 |       #   MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
 20 |       #     "#{login}"
 21 |       #   end
 22 |       #
 23 |       # text - String text to search.
 24 |       #
 25 |       # Yields the String match, the String login name, and a Boolean determining
 26 |       # if the match = "@mention[ed]".  The yield's return replaces the match in
 27 |       # the original text.
 28 |       #
 29 |       # Returns a String replaced with the return of the block.
 30 |       def self.mentioned_logins_in(text)
 31 |         text.gsub MentionPattern do |match|
 32 |           login = $1
 33 |           yield match, login, MentionLogins.include?(login.downcase)
 34 |         end
 35 |       end
 36 | 
 37 |       # Pattern used to extract @mentions from text.
 38 |       MentionPattern = /
 39 |         (?:^|\W)                   # beginning of string or non-word char
 40 |         @((?>[a-z0-9][a-z0-9-]*))  # @username
 41 |         (?!\/)                     # without a trailing slash
 42 |         (?=
 43 |           \.+[ \t\W]|              # dots followed by space or non-word character
 44 |           \.+$|                    # dots at end of line
 45 |           [^0-9a-zA-Z_.]|          # non-word character except dot
 46 |           $                        # end of line
 47 |         )
 48 |       /ix
 49 | 
 50 |       # List of username logins that, when mentioned, link to the blog post
 51 |       # about @mentions instead of triggering a real mention.
 52 |       MentionLogins = %w(
 53 |         mention
 54 |         mentions
 55 |         mentioned
 56 |         mentioning
 57 |       )
 58 | 
 59 |       # Don't look for mentions in text nodes that are children of these elements
 60 |       IGNORE_PARENTS = %w(pre code a).to_set
 61 | 
 62 |       def call
 63 |         doc.search('text()').each do |node|
 64 |           content = node.to_html
 65 |           next if !content.include?('@')
 66 |           next if has_ancestor?(node, IGNORE_PARENTS)
 67 |           html = mention_link_filter(content, base_url, info_url)
 68 |           next if html == content
 69 |           node.replace(html)
 70 |         end
 71 |         doc
 72 |       end
 73 | 
 74 |       # The URL to provide when someone @mentions a "mention" name, such as
 75 |       # @mention or @mentioned, that will give them more info on mentions.
 76 |       def info_url
 77 |         context[:info_url] || nil
 78 |       end
 79 | 
 80 |       # Replace user @mentions in text with links to the mentioned user's
 81 |       # profile page.
 82 |       #
 83 |       # text      - String text to replace @mention usernames in.
 84 |       # base_url  - The base URL used to construct user profile URLs.
 85 |       # info_url  - The "more info" URL used to link to more info on @mentions.
 86 |       #             If nil we don't link @mention or @mentioned.
 87 |       #
 88 |       # Returns a string with @mentions replaced with links. All links have a
 89 |       # 'user-mention' class name attached for styling.
 90 |       def mention_link_filter(text, base_url='/', info_url=nil)
 91 |         self.class.mentioned_logins_in(text) do |match, login, is_mentioned|
 92 |           link =
 93 |             if is_mentioned
 94 |               link_to_mention_info(login, info_url)
 95 |             else
 96 |               link_to_mentioned_user(login)
 97 |             end
 98 | 
 99 |           link ? match.sub("@#{login}", link) : match
100 |         end
101 |       end
102 | 
103 |       def link_to_mention_info(text, info_url=nil)
104 |         return "@#{text}" if info_url.nil?
105 |         "" +
106 |         "@#{text}" +
107 |         ""
108 |       end
109 | 
110 |       def link_to_mentioned_user(login)
111 |         url = File.join(base_url, login)
112 |         "" +
113 |         "@#{login}" +
114 |         ""
115 |       end
116 |     end
117 |   end
118 | end


--------------------------------------------------------------------------------
/lib/html/pipeline/sanitization_filter.rb:
--------------------------------------------------------------------------------
  1 | require 'sanitize'
  2 | 
  3 | module HTML
  4 |   class Pipeline
  5 |     # HTML filter with sanization routines and whitelists. This module defines
  6 |     # what HTML is allowed in user provided content and fixes up issues with
  7 |     # unbalanced tags and whatnot.
  8 |     #
  9 |     # See the Sanitize docs for more information on the underlying library:
 10 |     #
 11 |     # https://github.com/rgrove/sanitize/#readme
 12 |     #
 13 |     # Context options:
 14 |     #   :whitelist - The sanitizer whitelist configuration to use. This can be one
 15 |     #                of the options constants defined in this class or a custom
 16 |     #                sanitize options hash.
 17 |     #
 18 |     # This filter does not write additional information to the context.
 19 |     class SanitizationFilter < Filter
 20 |       LISTS     = Set.new(%w(ul ol).freeze)
 21 |       LIST_ITEM = 'li'.freeze
 22 | 
 23 |       # List of table child elements. These must be contained by a  element
 24 |       # or they are not allowed through. Otherwise they can be used to break out
 25 |       # of places we're using tables to contain formatted user content (like pull
 26 |       # request review comments).
 27 |       TABLE_ITEMS = Set.new(%w(tr td th).freeze)
 28 |       TABLE       = 'table'.freeze
 29 | 
 30 |       # The main sanitization whitelist. Only these elements and attributes are
 31 |       # allowed through by default.
 32 |       WHITELIST = {
 33 |         :elements => %w(
 34 |           h1 h2 h3 h4 h5 h6 h7 h8 br b i strong em a pre code img tt
 35 |           div ins del sup sub p ol ul table blockquote dl dt dd
 36 |           kbd q samp var hr ruby rt rp li tr td th
 37 |         ),
 38 |         :attributes => {
 39 |           'a' => ['href'],
 40 |           'img' => ['src'],
 41 |           'div' => ['itemscope', 'itemtype'],
 42 |           :all  => ['abbr', 'accept', 'accept-charset',
 43 |                     'accesskey', 'action', 'align', 'alt', 'axis',
 44 |                     'border', 'cellpadding', 'cellspacing', 'char',
 45 |                     'charoff', 'charset', 'checked', 'cite',
 46 |                     'clear', 'cols', 'colspan', 'color',
 47 |                     'compact', 'coords', 'datetime', 'dir',
 48 |                     'disabled', 'enctype', 'for', 'frame',
 49 |                     'headers', 'height', 'hreflang',
 50 |                     'hspace', 'ismap', 'label', 'lang',
 51 |                     'longdesc', 'maxlength', 'media', 'method',
 52 |                     'multiple', 'name', 'nohref', 'noshade',
 53 |                     'nowrap', 'prompt', 'readonly', 'rel', 'rev',
 54 |                     'rows', 'rowspan', 'rules', 'scope',
 55 |                     'selected', 'shape', 'size', 'span',
 56 |                     'start', 'summary', 'tabindex', 'target',
 57 |                     'title', 'type', 'usemap', 'valign', 'value',
 58 |                     'vspace', 'width', 'itemprop']
 59 |         },
 60 |         :protocols => {
 61 |           'a'   => {'href' => ['http', 'https', 'mailto', :relative, 'github-windows', 'github-mac']},
 62 |           'img' => {'src'  => ['http', 'https', :relative]}
 63 |         },
 64 |         :transformers => [
 65 |           # Top-level  elements are removed because they can break out of
 66 |           # containing markup.
 67 |           lambda { |env|
 68 |             name, node = env[:node_name], env[:node]
 69 |             if name == LIST_ITEM && !node.ancestors.any?{ |n| LISTS.include?(n.name) }
 70 |               node.replace(node.children)
 71 |             end
 72 |           },
 73 | 
 74 |           # Table child elements that are not contained by a 
 are removed.
 75 |           lambda { |env|
 76 |             name, node = env[:node_name], env[:node]
 77 |             if TABLE_ITEMS.include?(name) && !node.ancestors.any? { |n| n.name == TABLE }
 78 |               node.replace(node.children)
 79 |             end
 80 |           }
 81 |         ]
 82 |       }
 83 | 
 84 |       # A more limited sanitization whitelist. This includes all attributes,
 85 |       # protocols, and transformers from WHITELIST but with a more locked down
 86 |       # set of allowed elements.
 87 |       LIMITED = WHITELIST.merge(
 88 |         :elements => %w(b i strong em a pre code img ins del sup sub p ol ul li))
 89 | 
 90 |       # Strip all HTML tags from the document.
 91 |       FULL = { :elements => [] }
 92 | 
 93 |       # Sanitize markup using the Sanitize library.
 94 |       def call
 95 |         Sanitize.clean_node!(doc, whitelist)
 96 |       end
 97 | 
 98 |       # The whitelist to use when sanitizing. This can be passed in the context
 99 |       # hash to the filter but defaults to WHITELIST constant value above.
100 |       def whitelist
101 |         context[:whitelist] || WHITELIST
102 |       end
103 |     end
104 |   end
105 | end


--------------------------------------------------------------------------------
/test/html/pipeline/mention_filter_test.rb:
--------------------------------------------------------------------------------
  1 | require "test_helper"
  2 | 
  3 | class HTML::Pipeline::MentionFilterTest < Test::Unit::TestCase
  4 |   def filter(html, base_url='/', info_url=nil)
  5 |     HTML::Pipeline::MentionFilter.call(html, :base_url => base_url, :info_url => info_url)
  6 |   end
  7 | 
  8 |   def test_filtering_a_documentfragment
  9 |     body = "@kneath: check it out."
 10 |     doc  = Nokogiri::HTML::DocumentFragment.parse(body)
 11 | 
 12 |     res  = filter(doc, '/')
 13 |     assert_same doc, res
 14 | 
 15 |     link = "@kneath"
 16 |     assert_equal "#{link}: check it out.",
 17 |       res.to_html
 18 |   end
 19 | 
 20 |   def test_filtering_plain_text
 21 |     body = "@kneath: check it out."
 22 |     res  = filter(body, '/')
 23 | 
 24 |     link = "@kneath"
 25 |     assert_equal "#{link}: check it out.",
 26 |       res.to_html
 27 |   end
 28 | 
 29 |   def test_not_replacing_mentions_in_pre_tags
 30 |     body = "@kneath: okay"
 31 |     assert_equal body, filter(body).to_html
 32 |   end
 33 | 
 34 |   def test_not_replacing_mentions_in_code_tags
 35 |     body = "@kneath: okay"
 36 |     assert_equal body, filter(body).to_html
 37 |   end
 38 | 
 39 |   def test_not_replacing_mentions_in_links
 40 |     body = "@kneath okay"
 41 |     assert_equal body, filter(body).to_html
 42 |   end
 43 | 
 44 |   def test_entity_encoding_and_whatnot
 45 |     body = "@kneath what's up"
 46 |     link = "@kneath"
 47 |     assert_equal "#{link} what's up", filter(body, '/').to_html
 48 |   end
 49 | 
 50 |   def test_html_injection
 51 |     body = "@kneath <script>alert(0)</script>"
 52 |     link = "@kneath"
 53 |     assert_equal "#{link} <script>alert(0)</script>",
 54 |       filter(body, '/').to_html
 55 |   end
 56 | 
 57 |   def test_links_to_nothing_when_no_info_url_given
 58 |     body = "How do I @mention someone?"
 59 |     assert_equal "How do I @mention someone?",
 60 |       filter(body, '/').to_html
 61 |   end
 62 | 
 63 |   def test_links_to_more_info_when_info_url_given
 64 |     body = "How do I @mention someone?"
 65 |     link = "@mention"
 66 |     assert_equal "How do I #{link} someone?",
 67 |       filter(body, '/', 'https://github.com/blog/821').to_html
 68 |   end
 69 | 
 70 |   MarkdownPipeline =
 71 |     HTML::Pipeline.new [
 72 |       HTML::Pipeline::MarkdownFilter,
 73 |       HTML::Pipeline::MentionFilter
 74 |     ]
 75 | 
 76 |   def mentioned_usernames
 77 |     result = {}
 78 |     MarkdownPipeline.call(@body, {}, result)
 79 |     html = result[:output].to_html
 80 |     users = html.scan(/user-mention">@(.+?)#{replacement}").children.first.children
119 |     end
120 |     replace_without_encoding_fix(replacement)
121 |   end
122 | 
123 |   alias_method :replace_without_encoding_fix, :replace
124 |   alias_method :replace, :replace_with_encoding_fix
125 | 
126 |   def swap(replacement)
127 |     replace(replacement)
128 |     self
129 |   end
130 | end
131 | 


--------------------------------------------------------------------------------
/lib/html/pipeline/filter.rb:
--------------------------------------------------------------------------------
  1 | module HTML
  2 |   class Pipeline
  3 |     # Base class for user content HTML filters. Each filter takes an
  4 |     # HTML string or Nokogiri::HTML::DocumentFragment, performs
  5 |     # modifications and/or writes information to the result hash. Filters must
  6 |     # return a DocumentFragment (typically the same instance provided to the call
  7 |     # method) or a String with HTML markup.
  8 |     #
  9 |     # Example filter that replaces all images with trollface:
 10 |     #
 11 |     #   class FuuuFilter < HTML::Pipeline::Filter
 12 |     #     def call
 13 |     #       doc.search('img').each do |img|
 14 |     #         img['src'] = "http://paradoxdgn.com/junk/avatars/trollface.jpg"
 15 |     #       end
 16 |     #     end
 17 |     #   end
 18 |     #
 19 |     # The context Hash passes options to filters and should not be changed in
 20 |     # place.  A Result Hash allows filters to make extracted information
 21 |     # available to the caller and is mutable.
 22 |     #
 23 |     # Common context options:
 24 |     #   :base_url   - The site's base URL
 25 |     #   :repository - A Repository providing context for the HTML being processed
 26 |     #
 27 |     # Each filter may define additional options and output values. See the class
 28 |     # docs for more info.
 29 |     class Filter
 30 |       class InvalidDocumentException < StandardError; end
 31 | 
 32 |       def initialize(doc, context = nil, result = nil)
 33 |         if doc.kind_of?(String)
 34 |           @html = doc.to_str
 35 |           @doc = nil
 36 |         else
 37 |           @doc = doc
 38 |           @html = nil
 39 |         end
 40 |         @context = context || {}
 41 |         @result = result || {}
 42 |         validate
 43 |       end
 44 |       
 45 |       # Public: Returns a simple Hash used to pass extra information into filters
 46 |       # and also to allow filters to make extracted information available to the
 47 |       # caller.
 48 |       attr_reader :context
 49 | 
 50 |       # Public: Returns a Hash used to allow filters to pass back information
 51 |       # to callers of the various Pipelines.  This can be used for
 52 |       # #mentioned_users, for example.
 53 |       attr_reader :result
 54 | 
 55 |       # The Nokogiri::HTML::DocumentFragment to be manipulated. If the filter was
 56 |       # provided a String, parse into a DocumentFragment the first time this
 57 |       # method is called.
 58 |       def doc
 59 |         @doc ||= parse_html(html)
 60 |       end
 61 | 
 62 |       # The String representation of the document. If a DocumentFragment was
 63 |       # provided to the Filter, it is serialized into a String when this method is
 64 |       # called.
 65 |       def html
 66 |         raise InvalidDocumentException if @html.nil? && @doc.nil?
 67 |         @html || doc.to_html
 68 |       end
 69 | 
 70 |       # The main filter entry point. The doc attribute is guaranteed to be a
 71 |       # Nokogiri::HTML::DocumentFragment when invoked. Subclasses should modify
 72 |       # this document in place or extract information and add it to the context
 73 |       # hash.
 74 |       def call
 75 |         raise NotImplementedError
 76 |       end
 77 |       
 78 |       # Make sure the context has everything we need. Noop: Subclasses can override.
 79 |       def validate
 80 |       end
 81 | 
 82 |       # The Repository object provided in the context hash, or nil when no
 83 |       # :repository was specified.
 84 |       #
 85 |       # It's assumed that the repository context has already been checked
 86 |       # for permissions
 87 |       def repository
 88 |         context[:repository]
 89 |       end
 90 | 
 91 |       # The User object provided in the context hash, or nil when no user
 92 |       # was specified
 93 |       def current_user
 94 |         context[:current_user]
 95 |       end
 96 | 
 97 |       # Return whether the filter can access a given repo while
 98 |       # applying a filter
 99 |       #
100 |       # A repo can only be accessed if its pullable by the user who
101 |       # submitted the content of this filter, or if it's the same as
102 |       # the repository context in which the filter runs
103 |       def can_access_repo?(repo)
104 |         return false if repo.nil?
105 |         return true if repo == repository
106 |         repo.pullable_by?(current_user)
107 |       end
108 | 
109 |       # The site's base URL provided in the context hash, or '/' when no
110 |       # base URL was specified.
111 |       def base_url
112 |         context[:base_url] || '/'
113 |       end
114 | 
115 |       # Ensure the passed argument is a DocumentFragment. When a string is
116 |       # provided, it is parsed and returned; otherwise, the DocumentFragment is
117 |       # returned unmodified.
118 |       def parse_html(html)
119 |         HTML::Pipeline.parse(html)
120 |       end
121 | 
122 |       # Helper method for filter subclasses used to determine if any of a node's
123 |       # ancestors have one of the tag names specified.
124 |       #
125 |       # node - The Node object to check.
126 |       # tags - An array of tag name strings to check. These should be downcase.
127 |       #
128 |       # Returns true when the node has a matching ancestor.
129 |       def has_ancestor?(node, tags)
130 |         while node = node.parent
131 |           if tags.include?(node.name.downcase)
132 |             break true
133 |           end
134 |         end
135 |       end
136 | 
137 |       # Perform a filter on doc with the given context.
138 |       #
139 |       # Returns a HTML::Pipeline::DocumentFragment or a String containing HTML
140 |       # markup.
141 |       def self.call(doc, context = nil, result = nil)
142 |         new(doc, context, result).call
143 |       end
144 | 
145 |       # Like call but guarantees that a DocumentFragment is returned, even when
146 |       # the last filter returns a String.
147 |       def self.to_document(input, context = nil)
148 |         html = call(input, context)
149 |         HTML::Pipeline::parse(html)
150 |       end
151 | 
152 |       # Like call but guarantees that a string of HTML markup is returned.
153 |       def self.to_html(input, context = nil)
154 |         output = call(input, context)
155 |         if output.respond_to?(:to_html)
156 |           output.to_html
157 |         else
158 |           output.to_s
159 |         end
160 |       end
161 |       
162 |       # Validator for required context. This will check that anything passed in 
163 |       # contexts exists in @contexts
164 |       # 
165 |       # If any errors are found an ArgumentError will be raised with a
166 |       # message listing all the missing contexts and the filters that
167 |       # require them.
168 |       def needs(*keys)
169 |         missing = keys.reject { |key| context.include? key }
170 | 
171 |         if missing.any?
172 |           raise ArgumentError,
173 |             "Missing context keys for #{self.class.name}: #{missing.map(&:inspect).join ', '}"
174 |         end
175 |       end
176 |     end
177 |   end
178 | end


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # HTML::Pipeline [![Build Status](https://secure.travis-ci.org/jch/html-pipeline.png)](http://travis-ci.org/jch/html-pipeline)
  2 | 
  3 | GitHub HTML processing filters and utilities. This module includes a small
  4 | framework for defining DOM based content filters and applying them to user
  5 | provided content.
  6 | 
  7 | ## Installation
  8 | 
  9 | Add this line to your application's Gemfile:
 10 | 
 11 | ```ruby
 12 | gem 'html-pipeline'
 13 | ```
 14 | 
 15 | And then execute:
 16 | 
 17 | ```sh
 18 | $ bundle
 19 | ```
 20 | 
 21 | Or install it yourself as:
 22 | 
 23 | ```sh
 24 | $ gem install html-pipeline
 25 | ```
 26 | 
 27 | ## Usage
 28 | 
 29 | This library provides a handful of chainable HTML filters to transform user
 30 | content into markup. A filter takes an HTML string or
 31 | `Nokogiri::HTML::DocumentFragment`, optionally manipulates it, and then
 32 | outputs the result.
 33 | 
 34 | For example, to transform Markdown source into Markdown HTML:
 35 | 
 36 | ```ruby
 37 | require 'html/pipeline'
 38 | 
 39 | filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!")
 40 | filter.call
 41 | ```
 42 | 
 43 | Filters can be combined into a pipeline which causes each filter to hand its
 44 | output to the next filter's input. So if you wanted to have content be
 45 | filtered through Markdown and be syntax highlighted, you can create the
 46 | following pipeline:
 47 | 
 48 | ```ruby
 49 | pipeline = HTML::Pipeline.new [
 50 |   HTML::Pipeline::MarkdownFilter,
 51 |   HTML::Pipeline::SyntaxHighlightFilter
 52 | ]
 53 | result = pipeline.call <This is great:
 68 | 
 69 | 
 70 | some_code(:first)
 71 | 
 72 | 
 73 | ```
 74 | 
 75 | Some filters take an optional **context** and/or **result** hash. These are
 76 | used to pass around arguments and metadata between filters in a pipeline. For
 77 | example, if you want don't want to use GitHub formatted Markdown, you can
 78 | pass an option in the context hash:
 79 | 
 80 | ```ruby
 81 | filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!", :gfm => false)
 82 | filter.call
 83 | ```
 84 | 
 85 | ## Filters
 86 | 
 87 | * `MentionFilter` - replace `@user` mentions with links
 88 | * `AutolinkFilter` - auto_linking urls in HTML
 89 | * `CamoFilter` - replace http image urls with [camo-fied](https://github.com/atmos/camo) https versions
 90 | * `EmailReplyFilter` - util filter for working with emails
 91 | * `EmojiFilter` - everyone loves [emoji](http://www.emoji-cheat-sheet.com/)!
 92 | * `HttpsFilter` - HTML Filter for replacing http github urls with https versions.
 93 | * `ImageMaxWidthFilter` - link to full size image for large images
 94 | * `MarkdownFilter` - convert markdown to html
 95 | * `PlainTextInputFilter` - html escape text and wrap the result in a div
 96 | * `SanitizationFilter` - whitelist sanitize user markup
 97 | * `SyntaxHighlightFilter` - code syntax highlighter with [linguist](https://github.com/github/linguist)
 98 | * `TextileFilter` - convert textile to html
 99 | * `TableOfContentsFilter` - anchor headings with name attributes
100 | 
101 | ## Examples
102 | 
103 | We define different pipelines for different parts of our app. Here are a few
104 | paraphrased snippets to get you started:
105 | 
106 | ```ruby
107 | # The context hash is how you pass options between different filters.
108 | # See individual filter source for explanation of options.
109 | context = {
110 |   :asset_root => "http://your-domain.com/where/your/images/live/icons",
111 |   :base_url   => "http://your-domain.com"
112 | }
113 | 
114 | # Pipeline providing sanitization and image hijacking but no mention
115 | # related features.
116 | SimplePipeline = Pipeline.new [
117 |   SanitizationFilter,
118 |   TableOfContentsFilter, # add 'name' anchors to all headers
119 |   CamoFilter,
120 |   ImageMaxWidthFilter,
121 |   SyntaxHighlightFilter,
122 |   EmojiFilter,
123 |   AutolinkFilter
124 | ], context, {}
125 | 
126 | # Pipeline used for user provided content on the web
127 | MarkdownPipeline = Pipeline.new [
128 |   MarkdownFilter,
129 |   SanitizationFilter,
130 |   CamoFilter,
131 |   ImageMaxWidthFilter,
132 |   HttpsFilter,
133 |   MentionFilter,
134 |   EmojiFilter,
135 |   SyntaxHighlightFilter
136 | ], context.merge(:gfm => true), {}  # enable github formatted markdown
137 | 
138 | 
139 | # Define a pipeline based on another pipeline's filters
140 | NonGFMMarkdownPipeline = Pipeline.new(MarkdownPipeline.filters,
141 |   context.merge(:gfm => false), {})
142 | 
143 | # Pipelines aren't limited to the web. You can use them for email
144 | # processing also.
145 | HtmlEmailPipeline = Pipeline.new [
146 |   ImageMaxWidthFilter
147 | ], {}, {}
148 | 
149 | # Just emoji.
150 | EmojiPipeline = Pipeline.new [
151 |   HTMLInputFilter,
152 |   EmojiFilter
153 | ], context, {}
154 | ```
155 | 
156 | ## Extending
157 | To write a custom filter, you need a class with a `call` method that inherits
158 | from `HTML::Pipeline::Filter`.
159 | 
160 | For example this filter adds a base url to images that are root relative:
161 | 
162 | ```ruby
163 | require 'uri'
164 | 
165 | class RootRelativeFilter < HTML::Pipeline::Filter
166 | 
167 |   def call
168 |     doc.search("img").each do |img| 
169 |       next if img['src'].nil?
170 |       src = img['src'].strip
171 |       if src.start_with? '/'
172 |         img["src"] = URI.join(context[:base_url], src).to_s
173 |       end
174 |     end
175 |     doc
176 |   end
177 | 
178 | end
179 | ```
180 | 
181 | Now this filter can be used in a pipeline:
182 | 
183 | ```ruby
184 | Pipeline.new [ RootRelativeFilter ], { :base_url => 'http://somehost.com' }
185 | ```
186 | 
187 | ## Development
188 | 
189 | To see what has changed in recent versions, see the [CHANGELOG](https://github.com/jch/html-pipeline/blob/master/CHANGELOG.md).
190 | 
191 | ```sh
192 | bundle
193 | rake test
194 | ```
195 | 
196 | ## Contributing
197 | 
198 | 1. [Fork it](https://help.github.com/articles/fork-a-repo)
199 | 2. Create your feature branch (`git checkout -b my-new-feature`)
200 | 3. Commit your changes (`git commit -am 'Added some feature'`)
201 | 4. Push to the branch (`git push origin my-new-feature`)
202 | 5. Create new [Pull Request](https://help.github.com/articles/using-pull-requests)
203 | 
204 | 
205 | ## TODO
206 | 
207 | * test whether emoji filter works on heroku
208 | * test whether nokogiri monkey patch is still necessary
209 | 
210 | ## Contributors
211 | 
212 | * [Aman Gupta](mailto:aman@tmm1.net)
213 | * [Jake Boxer](mailto:jake@github.com)
214 | * [Joshua Peek](mailto:josh@joshpeek.com)
215 | * [Kyle Neath](mailto:kneath@gmail.com)
216 | * [Rob Sanheim](mailto:rsanheim@gmail.com)
217 | * [Simon Rozet](mailto:simon@rozet.name)
218 | * [Vicent Martí](mailto:tanoku@gmail.com)
219 | * [Risk :danger: Olson](mailto:technoweenie@gmail.com)
220 | 
221 | Project is a member of the [OSS Manifesto](http://ossmanifesto.org/).
222 | 


--------------------------------------------------------------------------------

Ice cube

Dopeman

Express Yourself

Dopeman

"Funky President" by James Brown

"It's My Thing" by Marva Whitney

"Boogie Back" by Roy Ayers

"Feel Good" by Fancy

"Funky Drummer" by James Brown

"Ruthless Villain" by Eazy-E

foo

bar