├── .github └── workflows │ └── test.yml ├── .gitignore ├── .rspec ├── .ruby-version ├── CHANGELOG.md ├── Gemfile ├── LICENSE.md ├── README.md ├── Rakefile ├── bin ├── ci └── upmark ├── coverage └── .gitkeep ├── lib ├── upmark.rb └── upmark │ ├── errors.rb │ ├── parser │ └── xml.rb │ ├── transform │ ├── ignore.rb │ ├── markdown.rb │ ├── normalise.rb │ └── preprocess.rb │ └── transform_helpers.rb ├── spec ├── acceptance │ └── upmark_spec.rb ├── errors_spec.rb ├── spec_helper.rb └── unit │ └── lib │ └── upmark │ ├── parser │ └── xml_spec.rb │ └── transform │ └── markdown_spec.rb └── upmark.gemspec /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Ruby Test 2 | on: [push] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v2 8 | - name: Use Ruby 9 | uses: ruby/setup-ruby@v1 10 | with: 11 | ruby-version: 3.2 12 | bundler-cache: true 13 | - name: Install dependencies 14 | run: bundle install 15 | - name: Run tests 16 | run: bundle exec rake 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | .bundle 3 | coverage 4 | .rvmrc 5 | Gemfile.lock 6 | .examples 7 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --colour 2 | --require spec_helper 3 | -------------------------------------------------------------------------------- /.ruby-version: -------------------------------------------------------------------------------- 1 | 3.2.2 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 1.1.0 / 2024-04-19 4 | * basic handling for nested lists 5 | 6 | ## 1.0.0 / 2018-03-27 7 | * Delegate `ascii_tree` method to cause object 8 | * Travis CI 9 | * Upgrade deps 10 | * Various cleanups 11 | 12 | ## v0.10.0 (17th June 2016) 13 | * better handling of HTML entities 14 | * better handling of hyperlinks with query strings 15 | 16 | ## v0.9.0 (8th November 2015) 17 | * bump parslet dependency to 1.7.x 18 | * converts some new HTML elements to markdown - h4, h5 and h6 19 | * detects more content that looks like an unordered list and converts 20 | it to a markdown list 21 | * strips some additional HTML tags and leaves the content, including 22 | * table 23 | * span 24 | * strips some HTML elements 25 | * img tags with non http[s] src 26 | * improved error when unbalanced HTML tags are detected 27 | 28 | ## v0.2.0 (21st July 2014) 29 | * Upmark.convert() now raises an Upmark::ParseError exception if the supplied 30 | HTML can't be parsed 31 | * Locked the parslet dependency to 1.4.0 for now 32 | * We depend on the Parslet:ParseError exception which was removed in parslet 1.5.0 33 | 34 | ## v0.1.4 (2nd August 2012) 35 | * BUGFIX: handle single quotes in attribute values 36 | 37 | ## v0.1.3 (28th March 2012) 38 | * BUGFIX: handle ampersands in attribute values 39 | 40 | ## v0.1.2 (26th September 2011) 41 | * BUGFIX: handle newlines after a
element 42 | 43 | ## v0.1.1 (26th September 2011) 44 | * lots of refactoring 45 | 46 | ## v0.1.0 (25th September 2011) 47 | * lots of refactoring 48 | 49 | ## v0.0.1 (23rd September 2011) 50 | * initial release 51 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | # Specify your gem's dependencies in upmark.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 The Conversation Media Group 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Upmark 2 | 3 | A HTML to Markdown converter. 4 | 5 | ## Installation 6 | 7 | > gem install upmark 8 | 9 | ## Usage 10 | 11 | In ruby: 12 | 13 | ```ruby 14 | require "upmark" 15 | html = "

messenger bag skateboard

" 16 | markdown = Upmark.convert(html) 17 | puts markdown 18 | ``` 19 | 20 | From the command-line: 21 | 22 | > upmark foo.html 23 | 24 | You can also pipe poorly formatted HTML documents through `tidy` before piping them into `upmark`: 25 | 26 | > cat bar.html | tidy -asxhtml -indent -quiet --show-errors 0 --show-warnings 0 --show-body-only 1 --wrap 0 | upmark 27 | 28 | ## Features 29 | 30 | Upmark will convert the following (arbitrarily nested) HTML elements to Markdown: 31 | 32 | * `strong` 33 | * `em` 34 | * `p` 35 | * `a` 36 | * `h1`, `h2`, `h3`, `h4`, `h5`, `h6` 37 | * `ul` 38 | * `ol` 39 | * `br` 40 | 41 | It will also pass through block and span-level HTML elements (e.g. `table`, `div`, `span`, etc) which aren't used by Markdown. 42 | 43 | ## How it works 44 | 45 | Upmark defines a parsing expression grammar (PEG) using the very awesome [Parslet](https://github.com/kschiess/parslet/) gem. This PEG is then used to convert HTML into Markdown in 4 steps: 46 | 47 | 1. Parse the XHTML into an abstract syntax tree (AST). 48 | 2. Normalize the AST into a nested hash of HTML elements. 49 | 3. Mark the block and span-level subtrees which should be ignored (`table`, `div`, `span`, etc). 50 | 4. Convert the AST leaves into Markdown. 51 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require "rspec/core/rake_task" 3 | 4 | namespace :spec do 5 | desc "Run acceptance specs" 6 | RSpec::Core::RakeTask.new(:acceptance) do |t| 7 | t.pattern = "./spec/acceptance/**/*_spec.rb" 8 | end 9 | 10 | desc "Run unit specs" 11 | RSpec::Core::RakeTask.new(:unit) do |t| 12 | t.pattern = "./spec/unit/**/*_spec.rb" 13 | end 14 | 15 | desc "Run unit and acceptance specs" 16 | task all: [:"spec:unit", :"spec:acceptance"] 17 | end 18 | 19 | task default: :"spec:all" 20 | -------------------------------------------------------------------------------- /bin/ci: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This script is the standard way to run a CI build for all TC projects. 4 | # It's primarily used by buildbox 5 | 6 | # Exits bash immediately if any command fails 7 | set -e 8 | 9 | # Will output commands as the run 10 | set -x 11 | 12 | # prepare the repository state, load dependencies, etc 13 | ruby -v 14 | hostname 15 | bundle check || bundle --local --path=vendor/bundle || bundle --path=vendor/bundle 16 | 17 | # .. and now start the build 18 | bundle exec rake 19 | -------------------------------------------------------------------------------- /bin/upmark: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | $:.push File.expand_path("../../lib", __FILE__) 4 | 5 | require "upmark" 6 | 7 | if ARGV.length > 0 8 | ARGV.each do |arg| 9 | puts Upmark.convert(File.read(arg)) 10 | end 11 | else 12 | puts Upmark.convert(ARGF.read) 13 | end 14 | 15 | -------------------------------------------------------------------------------- /coverage/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/conversation/upmark/2b805a308d493c33529f19bbb2d1cd183df52a4c/coverage/.gitkeep -------------------------------------------------------------------------------- /lib/upmark.rb: -------------------------------------------------------------------------------- 1 | require "parslet" 2 | 3 | require 'upmark/errors' 4 | require "upmark/parser/xml" 5 | require 'upmark/transform_helpers' 6 | require "upmark/transform/markdown" 7 | require "upmark/transform/normalise" 8 | require "upmark/transform/preprocess" 9 | 10 | module Upmark 11 | def self.convert(html) 12 | xml = Parser::XML.new 13 | normalise = Transform::Normalise.new 14 | preprocess = Transform::Preprocess.new 15 | markdown = Transform::Markdown.new 16 | 17 | ast = xml.parse(html.strip) 18 | ast = normalise.apply(ast) 19 | ast = preprocess.apply(ast) 20 | ast = markdown.apply(ast) 21 | 22 | # The result is either a String or an Array. 23 | ast = ast.join if ast.is_a?(Array) 24 | 25 | # Remove trailing whitespace 26 | ast.gsub!(/ +$/,'') 27 | 28 | # Compress bullet point lists 29 | ast.gsub!(/^•\s*([^•\n]*)\n+(?=•)/,"* #{'\1'}\n") 30 | 31 | # Any more than two consecutive newline characters is superflous. 32 | ast.gsub!(/\n(\s*\n)+/, "\n\n") 33 | 34 | # Remove other bullet points 35 | ast.gsub!(/^•\s*/,"* ") 36 | 37 | ast.strip 38 | rescue Parslet::ParseFailed => e 39 | raise Upmark::ParseFailed.new('Parse failed', e) 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /lib/upmark/errors.rb: -------------------------------------------------------------------------------- 1 | module Upmark 2 | class ParseFailed < StandardError 3 | attr_reader :cause 4 | 5 | def initialize(message, cause) 6 | @cause = cause 7 | super(message) 8 | end 9 | 10 | def ascii_tree 11 | @cause && @cause.ascii_tree 12 | end 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /lib/upmark/parser/xml.rb: -------------------------------------------------------------------------------- 1 | module Upmark 2 | module Parser 3 | # The XML parser class. 4 | # 5 | # Parses a XML document into an abstract syntax tree (AST). 6 | # 7 | # It's worth referring to the XML spec: 8 | # http://www.w3.org/TR/2000/REC-xml-20001006 9 | # 10 | class XML < Parslet::Parser 11 | root(:node) 12 | 13 | rule(:node) do 14 | ( 15 | empty_element.as(:empty) | 16 | element.as(:element) | 17 | text.as(:text) 18 | ).repeat(0) 19 | end 20 | 21 | rule(:empty_element) do 22 | start_tag.as(:start_tag) >> 23 | match(/\s+/) >> 24 | end_tag.as(:end_tag) 25 | end 26 | 27 | rule(:element) do 28 | empty_br.as(:empty_tag) | 29 | ( 30 | start_tag.as(:start_tag) >> 31 | node.as(:children) >> 32 | end_tag.as(:end_tag) 33 | ) | 34 | empty_tag.as(:empty_tag) 35 | end 36 | 37 | rule(:text) do 38 | match(/\A[\s\n\t ]+\Z/m).absent? >> # ignore entirely empty strings 39 | match(/[^<>]/).repeat(1) 40 | end 41 | 42 | rule(:start_tag) do 43 | str('<') >> 44 | name.as(:name) >> 45 | (space >> attribute).repeat.as(:attributes) >> 46 | space? >> 47 | str('>') 48 | end 49 | 50 | rule(:end_tag) do 51 | str('> 52 | name.as(:name) >> 53 | space? >> 54 | str('>') 55 | end 56 | 57 | rule(:empty_br) do 58 | str('<') >> space? >> str('br').as(:name) >> space? >> str('>') 59 | end 60 | 61 | rule(:empty_tag) do 62 | str('<') >> 63 | name.as(:name) >> 64 | (space >> attribute).repeat.as(:attributes) >> 65 | space? >> 66 | str('/>') 67 | end 68 | 69 | rule(:name) do 70 | match(/[a-zA-Z_:]/) >> match(/[\w:\.-]/).repeat 71 | end 72 | 73 | rule(:attribute) do 74 | name.as(:name) >> 75 | str('=') >> ( 76 | (str('"') >> double_quoted_attribute_value.as(:value) >> str('"')) | # double quotes 77 | (str("'") >> single_quoted_attribute_value.as(:value) >> str("'")) # single quotes 78 | ) 79 | end 80 | 81 | rule(:double_quoted_attribute_value) do 82 | (str('"').absent? >> (match(/[^<]/) | string_entity | numeric_entity)).repeat 83 | end 84 | 85 | rule(:single_quoted_attribute_value) do 86 | (str("'").absent? >> (match(/[^<]/) | string_entity | numeric_entity)).repeat 87 | end 88 | 89 | rule(:string_entity) { match("&") >> name >> match(";") } 90 | rule(:numeric_entity) { match(/&#\d+;/) } 91 | 92 | rule(:space) { match(/\s/).repeat(1) } 93 | rule(:space?) { space.maybe } 94 | end 95 | end 96 | end 97 | -------------------------------------------------------------------------------- /lib/upmark/transform/ignore.rb: -------------------------------------------------------------------------------- 1 | module Upmark 2 | module Transform 3 | # A transform class which marks all elements in a subtree as ignored. 4 | class Ignore < Parslet::Transform 5 | include TransformHelpers 6 | 7 | element(:*) do |element| 8 | { 9 | element: { 10 | name: element[:name], 11 | attributes: element[:attributes], 12 | children: element[:children], 13 | ignore: true 14 | } 15 | } 16 | end 17 | end 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /lib/upmark/transform/markdown.rb: -------------------------------------------------------------------------------- 1 | module Upmark 2 | module Transform 3 | # A transform class which converts an abstract syntax tree (AST) into 4 | # a Markdown document. 5 | class Markdown < Parslet::Transform 6 | include TransformHelpers 7 | 8 | rule(text: simple(:value)) { value.to_s } 9 | 10 | # Pass all unmatched elements through. 11 | rule( 12 | element: { 13 | name: simple(:name), 14 | attributes: subtree(:attributes), 15 | children: sequence(:children), 16 | ignore: simple(:ignore) 17 | } 18 | ) do |element| 19 | attributes = map_attributes_subtree(element[:attributes]) 20 | children = element[:children].join 21 | name = element[:name] 22 | 23 | attributes_list = 24 | if attributes.any? 25 | " " + attributes.map {|name, value| %Q{#{name}="#{value}"} }.join(" ") 26 | else 27 | "" 28 | end 29 | 30 | if children.empty? 31 | %Q{<#{name}#{attributes_list} />} 32 | else 33 | %Q{<#{name}#{attributes_list}>#{children}} 34 | end 35 | end 36 | 37 | def self.text(element) 38 | element[:children].join.gsub(/(\n)[\n ]+/, '\1') 39 | end 40 | 41 | element(:p) {|element| "#{text(element)}\n\n" } 42 | element(:h1) {|element| "# #{text(element)}" } 43 | element(:h2) {|element| "## #{text(element)}" } 44 | element(:h3) {|element| "### #{text(element)}" } 45 | element(:h4) {|element| "#### #{text(element)}" } 46 | element(:h5) {|element| "##### #{text(element)}" } 47 | element(:h6) {|element| "###### #{text(element)}" } 48 | element(:li) {|element| "#{text(element)}" } 49 | 50 | element(:ul) do |element| 51 | children = element[:children].flatten.map {|value| value.strip != "" ? value : nil }.compact 52 | children.map {|value| "* #{value.gsub(/^\s*•\s*/,'')}\n" } 53 | end 54 | 55 | element(:ol) do |element| 56 | children = element[:children].flatten.map {|value| value.strip != "" ? value : nil }.compact 57 | children.map.with_index {|value, i| "#{i + 1}. #{value}\n" } 58 | end 59 | 60 | element(:a) do |element| 61 | attributes = map_attributes_subtree(element[:attributes]) 62 | href = attributes[:href] 63 | title = attributes[:title] 64 | 65 | if /^(?:http|mailto)/ =~ href 66 | %Q{[#{text(element)}](#{href} "#{title}")} 67 | else 68 | text(element) 69 | end 70 | end 71 | 72 | element(:img) do |element| 73 | attributes = map_attributes_subtree(element[:attributes]) 74 | href = attributes[:src] 75 | title = attributes[:title] 76 | alt_text = attributes[:alt] 77 | 78 | if /^http/ =~ href 79 | %Q{![#{alt_text}](#{href} "#{title}")} 80 | else 81 | "#{alt_text || title}" 82 | end 83 | end 84 | 85 | element(:b, :strong) {|element| "**#{text(element)}**" } 86 | element(:i, :em) {|element| "*#{text(element)}*" } 87 | 88 | element(:br) { "\n" } 89 | rule(element: { name: "br"}) { "\n" } 90 | 91 | end 92 | end 93 | end 94 | -------------------------------------------------------------------------------- /lib/upmark/transform/normalise.rb: -------------------------------------------------------------------------------- 1 | module Upmark 2 | module Transform 3 | # A transform class withich normalises start/end/empty tags into the 4 | # same structure. 5 | class Normalise < Parslet::Transform 6 | 7 | rule(element: subtree(:invalid)) do 8 | raise Upmark::ParseFailed.new('Invalid parse result', nil) 9 | end 10 | 11 | # Strip empty tags 12 | rule(empty: subtree(:invalid)) do 13 | ' ' 14 | end 15 | 16 | rule( 17 | element: { 18 | start_tag: {name: simple(:name), attributes: subtree(:attributes)}, 19 | end_tag: {name: simple(:end_tag_name)}, 20 | children: subtree(:children) 21 | } 22 | ) do 23 | unless name == end_tag_name 24 | raise Upmark::ParseFailed.new('Mismatched tags', nil) 25 | end 26 | { 27 | element: { 28 | name: name, 29 | attributes: attributes, 30 | children: children, 31 | ignore: false 32 | } 33 | } 34 | end 35 | 36 | rule( 37 | element: { 38 | empty_tag: { name: simple(:name) } 39 | } 40 | ) do 41 | { 42 | element: { 43 | name: name, 44 | attributes: [], 45 | children: [], 46 | ignore: false 47 | } 48 | } 49 | end 50 | 51 | rule( 52 | element: { 53 | empty_tag: {name: simple(:name), attributes: subtree(:attributes)} 54 | } 55 | ) do 56 | { 57 | element: { 58 | name: name, 59 | attributes: attributes, 60 | children: [], 61 | ignore: false 62 | } 63 | } 64 | end 65 | 66 | end 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /lib/upmark/transform/preprocess.rb: -------------------------------------------------------------------------------- 1 | require "upmark/transform/ignore" 2 | 3 | module Upmark 4 | module Transform 5 | # A transform class which marks block-level elements as ignored. 6 | # i.e. These elements should not be converted to Markdown. 7 | class Preprocess < Parslet::Transform 8 | include TransformHelpers 9 | 10 | element(:div, :pre) do |element| 11 | { 12 | element: { 13 | name: element[:name], 14 | attributes: element[:attributes], 15 | children: Ignore.new.apply(element[:children]), 16 | ignore: true 17 | } 18 | } 19 | end 20 | 21 | element(:span) do |element| 22 | element[:children] 23 | end 24 | 25 | # table content elements are stripped ignoring their spacing 26 | element(:table, :thead, :tbody, :tfoot) do |element| 27 | element[:children].reject! do |c| 28 | Hash === c && c[:text].to_s =~ /\A[\n ]*\Z/m 29 | end 30 | element[:children] 31 | end 32 | 33 | # table content elements are stripped 34 | element(:td, :th) do |element| 35 | element[:children] 36 | end 37 | 38 | # table rows are treated as 'paragraph' blocks 39 | element(:tr) do |element| 40 | element[:children] 41 | .select { |c| Array === c } 42 | .map do |children| 43 | children.map do |child| 44 | if child.is_a?(Hash) # if the td doesn't contain nested elements 45 | if child[:text] 46 | child[:text].to_s.gsub!(/^\n */,'') 47 | end 48 | end 49 | child 50 | end + ["\n"] 51 | end + ["\n"] 52 | end 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /lib/upmark/transform_helpers.rb: -------------------------------------------------------------------------------- 1 | module Upmark 2 | module TransformHelpers 3 | def self.included(base) 4 | base.send :extend, ClassMethods 5 | end 6 | 7 | module ClassMethods 8 | def element(*names, &block) 9 | names.each do |name| 10 | name = name.to_s.downcase 11 | rule( 12 | { 13 | element: { 14 | name: (name != "*" ? name : simple(:name)), 15 | attributes: subtree(:attributes), 16 | children: subtree(:children), 17 | ignore: false 18 | } 19 | } 20 | ) do |element| 21 | element[:name] ||= name 22 | block.call(element) 23 | end 24 | end 25 | end 26 | 27 | def map_attributes_subtree(ast) 28 | ast.inject({}) do |hash, attribute| 29 | hash[attribute[:name].to_sym] = attribute[:value] 30 | hash 31 | end 32 | end 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /spec/acceptance/upmark_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe Upmark, ".convert" do 2 | RSpec::Matchers.define :convert_to do |expected| 3 | match do 4 | actual == expected 5 | end 6 | 7 | def actual 8 | @converted_actual ||= Upmark.convert(@actual) 9 | end 10 | 11 | diffable 12 | end 13 | 14 | context "" do 15 | specify 'converts to []()' do 16 | expect(<<-HTML.strip 17 |

messenger bag skateboard

18 | HTML 19 | ).to convert_to <<-MD.strip 20 | [messenger **bag** skateboard](http://helvetica.com/ "art party organic") 21 | MD 22 | end 23 | end 24 | 25 | context " hard" do 26 | specify 'converts as []()' do 27 | expect(<<-HTML.strip 28 |

Manager, Business Solutions

29 | HTML 30 | ).to convert_to <<-MD.strip 31 | [Manager, Business Solutions](http://jobs.latrobe.edu.au/jobDetails.asp?sJobIDs=545808&sKeywords=business "") 32 | MD 33 | end 34 | end 35 | 36 | context " with numeric entity" do 37 | specify 'converts as []()' do 38 | expect(<<-HTML.strip 39 |

blah

40 | HTML 41 | ).to convert_to <<-MD.strip 42 | [blah](http://www.abc.net.au/news/2016-02-18/haylen-we-need-a-drug-summit-because-we're-losing-the-war/7177152 "") 43 | MD 44 | end 45 | end 46 | 47 | context " with query string" do 48 | specify 'converts as []()' do 49 | expect(<<-HTML.strip 50 |

blah

51 | HTML 52 | ).to convert_to <<-MD.strip 53 | [blah](http://www.abc.net.au/news/2016-02-18/blah?blah=lol&lol=rofl "") 54 | MD 55 | end 56 | end 57 | 58 | context " with inline elements, no href" do 59 | specify 'converts as plain text' do 60 | expect(<<-HTML.strip 61 | How Australia can respond to the security challenges posed by climate change in the Asian Century 62 | HTML 63 | ).to convert_to <<-MD.strip 64 | How Australia can respond to the security challenges posed by climate change in the Asian Century 65 | MD 66 | end 67 | end 68 | 69 | context " with id href" do 70 | specify 'converts as plain text' do 71 | expect(<<-HTML.strip 72 | Labor MP calls to end dogs 73 | HTML 74 | ).to convert_to <<-MD.strip 75 | Labor MP calls to end dogs 76 | MD 77 | end 78 | end 79 | 80 | context "" do 81 | specify 'converts as ![]()' do 82 | expect(<<-HTML.strip 83 | messenger bag skateboard 84 | HTML 85 | ).to convert_to <<-MD.strip 86 | ![messenger bag skateboard](http://helvetica.com/image.gif "art party organic") 87 | MD 88 | end 89 | end 90 | 91 | context "

" do 92 | specify 'converts as plaintext' do 93 | expect(<<-HTML.strip 94 |

• Bullet 1

95 |

• Bullet 2

96 |

messenger bag skateboard

97 | 98 |

art party
99 | organic

100 | 101 |

art party
102 | organic

103 | 104 |

105 |

106 | 107 |

• Bullet 3

108 |

• Bullet 4

109 |

• Bullet 5

110 |

• Bullet 6

111 |

• Bullet 7

112 |

Something else

113 | HTML 114 | ).to convert_to <<-MD.strip 115 | * Bullet 1 116 | * Bullet 2 117 | 118 | messenger **bag** skateboard 119 | 120 | art party 121 | organic 122 | 123 | art party 124 | organic 125 | 126 | * Bullet 3 127 | * Bullet 4 128 | * Bullet 5 129 | * Bullet 6 130 | * Bullet 7 131 | 132 | Something else 133 | MD 134 | end 135 | 136 | it 'converts paragraph utf-8 bullet points to a markdown list' do 137 | expect("

• Bullet 1

• Bullet 2

").to convert_to "* Bullet 1\n* Bullet 2" 138 | end 139 | end 140 | 141 | context "