messenger bag skateboard
" do 230 | let(:html) { <<-HTML.strip } 231 |
messenger bag skateboard
232 |
messenger bag skateboard
233 | HTML 234 | 235 | specify 'are left alone' do 236 | expect(html).to convert_to html 237 | end 238 | end 239 | 240 | context "" do 241 | let(:html) { <<-HTML.strip } 242 |
243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 255 | 256 | 257 | 258 | 259 | 260 |
messenger	bag
messenger	bag
skateboarding	is cool with all the kids 254 \| or something
Messenger bags	are in with the hipsters though.
261 | HTML 262 | 263 | specify 'is converted to paragraphs' do 264 | expect(html).to convert_to <<-MD.strip 265 | **messenger** 266 | 267 | bag 268 | 269 | messenger 270 | 271 | **bag** 272 | 273 | skateboarding 274 | is cool with all the kids 275 | or something 276 | 277 | **Messenger bags** 278 | are in with the hipsters though. 279 | MD 280 | end 281 | end 282 | 283 | context "
" do
284 |       let(:html) { <<-HTML.strip }
285 | 286 |   
287 |     messenger bag skateboard
288 |   
289 | 
290 |       HTML
291 | 
292 |       specify 'are left alone' do
293 |         expect(html).to convert_to html
294 |       end
295 |     end
296 |   end
297 | 
298 |   context " elements" do
299 |     specify 'are stripped' do
300 |       expect(<<-HTML.strip
301 | messenger bag skateboard
302 |       HTML
303 |       ).to convert_to <<-MD.strip
304 | messenger **bag** skateboard
305 |       MD
306 |     end
307 |   end
308 | 
309 |   context "plain text" do
310 |     it 'containing plain bullet points converts to markdown' do
311 |       expect(
312 |         "• Bullet 1\n• Bullet 2\n"
313 |       ).to convert_to "* Bullet 1\n* Bullet 2"
314 |     end
315 |   end
316 | 
317 |   context "unbalanced elements" do
318 |     let(:html) { "foo" }
319 | 
320 |     it "should raise an exception" do
321 |       expect {
322 |         Upmark.convert(html)
323 |       }.to raise_error(Upmark::ParseFailed)
324 |     end
325 |   end
326 | 
327 |   context "unbalanced elements" do
328 |     let(:html) { "foo" }
329 | 
330 |     it "should raise an exception" do
331 |       expect {
332 |         Upmark.convert(html)
333 |       }.to raise_error(Upmark::ParseFailed)
334 |     end
335 |   end
336 | 
337 |   context "nested table" do
338 |     let(:html) { "Hi 
there"}
339 | 
340 |     it "should strip both tables" do
341 |       expect(html).to convert_to("Hi\nthere")
342 |     end
343 |   end
344 | 
345 |   context "nested unordered lists" do
346 |   let(:html) do
347 |       <<-HTML
348 |       

349 |         
350 |           List item
351 |         
352 |       
353 |       HTML
354 |     end
355 | 
356 |     it "generates readable output" do
357 |       expect(html).to convert_to("* * List item")
358 |     end
359 |   end
360 | 
361 |   context "nested ordered lists" do
362 |     let(:html) do
363 |         <<-HTML
364 |         
365 |           
366 |             List item
367 |           
368 |         
369 |         HTML
370 |       end
371 | 
372 |       it "generates readable output" do
373 |         expect(html).to convert_to("1. 1. List item")
374 |       end
375 |     end
376 |   end
377 | 


--------------------------------------------------------------------------------
/spec/errors_spec.rb:
--------------------------------------------------------------------------------
 1 | RSpec.describe Upmark::ParseFailed, ".ascii_tree" do
 2 |   it "delegates to a cause object" do
 3 |     cause = double(ascii_tree: double)
 4 |     error = Upmark::ParseFailed.new("oh noes", cause)
 5 |     expect(error.ascii_tree).to be(cause.ascii_tree)
 6 |   end
 7 | 
 8 |   it "returns nil when there is no cause" do
 9 |     error = Upmark::ParseFailed.new("oh noes", nil)
10 |     expect(error.ascii_tree).to be_nil
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | require "parslet/rig/rspec"
 2 | require "rspec"
 3 | require "simplecov"
 4 | 
 5 | if ENV["BUILDBOX"]
 6 |   SimpleCov.start do
 7 |     add_filter "spec/"
 8 |     add_filter "vendor/bundle/"
 9 |   end
10 | end
11 | 
12 | require "upmark"
13 | 
14 | RSpec.configure do |config|
15 |   config.disable_monkey_patching!
16 | 
17 |   config.order = :random
18 | 
19 |   config.example_status_persistence_file_path = '.examples'
20 | end
21 | 


--------------------------------------------------------------------------------
/spec/unit/lib/upmark/parser/xml_spec.rb:
--------------------------------------------------------------------------------
  1 | RSpec.describe Upmark::Parser::XML do
  2 |   let(:parser) { Upmark::Parser::XML.new }
  3 | 
  4 |   context "#node" do
  5 |     it 'will parse ""' do
  6 |       expect(parser.node).to parse ""
  7 |     end
  8 |     it 'will parse "messenger bag skateboard"' do
  9 |       expect(parser.node).to parse "messenger bag skateboard"
 10 |     end
 11 |     it 'will parse html br tags' do
 12 |       expect(parser.node).to parse 'One
Two'
 13 |     end
 14 |     it 'will parse "messenger bag skateboard"' do
 15 |       expect(
 16 |         parser.node
 17 |       ).to parse "messenger bag skateboard"
 18 |     end
 19 |     it 'will parse "messenger bag skateboard"' do
 20 |       expect(
 21 |         parser.node
 22 |       ).to parse "messenger bag skateboard"
 23 |     end
 24 |     it 'will parse "messenger
bag
skateboard"' do
 25 |       expect(
 26 |         parser.node
 27 |       ).to parse "messenger
bag
skateboard"
 28 |     end
 29 |     it 'will parse "messenger
\nbag
\nskateboard"' do
 30 |       expect(
 31 |         parser.node
 32 |       ).to parse "messenger
\nbag
\nskateboard"
 33 |     end
 34 |     it 'will parse "messenger bag skateboard"' do
 35 |       expect(
 36 |         parser.node
 37 |       ).to parse "messenger bag skateboard"
 38 |     end
 39 |   end
 40 | 
 41 |   context "#empty_element" do
 42 |     it 'will parse  ' do
 43 |       expect(parser.empty_element).to parse ' '
 44 |     end
 45 |   end
 46 | 
 47 |   context "#element" do
 48 |     it 'will parse ""' do
 49 |       expect(parser.element).to parse ""
 50 |     end
 51 |     it 'will parse "messenger bag skateboard"' do
 52 |       expect(parser.element).to parse "messenger bag skateboard"
 53 |     end
 54 |     it 'will parse "Some
Text"' do
 55 |       expect(parser.element).to parse "Some
Text"
 56 |     end
 57 |     it 'will parse %q{}' do
 58 |       expect(parser.element).to parse %q{}
 59 |     end
 60 |     it 'will not parse ""' do
 61 |       expect(parser.element).to_not parse "
"
 62 |     end
 63 |     it 'will not parse "
messenger bag skateboard"' do
 64 |       expect(parser.element).to_not parse "
messenger bag skateboard"
 65 |     end
 66 |     it 'will not parse "messenger bag skateboard"' do
 67 |       expect(parser.element).to_not parse "messenger bag skateboard"
 68 |     end
 69 |     it 'will not parse "messenger bag skateboard
"' do
 70 |       expect(parser.element).to_not parse "
messenger bag skateboard
"
 71 |     end
 72 |   end
 73 | 
 74 |   context "#text" do
 75 |     it 'will parse "messenger bag skateboard"' do
 76 |       expect(parser.text).to parse "messenger bag skateboard"
 77 |     end
 78 |     it 'will not parse "
messenger bag skateboard"' do
 79 |       expect(parser.text).to_not parse "messenger bag skateboard"
 80 |     end
 81 |     it 'will not parse " "' do
 82 |       expect(parser.text).to_not parse " "
 83 |     end
 84 |     it 'will not parse ""' do
 85 |       expect(parser.text).to_not parse ""
 86 |     end
 87 |   end
 88 | 
 89 |   context "#start_tag" do
 90 |     it 'will parse %q{}' do
 91 |       expect(parser.start_tag).to parse %q{}
 92 |     end
 93 |     it 'will parse %q{}' do
 94 |       expect(parser.start_tag).to parse %q{}
 95 |     end
 96 |     it 'will parse ""' do
 97 |       expect(parser.start_tag).to parse ""
 98 |     end
 99 |     it 'will not parse ""' do
100 |       expect(parser.start_tag).to_not parse ""
101 |     end
102 |     it 'will not parse ""' do
106 |       expect(parser.start_tag).to_not parse "tofu>"
107 |     end
108 |   end
109 | 
110 |   context "#end_tag" do
111 |     it 'will parse ""' do
112 |       expect(parser.end_tag).to parse ""
113 |     end
114 |     it 'will not parse ""' do
115 |       expect(parser.end_tag).to_not parse ""
116 |     end
117 |     it 'will not parse ""' do
121 |       expect(parser.end_tag).to_not parse "/tofu>"
122 |     end
123 |   end
124 | 
125 |   context "#empty_br" do
126 |     it 'will parse html br tags' do
127 |       expect(parser.empty_br).to parse '
'
128 |     end
129 |   end
130 | 
131 |   context "#empty_tag" do
132 |     it 'will parse %q{}' do
133 |       expect(parser.empty_tag).to parse %q{}
134 |     end
135 |     it 'will parse %q{}' do
136 |       expect(parser.empty_tag).to parse %q{}
137 |     end
138 |     it 'will parse %q{}' do
139 |       expect(parser.empty_tag).to parse %q{}
140 |     end
141 |     it 'will not parse ""' do
142 |       expect(parser.empty_tag).to_not parse ""
143 |     end
144 |     it 'will not parse ""' do
145 |       expect(parser.empty_tag).to_not parse ""
146 |     end
147 |     it 'will not parse ""' do
151 |       expect(parser.empty_tag).to_not parse "/tofu>"
152 |     end
153 |   end
154 | 
155 |   context "#name" do
156 |     it 'will parse "p"' do
157 |       expect(parser.name).to parse "p"
158 |     end
159 |     it 'will parse "h1"' do
160 |       expect(parser.name).to parse "h1"
161 |     end
162 |     it 'will not parse "1h"' do
163 |       expect(parser.name).to_not parse "1h"
164 |     end
165 |     it 'will not parse "h 1"' do
166 |       expect(parser.name).to_not parse "h 1"
167 |     end
168 |   end
169 | 
170 |   context "#attribute" do
171 |     it 'will parse %q{art="party organic"}' do
172 |       expect(parser.attribute).to parse %q{art="party organic"}
173 |     end
174 |     it 'will parse %q{art=\'party organic\'}' do
175 |       expect(parser.attribute).to parse %q{art='party organic'}
176 |     end
177 |     it 'will parse %q{art="party\'organic"}' do
178 |       expect(parser.attribute).to parse %q{art="party'organic"}
179 |     end
180 |     it 'will parse %q{art=\'party"organic\'}' do
181 |       expect(parser.attribute).to parse %q{art='party"organic'}
182 |     end
183 |     it 'will not parse "art"' do
184 |       expect(parser.attribute).to_not parse "art"
185 |     end
186 |     it 'will not parse "art="' do
187 |       expect(parser.attribute).to_not parse "art="
188 |     end
189 |     it 'will not parse "art=party"' do
190 |       expect(parser.attribute).to_not parse "art=party"
191 |     end
192 |     it 'will not parse %q{="party organic"}' do
193 |       expect(parser.attribute).to_not parse %q{="party organic"}
194 |     end
195 |     it 'will not parse %q{art="party organic\'}' do
196 |       expect(parser.attribute).to_not parse %q{art="party organic'}
197 |     end
198 |     it 'will not parse %q{art=\'party organic"}' do
199 |       expect(parser.attribute).to_not parse %q{art='party organic"}
200 |     end
201 |   end
202 | 
203 |   context "#parse" do
204 |     RSpec::Matchers.define :convert do |html|
205 |       match do |parser|
206 |         @actual = parser.parse(html)
207 |         @actual == @expected
208 |       end
209 | 
210 |       chain :to do |ast|
211 |         @expected = ast
212 |       end
213 |       attr_reader :expected
214 | 
215 |       failure_message do
216 |         %Q{expected "#{html}" to parse to "#{@expected.inspect}" but was #{@result.inspect}}
217 |       end
218 | 
219 |       diffable
220 |     end
221 | 
222 |     context "single tag" do
223 |       it 'is parsed as a single element' do
224 |         expect(parser).to convert("messenger").to([
225 |           {
226 |             element: {
227 |               start_tag: {name: "p", attributes: []},
228 |               end_tag:   {name: "p"},
229 |               children:  [{text: "messenger"}]
230 |             }
231 |           }
232 |         ])
233 |       end
234 | 
235 |       it 'will ignore empty text tags' do
236 |         expect(parser).to convert(' ').to(
237 |           [
238 |             {
239 |               empty:
240 |                 {
241 |                   start_tag: { name: "p", attributes: [] },
242 |                   end_tag:   { name: "p" },
243 |                 }
244 |             }
245 |           ]
246 |         )
247 |       end
248 |     end
249 | 
250 |     context "empty tag" do
251 |       it 'is parsed an empty_tag element' do
252 |         expect(parser).to convert("
").to([
253 |           {
254 |             element: {
255 |               empty_tag: {name: "br", attributes: []}
256 |             }
257 |           }
258 |         ])
259 |       end
260 |     end
261 | 
262 |     context "single tag with attributes" do
263 |       let(:html) { %q{messenger bag skateboard} }
264 | 
265 |       it 'is parsed an element with an attribute subtree' do
266 |         expect(parser).to convert(html).to([
267 |           {
268 |             element: {
269 |               start_tag: {
270 |                 name: "a",
271 |                 attributes: [
272 |                   {name: "href",  value: "http://helvetica.com/"},
273 |                   {name: "title", value: "art party organic"}
274 |                 ]
275 |               },
276 |               end_tag:  {name: "a"},
277 |               children: [{text: "messenger bag skateboard"}]
278 |             }
279 |           }
280 |         ])
281 |       end
282 |     end
283 | 
284 |     context "multiple inline tags" do
285 |       let(:html) { "messenger
bag
skateboard" }
286 | 
287 |       it 'converts to multiple elements' do
288 |         expect(parser).to convert(html).to([
289 |           {
290 |             element: {
291 |               start_tag: {name: "p", attributes: []},
292 |               end_tag:   {name: "p"},
293 |               children:  [{text: "messenger"}]
294 |             }
295 |           }, {
296 |             element: {
297 |               start_tag: {name: "p", attributes: []},
298 |               end_tag:   {name: "p"},
299 |               children:  [{text: "bag"}]
300 |             }
301 |           }, {
302 |             element: {
303 |               start_tag: {name: "p", attributes: []},
304 |               end_tag:   {name: "p"},
305 |               children:  [{text: "skateboard"}]
306 |             }
307 |           }
308 |         ])
309 |       end
310 |     end
311 | 
312 |     context "multiple tags" do
313 |       let(:html) { "messenger
\nbag
\nskateboard" }
314 | 
315 |       it 'converts to multiple elements' do
316 |         expect(parser).to convert(html).to([
317 |           {
318 |             element: {
319 |               start_tag: {name: "p", attributes: []},
320 |               end_tag:   {name: "p"},
321 |               children:  [{text: "messenger"}]
322 |             }
323 |           }, {
324 |             text: "\n"
325 |           }, {
326 |             element: {
327 |               start_tag: {name: "p", attributes: []},
328 |               end_tag:   {name: "p"},
329 |               children:  [{text: "bag"}]
330 |             }
331 |           }, {
332 |             text: "\n"
333 |           }, {
334 |             element: {
335 |               start_tag: {name: "p", attributes: []},
336 |               end_tag:   {name: "p"},
337 |               children:  [{text: "skateboard"}]
338 |             }
339 |           }
340 |         ])
341 |       end
342 |     end
343 | 
344 |     context "nested tags" do
345 |       let(:html) { "messenger bag skateboard" }
346 | 
347 |       it 'converts to multiple nested elements' do
348 |         expect(parser).to convert(html).to([
349 |           {
350 |             element: {
351 |               start_tag: {name: "p", attributes: []},
352 |               end_tag:   {name: "p"},
353 |               children:  [
354 |                 {
355 |                   text: "messenger "
356 |                 }, {
357 |                   element: {
358 |                     start_tag: {name: "strong", attributes: []},
359 |                     children:  [{text: "bag"}],
360 |                     end_tag:   {name: "strong"}
361 |                   }
362 |                 }, {
363 |                   text: " skateboard"
364 |                 }
365 |               ]
366 |             }
367 |           }
368 |         ])
369 |       end
370 |     end
371 |   end
372 | end
373 | 


--------------------------------------------------------------------------------
/spec/unit/lib/upmark/transform/markdown_spec.rb:
--------------------------------------------------------------------------------
  1 | RSpec.describe Upmark::Transform::Markdown do
  2 |   def transform(ast)
  3 |     Upmark::Transform::Markdown.new.apply(ast)
  4 |   end
  5 | 
  6 |   let(:transformed_ast) { transform(ast) }
  7 | 
  8 |   context "#apply" do
  9 |     context '
' do
 10 |       let(:ast) { [{ element: { name: 'br' }}] }
 11 | 
 12 |       it 'will transform to markdown' do
 13 |         expect(transformed_ast).to eq ["\n"]
 14 |       end
 15 |     end
 16 | 
 17 |     context "" do
 18 |       context "single tag" do
 19 |         let(:ast) do
 20 |           [
 21 |             {
 22 |               element: {
 23 |                 name: "p",
 24 |                 attributes: [],
 25 |                 children: [{text: "messenger bag skateboard"}],
 26 |                 ignore: false
 27 |               }
 28 |             }
 29 |           ]
 30 |         end
 31 | 
 32 |         it 'transforms to markdown' do
 33 |           expect(
 34 |             transformed_ast
 35 |           ).to eq(["messenger bag skateboard\n\n"])
 36 |         end
 37 |       end
 38 | 
 39 |       context "multiple tags" do
 40 |         let(:ast) do
 41 |           [
 42 |             {
 43 |               element: {
 44 |                 name: "p",
 45 |                 attributes: [],
 46 |                 children: [{text: "messenger"}],
 47 |                 ignore: false
 48 |               }
 49 |             }, {
 50 |               element: {
 51 |                 name: "p",
 52 |                 attributes: [],
 53 |                 children: [{text: "bag"}],
 54 |                 ignore: false
 55 |               }
 56 |             }, {
 57 |               element: {
 58 |                 name: "p",
 59 |                 attributes: [],
 60 |                 children: [{text: "skateboard"}],
 61 |                 ignore: false
 62 |               }
 63 |             }
 64 |           ]
 65 |         end
 66 | 
 67 |         it 'transforms to markdown' do
 68 |           expect(
 69 |             transformed_ast
 70 |           ).to eq(["messenger\n\n", "bag\n\n", "skateboard\n\n"])
 71 |         end
 72 |       end
 73 |     end
 74 | 
 75 |     context "" do
 76 |       context "single tag" do
 77 |         let(:ast) do
 78 |           a_tag(
 79 |             href: "http://helvetica.com/",
 80 |             title: "art party organic",
 81 |           )
 82 |         end
 83 | 
 84 |         def a_tag(attributes)
 85 |           [
 86 |             {
 87 |               element: {
 88 |                 name: "a",
 89 |                 attributes: attributes.map do |key, value|
 90 |                   { name: key.to_s, value: value }
 91 |                 end,
 92 |                 children: [{text: "messenger bag skateboard"}],
 93 |                 ignore: false
 94 |               }
 95 |             }
 96 |           ]
 97 |         end
 98 | 
 99 |         it 'transforms to markdown' do
100 |           expect(
101 |             transformed_ast
102 |           ).to eq([%q{[messenger bag skateboard](http://helvetica.com/ "art party organic")}])
103 |         end
104 | 
105 |         it 'transforms mailto to markdown' do
106 |           expect(
107 |             transform a_tag(href: 'mailto:a@example.com', title: 'Some Path')
108 |           ).to eq([%q{[messenger bag skateboard](mailto:a@example.com "Some Path")}])
109 |         end
110 | 
111 |         it 'strips local urls to their text' do
112 |           expect(
113 |             transform a_tag(href: 'file://some/path', title: 'Some Path')
114 |           ).to eq ['messenger bag skateboard']
115 |         end
116 | 
117 |         it 'strips relative urls to their alt text' do
118 |           expect(
119 |             transform a_tag(src: 'some/path', title: 'Some Path')
120 |           ).to eq ['messenger bag skateboard']
121 |         end
122 |       end
123 |     end
124 | 
125 |     context "" do
126 |       context "empty tag" do
127 |         let(:ast) do
128 |           img(
129 |             src:   "http://helvetica.com/image.gif",
130 |             title: "art party organic",
131 |             alt:   "messenger bag skateboard",
132 |           )
133 |         end
134 | 
135 |         def img(attributes)
136 |           [
137 |             {
138 |               element: {
139 |                 name: "img",
140 |                 attributes: attributes.map do |key, value|
141 |                   { name: key.to_s, value: value }
142 |                 end,
143 |                 children: [],
144 |                 ignore: false
145 |               }
146 |             }
147 |           ]
148 |         end
149 | 
150 |         it 'transforms to markdown' do
151 |           expect(
152 |             transformed_ast
153 |           ).to eq([%q{![messenger bag skateboard](http://helvetica.com/image.gif "art party organic")}])
154 |         end
155 | 
156 |         it 'strips file urls to their alt text or title' do
157 |           expect(
158 |             transform img(src: 'file://some/path', alt: 'Some', title: 'Path')
159 |           ).to eq ['Some']
160 |           expect(
161 |             transform img(src: 'file://some/path', title: 'Some Path')
162 |           ).to eq ['Some Path']
163 |         end
164 | 
165 |         it 'strips relative urls to their alt text' do
166 |           expect(
167 |             transform img(src: 'some/path', alt: 'Some', title: 'Path')
168 |           ).to eq ['Some']
169 |           expect(
170 |             transform img(src: 'some/path', title: 'Some Path')
171 |           ).to eq ['Some Path']
172 |         end
173 |       end
174 |     end
175 |   end
176 | end
177 | 


--------------------------------------------------------------------------------
/upmark.gemspec:
--------------------------------------------------------------------------------
 1 | Gem::Specification.new do |s|
 2 |   s.name        = "upmark"
 3 |   s.version     = "1.1.0"
 4 |   s.authors     = ["Josh Bassett", "Gus Gollings", "James Healy"]
 5 |   s.email       = "dev@theconversation.edu.au"
 6 |   s.homepage    = "http://github.com/conversation/upmark"
 7 |   s.summary     = "A HTML to Markdown converter."
 8 |   s.description = "Upmark has the skills to convert your HTML to Markdown."
 9 | 
10 |   s.required_ruby_version = ">= 1.9.3"
11 |   s.rubyforge_project = "upmark"
12 | 
13 |   s.files       =  Dir.glob("{lib,spec}/**/*") + ["Rakefile", "LICENSE.md", "README.md"]
14 |   s.test_files  =  Dir.glob("{spec}/**/*")
15 |   s.executables = ["upmark"]
16 | 
17 |   s.add_development_dependency "rspec", "~> 3.7"
18 |   s.add_development_dependency "rake"
19 |   s.add_development_dependency "simplecov"
20 | 
21 |   s.add_runtime_dependency "parslet", "~> 1.8.2"
22 | end
23 | 


--------------------------------------------------------------------------------
,

,

,

,

,

" do 208 | specify 'converts as #' do 209 | expect(<<-HTML.strip 210 |

messenger bag skateboard

messenger bag skateboard

messenger bag skateboard

messenger bag skateboard

messenger bag skateboard

messenger bag skateboard