├── Gemfile
├── lib
    ├── quiver
    │   ├── cells
    │   │   ├── markdown_cell.rb
    │   │   └── code_cell.rb
    │   ├── notebook.rb
    │   ├── extract_cells_from_markdown.rb
    │   └── note.rb
    ├── parsers
    │   ├── index_page_parser.rb
    │   └── export_page_parser.rb
    └── scraper.rb
├── README.md
├── Gemfile.lock
├── LICENSE
└── export_to_quiver.rb


/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 | 
3 | gem "pandoc-ruby"
4 | gem "httparty"
5 | gem "nokogiri"
6 | gem "pry"
7 | 


--------------------------------------------------------------------------------
/lib/quiver/cells/markdown_cell.rb:
--------------------------------------------------------------------------------
 1 | module Quiver
 2 |   class MarkdownCell
 3 |     attr_accessor :content
 4 | 
 5 |     def initialize(content = '')
 6 |       @content = content
 7 |     end
 8 | 
 9 |     def to_h
10 |       { type: 'markdown', data: content }
11 |     end
12 |   end
13 | end
14 | 


--------------------------------------------------------------------------------
/lib/quiver/cells/code_cell.rb:
--------------------------------------------------------------------------------
 1 | module Quiver
 2 |   class CodeCell
 3 |     attr_accessor :content, :language
 4 | 
 5 |     def initialize(language = 'text', content = '')
 6 |       @content, @language = content, language
 7 |     end
 8 | 
 9 |     def to_h
10 |       { type: 'code', language: language, data: content }
11 |     end
12 |   end
13 | end
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # mediawiki-to-quiver
 2 | Simple scripts to convert your MediaWiki to Quiver app format
 3 | 
 4 | # usage
 5 | 
 6 | ## Install Pandoc
 7 | 
 8 | On Mac:
 9 | ```
10 | brew install pandoc
11 | ```
12 | 
13 | Or check http://pandoc.org/installing.html
14 | 
15 | ## Bundle
16 | 
17 | ```
18 | bundle
19 | ```
20 | 
21 | ## Run
22 | 
23 | ```
24 | ruby export_to_quiver.rb https://website-of-your-mediawiki.com/some-subdir/maybe
25 | ```
26 | 
27 | ## With auth
28 | ```
29 | USER=david PASSWORD=helloworld ruby export_to_quiver.rb
30 | ```
31 | 
32 | # Thanks to
33 | * https://github.com/prurph/markdown-to-quiver
34 | 


--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
 1 | GEM
 2 |   remote: https://rubygems.org/
 3 |   specs:
 4 |     coderay (1.1.1)
 5 |     httparty (0.13.7)
 6 |       json (~> 1.8)
 7 |       multi_xml (>= 0.5.2)
 8 |     json (1.8.3)
 9 |     method_source (0.8.2)
10 |     mini_portile2 (2.0.0)
11 |     multi_xml (0.5.5)
12 |     nokogiri (1.6.7.2)
13 |       mini_portile2 (~> 2.0.0.rc2)
14 |     pandoc-ruby (2.0.1)
15 |     pry (0.10.3)
16 |       coderay (~> 1.1.0)
17 |       method_source (~> 0.8.1)
18 |       slop (~> 3.4)
19 |     slop (3.6.0)
20 | 
21 | PLATFORMS
22 |   ruby
23 | 
24 | DEPENDENCIES
25 |   httparty
26 |   nokogiri
27 |   pandoc-ruby
28 |   pry
29 | 
30 | BUNDLED WITH
31 |    1.11.2
32 | 


--------------------------------------------------------------------------------
/lib/parsers/index_page_parser.rb:
--------------------------------------------------------------------------------
 1 | # Parses a page index, i.e. /wiki/Special:Allpages
 2 | class IndexPageParser
 3 |   attr :page
 4 | 
 5 |   def initialize(content)
 6 |     @page = Nokogiri::HTML(content)
 7 |   end
 8 | 
 9 |   def page_titles
10 |     page_anchors.map { |anchor| anchor[:title] }
11 |   end
12 | 
13 |   def page_anchors
14 |     page.css("ul.mw-allpages-chunk").css("li").css("a").map do |link|
15 |       {
16 |         title: link.text,
17 |         path: link["href"]
18 |       }
19 |     end
20 |   end
21 | 
22 |   def next_page_path
23 |     if next_page_link = page.css('a:contains("Next page")').first
24 |       next_page_link["href"]
25 |     end
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/lib/quiver/notebook.rb:
--------------------------------------------------------------------------------
 1 | require "fileutils"
 2 | 
 3 | require "quiver/note"
 4 | 
 5 | module Quiver
 6 |  class Notebook
 7 |     attr :notes, :name, :title
 8 | 
 9 |     def initialize(name, title)
10 |       @notes = []
11 |       @name = name
12 |       @title = title
13 |     end
14 | 
15 |     def add_note(note)
16 |       @notes << note
17 |     end
18 | 
19 |     def save_to_directory(base_directory)
20 |       notebook_directory = [base_directory, directory_name].join("/")
21 | 
22 |       FileUtils.mkdir_p(notebook_directory)
23 | 
24 |       File.write(notebook_directory + "/meta.json", meta_json)
25 | 
26 |       notes.each do |note|
27 |         note.save_to_directory(notebook_directory)
28 |       end
29 |     end
30 | 
31 |     private
32 | 
33 |     def directory_name
34 |       name + ".qvnotebook"
35 |     end
36 | 
37 |     def meta_json
38 |       {
39 |         name: title,
40 |         uuid: name
41 |       }.to_json
42 |     end
43 | 
44 |   end
45 | end
46 | 


--------------------------------------------------------------------------------
/lib/parsers/export_page_parser.rb:
--------------------------------------------------------------------------------
 1 | # Parses an export page, i.e. /wiki/Special:Export
 2 | class ExportPageParser
 3 |   attr :content
 4 | 
 5 |   def initialize(content)
 6 |     @content = content
 7 |   end
 8 | 
 9 |   def pages
10 |     content["mediawiki"]["page"].map do |page|
11 |       parse_page(page)
12 |     end
13 |   end
14 | 
15 |   private
16 | 
17 |   def parse_page(page)
18 |     result = {
19 |       title: page["title"],
20 |     }
21 | 
22 |     revisions =
23 |       if page["revision"].is_a?(Array)
24 |         page["revision"].sort_by { |revision| DateTime.parse(revision["timestamp"])}
25 |       else
26 |         [page["revision"]]
27 |       end
28 | 
29 |     created_at = revisions.first["timestamp"]
30 |     updated_at = revisions.last["timestamp"]
31 | 
32 |     result[:created_at] = DateTime.parse(created_at)
33 | 
34 |     if created_at != updated_at
35 |       result[:updated_at] = DateTime.parse(updated_at)
36 |     end
37 | 
38 |     result[:content] = revisions.last["text"]["__content__"]
39 | 
40 |     result
41 |   end
42 | 
43 | end
44 | 


--------------------------------------------------------------------------------
/lib/quiver/extract_cells_from_markdown.rb:
--------------------------------------------------------------------------------
 1 | module Quiver
 2 |   class ExtractCellsFromMarkdown
 3 |     attr :cells, :content
 4 | 
 5 |     def initialize(content)
 6 |       @content = content
 7 | 
 8 |       generate_cells
 9 |     end
10 | 
11 |     private
12 | 
13 |     def generate_cells
14 |       @cells = []
15 | 
16 |       content.split("\n").each do |line|
17 |         set_correct_cell_for_line(line)
18 | 
19 |         current_cell.content += line + "\n"
20 |       end
21 |     end
22 | 
23 |     def set_correct_cell_for_line(line)
24 |       if match = line.match(/```(?<language>\S+)?/)
25 |         if current_cell === CodeCell
26 |           set_new_cell(MarkdownCell.new)
27 |         else
28 |           language = match["language"]
29 | 
30 |           set_new_cell(CodeCell.new(language))
31 |         end
32 |       end
33 |     end
34 | 
35 |     def current_cell
36 |       if @current_cell.nil?
37 |         set_new_cell(MarkdownCell.new)
38 |       end
39 | 
40 |       @current_cell
41 |     end
42 | 
43 |     def set_new_cell(cell)
44 |       @cells << cell
45 |       @current_cell = cell
46 |     end
47 |   end
48 | end
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 David Verhasselt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/lib/quiver/note.rb:
--------------------------------------------------------------------------------
 1 | require "fileutils"
 2 | 
 3 | require "quiver/cells/code_cell"
 4 | require "quiver/cells/markdown_cell"
 5 | 
 6 | module Quiver
 7 |   class Note
 8 |     attr :cells
 9 |     attr_accessor :uuid, :title, :created_at, :updated_at
10 | 
11 |     def initialize
12 |       @cells = []
13 |     end
14 | 
15 |     def add_cell(cell)
16 |       @cells << cell
17 |     end
18 | 
19 |     def save_to_directory(base_directory)
20 |       note_directory = [base_directory, directory_name].join("/")
21 | 
22 |       FileUtils.mkdir_p(note_directory)
23 | 
24 |       File.write(note_directory + "/meta.json", meta_json)
25 |       File.write(note_directory + "/content.json", content_json)
26 |     end
27 | 
28 |     private
29 | 
30 |     def directory_name
31 |       uuid + ".qvnote"
32 |     end
33 | 
34 |     def meta_json
35 |       meta_attrs = {
36 |         created_at: created_at.to_time.to_i,
37 |         updated_at: (updated_at || created_at).to_time.to_i,
38 |         title: title,
39 |         uuid: uuid
40 |       }.to_json
41 |     end
42 | 
43 |     def content_json
44 |       {
45 |         title: title,
46 |         cells: cells.map(&:to_h)
47 |       }.to_json
48 |     end
49 |   end
50 | end
51 | 


--------------------------------------------------------------------------------
/lib/scraper.rb:
--------------------------------------------------------------------------------
 1 | require "parsers/index_page_parser"
 2 | require "parsers/export_page_parser"
 3 | 
 4 | class Scraper
 5 |   ALL_PAGES_PATH = "Special:Allpages?hideredirects=1"
 6 |   EXPORT_PAGE_PATH = "Special:Export"
 7 | 
 8 |   attr :host, :subdir, :options
 9 | 
10 |   def initialize(host, subdir, options = {})
11 |     @host, @subdir, @options = host, subdir, options
12 |   end
13 | 
14 |   def page_titles
15 |     titles = []
16 |     page_index_path = subdir + ALL_PAGES_PATH
17 | 
18 |     while page_index_path
19 |       page = get_page_index(page_index_path)
20 |       titles += page.page_titles
21 | 
22 |       page_index_path = page.next_page_path
23 |     end
24 | 
25 |     titles
26 |   end
27 | 
28 |   def pages(titles)
29 |     export_content = download_export(titles)
30 | 
31 |     ExportPageParser.new(export_content).pages
32 |   end
33 | 
34 | private
35 | 
36 |   def get_page_index(path)
37 |     puts "GET #{host + path}"
38 |     content = HTTParty.get(host + path, @options)
39 | 
40 |     IndexPageParser.new(content)
41 |   end
42 | 
43 |   def download_export(article_names)
44 |     body = {
45 |       pages: article_names.join("\n")
46 |     }
47 | 
48 |     url = host + subdir + EXPORT_PAGE_PATH
49 | 
50 |     puts "\nPOST #{url}"
51 |     HTTParty.post(url, options.merge(body: body))
52 |   end
53 | 
54 | end
55 | 
56 | 


--------------------------------------------------------------------------------
/export_to_quiver.rb:
--------------------------------------------------------------------------------
 1 | require 'rubygems'
 2 | require 'bundler/setup'
 3 | Bundler.require(:default)
 4 | 
 5 | $:.unshift File.dirname(__FILE__) + "/lib"
 6 | 
 7 | require "uri"
 8 | require "scraper"
 9 | require "quiver/notebook"
10 | require "quiver/extract_cells_from_markdown"
11 | 
12 | def convert_to_markdown(mediawiki_content)
13 |   PandocRuby.convert(mediawiki_content, from: :mediawiki, to: :markdown)
14 | end
15 | 
16 | if ARGV.count != 1
17 |   puts "Please pass in only the URL to your wiki."
18 |   exit 1
19 | end
20 | 
21 | uri = URI.parse(ARGV[0])
22 | 
23 | host = "#{uri.scheme}://#{uri.host}"
24 | subdir = uri.path + "/"
25 | 
26 | options = {
27 |   verify: false
28 | }
29 | 
30 | if ENV["USER"]
31 |   puts "Found auth details for #{ENV["USER"]}"
32 |   options[:basic_auth] = {
33 |     username: ENV["USER"],
34 |     password: ENV["PASSWORD"]
35 |   }
36 | end
37 | 
38 | scraper = Scraper.new(host, subdir, options)
39 | 
40 | puts "Downloading page titles..."
41 | titles = scraper.page_titles
42 | 
43 | puts "Found #{titles.count} titles\n\n"
44 | puts "Downloading page exports..."
45 | 
46 | notebook = Quiver::Notebook.new("exported", "Exported")
47 | 
48 | titles.each_slice(25) do |batch_titles|
49 |   pages = scraper.pages(batch_titles)
50 | 
51 |   pages.each do |page|
52 |     print "."
53 |     note = Quiver::Note.new
54 | 
55 |     note.uuid = SecureRandom.uuid.upcase
56 |     note.title = page[:title]
57 |     note.created_at = page[:created_at]
58 |     note.updated_at = page[:updated_at]
59 | 
60 |     begin
61 |       content = convert_to_markdown(page[:content])
62 |     rescue
63 |       puts "Error converting #{page[:title]}"
64 |       next
65 |     end
66 | 
67 |     Quiver::ExtractCellsFromMarkdown.new(content).cells.each do |cell|
68 |       note.add_cell(cell)
69 |     end
70 | 
71 |     notebook.add_note(note)
72 |   end
73 | end
74 | 
75 | notebook.save_to_directory("markdown-export/")
76 | 


--------------------------------------------------------------------------------