├── Gemfile ├── lib ├── quiver │ ├── cells │ │ ├── markdown_cell.rb │ │ └── code_cell.rb │ ├── notebook.rb │ ├── extract_cells_from_markdown.rb │ └── note.rb ├── parsers │ ├── index_page_parser.rb │ └── export_page_parser.rb └── scraper.rb ├── README.md ├── Gemfile.lock ├── LICENSE └── export_to_quiver.rb /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gem "pandoc-ruby" 4 | gem "httparty" 5 | gem "nokogiri" 6 | gem "pry" 7 | -------------------------------------------------------------------------------- /lib/quiver/cells/markdown_cell.rb: -------------------------------------------------------------------------------- 1 | module Quiver 2 | class MarkdownCell 3 | attr_accessor :content 4 | 5 | def initialize(content = '') 6 | @content = content 7 | end 8 | 9 | def to_h 10 | { type: 'markdown', data: content } 11 | end 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /lib/quiver/cells/code_cell.rb: -------------------------------------------------------------------------------- 1 | module Quiver 2 | class CodeCell 3 | attr_accessor :content, :language 4 | 5 | def initialize(language = 'text', content = '') 6 | @content, @language = content, language 7 | end 8 | 9 | def to_h 10 | { type: 'code', language: language, data: content } 11 | end 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mediawiki-to-quiver 2 | Simple scripts to convert your MediaWiki to Quiver app format 3 | 4 | # usage 5 | 6 | ## Install Pandoc 7 | 8 | On Mac: 9 | ``` 10 | brew install pandoc 11 | ``` 12 | 13 | Or check http://pandoc.org/installing.html 14 | 15 | ## Bundle 16 | 17 | ``` 18 | bundle 19 | ``` 20 | 21 | ## Run 22 | 23 | ``` 24 | ruby export_to_quiver.rb https://website-of-your-mediawiki.com/some-subdir/maybe 25 | ``` 26 | 27 | ## With auth 28 | ``` 29 | USER=david PASSWORD=helloworld ruby export_to_quiver.rb 30 | ``` 31 | 32 | # Thanks to 33 | * https://github.com/prurph/markdown-to-quiver 34 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | coderay (1.1.1) 5 | httparty (0.13.7) 6 | json (~> 1.8) 7 | multi_xml (>= 0.5.2) 8 | json (1.8.3) 9 | method_source (0.8.2) 10 | mini_portile2 (2.0.0) 11 | multi_xml (0.5.5) 12 | nokogiri (1.6.7.2) 13 | mini_portile2 (~> 2.0.0.rc2) 14 | pandoc-ruby (2.0.1) 15 | pry (0.10.3) 16 | coderay (~> 1.1.0) 17 | method_source (~> 0.8.1) 18 | slop (~> 3.4) 19 | slop (3.6.0) 20 | 21 | PLATFORMS 22 | ruby 23 | 24 | DEPENDENCIES 25 | httparty 26 | nokogiri 27 | pandoc-ruby 28 | pry 29 | 30 | BUNDLED WITH 31 | 1.11.2 32 | -------------------------------------------------------------------------------- /lib/parsers/index_page_parser.rb: -------------------------------------------------------------------------------- 1 | # Parses a page index, i.e. /wiki/Special:Allpages 2 | class IndexPageParser 3 | attr :page 4 | 5 | def initialize(content) 6 | @page = Nokogiri::HTML(content) 7 | end 8 | 9 | def page_titles 10 | page_anchors.map { |anchor| anchor[:title] } 11 | end 12 | 13 | def page_anchors 14 | page.css("ul.mw-allpages-chunk").css("li").css("a").map do |link| 15 | { 16 | title: link.text, 17 | path: link["href"] 18 | } 19 | end 20 | end 21 | 22 | def next_page_path 23 | if next_page_link = page.css('a:contains("Next page")').first 24 | next_page_link["href"] 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/quiver/notebook.rb: -------------------------------------------------------------------------------- 1 | require "fileutils" 2 | 3 | require "quiver/note" 4 | 5 | module Quiver 6 | class Notebook 7 | attr :notes, :name, :title 8 | 9 | def initialize(name, title) 10 | @notes = [] 11 | @name = name 12 | @title = title 13 | end 14 | 15 | def add_note(note) 16 | @notes << note 17 | end 18 | 19 | def save_to_directory(base_directory) 20 | notebook_directory = [base_directory, directory_name].join("/") 21 | 22 | FileUtils.mkdir_p(notebook_directory) 23 | 24 | File.write(notebook_directory + "/meta.json", meta_json) 25 | 26 | notes.each do |note| 27 | note.save_to_directory(notebook_directory) 28 | end 29 | end 30 | 31 | private 32 | 33 | def directory_name 34 | name + ".qvnotebook" 35 | end 36 | 37 | def meta_json 38 | { 39 | name: title, 40 | uuid: name 41 | }.to_json 42 | end 43 | 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /lib/parsers/export_page_parser.rb: -------------------------------------------------------------------------------- 1 | # Parses an export page, i.e. /wiki/Special:Export 2 | class ExportPageParser 3 | attr :content 4 | 5 | def initialize(content) 6 | @content = content 7 | end 8 | 9 | def pages 10 | content["mediawiki"]["page"].map do |page| 11 | parse_page(page) 12 | end 13 | end 14 | 15 | private 16 | 17 | def parse_page(page) 18 | result = { 19 | title: page["title"], 20 | } 21 | 22 | revisions = 23 | if page["revision"].is_a?(Array) 24 | page["revision"].sort_by { |revision| DateTime.parse(revision["timestamp"])} 25 | else 26 | [page["revision"]] 27 | end 28 | 29 | created_at = revisions.first["timestamp"] 30 | updated_at = revisions.last["timestamp"] 31 | 32 | result[:created_at] = DateTime.parse(created_at) 33 | 34 | if created_at != updated_at 35 | result[:updated_at] = DateTime.parse(updated_at) 36 | end 37 | 38 | result[:content] = revisions.last["text"]["__content__"] 39 | 40 | result 41 | end 42 | 43 | end 44 | -------------------------------------------------------------------------------- /lib/quiver/extract_cells_from_markdown.rb: -------------------------------------------------------------------------------- 1 | module Quiver 2 | class ExtractCellsFromMarkdown 3 | attr :cells, :content 4 | 5 | def initialize(content) 6 | @content = content 7 | 8 | generate_cells 9 | end 10 | 11 | private 12 | 13 | def generate_cells 14 | @cells = [] 15 | 16 | content.split("\n").each do |line| 17 | set_correct_cell_for_line(line) 18 | 19 | current_cell.content += line + "\n" 20 | end 21 | end 22 | 23 | def set_correct_cell_for_line(line) 24 | if match = line.match(/```(?\S+)?/) 25 | if current_cell === CodeCell 26 | set_new_cell(MarkdownCell.new) 27 | else 28 | language = match["language"] 29 | 30 | set_new_cell(CodeCell.new(language)) 31 | end 32 | end 33 | end 34 | 35 | def current_cell 36 | if @current_cell.nil? 37 | set_new_cell(MarkdownCell.new) 38 | end 39 | 40 | @current_cell 41 | end 42 | 43 | def set_new_cell(cell) 44 | @cells << cell 45 | @current_cell = cell 46 | end 47 | end 48 | end 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 David Verhasselt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/quiver/note.rb: -------------------------------------------------------------------------------- 1 | require "fileutils" 2 | 3 | require "quiver/cells/code_cell" 4 | require "quiver/cells/markdown_cell" 5 | 6 | module Quiver 7 | class Note 8 | attr :cells 9 | attr_accessor :uuid, :title, :created_at, :updated_at 10 | 11 | def initialize 12 | @cells = [] 13 | end 14 | 15 | def add_cell(cell) 16 | @cells << cell 17 | end 18 | 19 | def save_to_directory(base_directory) 20 | note_directory = [base_directory, directory_name].join("/") 21 | 22 | FileUtils.mkdir_p(note_directory) 23 | 24 | File.write(note_directory + "/meta.json", meta_json) 25 | File.write(note_directory + "/content.json", content_json) 26 | end 27 | 28 | private 29 | 30 | def directory_name 31 | uuid + ".qvnote" 32 | end 33 | 34 | def meta_json 35 | meta_attrs = { 36 | created_at: created_at.to_time.to_i, 37 | updated_at: (updated_at || created_at).to_time.to_i, 38 | title: title, 39 | uuid: uuid 40 | }.to_json 41 | end 42 | 43 | def content_json 44 | { 45 | title: title, 46 | cells: cells.map(&:to_h) 47 | }.to_json 48 | end 49 | end 50 | end 51 | -------------------------------------------------------------------------------- /lib/scraper.rb: -------------------------------------------------------------------------------- 1 | require "parsers/index_page_parser" 2 | require "parsers/export_page_parser" 3 | 4 | class Scraper 5 | ALL_PAGES_PATH = "Special:Allpages?hideredirects=1" 6 | EXPORT_PAGE_PATH = "Special:Export" 7 | 8 | attr :host, :subdir, :options 9 | 10 | def initialize(host, subdir, options = {}) 11 | @host, @subdir, @options = host, subdir, options 12 | end 13 | 14 | def page_titles 15 | titles = [] 16 | page_index_path = subdir + ALL_PAGES_PATH 17 | 18 | while page_index_path 19 | page = get_page_index(page_index_path) 20 | titles += page.page_titles 21 | 22 | page_index_path = page.next_page_path 23 | end 24 | 25 | titles 26 | end 27 | 28 | def pages(titles) 29 | export_content = download_export(titles) 30 | 31 | ExportPageParser.new(export_content).pages 32 | end 33 | 34 | private 35 | 36 | def get_page_index(path) 37 | puts "GET #{host + path}" 38 | content = HTTParty.get(host + path, @options) 39 | 40 | IndexPageParser.new(content) 41 | end 42 | 43 | def download_export(article_names) 44 | body = { 45 | pages: article_names.join("\n") 46 | } 47 | 48 | url = host + subdir + EXPORT_PAGE_PATH 49 | 50 | puts "\nPOST #{url}" 51 | HTTParty.post(url, options.merge(body: body)) 52 | end 53 | 54 | end 55 | 56 | -------------------------------------------------------------------------------- /export_to_quiver.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'bundler/setup' 3 | Bundler.require(:default) 4 | 5 | $:.unshift File.dirname(__FILE__) + "/lib" 6 | 7 | require "uri" 8 | require "scraper" 9 | require "quiver/notebook" 10 | require "quiver/extract_cells_from_markdown" 11 | 12 | def convert_to_markdown(mediawiki_content) 13 | PandocRuby.convert(mediawiki_content, from: :mediawiki, to: :markdown) 14 | end 15 | 16 | if ARGV.count != 1 17 | puts "Please pass in only the URL to your wiki." 18 | exit 1 19 | end 20 | 21 | uri = URI.parse(ARGV[0]) 22 | 23 | host = "#{uri.scheme}://#{uri.host}" 24 | subdir = uri.path + "/" 25 | 26 | options = { 27 | verify: false 28 | } 29 | 30 | if ENV["USER"] 31 | puts "Found auth details for #{ENV["USER"]}" 32 | options[:basic_auth] = { 33 | username: ENV["USER"], 34 | password: ENV["PASSWORD"] 35 | } 36 | end 37 | 38 | scraper = Scraper.new(host, subdir, options) 39 | 40 | puts "Downloading page titles..." 41 | titles = scraper.page_titles 42 | 43 | puts "Found #{titles.count} titles\n\n" 44 | puts "Downloading page exports..." 45 | 46 | notebook = Quiver::Notebook.new("exported", "Exported") 47 | 48 | titles.each_slice(25) do |batch_titles| 49 | pages = scraper.pages(batch_titles) 50 | 51 | pages.each do |page| 52 | print "." 53 | note = Quiver::Note.new 54 | 55 | note.uuid = SecureRandom.uuid.upcase 56 | note.title = page[:title] 57 | note.created_at = page[:created_at] 58 | note.updated_at = page[:updated_at] 59 | 60 | begin 61 | content = convert_to_markdown(page[:content]) 62 | rescue 63 | puts "Error converting #{page[:title]}" 64 | next 65 | end 66 | 67 | Quiver::ExtractCellsFromMarkdown.new(content).cells.each do |cell| 68 | note.add_cell(cell) 69 | end 70 | 71 | notebook.add_note(note) 72 | end 73 | end 74 | 75 | notebook.save_to_directory("markdown-export/") 76 | --------------------------------------------------------------------------------