├── make-unsong.sh ├── README.md └── unsong_scraper.rb /make-unsong.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ruby unsong_scraper.rb > unsong.html || exit $? 4 | ebook-convert unsong.html unsong.mobi --authors "Scott Alexander" --title "Unsong" --max-toc-links 500 || exit $? 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Unsong Web Serial Scraper for Kindle 2 | 3 | I adapted a [Worm Web Serial scaper](https://github.com/rhelsing/worm_scraper) to make a ebook/kindle version of Unsong, by Scott Alexander. You can now enjoy Unsong without all of the eye strain! 4 | 5 | ![Unsong Header](http://i.imgur.com/d9LvKMc.png) 6 | 7 | ## Download 8 | 9 | Download the ebook or run the scraper yourself. 10 | 11 | - [Generated .mobi](//jasongross.github.io/unsong_scraper/unsong.mobi) 12 | - [Generated .epub](//jasongross.github.io/unsong_scraper/unsong.epub) 13 | 14 | ## How to run: 15 | 16 | 1. Clone this project 17 | 2. Install dependencies 18 | 19 | ```command 20 | gem install uri 21 | gem install open-uri 22 | gem install nokogiri 23 | gem install parallel 24 | ``` 25 | 26 | 3. Run the script and output into html file 27 | 28 | ```command 29 | ruby unsong_scraper.rb > unsong.html 30 | ``` 31 | 32 | 4. Convert (requires Calibre CLI) 33 | 34 | ```command 35 | ebook-convert unsong.html unsong.mobi --authors "Scott Alexander" --title "Unsong" --max-toc-links 500 36 | ``` 37 | -------------------------------------------------------------------------------- /unsong_scraper.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require 'nokogiri' 4 | require 'open-uri' 5 | require 'uri' 6 | require 'parallel' 7 | 8 | toc_page = Nokogiri::HTML(open('http://unsongbook.com/')).css('.pjgm-postcontent') 9 | 10 | chapters = Parallel.map_with_index(toc_page.css('a'), :in_threads => 8) do |link, ind| 11 | index = ind - 1 12 | url = link['href'] 13 | next unless url =~ /\/prologue|\/epilogue|\/book|\/interlude|\/chapter/ 14 | unless url.ascii_only? 15 | url = URI.escape(url) 16 | end 17 | if url.to_s.start_with?("//") 18 | url = "https:" + url 19 | end 20 | doc = Nokogiri::HTML(open(url)) 21 | chapter_title = doc.css('h1.pjgm-posttitle').first 22 | 23 | #modify chapter to have link 24 | chapter_title_plain = chapter_title.content 25 | $stderr.puts chapter_title_plain 26 | chapter_content = doc.css('div.pjgm-postcontent').first #gsub first p 27 | #clean 28 | chapter_content.search('.//div').remove 29 | to_remove = doc.css('div.entry-content p').first #gsub first p 30 | chapter_content = chapter_content.to_s.gsub(to_remove.to_s,"") 31 | #write 32 | {"body" => "

#{chapter_title_plain}

" + chapter_content, 33 | "toc" => "#{chapter_title_plain}
"} 34 | end.select {|chapter| chapter} 35 | 36 | @toc = "

Table of Contents

" 37 | @book_body = "" 38 | 39 | chapters.each do |chapter| 40 | @book_body << chapter["body"] 41 | @toc << chapter["toc"] 42 | end 43 | 44 | $stderr.puts "Writing Book..." 45 | 46 | puts @toc 47 | puts @book_body 48 | --------------------------------------------------------------------------------