├── .gitignore ├── misc ├── rubygems_guides.rb ├── ruby_hacking_guide.rb ├── ecma_262_3rd_edition_jp.rb ├── clojure.rb ├── getting_real_to_json.pl ├── SmoothCoffeeScript_to_json.pl ├── sicp_to_json.pl └── perfect_guide_for_english_learning.pl ├── index.tx ├── ncx.tx ├── opf.tx ├── README.md └── webiblo.pl /.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | tmp 3 | -------------------------------------------------------------------------------- /misc/rubygems_guides.rb: -------------------------------------------------------------------------------- 1 | %w(uri json nokogiri curb cgi).each {|g| require g } 2 | 3 | root = 'http://guides.rubygems.org' 4 | book = { 5 | :title => 'RubyGems Guides', 6 | :authors => %w( rubygems ), 7 | :cover_image => 'http://guides.rubygems.org/images/logo.png', 8 | :content_xpath => '//section[@id="chapters"]', 9 | :chapters => [] 10 | } 11 | 12 | def curl(url) 13 | c = Curl::Easy.new(url.to_s) 14 | c.follow_location = true 15 | c.perform 16 | c.body_str 17 | end 18 | 19 | doc = Nokogiri::HTML(curl(root)) 20 | doc.xpath('//section[@id="chapters"]/h2/a').each do |a| 21 | chapter_url = URI(root) + a[:href] 22 | chapter = { 23 | :uri => chapter_url, 24 | :title => a.text, 25 | } 26 | book[:chapters] << chapter 27 | end 28 | 29 | puts JSON.pretty_generate(book) 30 | -------------------------------------------------------------------------------- /misc/ruby_hacking_guide.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | %w(uri json nokogiri curb cgi).each {|g| require g } 3 | 4 | root = 'http://i.loveruby.net/ja/rhg/book/' 5 | book = { 6 | :title => 'Rubyソースコード完全解説', 7 | :authors => [ 'Minero Aoki' ], 8 | :cover_image => 'http://direct.ips.co.jp/directsys/Images/Goods/1/1721B.gif', 9 | :content_xpath => '//body', 10 | :chapters => [] 11 | } 12 | 13 | def curl(url) 14 | c = Curl::Easy.new(url.to_s) 15 | c.follow_location = true 16 | c.perform 17 | c.body_str 18 | end 19 | 20 | doc = Nokogiri::HTML(curl(root)) 21 | doc.xpath('//ul/li/a').each do |a| 22 | chapter_url = URI(root) + a[:href] 23 | chapter = { 24 | :uri => chapter_url, 25 | :title => a.text, 26 | } 27 | book[:chapters] << chapter 28 | end 29 | 30 | puts JSON.pretty_generate(book) 31 | -------------------------------------------------------------------------------- /misc/ecma_262_3rd_edition_jp.rb: -------------------------------------------------------------------------------- 1 | %w/uri json nokogiri curb cgi/.each {|g| require g } 2 | 3 | root = 'http://www2u.biglobe.ne.jp/~oz-07ams/prog/ecma262r3/fulltoc.html' 4 | book = { 5 | :title => 'ECMA-262 3rd edition', 6 | :authors => [ 7 | 'TAKI' 8 | ], 9 | :cover_image => 'http://www.ecma-international.org/images/logo_printerf.jpg', 10 | :content_xpath => '//div[@class="section level1"]', 11 | :chapters => [] 12 | } 13 | 14 | def curl(url) 15 | c = Curl::Easy.new(url.to_s) 16 | c.follow_location = true 17 | c.perform 18 | c.body_str 19 | end 20 | 21 | doc = Nokogiri::HTML(curl(root)) 22 | doc.xpath('//body/dl/dt/a').each do |a| 23 | chapter_url = URI(root) + a[:href] 24 | chapter = { 25 | :uri => chapter_url, 26 | :title => a.text, 27 | } 28 | book[:chapters] << chapter 29 | end 30 | 31 | puts book.to_json 32 | -------------------------------------------------------------------------------- /index.tx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | [% title %] 6 | 7 | 8 |

[% title %]

9 | 10 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /misc/clojure.rb: -------------------------------------------------------------------------------- 1 | %w/uri json nokogiri curb cgi/.each {|g| require g } 2 | 3 | root = 'http://clojure.org/' 4 | book = { 5 | :title => 'Clojure', 6 | :authors => [ 7 | 'Rich Hickey' 8 | ], 9 | :cover_image => 'http://clojure.org/space/showimage/clojure-icon.gif', 10 | :content_xpath => '//div[@id="content_view"]', 11 | :exclude_xpath => '//div[@id="toc"]', 12 | :chapters => [] 13 | } 14 | 15 | def curl(url) 16 | c = Curl::Easy.new(url.to_s) 17 | c.follow_location = true 18 | c.perform 19 | c.body_str 20 | end 21 | 22 | def sections(url) 23 | sections = [] 24 | doc = Nokogiri::HTML(curl(url)) 25 | doc.xpath('//div[@id="toc"]//a').each do |a| 26 | if a[:href] =~ /^#/ 27 | sections << { 28 | :uri => url.to_s + a[:href], 29 | :title => a.text 30 | } 31 | end 32 | end 33 | 34 | sections 35 | end 36 | 37 | doc = Nokogiri::HTML(curl(root)) 38 | doc.xpath('//div[@class="WikiCustomNav WikiElement wiki"]//a').each do |a| 39 | next if a[:href] =~ /^http/ 40 | chapter_url = URI(root) + a[:href] 41 | chapter = { 42 | :uri => chapter_url, 43 | :title => a.text, 44 | #:sections => sections(chapter_url) 45 | } 46 | book[:chapters] << chapter 47 | end 48 | 49 | puts book.to_json 50 | -------------------------------------------------------------------------------- /ncx.tx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | [% title %] 5 | 6 | 7 | [% title %] - Table of Contents 8 | 9 | [% num = 1 -%] 10 | 11 | [% FOREACH part IN parts -%] 12 | [% IF part.href -%] 13 | 14 | [% part.title %] 15 | 16 | [% num = num + 1 -%] 17 | [% END -%] 18 | [% FOREACH chapter IN part.chapters -%] 19 | [% IF chapter.href -%] 20 | 21 | [% chapter.title %] 22 | 23 | [% num = num + 1 -%] 24 | [% END -%] 25 | [% FOREACH section IN chapter.sections -%] 26 | 27 | [% section.title %] 28 | 29 | [% num = num + 1 -%] 30 | [% END -%] 31 | [% END -%] 32 | [% END -%] 33 | 34 | 35 | -------------------------------------------------------------------------------- /misc/getting_real_to_json.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use JSON::XS; 7 | use LWP::Simple; 8 | use HTML::TreeBuilder::XPath; 9 | use YAML; 10 | 11 | 12 | my $base = 'http://gettingreal.37signals.com'; 13 | my $tree = HTML::TreeBuilder::XPath->new; 14 | $tree->parse(get("$base/toc.php")); 15 | 16 | my $chapters = []; 17 | for my $chapter ( $tree->findnodes('//h2') ) { 18 | my @contents = $chapter->content_list; 19 | my $name = $contents[0]->attr('name'); 20 | next if !$name or $name !~ /^ch\d+/; 21 | my $title = $contents[1]; 22 | $title =~ s/\s+$//; 23 | 24 | my $sections = []; 25 | for my $section ( $chapter->right->findnodes('li/a') ) { 26 | push @$sections, { 27 | uri => $base . '/' . $section->attr('href'), 28 | title => $section->as_text, 29 | }; 30 | } 31 | 32 | push @$chapters, { 33 | title => $title, 34 | sections => $sections, 35 | }; 36 | } 37 | 38 | my $json = JSON::XS->new; 39 | $json->indent(1); 40 | 41 | print $json->encode({ 42 | title => 'Getting Real', 43 | authors => ['37signals'], 44 | date => '2012/1/9', 45 | chapters => $chapters, 46 | content_xpath => q{//div[@class="content"]}, 47 | exclude_xpath => q{//div[@class="next"]}, 48 | cover_image => 'http://ec2.images-amazon.com/images/I/31jvYr2h6GL._SS500_.jpg', 49 | }); 50 | 51 | exit; 52 | -------------------------------------------------------------------------------- /misc/SmoothCoffeeScript_to_json.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | use LWP::Simple; 6 | use HTML::TreeBuilder::XPath; 7 | use JSON::XS; 8 | 9 | my $base = 'http://autotelicum.github.com/Smooth-CoffeeScript/SmoothCoffeeScript.html'; 10 | 11 | my $content = get('http://autotelicum.github.com/Smooth-CoffeeScript/SmoothCoffeeScript.html'); 12 | 13 | my $tree = HTML::TreeBuilder::XPath->new; 14 | $tree->no_expand_entities(1); 15 | $tree->parse($content); 16 | $tree->eof; 17 | 18 | my $book = { 19 | title => ($tree->findnodes('//title'))[0]->as_text, 20 | author => ($tree->findnodes('//div[@class="author"]'))[0]->as_text, 21 | cover_image => 'http://autotelicum.github.com/Smooth-CoffeeScript/img/SmoothCoverWithSolutions.jpg', 22 | }; 23 | 24 | my $parts = []; 25 | 26 | for my $a ( $tree->findnodes('//a[@class="Link"]') ) { 27 | my $href = $a->attr('href'); 28 | my $title = $a->as_text; 29 | 30 | next if $href !~ /^#toc/ and $href !~ /^#Index/; 31 | 32 | if ( $href =~ /^#toc-Part/ or $href eq '#Index' ) { 33 | push @$parts, { 34 | title => $title, 35 | uri => $base . $href, 36 | chapters => [], 37 | }; 38 | } 39 | elsif ( $href =~ /^#toc-Chapter/ or $href =~ /^#toc-Appendix/ ) { 40 | push @{ $parts->[-1]->{chapters} }, { 41 | title => $title, 42 | uri => $base. $href, 43 | sections => [], 44 | }; 45 | } 46 | elsif ( $href =~ /^#toc-Section/ ) { 47 | push @{ $parts->[-1]->{chapters}->[-1]->{sections} }, { 48 | title => $title, 49 | uri => $base. $href, 50 | subsections => [], 51 | }; 52 | } 53 | elsif ( $href =~ /^#toc-Subsection/ ) { 54 | push @{ $parts->[-1]->{chapters}->[-1]->{sections}->[-1]->{subsections} }, { 55 | title => $title, 56 | uri => $base. $href, 57 | }; 58 | } 59 | } 60 | 61 | $book->{parts} = $parts; 62 | 63 | my $json = JSON::XS->new; 64 | $json->indent(1); 65 | 66 | print $json->encode($book); 67 | -------------------------------------------------------------------------------- /misc/sicp_to_json.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | use LWP::Simple; 6 | use HTML::TreeBuilder::XPath; 7 | use JSON::XS; 8 | 9 | my $base = 'http://mitpress.mit.edu/sicp/full-text/book'; 10 | 11 | my $book = { 12 | title => 'Structure and Interpretation of Computer Programs', 13 | authors => [ 'Harold Abelson', 'Gerald Jay Sussman', 'Julie Sussman' ], 14 | cover_image => 'http://mitpress.mit.edu/sicp/full-text/book/cover.jpg', 15 | exclude_xpath => q{//div[@class="navigation"]}, 16 | }; 17 | 18 | my $chapters = []; 19 | 20 | my $contents = get('http://mitpress.mit.edu/sicp/full-text/book/book-Z-H-4.html'); 21 | 22 | my $tree = HTML::TreeBuilder::XPath->new; 23 | $tree->parse($contents); 24 | $tree->eof; 25 | 26 | for my $a ( $tree->findnodes('//a') ) { 27 | my $name = $a->attr('name'); 28 | next if !$name or $name !~ /\%_toc/ or $name eq '%_toc_start'; 29 | 30 | my $text = $a->as_text; 31 | $text =~ s/\240/ /g; 32 | my $href = "$base/" . $a->attr('href'); 33 | 34 | if ( $name =~ /^\%_toc_\%_chap_Temp/ ) { 35 | push @$chapters, { 36 | title => $text, 37 | uri => $href, 38 | }; 39 | next; 40 | } 41 | # entering the chapter 42 | elsif ( $name =~ /^\%_toc_\%_chap_\d$/ ) { 43 | push @$chapters, { 44 | title => $text, 45 | uri => $href, 46 | sections => [], 47 | }; 48 | } 49 | # entering the section 50 | elsif ( $name =~ /\%_toc_\%_sec_\d\.\d$/ ) { 51 | push @{ $chapters->[-1]->{sections} }, { 52 | title => $text, 53 | uri => $href, 54 | subsections => [], 55 | }; 56 | } 57 | # entring the subsection 58 | elsif ( $name =~ /\%_toc_\%_sec_\d\.\d\.\d$/ ) { 59 | push @{ $chapters->[-1]->{sections}->[-1]->{subsections} }, { 60 | title => $text, 61 | uri => $href, 62 | }; 63 | } 64 | } 65 | 66 | $book->{chapters} = $chapters; 67 | 68 | my $json = JSON::XS->new; 69 | $json->indent(1); 70 | 71 | print $json->encode($book); 72 | 73 | -------------------------------------------------------------------------------- /opf.tx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | [% title %] 7 | en-us 8 | [% FOREACH author IN authors -%] 9 | [% author %] 10 | [% END -%] 11 | [% description %] 12 | [% date %] 13 | 14 | 15 | 16 | [% cover_file %] 17 | 18 | 19 | 20 | 21 | 22 | [% num = 1 -%] 23 | [% FOREACH part IN parts -%] 24 | [% IF part.file -%] 25 | 26 | [% num = num + 1 -%] 27 | [% END -%] 28 | [% FOREACH chapter IN part.chapters -%] 29 | [% IF chapter.file -%] 30 | 31 | [% num = num + 1 -%] 32 | [% END -%] 33 | [% FOREACH section IN chapter.sections -%] 34 | 35 | [% num = num + 1 -%] 36 | [% END -%] 37 | [% END -%] 38 | [% END -%] 39 | 40 | 41 | [% num = 1 -%] 42 | [% FOREACH part IN parts -%] 43 | [% FOREACH chapter IN part.chapters -%] 44 | [% FOREACH section IN chapter.sections -%] 45 | 46 | [% num = num + 1 -%] 47 | [% END -%] 48 | [% END -%] 49 | [% END -%] 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Webiblo - web to ebook project 2 | 3 | --------------------------------------- 4 | 5 | ## Overview 6 | 7 | Webiblo is a project to convert web sites to ebooks. 8 | 9 | You can convert Getting Real web data to mobipcoket format like this. 10 | 11 | $ webiblo.pl http://mizzy.org/webiblo/data/Getting_Real.json 12 | 13 | 14 | --------------------------------------- 15 | 16 | ## JSON data format 17 | 18 | JSON data format to convert web data to ebook is like this: 19 | 20 | { 21 | "title" : "Structure and Interpretation of Computer Programs", 22 | "authors" : [ 23 | "Harold Abelson", 24 | "Gerald Jay Sussman", 25 | "Julie Sussman" 26 | ], 27 | "cover_image" : "http://mitpress.mit.edu/sicp/full-text/book/cover.jpg", 28 | "content_xpath" : "//div[@class=\"content\"]", # Optional 29 | "exclude_xpath" : "//div[@class=\"navigation\"]", # Optional 30 | "chapters" : [ 31 | { 32 | "title" : "Foreword", 33 | "uri" : "http://mitpress.mit.edu/sicp/full-text/book/book-Z-H-5.html#%_chap_Temp_2" 34 | }, 35 | { 36 | "title" : "1 Building Abstractions with Procedures", 37 | "uri" : "http://mitpress.mit.edu/sicp/full-text/book/book-Z-H-9.html#%_chap_1", 38 | "sections" : [ 39 | "title" : "1.1 The Elements of Programming", 40 | "uri" : "http://mitpress.mit.edu/sicp/full-text/book/book-Z-H-10.html#%_sec_1.1" 41 | "subsections" : [ 42 | { 43 | "title" : "1.1.1 Expressions", 44 | "uri" : "http://mitpress.mit.edu/sicp/full-text/book/book-Z-H-10.html#%_sec_1.1.1" 45 | }, 46 | ] 47 | ] 48 | } 49 | ] 50 | } 51 | 52 | These are the examples. 53 | 54 | * http://mizzy.org/webiblo/data/Getting_Real.json 55 | * http://mizzy.org/webiblo/data/SICP.json 56 | * http://mizzy.org/webiblo/data/SmoothCoffeeScript.json 57 | 58 | --------------------------------------- 59 | 60 | ## Try your own JSON data 61 | 62 | webiblo.pl takes JSON data from STDIN, so you can run webiblo.pl like this: 63 | 64 | $ cat data.json | webiblo.pl 65 | 66 | 67 | ## Share your JSON data 68 | 69 | JSON data are put on [gh-pages branch](https://github.com/mizzy/webiblo/tree/gh-pages) and shared on [GitHub Pages](http://mizzy.org/webiblo/). 70 | 71 | If you create a JSON data for webiblo, please send me pull requests. 72 | 73 | --------------------------------------- 74 | 75 | ## TODO 76 | 77 | * Support formats other than mobipocket. (eg. EPUB3) 78 | * Search JSON catalog from CLI. -------------------------------------------------------------------------------- /misc/perfect_guide_for_english_learning.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | #use utf8; 6 | use Web::Query qw(wq); 7 | use Data::Section::Simple qw(get_data_section); 8 | use JSON::XS qw(encode_json); 9 | use YAML::Syck; 10 | 11 | my $sec_i = 0; 12 | 13 | my $meta = { 14 | title => '英語上達完全マップ', 15 | author => '森沢 洋介', 16 | cover_image => 'http://images-jp.amazon.com/images/P/4860641027.01.LZZZZZZZ.jpg', 17 | content_xpath => '//body/table//tr[count(preceding-sibling::*) = 4 and parent::*]', 18 | exclude_xpath => '//body/table//tr[count(preceding-sibling::*) = 3 and parent::*]', 19 | chapters => [], 20 | }; 21 | 22 | my $data = YAML::Syck::Load do { local $/; }; 23 | 24 | for (my $i = 0; ; $i++) { 25 | my $data = $data->[$i] or last; 26 | my ($title, $uri, $sec) = @{$data}; 27 | my $chapter = {}; 28 | $chapter->{title} = $title; 29 | $chapter->{uri} = $uri if $uri; 30 | $chapter->{sections} = do { 31 | my @secs; 32 | while (my ($title, $uri) = splice @{$sec}, 0, 2) { 33 | push @secs, { 34 | title => $title, 35 | ($uri ? ('uri', $uri) : ()), 36 | }; 37 | } 38 | \@secs; 39 | }; 40 | push @{$meta->{chapters}}, $chapter; 41 | } 42 | 43 | my $json = JSON::XS->new; 44 | $json->indent(1); 45 | print $json->encode($meta); 46 | 47 | __DATA__ 48 | --- 49 | - 50 | - はじめに 51 | - http://homepage3.nifty.com/mutuno/01_first/01_first.html 52 | - 53 | - 英語のテスト特にTOEICについて 54 | - http://homepage3.nifty.com/mutuno/02_toeic/02_toeic.html 55 | - 56 | - 英語は日本で上達する 57 | - http://homepage3.nifty.com/mutuno/03_japan/03_japan.html 58 | - 59 | - 英語力を解剖する 60 | - http://homepage3.nifty.com/mutuno/04_dissect/04_dissect.html 61 | - 62 | - 英語トレーニング法 63 | - http://homepage3.nifty.com/mutuno/05_training/05_training.html 64 | - 65 | - 音読パッケージ 66 | - HTtp://homepage3.nifty.com/mutuno/05_training/05_training01.html 67 | - 瞬間英作文 68 | - HTtp://homepage3.nifty.com/mutuno/05_training/05_training02.html 69 | - 文法 70 | - HTtp://homepage3.nifty.com/mutuno/05_training/05_training03.html 71 | - 精読 72 | - HTtp://homepage3.nifty.com/mutuno/05_training/05_training04.html 73 | - 多読(速読) 74 | - HTtp://homepage3.nifty.com/mutuno/05_training/05_training05.html 75 | - 語彙増強=ボキャビル 76 | - HTtp://homepage3.nifty.com/mutuno/05_training/05_training06.html 77 | - リスニング 78 | - HTtp://homepage3.nifty.com/mutuno/05_training/05_training07.html 79 | - 会話 80 | - HTtp://homepage3.nifty.com/mutuno/05_training/05_training08.html 81 | - 82 | - トレーニングの進め方 83 | - http://homepage3.nifty.com/mutuno/06_case/06_case.html 84 | - 85 | - 標準ケース 86 | - http://homepage3.nifty.com/mutuno/06_case/06_case01.html 87 | - 実例ケース 88 | - http://homepage3.nifty.com/mutuno/06_case/06_case02.html 89 | - 目的・タイプ別ケース 90 | - http://homepage3.nifty.com/mutuno/06_case/06_case03.html 91 | - 92 | - トレーニングを継続するために 93 | - http://homepage3.nifty.com/mutuno/07_continue/07_continue.html 94 | - 95 | - おすすめ教材集 96 | - http://homepage3.nifty.com/mutuno/08_book/08_book.html 97 | - 98 | - アドバイス集 99 | - http://homepage3.nifty.com/mutuno/09_advice/09_advice.html 100 | - 101 | - Q & A 102 | - http://homepage3.nifty.com/mutuno/10_QA/10_QA.html 103 | - 104 | - 教室案内 105 | - http://homepage3.nifty.com/mutuno/11_school/11_school.html 106 | - 107 | - リンク 108 | - http://homepage3.nifty.com/mutuno/12_link/12_link.html 109 | -------------------------------------------------------------------------------- /webiblo.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | use JSON::Syck; 6 | use LWP::Simple; 7 | use URI; 8 | use HTML::TreeBuilder::XPath; 9 | use Text::Xslate; 10 | use Image::Resize; 11 | 12 | my $style = HTML::Element->new('style'); 13 | $style->attr('type', 'text/css'); 14 | $style->push_content(< }); 23 | 24 | mkdir 'tmp' unless -d 'tmp'; 25 | mkdir 'out' unless -d 'out'; 26 | 27 | # Get cover image 28 | if ( $book->{cover_image} ) { 29 | my $uri = URI->new($book->{cover_image}); 30 | my $file = ($uri->path_segments)[-1]; 31 | mirror($uri, "out/$file") unless -f "out/$file"; 32 | $book->{cover_file} = $file; 33 | my $image = Image::Resize->new("out/$file"); 34 | my $gd = $image->resize(600, 800); 35 | open my $out, '>', "out/$file" or die $!; 36 | print $out $gd->jpeg; 37 | close $out; 38 | } 39 | 40 | $book->{parts}->[0]->{chapters} = $book->{chapters} unless $book->{parts}; 41 | 42 | for my $part ( @{ $book->{parts} } ) { 43 | get_content($part); 44 | for my $chapter ( @{ $part->{chapters} } ) { 45 | get_content($chapter); 46 | for my $section ( @{ $chapter->{sections} } ) { 47 | get_content($section); 48 | for my $subsection ( @{ $section->{subsections} } ) { 49 | get_content($subsection); 50 | } 51 | } 52 | } 53 | } 54 | 55 | my $tx = Text::Xslate->new( syntax => 'TTerse' ); 56 | 57 | warn "Writing index.html ...\n"; 58 | open my $out, '>', 'out/index.html' or die $!; 59 | print $out $tx->render('index.tx', $book); 60 | close $out; 61 | 62 | warn "Writing toc.ncx ...\n"; 63 | open $out, '>', 'out/toc.ncx' or die $!; 64 | print $out $tx->render('ncx.tx', $book); 65 | close $out; 66 | 67 | my $book_title = $book->{title}; 68 | $book_title =~ s/\s/_/g; 69 | 70 | set_startup_page($book); 71 | 72 | warn "Writing ${book_title}.opf ...\n"; 73 | open $out, '>', "out/${book_title}.opf" or die $!; 74 | print $out $tx->render('opf.tx', $book); 75 | close $out; 76 | 77 | warn "Executing kindlegen ...\n"; 78 | `kindlegen out/${book_title}.opf`; 79 | 80 | exit; 81 | 82 | sub get_content { 83 | my $object = shift; 84 | return if !$object->{uri}; 85 | 86 | my $uri = URI->new($object->{uri}); 87 | my $file = ($uri->path_segments)[-1]; 88 | my $fragment = $uri->fragment; 89 | 90 | $file =~ s/\..+/.html/ unless $file =~ /\.html$/; 91 | $file .= '.html' unless $file =~ /\.html$/; # add .html if no extension 92 | $object->{file} = $file; 93 | $object->{href} = $fragment ? "$file#$fragment" : $file; 94 | 95 | return if -f "tmp/$file"; 96 | 97 | warn "Getting $object->{title} ...\n"; 98 | mirror($uri, "tmp/$file"); 99 | 100 | my $tree = HTML::TreeBuilder::XPath->new; 101 | $tree->no_expand_entities(1); 102 | $tree->parse_file("tmp/$file"); 103 | $tree->eof; 104 | 105 | if ( $book->{content_xpath} ) { 106 | my $content = ($tree->findnodes($book->{content_xpath}))[0]->as_XML; 107 | my $meta = join '', map { $_->as_XML } $tree->findnodes('//head/meta'); 108 | $tree = HTML::TreeBuilder::XPath->new; 109 | $tree->no_expand_entities(1); 110 | $tree->parse(<<"HTML"); 111 | 112 | 113 | $meta 114 | 115 | 116 | $content 117 | 118 | 119 | HTML 120 | $tree->eof; 121 | } 122 | 123 | if ( $book->{exclude_xpath} ) { 124 | my @excludes = ($tree->findnodes($book->{exclude_xpath})); 125 | for my $exclude ( @excludes ) { 126 | $exclude->detach; 127 | } 128 | } 129 | 130 | my @links = $tree->findnodes('//link[@rel="stylesheet"]'); 131 | for my $link ( @links ) { 132 | warn "Getting $uri ...\n"; 133 | my $href = $link->attr('href'); 134 | my $base = $uri->as_string; 135 | $base =~ s{/[^/]+$}{}; 136 | $href = "$base/$href" if $href !~ m!^https?://!; 137 | my $file = (URI->new($href)->path_segments)[-1]; 138 | mirror($href, "out/$file") unless -f "out/$file"; 139 | } 140 | 141 | if ( ! scalar @links ) { 142 | my $head = ($tree->findnodes('/html/head'))[0]; 143 | $head->push_content($style) 144 | }; 145 | 146 | my @images = $tree->findnodes('//img'); 147 | for my $image ( @images ) { 148 | warn "Getting $uri ...\n"; 149 | my $src = $image->attr('src'); 150 | my $base = $uri->as_string; 151 | $base =~ s{/[^/]+$}{}; 152 | $src = "$base/$src" if $src !~ m!^https?://!; 153 | my $file = (URI->new($src)->path_segments)[-1]; 154 | mirror($src, "out/$file") unless -f "out/$file"; 155 | $image->attr('src', $file); 156 | } 157 | 158 | open my $out, '>', "out/$file" or die $!; 159 | print $out $tree->as_XML; 160 | close $out; 161 | } 162 | 163 | sub set_startup_page { 164 | my $book = shift; 165 | 166 | for my $part ( @{ $book->{parts} } ) { 167 | if ( $part->{href} ) { 168 | $book->{startup_page} = $part->{href}; 169 | return; 170 | } 171 | for my $chapter ( @{ $part->{chapters} } ) { 172 | if ( $chapter->{href} ) { 173 | $book->{startup_page} = $chapter->{href}; 174 | return; 175 | for my $section ( @{ $chapter->{sections} } ) { 176 | if ( $section->{href} ) { 177 | $book->{startup_page} = $section->{href}; 178 | return; 179 | } 180 | for my $subsection ( @{ $section->{subsections} } ) { 181 | if ( $subsection->{href} ) { 182 | $book->{startup_page} = $subsection->{href}; 183 | return; 184 | } 185 | } 186 | } 187 | } 188 | } 189 | } 190 | } 191 | --------------------------------------------------------------------------------