├── .document
├── .gitignore
├── .rspec
├── CHANGELOG.md
├── Gemfile
├── Gemfile.lock
├── LICENSE.txt
├── README.md
├── Rakefile
├── VERSION
├── lib
├── mobi.rb
└── mobi
│ ├── header
│ ├── exth_header.rb
│ ├── mobi_header.rb
│ └── palm_doc_header.rb
│ ├── metadata.rb
│ ├── metadata_streams.rb
│ └── stream_slicer.rb
├── mobi.gemspec
└── spec
├── fixtures
└── sherlock.mobi
├── lib
├── mobi
│ ├── header
│ │ ├── exth_header_spec.rb
│ │ ├── mobi_header_spec.rb
│ │ └── palm_doc_header_spec.rb
│ ├── metadata_spec.rb
│ └── stream_slicer_spec.rb
└── mobi_spec.rb
└── spec_helper.rb
/.document:
--------------------------------------------------------------------------------
1 | lib/**/*.rb
2 | bin/*
3 | -
4 | features/**/*.feature
5 | LICENSE.txt
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # rcov generated
2 | coverage
3 |
4 | # rdoc generated
5 | rdoc
6 |
7 | # yard generated
8 | doc
9 | .yardoc
10 |
11 | # bundler
12 | .bundle
13 |
14 | # jeweler generated
15 | pkg
16 |
17 | # Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
18 | #
19 | # * Create a file at ~/.gitignore
20 | # * Include files you want ignored
21 | # * Run: git config --global core.excludesfile ~/.gitignore
22 | #
23 | # After doing this, these files will be ignored in all your git projects,
24 | # saving you from having to 'pollute' every project you touch with them
25 | #
26 | # Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
27 | #
28 | # For MacOS:
29 | #
30 | #.DS_Store
31 | #
32 | # For TextMate
33 | #*.tmproj
34 | #tmtags
35 | #
36 | # For emacs:
37 | #*~
38 | #\#*
39 | #.\#*
40 | #
41 | # For vim:
42 | #*.swp
43 |
--------------------------------------------------------------------------------
/.rspec:
--------------------------------------------------------------------------------
1 | --color
2 | --format documentation
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | ## 1.0.0
4 |
5 | * No implementation changes. Bumping to 1.0 because there have been no issues raised for over a year, and just because I can!
6 | * Update RSpec.
7 | * Update Bundler.
8 |
9 | ## 0.2.0
10 |
11 | * Extract EXTH header data into it's own class.
12 | * Parse Mobi header data.
13 | * Parse PalmDOC header data.
14 | * Large internals refactoring, and now with added tests!
15 |
16 | ## 0.1.2
17 |
18 | * Trying to instantiate Mobi::Metadata with an invalid mobi file now raises an InvalidMobi exception
19 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "http://rubygems.org"
2 | # Add dependencies required to use your gem here.
3 | # Example:
4 | # gem "activesupport", ">= 2.3.5"
5 |
6 | # Add dependencies to develop your gem here.
7 | # Include everything needed to run rake, tests, features, etc.
8 | group :development do
9 | gem 'rspec', '~> 3.1.0'
10 | gem 'rr', '~> 1.1.0'
11 | gem 'bundler', '~> 1.7.0'
12 | gem 'jeweler', '~> 2.0.0'
13 | end
14 |
--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
1 | GEM
2 | remote: http://rubygems.org/
3 | specs:
4 | addressable (2.3.6)
5 | builder (3.2.2)
6 | descendants_tracker (0.0.4)
7 | thread_safe (~> 0.3, >= 0.3.1)
8 | diff-lcs (1.2.5)
9 | faraday (0.9.0)
10 | multipart-post (>= 1.2, < 3)
11 | git (1.2.8)
12 | github_api (0.12.2)
13 | addressable (~> 2.3)
14 | descendants_tracker (~> 0.0.4)
15 | faraday (~> 0.8, < 0.10)
16 | hashie (>= 3.3)
17 | multi_json (>= 1.7.5, < 2.0)
18 | nokogiri (~> 1.6.3)
19 | oauth2
20 | hashie (3.3.1)
21 | highline (1.6.21)
22 | jeweler (2.0.1)
23 | builder
24 | bundler (>= 1.0)
25 | git (>= 1.2.5)
26 | github_api
27 | highline (>= 1.6.15)
28 | nokogiri (>= 1.5.10)
29 | rake
30 | rdoc
31 | json (1.8.1)
32 | jwt (1.0.0)
33 | mini_portile (0.6.1)
34 | multi_json (1.10.1)
35 | multi_xml (0.5.5)
36 | multipart-post (2.0.0)
37 | nokogiri (1.6.4.1)
38 | mini_portile (~> 0.6.0)
39 | oauth2 (1.0.0)
40 | faraday (>= 0.8, < 0.10)
41 | jwt (~> 1.0)
42 | multi_json (~> 1.3)
43 | multi_xml (~> 0.5)
44 | rack (~> 1.2)
45 | rack (1.5.2)
46 | rake (10.3.2)
47 | rdoc (4.1.2)
48 | json (~> 1.4)
49 | rr (1.1.2)
50 | rspec (3.1.0)
51 | rspec-core (~> 3.1.0)
52 | rspec-expectations (~> 3.1.0)
53 | rspec-mocks (~> 3.1.0)
54 | rspec-core (3.1.7)
55 | rspec-support (~> 3.1.0)
56 | rspec-expectations (3.1.2)
57 | diff-lcs (>= 1.2.0, < 2.0)
58 | rspec-support (~> 3.1.0)
59 | rspec-mocks (3.1.3)
60 | rspec-support (~> 3.1.0)
61 | rspec-support (3.1.2)
62 | thread_safe (0.3.4)
63 |
64 | PLATFORMS
65 | ruby
66 |
67 | DEPENDENCIES
68 | bundler (~> 1.7.0)
69 | jeweler (~> 2.0.0)
70 | rr (~> 1.1.0)
71 | rspec (~> 3.1.0)
72 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2011 jkongie
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining
4 | a copy of this software and associated documentation files (the
5 | "Software"), to deal in the Software without restriction, including
6 | without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to
8 | permit persons to whom the Software is furnished to do so, subject to
9 | the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # mobi
4 |
5 | A Ruby Gem that reads MOBI metadata.
6 |
7 | ## Ruby
8 |
9 | Compatible up to ruby 2.0.0p0.
10 |
11 | I haven't tested beyond that version, but it should work.
12 |
13 | ## Installation
14 |
15 | `gem install mobi`
16 |
17 | ## Usage
18 |
19 | Creating a Mobi::Metadata object
20 |
21 | ```ruby
22 | Mobi::Metadata.new(File.open('/path/to/file.mobi'))
23 | ```
24 |
25 | A handy convenience method to do the exact same thing
26 |
27 | ```ruby
28 | Mobi.metadata File.open('/path/to/file.mobi')
29 | ```
30 |
31 | Getting metadata information is as simple as:
32 |
33 | ```ruby
34 | metadata = Mobi.metadata File.open('/path/to/fellowship_of_the_ring.mobi')
35 | #=> #
36 | metadata.author
37 | #=> "J.R.R. Tolkien"
38 | ```
39 |
40 | Supported metadata options are:
41 |
42 | * asin
43 | * author
44 | * contributor
45 | * description
46 | * imprint
47 | * isbn
48 | * publisher
49 | * published_at
50 | * review
51 | * rights
52 | * source
53 | * subject
54 | * subject_code
55 | * title
56 | * type
57 | * version
58 |
59 | You can also drill down and get the PalmDOC and MOBI header details:
60 |
61 | ```ruby
62 | palm_doc_header = metadata.palm_doc_header
63 | #=> #
64 | palm_doc_header.raw_compression_type
65 | #=> 2
66 | palm_doc_header.compression_type
67 | #=> "PalmDOC"
68 |
69 | mobi_header = metadata.mobi_header
70 | #=> #
71 | mobi_header.raw_mobi_type
72 | #=> 2
73 | mobi_header.mobi_type
74 | #=> "MOBIpocket Book"
75 | ```
76 |
77 | See the source for more methods.
78 |
79 | ## Thanks
80 |
81 | * Calibre open source project. I ripped off the idea of a Stream Slicer and got a better understanding of MOBI files from their code base. Check them out at http://calibre-ebook.com
82 |
83 | ## Contributing to mobi
84 |
85 | * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
86 | * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
87 | * Fork the project
88 | * Start a feature/bugfix branch
89 | * Commit and push until you are happy with your contribution
90 | * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
91 | * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
92 |
93 | ## Copyright
94 |
95 | Copyright (c) 2011 jkongie. See LICENSE.txt for further details.
96 |
97 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require 'rubygems'
2 | require 'bundler'
3 | begin
4 | Bundler.setup(:default, :development)
5 | rescue Bundler::BundlerError => e
6 | $stderr.puts e.message
7 | $stderr.puts "Run `bundle install` to install missing gems"
8 | exit e.status_code
9 | end
10 | require 'rake'
11 |
12 | require 'jeweler'
13 | Jeweler::Tasks.new do |gem|
14 | # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15 | gem.name = "mobi"
16 | gem.homepage = "http://github.com/jkongie/mobi"
17 | gem.license = "MIT"
18 | gem.summary = %Q{A Rubygem that inspects MOBI metadata.}
19 | gem.description = %Q{Mobi is a Rubygem that allows you to easily read MOBI e-book format metadata.}
20 | gem.email = "jkongie@gmail.com"
21 | gem.authors = ["jkongie"]
22 | # Include your dependencies below. Runtime dependencies are required when using your gem,
23 | # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24 | # gem.add_runtime_dependency 'jabber4r', '> 0.1'
25 | # gem.add_development_dependency 'rspec', '> 1.2.3'
26 | end
27 | Jeweler::RubygemsDotOrgTasks.new
28 |
29 | task :default => :test
30 |
31 | require 'rdoc/task'
32 | Rake::RDocTask.new do |rdoc|
33 | version = File.exist?('VERSION') ? File.read('VERSION') : ""
34 |
35 | rdoc.rdoc_dir = 'rdoc'
36 | rdoc.title = "mobi #{version}"
37 | rdoc.rdoc_files.include('README*')
38 | rdoc.rdoc_files.include('lib/**/*.rb')
39 | end
40 |
--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 1.0.0
--------------------------------------------------------------------------------
/lib/mobi.rb:
--------------------------------------------------------------------------------
1 | require 'mobi/stream_slicer'
2 | require 'mobi/metadata'
3 | require 'mobi/metadata_streams'
4 | require 'mobi/header/palm_doc_header'
5 | require 'mobi/header/mobi_header'
6 | require 'mobi/header/exth_header'
7 |
8 | module Mobi
9 |
10 | def self.metadata(file)
11 | Mobi::Metadata.new(file)
12 | end
13 |
14 | end
--------------------------------------------------------------------------------
/lib/mobi/header/exth_header.rb:
--------------------------------------------------------------------------------
1 | module Mobi
2 | module Header
3 | class ExthHeader
4 | RECORD_TYPES = { 100 => :author, 101 => :publisher, 102 => :imprint, 103 => :description, 104 => :isbn, 105 => :subject,
5 | 106 => :published_at, 107 => :review, 108 => :contributor, 109 => :rights, 110 => :subject_code,
6 | 111 => :type, 112 => :source, 113 => :asin, 114 => :version }
7 |
8 | attr_reader *RECORD_TYPES.values
9 |
10 | def initialize(data)
11 | @data = data
12 | @record_count, = @data[8, 4].unpack('N*')
13 |
14 | define_data_methods
15 | end
16 |
17 | private
18 |
19 | def define_data_methods
20 | start = 12
21 | @record_count.times do
22 | record = ExthRecord.new(@data, start)
23 |
24 | if RECORD_TYPES.key?(record.code)
25 | instance_variable_set "@#{record.name}", record.value
26 | end
27 |
28 | start += record.length
29 | end
30 | end
31 |
32 | class ExthRecord
33 |
34 | attr_reader :code, :name, :length, :value
35 |
36 | def initialize(data, start)
37 | @code, = data[start, 4].unpack('N*')[0].to_i
38 | @name = ExthHeader::RECORD_TYPES[@code]
39 | @length, = data[start + 4, 4].unpack('N*')
40 | @value = data[start + 8, length - 8]
41 | end
42 | end
43 |
44 | end
45 | end
46 | end
--------------------------------------------------------------------------------
/lib/mobi/header/mobi_header.rb:
--------------------------------------------------------------------------------
1 | # Public: Parses the Mobi Header which follows the 16 bytes of the PalmDOC
2 | # header.
3 | module Mobi
4 | module Header
5 | class MobiHeader
6 |
7 | # Initialize the MobiHeader.
8 | #
9 | # data - A StreamSlicer which starts at record 0 of the PalmDOC.
10 | #
11 | # Returns self.
12 | def initialize(data)
13 | @data = data
14 | end
15 |
16 | # A MOBI identifier.
17 | #
18 | # Returns a String.
19 | def identifier
20 | @identifier ||= @data[16, 4]
21 | end
22 |
23 | # The length of the MOBI header.
24 | #
25 | # Returns a Fixnum.
26 | def header_length
27 | @header_length ||= @data[20, 4].unpack('N*')[0]
28 | end
29 |
30 | # The kind of Mobipocket file as returned from byte code.
31 | #
32 | # Returns a Fixnum.
33 | def raw_mobi_type
34 | @raw_mobi_type ||= @data[24, 4].unpack('N*')[0]
35 | end
36 |
37 | # The kind of Mobipocket file.
38 | #
39 | # Returns a String.
40 | def mobi_type
41 | { 2 => 'MOBIpocket Book',
42 | 3 => 'PalmDoc Book',
43 | 4 => 'Audio',
44 | 232 => 'MOBIpocket',
45 | 248 => 'KF8',
46 | 257 => 'News',
47 | 258 => 'News Feed',
48 | 259 => 'News_Magazine',
49 | 513 => 'PICS',
50 | 514 => 'WORD',
51 | 515 => 'XLS',
52 | 516 => 'PPT',
53 | 517 => 'TEXT',
54 | 518 => 'HTML'
55 | }.fetch(raw_mobi_type)
56 | end
57 |
58 | # The text encoding as return from byte code.
59 | #
60 | # Returns a Fixnum.
61 | def raw_text_encoding
62 | @text_encoding ||= @data[28, 4].unpack('N*')[0]
63 | end
64 |
65 | # The text encoding.
66 | #
67 | # Returns a String.
68 | def text_encoding
69 | { 1252 => 'CP1252 (WinLatin1)',
70 | 65001 => 'UTF-8'
71 | }.fetch(raw_text_encoding)
72 | end
73 |
74 | # The unique ID.
75 | #
76 | # Returns an Integer.
77 | def unique_id
78 | @unique_id ||= @data[32, 4].unpack('N*')[0]
79 | end
80 |
81 | # The version of the MOBIpocket format used in this file.
82 | #
83 | # Returns a String
84 | def file_version
85 | @file_version ||= @data[36, 4].unpack('N*')[0]
86 | end
87 |
88 | # The first record number (starting with 0) that is not the book's text.
89 | #
90 | # Returns an Integer.
91 | def first_non_book_index
92 | @first_non_book_index ||= @data[80, 4].unpack('N*')[0]
93 | end
94 |
95 | # Offset in record 0 (not from start of file) of the full name of the book.
96 | #
97 | # Returns an Integer.
98 | def full_name_offset
99 | @full_name_offset ||= @data[84, 4].unpack('N*')[0]
100 | end
101 |
102 | # Length in bytes of the full name of the book.
103 | #
104 | # Returns an Integer.
105 | def full_name_length
106 | @full_name_length ||= @data[88, 4].unpack('N*')[0]
107 | end
108 |
109 | # The raw book locale code. I believe this refers to a LCID code.
110 | #
111 | # The low byte is the main language: 09 = English.
112 | # The next byte is dialect: 08 = British, 04 = US.
113 | # Thus US English is 1033, UK English is 2057.
114 | #
115 | # Returns an Integer.
116 | def raw_locale_code
117 | @raw_locale_code ||= @data[92, 4].unpack('N*')[0]
118 | end
119 |
120 | # The minimum MOBIpocket version support needed to read this file.
121 | #
122 | # Returns an Integer.
123 | def minimum_supported_mobipocket_version
124 | @minimum_supported_mobipocket_version ||= @data[104, 4].unpack('N*')[0]
125 | end
126 |
127 | # The first record number (starting with 0) that contains an image. Image
128 | # records should be sequential.
129 | #
130 | # Returns an Integer.
131 | def first_image_index_record_number
132 | @first_image_index_record_number ||= @data[108, 4].unpack('N*')[0]
133 | end
134 |
135 | # The EXTH flag.
136 | #
137 | # If bit 6 is set, then there is an EXTH record.
138 | #
139 | # Returns a Fixnum, 1 or 0.
140 | def exth_flag
141 | @exth_flag ||= @data[128, 4].unpack('@3B8').first[1].to_i
142 | end
143 |
144 | # Does the book have an EXTH header?
145 | #
146 | # Returns true if the book has an EXTH header.
147 | def exth_header?
148 | exth_flag == 1
149 | end
150 |
151 | end
152 | end
153 | end
--------------------------------------------------------------------------------
/lib/mobi/header/palm_doc_header.rb:
--------------------------------------------------------------------------------
1 | # Public:
2 | module Mobi
3 | module Header
4 | class PalmDocHeader
5 |
6 | # Initializes the PalmDOC header.
7 | #
8 | # data - A StreamSlicer which starts at the PalmDOC header.
9 | #
10 | # Returns self.
11 | def initialize(data)
12 | @data = data
13 | end
14 |
15 | # The compression type as returned from byte code.
16 | #
17 | # Returns a Fixnum.
18 | def raw_compression_type
19 | @compression_type ||= @data[0, 2].unpack('n*')[0]
20 | end
21 |
22 | # The compression type.
23 | #
24 | # Returns a Fixnum.
25 | def compression_type
26 | { 1 => 'None',
27 | 2 => 'PalmDOC',
28 | 17480 => 'HUFF/CDIC'
29 | }.fetch(raw_compression_type)
30 | end
31 |
32 | # The uncompressed length of the entire text of the book.
33 | #
34 | # Returns a Fixnum.
35 | def text_length
36 | @text_length ||= @data[4, 4].unpack('N*')[0]
37 | end
38 |
39 | # Number of PDB records used for the text of the book.
40 | #
41 | # Returns a Fixnum.
42 | def record_count
43 | @record_count ||= @data[8, 2].unpack('n*')[0]
44 | end
45 |
46 | # Maximum size of each record containing text. Note that this always
47 | # returns 4096.
48 | #
49 | # Returns a Fixnum.
50 | def record_size
51 | @record_size ||= @data[10, 2].unpack('n*')[0]
52 | end
53 |
54 | # The encryption type as returned from byte code.
55 | #
56 | # Returns a Fixnum
57 | def raw_encryption_type
58 | @encryption_type ||= @data[12, 2].unpack('n*')[0]
59 | end
60 |
61 | # The encryption type.
62 | #
63 | # Returns a String.
64 | def encryption_type
65 | { 0 => 'None',
66 | 1 => 'Old MOBIpocket',
67 | 2 => 'MOBIpocket'
68 | }.fetch(raw_encryption_type)
69 | end
70 |
71 | end
72 | end
73 | end
--------------------------------------------------------------------------------
/lib/mobi/metadata.rb:
--------------------------------------------------------------------------------
1 | require 'forwardable'
2 |
3 | module Mobi
4 | class Metadata
5 | extend Forwardable
6 |
7 | EXTH_RECORDS = %w(author publisher imprint description isbn subject
8 | published_at review contributor rights subject_code type
9 | source asin version)
10 |
11 | # Raw data stream
12 | attr_reader :data
13 | # Individual header classes for your reading pleasure.
14 | attr_reader :palm_doc_header, :mobi_header, :exth_header
15 |
16 | def initialize(file)
17 | @file = file
18 | @data = StreamSlicer.new(file)
19 |
20 | raise InvalidMobi, "The supplied file is not in a valid mobi format" unless bookmobi?
21 |
22 | @record_zero_stream = MetadataStreams.record_zero_stream(file)
23 | @palm_doc_header = Header::PalmDocHeader.new @record_zero_stream
24 | @mobi_header = Header::MobiHeader.new @record_zero_stream
25 |
26 | @exth_stream = MetadataStreams.exth_stream(file, @mobi_header.header_length)
27 | @exth_header = Header::ExthHeader.new @exth_stream
28 | end
29 |
30 | # Gets the title of the book.
31 | #
32 | # Returns a String.
33 | def title
34 | return @title if @title
35 |
36 | offset = @mobi_header.full_name_offset
37 | length = @mobi_header.full_name_length
38 |
39 | @title = @record_zero_stream[offset, length]
40 | end
41 |
42 | # Determines if the file is a valid mobi file.
43 | #
44 | # Returns true if the file is a valid MOBI.
45 | def bookmobi?
46 | @data[60, 8] == "BOOKMOBI"
47 | end
48 |
49 | # Delegate EXTH records types to the EXTH header.
50 | EXTH_RECORDS.each do |type|
51 | def_delegators :@exth_header, type.to_sym, type.to_sym
52 | end
53 |
54 | class InvalidMobi < ArgumentError;end;
55 | end
56 | end
--------------------------------------------------------------------------------
/lib/mobi/metadata_streams.rb:
--------------------------------------------------------------------------------
1 | module Mobi
2 | module MetadataStreams
3 |
4 | # Creates a stream starting at the Record 0 in the PalmDOC.
5 | #
6 | # Returns a StreamSlicer.
7 | def self.record_zero_stream(file)
8 | data = StreamSlicer.new(file)
9 |
10 | start, stop = record_zero_endpoints(data)
11 |
12 | StreamSlicer.new(file, start, stop)
13 | end
14 |
15 | # Creates a stream starting at the EXTH header in Record 0.
16 | #
17 | # Returns a StreamSlicer.
18 | def self.exth_stream(file, header_length)
19 | record_zero_stream = record_zero_stream(file)
20 |
21 | record_zero_offset = record_zero_stream.start
22 | palm_doc_header_length = 16
23 |
24 | exth_off = record_zero_offset +
25 | palm_doc_header_length +
26 | header_length
27 |
28 | StreamSlicer.new(file, exth_off, record_zero_stream.stop)
29 | end
30 |
31 | private
32 |
33 | # Determines the start and end points of Record 0 in the PalmDOC. The start point
34 | # is returned as the first value in the array, and the end point as the
35 | # second value.
36 | #
37 | # Returns an Array.
38 | def self.record_zero_endpoints(data)
39 | offset = 78
40 | start, = data[offset, 4].unpack('N*')
41 | stop, = data[offset + 8, offset + 12].unpack('N*');
42 | [start, stop]
43 | end
44 |
45 | end
46 | end
--------------------------------------------------------------------------------
/lib/mobi/stream_slicer.rb:
--------------------------------------------------------------------------------
1 | module Mobi
2 | class StreamSlicer
3 |
4 | attr_reader :stream, :length
5 | attr_accessor :start, :stop
6 |
7 | def initialize(stream, start = 0, stop = nil)
8 | @stream = stream
9 | @start = start
10 |
11 | if stop.nil?
12 | stream.seek(0, 2)
13 | stop = stream.tell
14 | end
15 |
16 | @stop = stop
17 | @length = stop - start
18 | end
19 |
20 | def [](offset, bytes=1)
21 | stream = @stream
22 | base = @start
23 |
24 | if bytes == 1
25 | stream.seek(base + offset)
26 | return stream.read(1)
27 | end
28 |
29 | start = offset
30 | stop = offset + bytes
31 |
32 | # Reverse if you want to pass in negative bytes
33 | start, stop = stop, start if bytes < 0
34 |
35 | # I can't find a use case where it will ever get here
36 | size = stop - start
37 | return nil if size <= 0
38 |
39 | stream.seek(base + start)
40 | data = stream.read(size)
41 | return data
42 | end
43 | end
44 | end
45 |
--------------------------------------------------------------------------------
/mobi.gemspec:
--------------------------------------------------------------------------------
1 | # Generated by jeweler
2 | # DO NOT EDIT THIS FILE DIRECTLY
3 | # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4 | # -*- encoding: utf-8 -*-
5 |
6 | Gem::Specification.new do |s|
7 | s.name = "mobi"
8 | s.version = "1.0.0"
9 |
10 | s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11 | s.authors = ["jkongie"]
12 | s.date = "2013-10-04"
13 | s.description = "Mobi is a Rubygem that allows you to easily read MOBI e-book format metadata."
14 | s.email = "jkongie@gmail.com"
15 | s.extra_rdoc_files = [
16 | "LICENSE.txt",
17 | "README.md"
18 | ]
19 | s.files = [
20 | ".document",
21 | ".rspec",
22 | "CHANGELOG.md",
23 | "Gemfile",
24 | "Gemfile.lock",
25 | "LICENSE.txt",
26 | "README.md",
27 | "Rakefile",
28 | "VERSION",
29 | "lib/mobi.rb",
30 | "lib/mobi/header/exth_header.rb",
31 | "lib/mobi/header/mobi_header.rb",
32 | "lib/mobi/header/palm_doc_header.rb",
33 | "lib/mobi/metadata.rb",
34 | "lib/mobi/metadata_streams.rb",
35 | "lib/mobi/stream_slicer.rb",
36 | "mobi.gemspec",
37 | "spec/fixtures/sherlock.mobi",
38 | "spec/lib/mobi/header/exth_header_spec.rb",
39 | "spec/lib/mobi/header/mobi_header_spec.rb",
40 | "spec/lib/mobi/header/palm_doc_header_spec.rb",
41 | "spec/lib/mobi/metadata_spec.rb",
42 | "spec/lib/mobi/stream_slicer_spec.rb",
43 | "spec/lib/mobi_spec.rb",
44 | "spec/spec_helper.rb"
45 | ]
46 | s.homepage = "http://github.com/jkongie/mobi"
47 | s.licenses = ["MIT"]
48 | s.require_paths = ["lib"]
49 | s.rubygems_version = "1.8.23"
50 | s.summary = "A Rubygem that inspects MOBI metadata."
51 |
52 | if s.respond_to? :specification_version then
53 | s.specification_version = 3
54 |
55 | if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
56 | s.add_development_dependency(%q, ["~> 2.14.0"])
57 | s.add_development_dependency(%q, ["~> 1.0.4"])
58 | s.add_development_dependency(%q, ["~> 1.3.0"])
59 | s.add_development_dependency(%q, ["~> 1.8.0"])
60 | else
61 | s.add_dependency(%q, ["~> 2.14.0"])
62 | s.add_dependency(%q, ["~> 1.0.4"])
63 | s.add_dependency(%q, ["~> 1.3.0"])
64 | s.add_dependency(%q, ["~> 1.8.0"])
65 | end
66 | else
67 | s.add_dependency(%q, ["~> 2.14.0"])
68 | s.add_dependency(%q, ["~> 1.0.4"])
69 | s.add_dependency(%q, ["~> 1.3.0"])
70 | s.add_dependency(%q, ["~> 1.8.0"])
71 | end
72 | end
73 |
74 |
--------------------------------------------------------------------------------
/spec/fixtures/sherlock.mobi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkongie/mobi/d584e65a3f3c70379c82a135e42e33a98a54d1fb/spec/fixtures/sherlock.mobi
--------------------------------------------------------------------------------
/spec/lib/mobi/header/exth_header_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | require 'mobi/stream_slicer'
4 | require 'mobi/metadata_streams'
5 | require 'mobi/header/mobi_header'
6 | require 'mobi/header/exth_header'
7 |
8 | describe Mobi::Header::ExthHeader do
9 |
10 | before :all do
11 | file = File.open('spec/fixtures/sherlock.mobi')
12 |
13 | record_zero_stream = Mobi::MetadataStreams.record_zero_stream(file)
14 | mobi_header = Mobi::Header::MobiHeader.new record_zero_stream
15 | exth_stream = Mobi::MetadataStreams.exth_stream(file, mobi_header.header_length)
16 |
17 | @header = Mobi::Header::ExthHeader.new exth_stream
18 | end
19 |
20 | it 'gets the author' do
21 | expect(@header.author).to eq('Sir Arthur Conan Doyle')
22 | end
23 |
24 | it 'gets the book subject' do
25 | expect(@header.subject).to eq('Detective and mystery stories, English')
26 | end
27 |
28 | it 'gets the book rights' do
29 | expect(@header.rights).to eq('Public domain in the USA.')
30 | end
31 |
32 | it 'gets the book source' do
33 | expect(@header.source).to eq('http://www.gutenberg.org/files/2350/2350-h/2350-h.htm')
34 | end
35 |
36 | end
37 |
--------------------------------------------------------------------------------
/spec/lib/mobi/header/mobi_header_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | require 'mobi/stream_slicer'
4 | require 'mobi/metadata_streams'
5 | require 'mobi/header/mobi_header'
6 |
7 | describe Mobi::Header::MobiHeader do
8 | before :all do
9 | file = File.open('spec/fixtures/sherlock.mobi')
10 | stream = Mobi::MetadataStreams.record_zero_stream(file)
11 |
12 | @header = Mobi::Header::MobiHeader.new stream
13 | end
14 |
15 | it 'gets the identifier' do
16 | expect(@header.identifier).to eq('MOBI')
17 | end
18 |
19 | it 'gets the length of the MOBI header' do
20 | expect(@header.header_length).to eq(232)
21 | end
22 |
23 | it 'gets the mobi type as an integer' do
24 | expect(@header.raw_mobi_type).to eq(2)
25 | end
26 |
27 | it 'gets the mobi type as a string' do
28 | expect(@header.mobi_type).to eq('MOBIpocket Book')
29 | end
30 |
31 | it 'gets the raw text encoding' do
32 | expect(@header.raw_text_encoding).to eq(65001)
33 | end
34 |
35 | it 'gets the text encoding' do
36 | expect(@header.text_encoding).to eq('UTF-8')
37 | end
38 |
39 | it 'gets the unique id' do
40 | expect(@header.unique_id).to eq(1532466569)
41 | end
42 |
43 | it 'gets the file version' do
44 | expect(@header.file_version).to eq(6)
45 | end
46 |
47 | it 'gets the first non book index' do
48 | expect(@header.first_non_book_index).to eq(16)
49 | end
50 |
51 | it 'gets the full name offset' do
52 | expect(@header.full_name_offset).to eq(688)
53 | end
54 |
55 | it 'gets the full name length' do
56 | expect(@header.full_name_length).to eq(12)
57 | end
58 |
59 | it 'gets the raw locale code' do
60 | expect(@header.raw_locale_code).to eq(9)
61 | end
62 |
63 | it 'gets the minimum supported mobipocket version' do
64 | expect(@header.minimum_supported_mobipocket_version).to eq(6)
65 | end
66 |
67 | it 'gets the first image index record number' do
68 | expect(@header.first_image_index_record_number).to eq(19)
69 | end
70 |
71 | it 'gets the EXTH header flag' do
72 | expect(@header.exth_flag).to eq(1)
73 | end
74 |
75 | it 'checks if there an EXTH header exists' do
76 | expect(@header.exth_header?).to be true
77 | end
78 |
79 | end
80 |
--------------------------------------------------------------------------------
/spec/lib/mobi/header/palm_doc_header_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | require 'mobi/stream_slicer'
4 | require 'mobi/metadata_streams'
5 | require 'mobi/header/palm_doc_header'
6 |
7 | describe Mobi::Header::PalmDocHeader do
8 |
9 | before :all do
10 | file = File.open('spec/fixtures/sherlock.mobi')
11 | stream = Mobi::MetadataStreams.record_zero_stream(file)
12 |
13 | @header = Mobi::Header::PalmDocHeader.new stream
14 | end
15 |
16 | it 'gets the raw compression_type' do
17 | expect(@header.raw_compression_type).to eq(2)
18 | end
19 |
20 | it 'gets the compression type' do
21 | expect(@header.compression_type).to eq('PalmDOC')
22 | end
23 |
24 | it 'gets the text length' do
25 | expect(@header.text_length).to eq(57327)
26 | end
27 |
28 | it 'gets the record_count' do
29 | expect(@header.record_count).to eq(14)
30 | end
31 |
32 | it 'gets the record size' do
33 | expect(@header.record_size).to eq(4096)
34 | end
35 |
36 | it 'gets the raw encryption type' do
37 | expect(@header.raw_encryption_type).to eq(0)
38 | end
39 |
40 | it 'gets the encryption type' do
41 | expect(@header.encryption_type).to eq('None')
42 | end
43 |
44 | end
45 |
46 |
--------------------------------------------------------------------------------
/spec/lib/mobi/metadata_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | require 'mobi'
4 |
5 | describe Mobi::Metadata do
6 | before :all do
7 | @file = File.open('spec/fixtures/sherlock.mobi')
8 | end
9 |
10 |
11 | context 'initialization' do
12 | it 'instantiates a StreamSlicer from the file' do
13 | metadata = Mobi::Metadata.new(@file)
14 |
15 | expect(metadata.data).to be_instance_of(Mobi::StreamSlicer)
16 | end
17 |
18 | it 'raises an exception if the book is not a mobi' do
19 | any_instance_of(Mobi::Metadata) do |m|
20 | mock(m).bookmobi? { false }
21 | end
22 |
23 | expect{ Mobi::Metadata.new(@file) }.to raise_exception(Mobi::Metadata::InvalidMobi)
24 | end
25 |
26 | context 'instantiating headers' do
27 | before :all do
28 | @metadata = Mobi::Metadata.new(@file)
29 | end
30 |
31 | it 'instantiates a palm doc header' do
32 | expect(@metadata.palm_doc_header).to be_a Mobi::Header::PalmDocHeader
33 | end
34 |
35 | it 'instantiates a mobi header' do
36 | expect(@metadata.mobi_header).to be_a Mobi::Header::MobiHeader
37 | end
38 |
39 | it 'instantiates a exth_header' do
40 | expect(@metadata.exth_header).to be_a Mobi::Header::ExthHeader
41 | end
42 | end
43 | end
44 |
45 | context 'instance' do
46 | before :all do
47 | @metadata = Mobi::Metadata.new(@file)
48 | end
49 |
50 | it 'gets the the title of the book' do
51 | expect(@metadata.title).to eq('His Last Bow')
52 | end
53 |
54 | it 'is a bookmobi' do
55 | expect(@metadata.bookmobi?).to be true
56 | end
57 |
58 | it 'defines delgate exth record method names to the exth record' do
59 | mock.proxy(@metadata.exth_header).author
60 |
61 | expect(@metadata.author).to eq('Sir Arthur Conan Doyle')
62 | end
63 | end
64 |
65 | end
66 |
--------------------------------------------------------------------------------
/spec/lib/mobi/stream_slicer_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | require 'mobi/stream_slicer'
4 |
5 | describe Mobi::StreamSlicer do
6 | let(:file){ File.open('spec/fixtures/sherlock.mobi') }
7 |
8 | context "instantiation" do
9 | it "sets the start point to 0 if no start is provided" do
10 | ss = Mobi::StreamSlicer.new(file)
11 | expect(ss.start).to eq(0)
12 | end
13 |
14 | it "sets the end point to the file end if no stop is provided" do
15 | ss = Mobi::StreamSlicer.new(file)
16 | expect(ss.stop).to eq(111449)
17 | end
18 |
19 | it "sets the start and stop points to the arguments provided" do
20 | ss = Mobi::StreamSlicer.new(file, 1, 2)
21 | expect(ss.start).to eq(1)
22 | expect(ss.stop).to eq(2)
23 | end
24 |
25 | it "sets the length" do
26 | ss = Mobi::StreamSlicer.new(file, 1, 10)
27 | expect(ss.length).to eq(9)
28 | end
29 |
30 | it "sets the stream to the input file" do
31 | ss = Mobi::StreamSlicer.new(file)
32 | expect(ss.stream).to eq(file)
33 | end
34 | end
35 |
36 | context "#[]" do
37 |
38 | it "returns 1 byte" do
39 | ss = Mobi::StreamSlicer.new(file)
40 | expect(ss[0].length).to eq(1)
41 | end
42 |
43 | it "starts the search from the offset provided" do
44 | file.seek(5)
45 | value = file.read(1)
46 | ss = Mobi::StreamSlicer.new(file, 5)
47 | expect(ss[0]).to eq(value)
48 | end
49 |
50 | it "returns the correct number of bytes" do
51 | file.seek(5)
52 | value = file.read(2)
53 | ss = Mobi::StreamSlicer.new(file, 5)
54 | expect(ss[0, 2]).to eq(value)
55 | end
56 |
57 | it "seeks in reverse order" do
58 | file.seek(8)
59 | value = file.read(2)
60 | ss = Mobi::StreamSlicer.new(file, 5)
61 | expect(ss[5, -2]).to eq(value)
62 | end
63 | end
64 | end
65 |
--------------------------------------------------------------------------------
/spec/lib/mobi_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | require 'mobi'
4 |
5 | describe Mobi do
6 | it "instantiates a Mobi::Metadata object" do
7 | file = File.open('spec/fixtures/sherlock.mobi')
8 | expect(Mobi.metadata(file)).to be_an_instance_of(Mobi::Metadata)
9 | end
10 | end
11 |
--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
1 | RSpec.configure do |config|
2 | config.mock_with :rr
3 | end
4 |
5 |
--------------------------------------------------------------------------------