├── .document ├── .gitignore ├── .rspec ├── CHANGELOG.md ├── Gemfile ├── Gemfile.lock ├── LICENSE.txt ├── README.md ├── Rakefile ├── VERSION ├── lib ├── mobi.rb └── mobi │ ├── header │ ├── exth_header.rb │ ├── mobi_header.rb │ └── palm_doc_header.rb │ ├── metadata.rb │ ├── metadata_streams.rb │ └── stream_slicer.rb ├── mobi.gemspec └── spec ├── fixtures └── sherlock.mobi ├── lib ├── mobi │ ├── header │ │ ├── exth_header_spec.rb │ │ ├── mobi_header_spec.rb │ │ └── palm_doc_header_spec.rb │ ├── metadata_spec.rb │ └── stream_slicer_spec.rb └── mobi_spec.rb └── spec_helper.rb /.document: -------------------------------------------------------------------------------- 1 | lib/**/*.rb 2 | bin/* 3 | - 4 | features/**/*.feature 5 | LICENSE.txt 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # rcov generated 2 | coverage 3 | 4 | # rdoc generated 5 | rdoc 6 | 7 | # yard generated 8 | doc 9 | .yardoc 10 | 11 | # bundler 12 | .bundle 13 | 14 | # jeweler generated 15 | pkg 16 | 17 | # Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore: 18 | # 19 | # * Create a file at ~/.gitignore 20 | # * Include files you want ignored 21 | # * Run: git config --global core.excludesfile ~/.gitignore 22 | # 23 | # After doing this, these files will be ignored in all your git projects, 24 | # saving you from having to 'pollute' every project you touch with them 25 | # 26 | # Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line) 27 | # 28 | # For MacOS: 29 | # 30 | #.DS_Store 31 | # 32 | # For TextMate 33 | #*.tmproj 34 | #tmtags 35 | # 36 | # For emacs: 37 | #*~ 38 | #\#* 39 | #.\#* 40 | # 41 | # For vim: 42 | #*.swp 43 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --format documentation -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 1.0.0 4 | 5 | * No implementation changes. Bumping to 1.0 because there have been no issues raised for over a year, and just because I can! 6 | * Update RSpec. 7 | * Update Bundler. 8 | 9 | ## 0.2.0 10 | 11 | * Extract EXTH header data into it's own class. 12 | * Parse Mobi header data. 13 | * Parse PalmDOC header data. 14 | * Large internals refactoring, and now with added tests! 15 | 16 | ## 0.1.2 17 | 18 | * Trying to instantiate Mobi::Metadata with an invalid mobi file now raises an InvalidMobi exception 19 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "http://rubygems.org" 2 | # Add dependencies required to use your gem here. 3 | # Example: 4 | # gem "activesupport", ">= 2.3.5" 5 | 6 | # Add dependencies to develop your gem here. 7 | # Include everything needed to run rake, tests, features, etc. 8 | group :development do 9 | gem 'rspec', '~> 3.1.0' 10 | gem 'rr', '~> 1.1.0' 11 | gem 'bundler', '~> 1.7.0' 12 | gem 'jeweler', '~> 2.0.0' 13 | end 14 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: http://rubygems.org/ 3 | specs: 4 | addressable (2.3.6) 5 | builder (3.2.2) 6 | descendants_tracker (0.0.4) 7 | thread_safe (~> 0.3, >= 0.3.1) 8 | diff-lcs (1.2.5) 9 | faraday (0.9.0) 10 | multipart-post (>= 1.2, < 3) 11 | git (1.2.8) 12 | github_api (0.12.2) 13 | addressable (~> 2.3) 14 | descendants_tracker (~> 0.0.4) 15 | faraday (~> 0.8, < 0.10) 16 | hashie (>= 3.3) 17 | multi_json (>= 1.7.5, < 2.0) 18 | nokogiri (~> 1.6.3) 19 | oauth2 20 | hashie (3.3.1) 21 | highline (1.6.21) 22 | jeweler (2.0.1) 23 | builder 24 | bundler (>= 1.0) 25 | git (>= 1.2.5) 26 | github_api 27 | highline (>= 1.6.15) 28 | nokogiri (>= 1.5.10) 29 | rake 30 | rdoc 31 | json (1.8.1) 32 | jwt (1.0.0) 33 | mini_portile (0.6.1) 34 | multi_json (1.10.1) 35 | multi_xml (0.5.5) 36 | multipart-post (2.0.0) 37 | nokogiri (1.6.4.1) 38 | mini_portile (~> 0.6.0) 39 | oauth2 (1.0.0) 40 | faraday (>= 0.8, < 0.10) 41 | jwt (~> 1.0) 42 | multi_json (~> 1.3) 43 | multi_xml (~> 0.5) 44 | rack (~> 1.2) 45 | rack (1.5.2) 46 | rake (10.3.2) 47 | rdoc (4.1.2) 48 | json (~> 1.4) 49 | rr (1.1.2) 50 | rspec (3.1.0) 51 | rspec-core (~> 3.1.0) 52 | rspec-expectations (~> 3.1.0) 53 | rspec-mocks (~> 3.1.0) 54 | rspec-core (3.1.7) 55 | rspec-support (~> 3.1.0) 56 | rspec-expectations (3.1.2) 57 | diff-lcs (>= 1.2.0, < 2.0) 58 | rspec-support (~> 3.1.0) 59 | rspec-mocks (3.1.3) 60 | rspec-support (~> 3.1.0) 61 | rspec-support (3.1.2) 62 | thread_safe (0.3.4) 63 | 64 | PLATFORMS 65 | ruby 66 | 67 | DEPENDENCIES 68 | bundler (~> 1.7.0) 69 | jeweler (~> 2.0.0) 70 | rr (~> 1.1.0) 71 | rspec (~> 3.1.0) 72 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 jkongie 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # mobi 4 | 5 | A Ruby Gem that reads MOBI metadata. 6 | 7 | ## Ruby 8 | 9 | Compatible up to ruby 2.0.0p0. 10 | 11 | I haven't tested beyond that version, but it should work. 12 | 13 | ## Installation 14 | 15 | `gem install mobi` 16 | 17 | ## Usage 18 | 19 | Creating a Mobi::Metadata object 20 | 21 | ```ruby 22 | Mobi::Metadata.new(File.open('/path/to/file.mobi')) 23 | ``` 24 | 25 | A handy convenience method to do the exact same thing 26 | 27 | ```ruby 28 | Mobi.metadata File.open('/path/to/file.mobi') 29 | ``` 30 | 31 | Getting metadata information is as simple as: 32 | 33 | ```ruby 34 | metadata = Mobi.metadata File.open('/path/to/fellowship_of_the_ring.mobi') 35 | #=> # 36 | metadata.author 37 | #=> "J.R.R. Tolkien" 38 | ``` 39 | 40 | Supported metadata options are: 41 | 42 | * asin 43 | * author 44 | * contributor 45 | * description 46 | * imprint 47 | * isbn 48 | * publisher 49 | * published_at 50 | * review 51 | * rights 52 | * source 53 | * subject 54 | * subject_code 55 | * title 56 | * type 57 | * version 58 | 59 | You can also drill down and get the PalmDOC and MOBI header details: 60 | 61 | ```ruby 62 | palm_doc_header = metadata.palm_doc_header 63 | #=> # 64 | palm_doc_header.raw_compression_type 65 | #=> 2 66 | palm_doc_header.compression_type 67 | #=> "PalmDOC" 68 | 69 | mobi_header = metadata.mobi_header 70 | #=> # 71 | mobi_header.raw_mobi_type 72 | #=> 2 73 | mobi_header.mobi_type 74 | #=> "MOBIpocket Book" 75 | ``` 76 | 77 | See the source for more methods. 78 | 79 | ## Thanks 80 | 81 | * Calibre open source project. I ripped off the idea of a Stream Slicer and got a better understanding of MOBI files from their code base. Check them out at http://calibre-ebook.com 82 | 83 | ## Contributing to mobi 84 | 85 | * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet 86 | * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it 87 | * Fork the project 88 | * Start a feature/bugfix branch 89 | * Commit and push until you are happy with your contribution 90 | * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally. 91 | * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it. 92 | 93 | ## Copyright 94 | 95 | Copyright (c) 2011 jkongie. See LICENSE.txt for further details. 96 | 97 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'bundler' 3 | begin 4 | Bundler.setup(:default, :development) 5 | rescue Bundler::BundlerError => e 6 | $stderr.puts e.message 7 | $stderr.puts "Run `bundle install` to install missing gems" 8 | exit e.status_code 9 | end 10 | require 'rake' 11 | 12 | require 'jeweler' 13 | Jeweler::Tasks.new do |gem| 14 | # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options 15 | gem.name = "mobi" 16 | gem.homepage = "http://github.com/jkongie/mobi" 17 | gem.license = "MIT" 18 | gem.summary = %Q{A Rubygem that inspects MOBI metadata.} 19 | gem.description = %Q{Mobi is a Rubygem that allows you to easily read MOBI e-book format metadata.} 20 | gem.email = "jkongie@gmail.com" 21 | gem.authors = ["jkongie"] 22 | # Include your dependencies below. Runtime dependencies are required when using your gem, 23 | # and development dependencies are only needed for development (ie running rake tasks, tests, etc) 24 | # gem.add_runtime_dependency 'jabber4r', '> 0.1' 25 | # gem.add_development_dependency 'rspec', '> 1.2.3' 26 | end 27 | Jeweler::RubygemsDotOrgTasks.new 28 | 29 | task :default => :test 30 | 31 | require 'rdoc/task' 32 | Rake::RDocTask.new do |rdoc| 33 | version = File.exist?('VERSION') ? File.read('VERSION') : "" 34 | 35 | rdoc.rdoc_dir = 'rdoc' 36 | rdoc.title = "mobi #{version}" 37 | rdoc.rdoc_files.include('README*') 38 | rdoc.rdoc_files.include('lib/**/*.rb') 39 | end 40 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 1.0.0 -------------------------------------------------------------------------------- /lib/mobi.rb: -------------------------------------------------------------------------------- 1 | require 'mobi/stream_slicer' 2 | require 'mobi/metadata' 3 | require 'mobi/metadata_streams' 4 | require 'mobi/header/palm_doc_header' 5 | require 'mobi/header/mobi_header' 6 | require 'mobi/header/exth_header' 7 | 8 | module Mobi 9 | 10 | def self.metadata(file) 11 | Mobi::Metadata.new(file) 12 | end 13 | 14 | end -------------------------------------------------------------------------------- /lib/mobi/header/exth_header.rb: -------------------------------------------------------------------------------- 1 | module Mobi 2 | module Header 3 | class ExthHeader 4 | RECORD_TYPES = { 100 => :author, 101 => :publisher, 102 => :imprint, 103 => :description, 104 => :isbn, 105 => :subject, 5 | 106 => :published_at, 107 => :review, 108 => :contributor, 109 => :rights, 110 => :subject_code, 6 | 111 => :type, 112 => :source, 113 => :asin, 114 => :version } 7 | 8 | attr_reader *RECORD_TYPES.values 9 | 10 | def initialize(data) 11 | @data = data 12 | @record_count, = @data[8, 4].unpack('N*') 13 | 14 | define_data_methods 15 | end 16 | 17 | private 18 | 19 | def define_data_methods 20 | start = 12 21 | @record_count.times do 22 | record = ExthRecord.new(@data, start) 23 | 24 | if RECORD_TYPES.key?(record.code) 25 | instance_variable_set "@#{record.name}", record.value 26 | end 27 | 28 | start += record.length 29 | end 30 | end 31 | 32 | class ExthRecord 33 | 34 | attr_reader :code, :name, :length, :value 35 | 36 | def initialize(data, start) 37 | @code, = data[start, 4].unpack('N*')[0].to_i 38 | @name = ExthHeader::RECORD_TYPES[@code] 39 | @length, = data[start + 4, 4].unpack('N*') 40 | @value = data[start + 8, length - 8] 41 | end 42 | end 43 | 44 | end 45 | end 46 | end -------------------------------------------------------------------------------- /lib/mobi/header/mobi_header.rb: -------------------------------------------------------------------------------- 1 | # Public: Parses the Mobi Header which follows the 16 bytes of the PalmDOC 2 | # header. 3 | module Mobi 4 | module Header 5 | class MobiHeader 6 | 7 | # Initialize the MobiHeader. 8 | # 9 | # data - A StreamSlicer which starts at record 0 of the PalmDOC. 10 | # 11 | # Returns self. 12 | def initialize(data) 13 | @data = data 14 | end 15 | 16 | # A MOBI identifier. 17 | # 18 | # Returns a String. 19 | def identifier 20 | @identifier ||= @data[16, 4] 21 | end 22 | 23 | # The length of the MOBI header. 24 | # 25 | # Returns a Fixnum. 26 | def header_length 27 | @header_length ||= @data[20, 4].unpack('N*')[0] 28 | end 29 | 30 | # The kind of Mobipocket file as returned from byte code. 31 | # 32 | # Returns a Fixnum. 33 | def raw_mobi_type 34 | @raw_mobi_type ||= @data[24, 4].unpack('N*')[0] 35 | end 36 | 37 | # The kind of Mobipocket file. 38 | # 39 | # Returns a String. 40 | def mobi_type 41 | { 2 => 'MOBIpocket Book', 42 | 3 => 'PalmDoc Book', 43 | 4 => 'Audio', 44 | 232 => 'MOBIpocket', 45 | 248 => 'KF8', 46 | 257 => 'News', 47 | 258 => 'News Feed', 48 | 259 => 'News_Magazine', 49 | 513 => 'PICS', 50 | 514 => 'WORD', 51 | 515 => 'XLS', 52 | 516 => 'PPT', 53 | 517 => 'TEXT', 54 | 518 => 'HTML' 55 | }.fetch(raw_mobi_type) 56 | end 57 | 58 | # The text encoding as return from byte code. 59 | # 60 | # Returns a Fixnum. 61 | def raw_text_encoding 62 | @text_encoding ||= @data[28, 4].unpack('N*')[0] 63 | end 64 | 65 | # The text encoding. 66 | # 67 | # Returns a String. 68 | def text_encoding 69 | { 1252 => 'CP1252 (WinLatin1)', 70 | 65001 => 'UTF-8' 71 | }.fetch(raw_text_encoding) 72 | end 73 | 74 | # The unique ID. 75 | # 76 | # Returns an Integer. 77 | def unique_id 78 | @unique_id ||= @data[32, 4].unpack('N*')[0] 79 | end 80 | 81 | # The version of the MOBIpocket format used in this file. 82 | # 83 | # Returns a String 84 | def file_version 85 | @file_version ||= @data[36, 4].unpack('N*')[0] 86 | end 87 | 88 | # The first record number (starting with 0) that is not the book's text. 89 | # 90 | # Returns an Integer. 91 | def first_non_book_index 92 | @first_non_book_index ||= @data[80, 4].unpack('N*')[0] 93 | end 94 | 95 | # Offset in record 0 (not from start of file) of the full name of the book. 96 | # 97 | # Returns an Integer. 98 | def full_name_offset 99 | @full_name_offset ||= @data[84, 4].unpack('N*')[0] 100 | end 101 | 102 | # Length in bytes of the full name of the book. 103 | # 104 | # Returns an Integer. 105 | def full_name_length 106 | @full_name_length ||= @data[88, 4].unpack('N*')[0] 107 | end 108 | 109 | # The raw book locale code. I believe this refers to a LCID code. 110 | # 111 | # The low byte is the main language: 09 = English. 112 | # The next byte is dialect: 08 = British, 04 = US. 113 | # Thus US English is 1033, UK English is 2057. 114 | # 115 | # Returns an Integer. 116 | def raw_locale_code 117 | @raw_locale_code ||= @data[92, 4].unpack('N*')[0] 118 | end 119 | 120 | # The minimum MOBIpocket version support needed to read this file. 121 | # 122 | # Returns an Integer. 123 | def minimum_supported_mobipocket_version 124 | @minimum_supported_mobipocket_version ||= @data[104, 4].unpack('N*')[0] 125 | end 126 | 127 | # The first record number (starting with 0) that contains an image. Image 128 | # records should be sequential. 129 | # 130 | # Returns an Integer. 131 | def first_image_index_record_number 132 | @first_image_index_record_number ||= @data[108, 4].unpack('N*')[0] 133 | end 134 | 135 | # The EXTH flag. 136 | # 137 | # If bit 6 is set, then there is an EXTH record. 138 | # 139 | # Returns a Fixnum, 1 or 0. 140 | def exth_flag 141 | @exth_flag ||= @data[128, 4].unpack('@3B8').first[1].to_i 142 | end 143 | 144 | # Does the book have an EXTH header? 145 | # 146 | # Returns true if the book has an EXTH header. 147 | def exth_header? 148 | exth_flag == 1 149 | end 150 | 151 | end 152 | end 153 | end -------------------------------------------------------------------------------- /lib/mobi/header/palm_doc_header.rb: -------------------------------------------------------------------------------- 1 | # Public: 2 | module Mobi 3 | module Header 4 | class PalmDocHeader 5 | 6 | # Initializes the PalmDOC header. 7 | # 8 | # data - A StreamSlicer which starts at the PalmDOC header. 9 | # 10 | # Returns self. 11 | def initialize(data) 12 | @data = data 13 | end 14 | 15 | # The compression type as returned from byte code. 16 | # 17 | # Returns a Fixnum. 18 | def raw_compression_type 19 | @compression_type ||= @data[0, 2].unpack('n*')[0] 20 | end 21 | 22 | # The compression type. 23 | # 24 | # Returns a Fixnum. 25 | def compression_type 26 | { 1 => 'None', 27 | 2 => 'PalmDOC', 28 | 17480 => 'HUFF/CDIC' 29 | }.fetch(raw_compression_type) 30 | end 31 | 32 | # The uncompressed length of the entire text of the book. 33 | # 34 | # Returns a Fixnum. 35 | def text_length 36 | @text_length ||= @data[4, 4].unpack('N*')[0] 37 | end 38 | 39 | # Number of PDB records used for the text of the book. 40 | # 41 | # Returns a Fixnum. 42 | def record_count 43 | @record_count ||= @data[8, 2].unpack('n*')[0] 44 | end 45 | 46 | # Maximum size of each record containing text. Note that this always 47 | # returns 4096. 48 | # 49 | # Returns a Fixnum. 50 | def record_size 51 | @record_size ||= @data[10, 2].unpack('n*')[0] 52 | end 53 | 54 | # The encryption type as returned from byte code. 55 | # 56 | # Returns a Fixnum 57 | def raw_encryption_type 58 | @encryption_type ||= @data[12, 2].unpack('n*')[0] 59 | end 60 | 61 | # The encryption type. 62 | # 63 | # Returns a String. 64 | def encryption_type 65 | { 0 => 'None', 66 | 1 => 'Old MOBIpocket', 67 | 2 => 'MOBIpocket' 68 | }.fetch(raw_encryption_type) 69 | end 70 | 71 | end 72 | end 73 | end -------------------------------------------------------------------------------- /lib/mobi/metadata.rb: -------------------------------------------------------------------------------- 1 | require 'forwardable' 2 | 3 | module Mobi 4 | class Metadata 5 | extend Forwardable 6 | 7 | EXTH_RECORDS = %w(author publisher imprint description isbn subject 8 | published_at review contributor rights subject_code type 9 | source asin version) 10 | 11 | # Raw data stream 12 | attr_reader :data 13 | # Individual header classes for your reading pleasure. 14 | attr_reader :palm_doc_header, :mobi_header, :exth_header 15 | 16 | def initialize(file) 17 | @file = file 18 | @data = StreamSlicer.new(file) 19 | 20 | raise InvalidMobi, "The supplied file is not in a valid mobi format" unless bookmobi? 21 | 22 | @record_zero_stream = MetadataStreams.record_zero_stream(file) 23 | @palm_doc_header = Header::PalmDocHeader.new @record_zero_stream 24 | @mobi_header = Header::MobiHeader.new @record_zero_stream 25 | 26 | @exth_stream = MetadataStreams.exth_stream(file, @mobi_header.header_length) 27 | @exth_header = Header::ExthHeader.new @exth_stream 28 | end 29 | 30 | # Gets the title of the book. 31 | # 32 | # Returns a String. 33 | def title 34 | return @title if @title 35 | 36 | offset = @mobi_header.full_name_offset 37 | length = @mobi_header.full_name_length 38 | 39 | @title = @record_zero_stream[offset, length] 40 | end 41 | 42 | # Determines if the file is a valid mobi file. 43 | # 44 | # Returns true if the file is a valid MOBI. 45 | def bookmobi? 46 | @data[60, 8] == "BOOKMOBI" 47 | end 48 | 49 | # Delegate EXTH records types to the EXTH header. 50 | EXTH_RECORDS.each do |type| 51 | def_delegators :@exth_header, type.to_sym, type.to_sym 52 | end 53 | 54 | class InvalidMobi < ArgumentError;end; 55 | end 56 | end -------------------------------------------------------------------------------- /lib/mobi/metadata_streams.rb: -------------------------------------------------------------------------------- 1 | module Mobi 2 | module MetadataStreams 3 | 4 | # Creates a stream starting at the Record 0 in the PalmDOC. 5 | # 6 | # Returns a StreamSlicer. 7 | def self.record_zero_stream(file) 8 | data = StreamSlicer.new(file) 9 | 10 | start, stop = record_zero_endpoints(data) 11 | 12 | StreamSlicer.new(file, start, stop) 13 | end 14 | 15 | # Creates a stream starting at the EXTH header in Record 0. 16 | # 17 | # Returns a StreamSlicer. 18 | def self.exth_stream(file, header_length) 19 | record_zero_stream = record_zero_stream(file) 20 | 21 | record_zero_offset = record_zero_stream.start 22 | palm_doc_header_length = 16 23 | 24 | exth_off = record_zero_offset + 25 | palm_doc_header_length + 26 | header_length 27 | 28 | StreamSlicer.new(file, exth_off, record_zero_stream.stop) 29 | end 30 | 31 | private 32 | 33 | # Determines the start and end points of Record 0 in the PalmDOC. The start point 34 | # is returned as the first value in the array, and the end point as the 35 | # second value. 36 | # 37 | # Returns an Array. 38 | def self.record_zero_endpoints(data) 39 | offset = 78 40 | start, = data[offset, 4].unpack('N*') 41 | stop, = data[offset + 8, offset + 12].unpack('N*'); 42 | [start, stop] 43 | end 44 | 45 | end 46 | end -------------------------------------------------------------------------------- /lib/mobi/stream_slicer.rb: -------------------------------------------------------------------------------- 1 | module Mobi 2 | class StreamSlicer 3 | 4 | attr_reader :stream, :length 5 | attr_accessor :start, :stop 6 | 7 | def initialize(stream, start = 0, stop = nil) 8 | @stream = stream 9 | @start = start 10 | 11 | if stop.nil? 12 | stream.seek(0, 2) 13 | stop = stream.tell 14 | end 15 | 16 | @stop = stop 17 | @length = stop - start 18 | end 19 | 20 | def [](offset, bytes=1) 21 | stream = @stream 22 | base = @start 23 | 24 | if bytes == 1 25 | stream.seek(base + offset) 26 | return stream.read(1) 27 | end 28 | 29 | start = offset 30 | stop = offset + bytes 31 | 32 | # Reverse if you want to pass in negative bytes 33 | start, stop = stop, start if bytes < 0 34 | 35 | # I can't find a use case where it will ever get here 36 | size = stop - start 37 | return nil if size <= 0 38 | 39 | stream.seek(base + start) 40 | data = stream.read(size) 41 | return data 42 | end 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /mobi.gemspec: -------------------------------------------------------------------------------- 1 | # Generated by jeweler 2 | # DO NOT EDIT THIS FILE DIRECTLY 3 | # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec' 4 | # -*- encoding: utf-8 -*- 5 | 6 | Gem::Specification.new do |s| 7 | s.name = "mobi" 8 | s.version = "1.0.0" 9 | 10 | s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= 11 | s.authors = ["jkongie"] 12 | s.date = "2013-10-04" 13 | s.description = "Mobi is a Rubygem that allows you to easily read MOBI e-book format metadata." 14 | s.email = "jkongie@gmail.com" 15 | s.extra_rdoc_files = [ 16 | "LICENSE.txt", 17 | "README.md" 18 | ] 19 | s.files = [ 20 | ".document", 21 | ".rspec", 22 | "CHANGELOG.md", 23 | "Gemfile", 24 | "Gemfile.lock", 25 | "LICENSE.txt", 26 | "README.md", 27 | "Rakefile", 28 | "VERSION", 29 | "lib/mobi.rb", 30 | "lib/mobi/header/exth_header.rb", 31 | "lib/mobi/header/mobi_header.rb", 32 | "lib/mobi/header/palm_doc_header.rb", 33 | "lib/mobi/metadata.rb", 34 | "lib/mobi/metadata_streams.rb", 35 | "lib/mobi/stream_slicer.rb", 36 | "mobi.gemspec", 37 | "spec/fixtures/sherlock.mobi", 38 | "spec/lib/mobi/header/exth_header_spec.rb", 39 | "spec/lib/mobi/header/mobi_header_spec.rb", 40 | "spec/lib/mobi/header/palm_doc_header_spec.rb", 41 | "spec/lib/mobi/metadata_spec.rb", 42 | "spec/lib/mobi/stream_slicer_spec.rb", 43 | "spec/lib/mobi_spec.rb", 44 | "spec/spec_helper.rb" 45 | ] 46 | s.homepage = "http://github.com/jkongie/mobi" 47 | s.licenses = ["MIT"] 48 | s.require_paths = ["lib"] 49 | s.rubygems_version = "1.8.23" 50 | s.summary = "A Rubygem that inspects MOBI metadata." 51 | 52 | if s.respond_to? :specification_version then 53 | s.specification_version = 3 54 | 55 | if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then 56 | s.add_development_dependency(%q, ["~> 2.14.0"]) 57 | s.add_development_dependency(%q, ["~> 1.0.4"]) 58 | s.add_development_dependency(%q, ["~> 1.3.0"]) 59 | s.add_development_dependency(%q, ["~> 1.8.0"]) 60 | else 61 | s.add_dependency(%q, ["~> 2.14.0"]) 62 | s.add_dependency(%q, ["~> 1.0.4"]) 63 | s.add_dependency(%q, ["~> 1.3.0"]) 64 | s.add_dependency(%q, ["~> 1.8.0"]) 65 | end 66 | else 67 | s.add_dependency(%q, ["~> 2.14.0"]) 68 | s.add_dependency(%q, ["~> 1.0.4"]) 69 | s.add_dependency(%q, ["~> 1.3.0"]) 70 | s.add_dependency(%q, ["~> 1.8.0"]) 71 | end 72 | end 73 | 74 | -------------------------------------------------------------------------------- /spec/fixtures/sherlock.mobi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkongie/mobi/d584e65a3f3c70379c82a135e42e33a98a54d1fb/spec/fixtures/sherlock.mobi -------------------------------------------------------------------------------- /spec/lib/mobi/header/exth_header_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | require 'mobi/stream_slicer' 4 | require 'mobi/metadata_streams' 5 | require 'mobi/header/mobi_header' 6 | require 'mobi/header/exth_header' 7 | 8 | describe Mobi::Header::ExthHeader do 9 | 10 | before :all do 11 | file = File.open('spec/fixtures/sherlock.mobi') 12 | 13 | record_zero_stream = Mobi::MetadataStreams.record_zero_stream(file) 14 | mobi_header = Mobi::Header::MobiHeader.new record_zero_stream 15 | exth_stream = Mobi::MetadataStreams.exth_stream(file, mobi_header.header_length) 16 | 17 | @header = Mobi::Header::ExthHeader.new exth_stream 18 | end 19 | 20 | it 'gets the author' do 21 | expect(@header.author).to eq('Sir Arthur Conan Doyle') 22 | end 23 | 24 | it 'gets the book subject' do 25 | expect(@header.subject).to eq('Detective and mystery stories, English') 26 | end 27 | 28 | it 'gets the book rights' do 29 | expect(@header.rights).to eq('Public domain in the USA.') 30 | end 31 | 32 | it 'gets the book source' do 33 | expect(@header.source).to eq('http://www.gutenberg.org/files/2350/2350-h/2350-h.htm') 34 | end 35 | 36 | end 37 | -------------------------------------------------------------------------------- /spec/lib/mobi/header/mobi_header_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | require 'mobi/stream_slicer' 4 | require 'mobi/metadata_streams' 5 | require 'mobi/header/mobi_header' 6 | 7 | describe Mobi::Header::MobiHeader do 8 | before :all do 9 | file = File.open('spec/fixtures/sherlock.mobi') 10 | stream = Mobi::MetadataStreams.record_zero_stream(file) 11 | 12 | @header = Mobi::Header::MobiHeader.new stream 13 | end 14 | 15 | it 'gets the identifier' do 16 | expect(@header.identifier).to eq('MOBI') 17 | end 18 | 19 | it 'gets the length of the MOBI header' do 20 | expect(@header.header_length).to eq(232) 21 | end 22 | 23 | it 'gets the mobi type as an integer' do 24 | expect(@header.raw_mobi_type).to eq(2) 25 | end 26 | 27 | it 'gets the mobi type as a string' do 28 | expect(@header.mobi_type).to eq('MOBIpocket Book') 29 | end 30 | 31 | it 'gets the raw text encoding' do 32 | expect(@header.raw_text_encoding).to eq(65001) 33 | end 34 | 35 | it 'gets the text encoding' do 36 | expect(@header.text_encoding).to eq('UTF-8') 37 | end 38 | 39 | it 'gets the unique id' do 40 | expect(@header.unique_id).to eq(1532466569) 41 | end 42 | 43 | it 'gets the file version' do 44 | expect(@header.file_version).to eq(6) 45 | end 46 | 47 | it 'gets the first non book index' do 48 | expect(@header.first_non_book_index).to eq(16) 49 | end 50 | 51 | it 'gets the full name offset' do 52 | expect(@header.full_name_offset).to eq(688) 53 | end 54 | 55 | it 'gets the full name length' do 56 | expect(@header.full_name_length).to eq(12) 57 | end 58 | 59 | it 'gets the raw locale code' do 60 | expect(@header.raw_locale_code).to eq(9) 61 | end 62 | 63 | it 'gets the minimum supported mobipocket version' do 64 | expect(@header.minimum_supported_mobipocket_version).to eq(6) 65 | end 66 | 67 | it 'gets the first image index record number' do 68 | expect(@header.first_image_index_record_number).to eq(19) 69 | end 70 | 71 | it 'gets the EXTH header flag' do 72 | expect(@header.exth_flag).to eq(1) 73 | end 74 | 75 | it 'checks if there an EXTH header exists' do 76 | expect(@header.exth_header?).to be true 77 | end 78 | 79 | end 80 | -------------------------------------------------------------------------------- /spec/lib/mobi/header/palm_doc_header_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | require 'mobi/stream_slicer' 4 | require 'mobi/metadata_streams' 5 | require 'mobi/header/palm_doc_header' 6 | 7 | describe Mobi::Header::PalmDocHeader do 8 | 9 | before :all do 10 | file = File.open('spec/fixtures/sherlock.mobi') 11 | stream = Mobi::MetadataStreams.record_zero_stream(file) 12 | 13 | @header = Mobi::Header::PalmDocHeader.new stream 14 | end 15 | 16 | it 'gets the raw compression_type' do 17 | expect(@header.raw_compression_type).to eq(2) 18 | end 19 | 20 | it 'gets the compression type' do 21 | expect(@header.compression_type).to eq('PalmDOC') 22 | end 23 | 24 | it 'gets the text length' do 25 | expect(@header.text_length).to eq(57327) 26 | end 27 | 28 | it 'gets the record_count' do 29 | expect(@header.record_count).to eq(14) 30 | end 31 | 32 | it 'gets the record size' do 33 | expect(@header.record_size).to eq(4096) 34 | end 35 | 36 | it 'gets the raw encryption type' do 37 | expect(@header.raw_encryption_type).to eq(0) 38 | end 39 | 40 | it 'gets the encryption type' do 41 | expect(@header.encryption_type).to eq('None') 42 | end 43 | 44 | end 45 | 46 | -------------------------------------------------------------------------------- /spec/lib/mobi/metadata_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | require 'mobi' 4 | 5 | describe Mobi::Metadata do 6 | before :all do 7 | @file = File.open('spec/fixtures/sherlock.mobi') 8 | end 9 | 10 | 11 | context 'initialization' do 12 | it 'instantiates a StreamSlicer from the file' do 13 | metadata = Mobi::Metadata.new(@file) 14 | 15 | expect(metadata.data).to be_instance_of(Mobi::StreamSlicer) 16 | end 17 | 18 | it 'raises an exception if the book is not a mobi' do 19 | any_instance_of(Mobi::Metadata) do |m| 20 | mock(m).bookmobi? { false } 21 | end 22 | 23 | expect{ Mobi::Metadata.new(@file) }.to raise_exception(Mobi::Metadata::InvalidMobi) 24 | end 25 | 26 | context 'instantiating headers' do 27 | before :all do 28 | @metadata = Mobi::Metadata.new(@file) 29 | end 30 | 31 | it 'instantiates a palm doc header' do 32 | expect(@metadata.palm_doc_header).to be_a Mobi::Header::PalmDocHeader 33 | end 34 | 35 | it 'instantiates a mobi header' do 36 | expect(@metadata.mobi_header).to be_a Mobi::Header::MobiHeader 37 | end 38 | 39 | it 'instantiates a exth_header' do 40 | expect(@metadata.exth_header).to be_a Mobi::Header::ExthHeader 41 | end 42 | end 43 | end 44 | 45 | context 'instance' do 46 | before :all do 47 | @metadata = Mobi::Metadata.new(@file) 48 | end 49 | 50 | it 'gets the the title of the book' do 51 | expect(@metadata.title).to eq('His Last Bow') 52 | end 53 | 54 | it 'is a bookmobi' do 55 | expect(@metadata.bookmobi?).to be true 56 | end 57 | 58 | it 'defines delgate exth record method names to the exth record' do 59 | mock.proxy(@metadata.exth_header).author 60 | 61 | expect(@metadata.author).to eq('Sir Arthur Conan Doyle') 62 | end 63 | end 64 | 65 | end 66 | -------------------------------------------------------------------------------- /spec/lib/mobi/stream_slicer_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | require 'mobi/stream_slicer' 4 | 5 | describe Mobi::StreamSlicer do 6 | let(:file){ File.open('spec/fixtures/sherlock.mobi') } 7 | 8 | context "instantiation" do 9 | it "sets the start point to 0 if no start is provided" do 10 | ss = Mobi::StreamSlicer.new(file) 11 | expect(ss.start).to eq(0) 12 | end 13 | 14 | it "sets the end point to the file end if no stop is provided" do 15 | ss = Mobi::StreamSlicer.new(file) 16 | expect(ss.stop).to eq(111449) 17 | end 18 | 19 | it "sets the start and stop points to the arguments provided" do 20 | ss = Mobi::StreamSlicer.new(file, 1, 2) 21 | expect(ss.start).to eq(1) 22 | expect(ss.stop).to eq(2) 23 | end 24 | 25 | it "sets the length" do 26 | ss = Mobi::StreamSlicer.new(file, 1, 10) 27 | expect(ss.length).to eq(9) 28 | end 29 | 30 | it "sets the stream to the input file" do 31 | ss = Mobi::StreamSlicer.new(file) 32 | expect(ss.stream).to eq(file) 33 | end 34 | end 35 | 36 | context "#[]" do 37 | 38 | it "returns 1 byte" do 39 | ss = Mobi::StreamSlicer.new(file) 40 | expect(ss[0].length).to eq(1) 41 | end 42 | 43 | it "starts the search from the offset provided" do 44 | file.seek(5) 45 | value = file.read(1) 46 | ss = Mobi::StreamSlicer.new(file, 5) 47 | expect(ss[0]).to eq(value) 48 | end 49 | 50 | it "returns the correct number of bytes" do 51 | file.seek(5) 52 | value = file.read(2) 53 | ss = Mobi::StreamSlicer.new(file, 5) 54 | expect(ss[0, 2]).to eq(value) 55 | end 56 | 57 | it "seeks in reverse order" do 58 | file.seek(8) 59 | value = file.read(2) 60 | ss = Mobi::StreamSlicer.new(file, 5) 61 | expect(ss[5, -2]).to eq(value) 62 | end 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /spec/lib/mobi_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | require 'mobi' 4 | 5 | describe Mobi do 6 | it "instantiates a Mobi::Metadata object" do 7 | file = File.open('spec/fixtures/sherlock.mobi') 8 | expect(Mobi.metadata(file)).to be_an_instance_of(Mobi::Metadata) 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | RSpec.configure do |config| 2 | config.mock_with :rr 3 | end 4 | 5 | --------------------------------------------------------------------------------