├── .gitignore
├── LICENSE
├── README.md
├── chapter.go
├── cncx.go
├── exth.go
├── fcis.go
├── flis.go
├── header.go
├── idxt.go
├── indx.go
├── mint.go
├── mobi.go
├── pdf.go
├── pdh.go
├── peeker.go
├── ptagx.go
├── reader.go
├── tagx.go
├── util.go
├── writer.go
└── writer_indx.go


/.gitignore:
--------------------------------------------------------------------------------
1 | output.mobi
2 | sample/
3 | tools/
4 | .vscode/
5 | main._o


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 Vladimir Konovalov. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 |    * Redistributions of source code must retain the above copyright
 8 | notice, this list of conditions and the following disclaimer.
 9 |    * Redistributions in binary form must reproduce the above
10 | copyright notice, this list of conditions and the following disclaimer
11 | in the documentation and/or other materials provided with the
12 | distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
18 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
20 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Mobi
 2 | Writer/Reader for Mobi format.
 3 | 
 4 | **Note:** All testing were done on `Kindle Previewer` (Windows) and `Kindle Paperwhite (6th Gen)`
 5 | 
 6 | ## Before You Start
 7 | - This is more or less WIP. Use at your own risk.
 8 | - This package was written for a specific task, thus there are certain limitations, such as:
 9 |     - `img` tags are ignored and not embedded.
10 |     - TOC depth does not go beyond 1. Meaning for now you can only have chapters and sub-chapters. But sub-chaper can not have it's own sub-chapters.
11 | - HTML formatting is supported, but rendering is dependant on your eBook reader. (For Kindle see [Supported HTML Tags in Book Content](https://kdp.amazon.com/help?topicId=A1JPUWCSD6F59O))
12 | - Cover images should be in JPG (I have not tested GIF, which sould be [supported](https://kdp.amazon.com/help?topicId=A1B6GKJ79HC7AN)). 
13 | 	- **IMPORTANT**: Images resized using `image/jpeg` package will not display (in Kindle) because [JFIF APP0 marker segment](https://en.wikipedia.org/wiki/JPEG_File_Interchange_Format#JFIF_APP0_marker_segment) is not generated by `image/jpeg` package.
14 | - Table of Content is automaticaly generated.
15 | 
16 | ## Usage
17 | ### Writer
18 | 
19 | 	m, err := mobi.NewWriter("output.mobi")
20 | 	if err != nil {
21 | 		panic(err)
22 | 	}
23 | 	
24 | 	m.Title("Book Title")
25 | 	m.Compression(mobi.CompressionNone) // LZ77 compression is also possible using  mobi.CompressionPalmDoc
26 | 
27 |     // Add cover image
28 |     m.AddCover("data/cover.jpg", "data/thumbnail.jpg")
29 | 
30 | 	// Meta data
31 | 	m.NewExthRecord(mobi.EXTH_DOCTYPE, "EBOK")
32 | 	m.NewExthRecord(mobi.EXTH_AUTHOR, "Book Author Name")
33 | 	// See exth.go for additional EXTH record IDs
34 | 
35 | 	// Add chapters and subchapters
36 |     ch1 := m.NewChapter("Chapter 1", []byte("Some text here"))
37 |     ch1.AddSubChapter("Chapter 1-1", []byte("Some text here"))
38 |     ch1.AddSubChapter("Chapter 1-2", []byte("Some text here"))
39 | 
40 | 	m.NewChapter("Chapter 2", []byte("Some text here")).AddSubChapter("Chapter 2-1", []byte("Some text here")).AddSubChapter("Chapter 2-2", []byte("Some text here"))
41 | 	m.NewChapter("Chapter 3", []byte("Some text here")).AddSubChapter("Chapter 3-1", []byte("Some text here"))
42 | 	m.NewChapter("Chapter 4", []byte("Some text here")).AddSubChapter("Chapter 4-1", []byte("Some text here"))
43 | 
44 |     // Output MOBI File
45 | 	m.Write()
46 | 
47 | ### Reader
48 | For now, Reader does not give any useful information.


--------------------------------------------------------------------------------
/chapter.go:
--------------------------------------------------------------------------------
 1 | package mobi
 2 | 
 3 | import "bytes"
 4 | 
 5 | type mobiChapter struct {
 6 | 	Id           int
 7 | 	Parent       int
 8 | 	Title        string
 9 | 	RecordOffset int
10 | 	LabelOffset  int
11 | 	Len          int
12 | 	Html         []uint8
13 | 	SubChapters  []*mobiChapter
14 | }
15 | 
16 | func (w *MobiWriter) NewChapter(title string, text []byte) *mobiChapter {
17 | 	w.chapters = append(w.chapters, mobiChapter{Id: w.chapterCount, Title: title, Html: minimizeHTML(text)})
18 | 	w.chapterCount++
19 | 	return &w.chapters[len(w.chapters)-1]
20 | }
21 | 
22 | func (w *mobiChapter) AddSubChapter(title string, text []byte) *mobiChapter {
23 | 	w.SubChapters = append(w.SubChapters, &mobiChapter{Parent: w.Id, Title: title, Html: minimizeHTML(text)})
24 | 	return w
25 | }
26 | 
27 | func (w *mobiChapter) SubChapterCount() int {
28 | 	return len(w.SubChapters)
29 | }
30 | 
31 | func (w *mobiChapter) generateHTML(out *bytes.Buffer) {
32 | 	//Add check for unsupported HTML tags, characters, clean up HTML
33 | 	w.RecordOffset = out.Len()
34 | 	Len0 := out.Len()
35 | 	//fmt.Printf("Offset: --- %v %v \n", w.Offset, w.Title)
36 | 	out.WriteString("<h1>" + w.Title + "</h1>")
37 | 	out.Write(w.Html)
38 | 	out.WriteString("<mbp:pagebreak/>")
39 | 	w.Len = out.Len() - Len0
40 | 	for i, _ := range w.SubChapters {
41 | 		w.SubChapters[i].generateHTML(out)
42 | 	}
43 | }
44 | 


--------------------------------------------------------------------------------
/cncx.go:
--------------------------------------------------------------------------------
 1 | package mobi
 2 | 
 3 | type mobiCncx struct {
 4 | 	Len       uint8   `init:"Id"`       //Lenght of Cncx ID
 5 | 	Id        []uint8 `format:"string"` //String ID,
 6 | 	NCX_Count uint16  // Number of IndxEntries
 7 | 	// Pad with zeros to reach a multiple of 4
 8 | 	/*
 9 | 		0 - 2: IDLen 	Lenght of ID
10 | 		2 - *: ID
11 | 
12 | 	*/
13 | }
14 | 


--------------------------------------------------------------------------------
/exth.go:
--------------------------------------------------------------------------------
  1 | package mobi
  2 | 
  3 | // Type of EXTH record. If it's Binary/Numberic then read/write
  4 | // it using BigEndian, String is read/write using LittleEndian
  5 | type ExthType uint32
  6 | 
  7 | const (
  8 | 	EXTH_TYPE_NUMERIC ExthType = 0
  9 | 	EXTH_TYPE_STRING  ExthType = 1
 10 | 	EXTH_TYPE_BINARY  ExthType = 2
 11 | )
 12 | 
 13 | // EXTH record IDs
 14 | const (
 15 | 	EXTH_DRMSERVER       uint32 = 1
 16 | 	EXTH_DRMCOMMERCE            = 2
 17 | 	EXTH_DRMEBOOKBASE           = 3
 18 | 	EXTH_TITLE                  = 99  /**< <dc:title> */
 19 | 	EXTH_AUTHOR                 = 100 /**< <dc:creator> */
 20 | 	EXTH_PUBLISHER              = 101 /**< <dc:publisher> */
 21 | 	EXTH_IMPRINT                = 102 /**< <imprint> */
 22 | 	EXTH_DESCRIPTION            = 103 /**< <dc:description> */
 23 | 	EXTH_ISBN                   = 104 /**< <dc:identifier opf:scheme="ISBN"> */
 24 | 	EXTH_SUBJECT                = 105 // Could appear multiple times /**< <dc:subject> */
 25 | 	EXTH_PUBLISHINGDATE         = 106 /**< <dc:date> */
 26 | 	EXTH_REVIEW                 = 107 /**< <review> */
 27 | 	EXTH_CONTRIBUTOR            = 108 /**< <dc:contributor> */
 28 | 	EXTH_RIGHTS                 = 109 /**< <dc:rights> */
 29 | 	EXTH_SUBJECTCODE            = 110 /**< <dc:subject BASICCode="subjectcode"> */
 30 | 	EXTH_TYPE                   = 111 /**< <dc:type> */
 31 | 	EXTH_SOURCE                 = 112 /**< <dc:source> */
 32 | 	EXTH_ASIN                   = 113 // Kindle Paperwhite labels books with "Personal" if they don't have this record.
 33 | 	EXTH_VERSION                = 114
 34 | 	EXTH_SAMPLE                 = 115 // 0x0001 if the book content is only a sample of the full book
 35 | 	EXTH_STARTREADING           = 116 // Position (4-byte offset) in file at which to open when first opened /**< Start reading */
 36 | 	EXTH_ADULT                  = 117 // Mobipocket Creator adds this if Adult only is checked on its GUI; contents: "yes" /**< <adult> */
 37 | 	EXTH_PRICE                  = 118 // As text, e.g. "4.99" /**< <srp> */
 38 | 	EXTH_CURRENCY               = 119 // As text, e.g. "USD" /**< <srp currency="currency"> */
 39 | 	EXTH_KF8BOUNDARY            = 121
 40 | 	EXTH_FIXEDLAYOUT            = 122 /**< <fixed-layout> */
 41 | 	EXTH_BOOKTYPE               = 123 /**< <book-type> */
 42 | 	EXTH_ORIENTATIONLOCK        = 124 /**< <orientation-lock> */
 43 | 	EXTH_COUNTRESOURCES         = 125
 44 | 	EXTH_ORIGRESOLUTION         = 126 /**< <original-resolution> */
 45 | 	EXTH_ZEROGUTTER             = 127 /**< <zero-gutter> */
 46 | 	EXTH_ZEROMARGIN             = 128 /**< <zero-margin> */
 47 | 	EXTH_KF8COVERURI            = 129
 48 | 	EXTH_RESCOFFSET             = 131
 49 | 	EXTH_REGIONMAGNI            = 132 /**< <region-mag> */
 50 | 
 51 | 	EXTH_DICTNAME     = 200 // As text /**< <DictionaryVeryShortName> */
 52 | 	EXTH_COVEROFFSET  = 201 // Add to first image field in Mobi Header to find PDB record containing the cover image/**< <EmbeddedCover> */
 53 | 	EXTH_THUMBOFFSET  = 202 // Add to first image field in Mobi Header to find PDB record containing the thumbnail cover image
 54 | 	EXTH_HASFAKECOVER = 203
 55 | 	EXTH_CREATORSOFT  = 204 //Known Values: 1=mobigen, 2=Mobipocket Creator, 200=kindlegen (Windows), 201=kindlegen (Linux), 202=kindlegen (Mac).
 56 | 	EXTH_CREATORMAJOR = 205
 57 | 	EXTH_CREATORMINOR = 206
 58 | 	EXTH_CREATORBUILD = 207
 59 | 	EXTH_WATERMARK    = 208
 60 | 	EXTH_TAMPERKEYS   = 209
 61 | 
 62 | 	EXTH_FONTSIGNATURE = 300
 63 | 
 64 | 	EXTH_CLIPPINGLIMIT  = 401 // Integer percentage of the text allowed to be clipped. Usually 10.
 65 | 	EXTH_PUBLISHERLIMIT = 402
 66 | 	EXTH_UNK403         = 403
 67 | 	EXTH_TTSDISABLE     = 404 // 1 - Text to Speech disabled; 0 - Text to Speech enabled
 68 | 	EXTH_UNK405         = 405 // 1 in this field seems to indicate a rental book
 69 | 	EXTH_RENTAL         = 406 // If this field is removed from a rental, the book says it expired in 1969
 70 | 	EXTH_UNK407         = 407
 71 | 	EXTH_UNK450         = 450
 72 | 	EXTH_UNK451         = 451
 73 | 	EXTH_UNK452         = 452
 74 | 	EXTH_UNK453         = 453
 75 | 
 76 | 	EXTH_DOCTYPE         = 501 // PDOC - Personal Doc; EBOK - ebook; EBSP - ebook sample;
 77 | 	EXTH_LASTUPDATE      = 502
 78 | 	EXTH_UPDATEDTITLE    = 503
 79 | 	EXTH_ASIN504         = 504 // ?? ASIN in this record.
 80 | 	EXTH_TITLEFILEAS     = 508
 81 | 	EXTH_CREATORFILEAS   = 517
 82 | 	EXTH_PUBLISHERFILEAS = 522
 83 | 	EXTH_LANGUAGE        = 524 /**< <dc:language> */
 84 | 	EXTH_ALIGNMENT       = 525 // ?? horizontal-lr in this record /**< <primary-writing-mode> */
 85 | 	EXTH_PAGEDIR         = 527
 86 | 	EXTH_OVERRIDEFONTS   = 528 /**< <override-kindle-fonts> */
 87 | 	EXTH_SORCEDESC       = 529
 88 | 	EXTH_DICTLANGIN      = 531
 89 | 	EXTH_DICTLANGOUT     = 532
 90 | 	EXTH_UNK534          = 534
 91 | 	EXTH_CREATORBUILDREV = 535
 92 | )
 93 | 
 94 | // EXTH Tag ID - Name - Type relationship
 95 | var ExthMeta = []mobiExthMeta{
 96 | 	{0, 0, ""},
 97 | 	{EXTH_SAMPLE, EXTH_TYPE_NUMERIC, "Sample"},
 98 | 	{EXTH_STARTREADING, EXTH_TYPE_NUMERIC, "Start offset"},
 99 | 	{EXTH_KF8BOUNDARY, EXTH_TYPE_NUMERIC, "K8 Boundary Offset"},
100 | 	{EXTH_COUNTRESOURCES, EXTH_TYPE_NUMERIC, "K8 Resources Count"}, // of , fonts, images
101 | 	{EXTH_RESCOFFSET, EXTH_TYPE_NUMERIC, "RESC Offset"},
102 | 	{EXTH_COVEROFFSET, EXTH_TYPE_NUMERIC, "Cover Offset"},
103 | 	{EXTH_THUMBOFFSET, EXTH_TYPE_NUMERIC, "Thumbnail Offset"},
104 | 	{EXTH_HASFAKECOVER, EXTH_TYPE_NUMERIC, "Has Fake Cover"},
105 | 	{EXTH_CREATORSOFT, EXTH_TYPE_NUMERIC, "Creator Software"},
106 | 	{EXTH_CREATORMAJOR, EXTH_TYPE_NUMERIC, "Creator Major Version"},
107 | 	{EXTH_CREATORMINOR, EXTH_TYPE_NUMERIC, "Creator Minor Version"},
108 | 	{EXTH_CREATORBUILD, EXTH_TYPE_NUMERIC, "Creator Build Number"},
109 | 	{EXTH_CLIPPINGLIMIT, EXTH_TYPE_NUMERIC, "Clipping Limit"},
110 | 	{EXTH_PUBLISHERLIMIT, EXTH_TYPE_NUMERIC, "Publisher Limit"},
111 | 	{EXTH_TTSDISABLE, EXTH_TYPE_NUMERIC, "Text-to-Speech Disabled"},
112 | 	{EXTH_RENTAL, EXTH_TYPE_NUMERIC, "Rental Indicator"},
113 | 	{EXTH_DRMSERVER, EXTH_TYPE_STRING, "DRM Server ID"},
114 | 	{EXTH_DRMCOMMERCE, EXTH_TYPE_STRING, "DRM Commerce ID"},
115 | 	{EXTH_DRMEBOOKBASE, EXTH_TYPE_STRING, "DRM Ebookbase Book ID"},
116 | 	{EXTH_TITLE, EXTH_TYPE_STRING, "Title"},
117 | 	{EXTH_AUTHOR, EXTH_TYPE_STRING, "Creator"},
118 | 	{EXTH_PUBLISHER, EXTH_TYPE_STRING, "Publisher"},
119 | 	{EXTH_IMPRINT, EXTH_TYPE_STRING, "Imprint"},
120 | 	{EXTH_DESCRIPTION, EXTH_TYPE_STRING, "Description"},
121 | 	{EXTH_ISBN, EXTH_TYPE_STRING, "ISBN"},
122 | 	{EXTH_SUBJECT, EXTH_TYPE_STRING, "Subject"},
123 | 	{EXTH_PUBLISHINGDATE, EXTH_TYPE_STRING, "Published"},
124 | 	{EXTH_REVIEW, EXTH_TYPE_STRING, "Review"},
125 | 	{EXTH_CONTRIBUTOR, EXTH_TYPE_STRING, "Contributor"},
126 | 	{EXTH_RIGHTS, EXTH_TYPE_STRING, "Rights"},
127 | 	{EXTH_SUBJECTCODE, EXTH_TYPE_STRING, "Subject Code"},
128 | 	{EXTH_TYPE, EXTH_TYPE_STRING, "Type"},
129 | 	{EXTH_SOURCE, EXTH_TYPE_STRING, "Source"},
130 | 	{EXTH_ASIN, EXTH_TYPE_STRING, "ASIN"},
131 | 	{EXTH_VERSION, EXTH_TYPE_STRING, "Version Number"},
132 | 	{EXTH_ADULT, EXTH_TYPE_STRING, "Adult"},
133 | 	{EXTH_PRICE, EXTH_TYPE_STRING, "Price"},
134 | 	{EXTH_CURRENCY, EXTH_TYPE_STRING, "Currency"},
135 | 	{EXTH_FIXEDLAYOUT, EXTH_TYPE_STRING, "Fixed Layout"},
136 | 	{EXTH_BOOKTYPE, EXTH_TYPE_STRING, "Book Type"},
137 | 	{EXTH_ORIENTATIONLOCK, EXTH_TYPE_STRING, "Orientation Lock"},
138 | 	{EXTH_ORIGRESOLUTION, EXTH_TYPE_STRING, "Original Resolution"},
139 | 	{EXTH_ZEROGUTTER, EXTH_TYPE_STRING, "Zero Gutter"},
140 | 	{EXTH_ZEROMARGIN, EXTH_TYPE_STRING, "Zero margin"},
141 | 	{EXTH_KF8COVERURI, EXTH_TYPE_STRING, "K8 Masthead/Cover Image"},
142 | 	{EXTH_REGIONMAGNI, EXTH_TYPE_STRING, "Region Magnification"},
143 | 	{EXTH_DICTNAME, EXTH_TYPE_STRING, "Dictionary Short Name"},
144 | 	{EXTH_WATERMARK, EXTH_TYPE_STRING, "Watermark"},
145 | 	{EXTH_DOCTYPE, EXTH_TYPE_STRING, "Document Type"},
146 | 	{EXTH_LASTUPDATE, EXTH_TYPE_STRING, "Last Update Time"},
147 | 	{EXTH_UPDATEDTITLE, EXTH_TYPE_STRING, "Updated Title"},
148 | 	{EXTH_ASIN504, EXTH_TYPE_STRING, "ASIN (504)"},
149 | 	{EXTH_TITLEFILEAS, EXTH_TYPE_STRING, "Title File As"},
150 | 	{EXTH_CREATORFILEAS, EXTH_TYPE_STRING, "Creator File As"},
151 | 	{EXTH_PUBLISHERFILEAS, EXTH_TYPE_STRING, "Publisher File As"},
152 | 	{EXTH_LANGUAGE, EXTH_TYPE_STRING, "Language"},
153 | 	{EXTH_ALIGNMENT, EXTH_TYPE_STRING, "Primary Writing Mode"},
154 | 	{EXTH_PAGEDIR, EXTH_TYPE_STRING, "Page Progression Direction"},
155 | 	{EXTH_OVERRIDEFONTS, EXTH_TYPE_STRING, "Override Kindle Fonts"},
156 | 	{EXTH_SORCEDESC, EXTH_TYPE_STRING, "Original Source description"},
157 | 	{EXTH_DICTLANGIN, EXTH_TYPE_STRING, "Dictionary Input Language"},
158 | 	{EXTH_DICTLANGOUT, EXTH_TYPE_STRING, "Dictionary output Language"},
159 | 	{EXTH_UNK534, EXTH_TYPE_STRING, "Unknown (534)"},
160 | 	{EXTH_CREATORBUILDREV, EXTH_TYPE_STRING, "Kindlegen BuildRev Number"},
161 | 	{EXTH_TAMPERKEYS, EXTH_TYPE_BINARY, "Tamper Proof Keys"},
162 | 	{EXTH_FONTSIGNATURE, EXTH_TYPE_BINARY, "Font Signature"},
163 | 	{EXTH_UNK403, EXTH_TYPE_BINARY, "Unknown (403)"},
164 | 	{EXTH_UNK405, EXTH_TYPE_BINARY, "Unknown (405)"},
165 | 	{EXTH_UNK407, EXTH_TYPE_BINARY, "Unknown (407)"},
166 | 	{EXTH_UNK450, EXTH_TYPE_BINARY, "Unknown (450)"},
167 | 	{EXTH_UNK451, EXTH_TYPE_BINARY, "Unknown (451)"},
168 | 	{EXTH_UNK452, EXTH_TYPE_BINARY, "Unknown (452)"},
169 | 	{EXTH_UNK453, EXTH_TYPE_BINARY, "Unknown (453)"}}
170 | 
171 | type mobiExth struct {
172 | 	Identifier   [4]uint8 `format:"string"`
173 | 	HeaderLenght uint32   // The length of the EXTH header, including the previous 4 bytes - but not including the final padding.
174 | 	RecordCount  uint32   // The number of records in the EXTH header. the rest of the EXTH header consists of repeated EXTH records to the end of the EXTH length.
175 | 
176 | 	Records []mobiExthRecord // Lenght of RecordCount
177 | 
178 | 	// []uint8 - lenght of X. Where X is the amount of bytes needed to reach multiples of 4 for the whole EXTH record
179 | 
180 | 	// According to Wiki padding null bytes are not included into header lenght calculation, but from what
181 | 	// I see in mobi files, those bytes are included in total calculation.
182 | }
183 | 
184 | type mobiExthRecord struct {
185 | 	RecordType   uint32 // Exth Record type. Just a number identifying what's stored in the record
186 | 	RecordLength uint32 // Length of EXTH record = L , including the 8 bytes in the type and length fields
187 | 	Value        []uint8
188 | }
189 | 
190 | // Copy from https://github.com/bfabiszewski/libmobi/blob/f4f75982f0c00b592c418bfcf3f9920600e81573/src/util.c
191 | type mobiExthMeta struct {
192 | 	ID   uint32
193 | 	Type ExthType
194 | 	Name string
195 | }
196 | 
197 | func (w *mobiExth) GetHeaderLenght() int {
198 | 	elen := 12
199 | 
200 | 	for _, k := range w.Records {
201 | 		elen += int(k.RecordLength)
202 | 	}
203 | 
204 | 	Padding := elen % 4
205 | 	elen += Padding
206 | 
207 | 	return elen
208 | }
209 | 
210 | func (e *mobiExth) Add(recType uint32, Value interface{}) *mobiExth {
211 | 	e.RecordCount++
212 | 
213 | 	var MetaType = getExthMetaByTag(recType)
214 | 	var ExthRec mobiExthRecord = mobiExthRecord{RecordType: recType}
215 | 
216 | 	switch MetaType.Type {
217 | 	case EXTH_TYPE_BINARY:
218 | 		ExthRec.Value = Value.([]uint8)
219 | 	case EXTH_TYPE_NUMERIC:
220 | 		var castValue uint32
221 | 		switch Value.(type) {
222 | 		case int:
223 | 			castValue = uint32(Value.(int))
224 | 		case uint16:
225 | 			castValue = uint32(Value.(uint16))
226 | 		case uint32:
227 | 			castValue = uint32(Value.(uint32))
228 | 		case uint64:
229 | 			castValue = uint32(Value.(uint64))
230 | 		case int16:
231 | 			castValue = uint32(Value.(int16))
232 | 		case int32:
233 | 			castValue = uint32(Value.(int32))
234 | 		case int64:
235 | 			castValue = uint32(Value.(int64))
236 | 		default:
237 | 			panic("EXTH_TYPE_NUMERIC type is unsupported")
238 | 		}
239 | 		ExthRec.Value = int32ToBytes(castValue)
240 | 	case EXTH_TYPE_STRING:
241 | 		switch Value.(type) {
242 | 		case []uint8:
243 | 			ExthRec.Value = Value.([]uint8)
244 | 		case string:
245 | 			ExthRec.Value = []uint8(Value.(string))
246 | 		}
247 | 	default:
248 | 		panic("Unknown EXTH meta type")
249 | 	}
250 | 
251 | 	ExthRec.RecordLength = uint32(8 + len(ExthRec.Value))
252 | 	e.Records = append(e.Records, ExthRec)
253 | 	return e
254 | }
255 | 


--------------------------------------------------------------------------------
/fcis.go:
--------------------------------------------------------------------------------
 1 | package mobi
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"encoding/binary"
 6 | )
 7 | 
 8 | type mobiFcis struct { //  RECORD -1
 9 | 	Identifier uint32 //UINT   ID <comment="FCIS">;
10 | 	Fixed0     uint32 //UINT  // fixed1  <comment="fixed value 20">;
11 | 	Fixed1     uint32 //UINT //  fixed2  <comment="fixed value 16">;
12 | 	Fixed2     uint32 //UINT   fixed3  <comment="fixed value 1">;
13 | 	Fixed3     uint32 //UINT  // fixed4  <comment="fixed value 0">;
14 | 	Fixed4     uint32 //UINT  // fixed5  <comment="text length (the same value as \"text length\" in the PalmDoc header)">;
15 | 	Fixed5     uint32 //UINT   fixed6  <comment="fixed value 0">;
16 | 	Fixed6     uint32 //UINT   fixed7  <comment="fixed value 32">;
17 | 	Fixed7     uint32 //UINT   fixed8  <comment="fixed value 8">;
18 | 	Fixed8     uint16 //USHORT fixed9  <comment="fixed value 1">;
19 | 	Fixed9     uint16 //USHORT fixed10 <comment="fixed value 1">;
20 | 	Fixed10    uint32 //UINT   fixed11 <comment="fixed value 0">;
21 | } //FCISRECORD;*/
22 | 
23 | func (w *MobiWriter) generateFcis() []byte {
24 | 	c := mobiFcis{}
25 | 	c.Identifier = 1178814803 //StringToBytes("FLIS", &c.Identifier)
26 | 	c.Fixed0 = 20
27 | 	c.Fixed1 = 16
28 | 	c.Fixed2 = 1
29 | 	//c.Fixed3
30 | 	c.Fixed4 = w.Pdh.TextLength
31 | 	//c.Fixed5 = 0
32 | 	c.Fixed6 = 32
33 | 	c.Fixed7 = 8
34 | 	c.Fixed8 = 1
35 | 	c.Fixed9 = 1
36 | 	//c.Fixed10 = 0
37 | 
38 | 	buf := new(bytes.Buffer)
39 | 	binary.Write(buf, binary.BigEndian, c)
40 | 	return buf.Bytes()
41 | }
42 | 


--------------------------------------------------------------------------------
/flis.go:
--------------------------------------------------------------------------------
 1 | package mobi
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"encoding/binary"
 6 | )
 7 | 
 8 | type mobiFlis struct { //  RECORD -2
 9 | 	Identifier uint32 //ID <comment="FLIS">;
10 | 	Fixed0     uint32 //UINT   fixed1  <comment="fixed value 8">;
11 | 	Fixed1     uint16 //USHORT fixed2  <comment="fixed value 65">;
12 | 	Fixed2     uint16 //USHORT fixed3  <comment="fixed value 0">;
13 | 	Fixed3     uint32 //UINT   fixed4  <comment="fixed value 0">;
14 | 	Fixed4     uint32 //UINT   fixed5  <comment="fixed value -1">;
15 | 	Fixed5     uint16 //USHORT fixed6  <comment="fixed value 1">;
16 | 	Fixed6     uint16 //USHORT fixed7  <comment="fixed value 3">;
17 | 	Fixed7     uint32 //UINT   fixed8  <comment="fixed value 3">;
18 | 	Fixed8     uint32 //UINT   fixed9  <comment="fixed value 1">;
19 | 	Fixed9     uint32 //UINT   fixed10 <comment="fixed value -1">;
20 | } //FLISRECORD;
21 | 
22 | func (w *MobiWriter) generateFlis() []byte {
23 | 	c := mobiFlis{}
24 | 	c.Identifier = 1179404627 //StringToBytes("FLIS", &c.Identifier)
25 | 	c.Fixed0 = 8
26 | 	c.Fixed1 = 65
27 | 	//c.Fixed2
28 | 	//c.Fixed3
29 | 	c.Fixed4 = 4294967295
30 | 	c.Fixed5 = 1
31 | 	c.Fixed6 = 3
32 | 	c.Fixed7 = 3
33 | 	c.Fixed8 = 1
34 | 	c.Fixed9 = 4294967295
35 | 
36 | 	buf := new(bytes.Buffer)
37 | 	binary.Write(buf, binary.BigEndian, c)
38 | 	return buf.Bytes()
39 | }
40 | 


--------------------------------------------------------------------------------
/header.go:
--------------------------------------------------------------------------------
 1 | package mobi
 2 | 
 3 | type mobiHeader struct {
 4 | 	Identifier          [4]uint8 `format:"string"` // Must be characters MOBI
 5 | 	HeaderLength        uint32   // The length of the MOBI header, including the previous 4 bytes
 6 | 	MobiType            uint32   // Mobi type enum
 7 | 	TextEncoding        uint32   // 1252 = CP1252 (WinLatin1); 65001 = UTF-8
 8 | 	UniqueID            uint32   // Some kind of unique ID number (random?)
 9 | 	FileVersion         uint32   // Version of the Mobipocket format used in this file. //If FileVersion == 8. Then it's KF8
10 | 	OrthographicIndex   uint32   // Section number of orthographic meta index. 0xFFFFFFFF if index is not available.
11 | 	InflectionIndex     uint32   // Section number of inflection meta index. 0xFFFFFFFF if index is not available.
12 | 	IndexNames          uint32   // 0xFFFFFFFF if index is not available.
13 | 	IndexKeys           uint32   // 0xFFFFFFFF if index is not available.
14 | 	ExtraIndex0         uint32   // Section number of extra 0 meta index. 0xFFFFFFFF if index is not available.
15 | 	ExtraIndex1         uint32   // Section number of extra 1 meta index. 0xFFFFFFFF if index is not available.
16 | 	ExtraIndex2         uint32   // Section number of extra 2 meta index. 0xFFFFFFFF if index is not available.
17 | 	ExtraIndex3         uint32   // Section number of extra 3 meta index. 0xFFFFFFFF if index is not available.
18 | 	ExtraIndex4         uint32   // Section number of extra 4 meta index. 0xFFFFFFFF if index is not available.
19 | 	ExtraIndex5         uint32   // Section number of extra 5 meta index. 0xFFFFFFFF if index is not available.
20 | 	FirstNonBookIndex   uint32   // First record number (starting with 0) that's not the book's text
21 | 	FullNameOffset      uint32   // Offset in record 0 (not from start of file) of the full name of the book
22 | 	FullNameLength      uint32   // Length in bytes of the full name of the book
23 | 	Locale              uint32   // Book locale code. Low byte is main language 09=English, next byte is dialect, 08=British, 04=US. Thus US English is 1033, UK English is 2057.
24 | 	InputLanguage       uint32   //Input language for a dictionary
25 | 	OutputLanguage      uint32   //Output language for a dictionary
26 | 	MinVersion          uint32   //Minimum mobipocket version support needed to read this file.
27 | 	FirstImageIndex     uint32   //First record number (starting with 0) that contains an image. Image records should be sequential.
28 | 	HuffmanRecordOffset uint32   //The record number of the first huffman compression record.
29 | 	HuffmanRecordCount  uint32   //The number of huffman compression records.
30 | 	HuffmanTableOffset  uint32
31 | 	HuffmanTableLength  uint32
32 | 	ExthFlags           uint32   //Bitfield. If bit 6 (0x40) is set, then there's an EXTH record
33 | 	Unknown1            [32]byte //Unknown values
34 | 	DrmOffset           uint32   //Offset to DRM key info in DRMed files. 0xFFFFFFFF if no DRM
35 | 	DrmCount            uint32   //Number of entries in DRM info. 0xFFFFFFFF if no DRM
36 | 	DrmSize             uint32   //Number of bytes in DRM info.
37 | 	DrmFlags            uint32   //Some flags concerning the DRM info.
38 | 	Unknown0            [12]byte //Unknown values
39 | 
40 | 	// If it's KF8
41 | 	// 		FdstRecordIndex uint32
42 | 	// else
43 | 	FirstContentRecordNumber uint16 //Number of first text record. Normally 1.
44 | 	LastContentRecordNumber  uint16 //Number of last image record or number of last text record if it contains no images. Includes Image, DATP, HUFF, DRM.
45 | 	//End else
46 | 
47 | 	Unknown6        uint32 //FdstRecordCount? //Use 0x00000001.
48 | 	FcisRecordIndex uint32
49 | 	FcisRecordCount uint32 //Use 0x00000001. // Always 1
50 | 	FlisRecordIndex uint32
51 | 	FlisRecordCount uint32 //Use 0x00000001. // Always 1
52 | 	Unknown7        uint32
53 | 	Unknown8        uint32
54 | 	SrcsRecordIndex uint32
55 | 	SrcsRecordCount uint32
56 | 	Unknown9        uint32
57 | 	Unknown10       uint32
58 | 
59 | 	// A set of binary flags, some of which indicate extra data at the end of each text block. This only
60 | 	// seems to be valid for Mobipocket format version 5 and 6 (and higher?), when the header length is 228 (0xE4) or 232 (0xE8).
61 | 	// 		bit 1 (0x1): <extra multibyte bytes><size>
62 | 	// 		bit 2 (0x2): <TBS indexing description of this HTML record><size>
63 | 	// 		bit 3 (0x4): <uncrossable breaks><size>
64 | 	// Setting bit 2 (0x2) disables <guide><reference type="start"> functionality.
65 | 	ExtraRecordDataFlags uint32 `format:"bits"`
66 | 	IndxRecodOffset      uint32 //(If not 0xFFFFFFFF) The record number of the first INDX record created from an ncx file.
67 | 
68 | 	//If header lenght is 248 then there's 16 extra bytes.
69 | 
70 | 	/*
71 | 			If KF8
72 | 				FragmentIndex uint32
73 | 				SkeletonIndex uint32
74 | 			Else
75 | 				unknown14 uint32
76 | 				unknown15 uint32
77 | 
78 | 			DatpIndex uint32
79 | 
80 | 			If KF8
81 | 				GuideIndex uint32
82 | 			Else
83 | 				unknown16 uint32
84 | 
85 | 			unknown17 uint32
86 | 		    unknown18 uint32
87 | 		    unknown19 uint32 ?
88 | 		    unknown20 uint32 ?
89 | 	*/
90 | }
91 | 


--------------------------------------------------------------------------------
/idxt.go:
--------------------------------------------------------------------------------
1 | package mobi
2 | 
3 | type mobiIdxt struct {
4 | 	Identifier [4]byte  `format:"string"`
5 | 	Offset     []uint16 /* mobiIndx.HeaderLenght + len(mobiTagx.HeaderLenght) */
6 | 	//Unk1       uint16   // Pad with zeros to make it multiples of 4?
7 | }
8 | 


--------------------------------------------------------------------------------
/indx.go:
--------------------------------------------------------------------------------
 1 | package mobi
 2 | 
 3 | const (
 4 | 	INDX_TYPE_NORMAL     uint32 = 0
 5 | 	INDX_TYPE_INFLECTION uint32 = 2
 6 | )
 7 | 
 8 | type mobiIndx struct {
 9 | 	Identifier         [4]byte `format:"string"`
10 | 	HeaderLen          uint32
11 | 	Unk0               uint32
12 | 	Unk1               uint32 /* 1 when inflection is normal? */
13 | 	Indx_Type          uint32 /* 12: 0 - normal, 2 - inflection */
14 | 	Idxt_Offset        uint32 /* 20: IDXT offset */
15 | 	Idxt_Count         uint32 /* 24: entries count */
16 | 	Idxt_Encoding      uint32 /* 28: encoding */
17 | 	SetUnk2            uint32 //-1
18 | 	Idxt_Entry_Count   uint32 /* 36: total entries count */
19 | 	Ordt_Offset        uint32
20 | 	Ligt_Offset        uint32
21 | 	Ligt_Entries_Count uint32 /* 48: LIGT entries count */
22 | 	Cncx_Records_Count uint32 /* 52: CNCX entries count */
23 | 	Unk3               [108]byte
24 | 	Ordt_Type          uint32 /* 164: ORDT type */
25 | 	Ordt_Entries_Count uint32 /* 168: ORDT entries count */
26 | 	Ordt1_Offset       uint32 /* 172: ORDT1 offset */
27 | 	Ordt2_Offset       uint32 /* 176: ORDT2 offset */
28 | 	Tagx_Offset        uint32 /* 180: */
29 | 	Unk4               uint32 /* 184: */ /* ? Default index string offset ? */
30 | 	Unk5               uint32 /* 188: */ /* ? Default index string length ? */
31 | }
32 | 
33 | type mobiIndxEntry struct {
34 | 	EntryID    uint8
35 | 	EntryValue uint32
36 | }
37 | 


--------------------------------------------------------------------------------
/mint.go:
--------------------------------------------------------------------------------
 1 | package mobi
 2 | 
 3 | type Mint int
 4 | 
 5 | func (i Mint) UInt16() uint16 {
 6 | 	return uint16(i)
 7 | }
 8 | 
 9 | func (i Mint) UInt32() uint32 {
10 | 	return uint32(i)
11 | }
12 | 
13 | func (i Mint) Int() int {
14 | 	return int(i)
15 | }
16 | 


--------------------------------------------------------------------------------
/mobi.go:
--------------------------------------------------------------------------------
 1 | package mobi
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"reflect"
 6 | )
 7 | 
 8 | type Mobi struct {
 9 | 	file     *os.File
10 | 	fileStat os.FileInfo
11 | 
12 | 	Pdf     mobiPDF            // Palm Database Format: http://wiki.mobileread.com/wiki/PDB#Palm_Database_Format
13 | 	Offsets []mobiRecordOffset // Offsets for all the records. Starting from beginning of a file.
14 | 	Pdh     mobiPDH
15 | 
16 | 	Header mobiHeader
17 | 	Exth   mobiExth
18 | 
19 | 	//Index
20 | 	Indx  []mobiIndx
21 | 	Idxt  mobiIdxt
22 | 	Cncx  mobiCncx
23 | 	Tagx  mobiTagx
24 | 	PTagx []mobiPTagx
25 | }
26 | 
27 | const (
28 | 	MOBI_MAX_RECORD_SIZE    = 4096
29 | 	MOBI_PALMDB_HEADER_LEN  = 78
30 | 	MOBI_INDX_HEADER_LEN    = 192
31 | 	MOBI_PALMDOC_HEADER_LEN = 16
32 | 	MOBI_MOBIHEADER_LEN     = 232
33 | )
34 | 
35 | type mobiRecordOffset struct {
36 | 	Offset     uint32 //The offset of record {N} from the start of the PDB of this record
37 | 	Attributes uint8  //Bit Field. The least significant four bits are used to represent the category values.
38 | 	Skip       uint8  //UniqueID is supposed to take 3 bytes, but for our inteded purposes uint16(UniqueID) should work. Let me know if there's any mobi files with more than 32767 records
39 | 	UniqueID   uint16 //The unique ID for this record. Often just a sequential count from 0
40 | }
41 | 
42 | const (
43 | 	magicMobi     mobiMagicType = "MOBI"
44 | 	magicExth     mobiMagicType = "EXTH"
45 | 	magicHuff     mobiMagicType = "HUFF"
46 | 	magicCdic     mobiMagicType = "CDIC"
47 | 	magicFdst     mobiMagicType = "FDST"
48 | 	magicIdxt     mobiMagicType = "IDXT"
49 | 	magicIndx     mobiMagicType = "INDX"
50 | 	magicLigt     mobiMagicType = "LIGT"
51 | 	magicOrdt     mobiMagicType = "ORDT"
52 | 	magicTagx     mobiMagicType = "TAGX"
53 | 	magicFont     mobiMagicType = "FONT"
54 | 	magicAudi     mobiMagicType = "AUDI"
55 | 	magicVide     mobiMagicType = "VIDE"
56 | 	magicResc     mobiMagicType = "RESC"
57 | 	magicBoundary mobiMagicType = "BOUNDARY"
58 | )
59 | 
60 | type mobiMagicType string
61 | 
62 | func (m mobiMagicType) String() string {
63 | 	return string(m)
64 | }
65 | 
66 | func (m mobiMagicType) WriteTo(output interface{}) {
67 | 	out := reflect.ValueOf(output).Elem()
68 | 
69 | 	if out.Type().Len() != len(m) {
70 | 		panic("Magic lenght is larger than target size")
71 | 	}
72 | 
73 | 	for i := 0; i < out.Type().Len(); i++ {
74 | 		if i > len(m)-1 {
75 | 			break
76 | 		}
77 | 		out.Index(i).Set(reflect.ValueOf(byte(m[i])))
78 | 	}
79 | }
80 | 
81 | const (
82 | 	MOBI_ENC_CP1252 = 1252  /**< cp-1252 encoding */
83 | 	MOBI_ENC_UTF8   = 65001 /**< utf-8 encoding */
84 | 	MOBI_ENC_UTF16  = 65002 /**< utf-16 encoding */
85 | )


--------------------------------------------------------------------------------
/pdf.go:
--------------------------------------------------------------------------------
 1 | package mobi
 2 | 
 3 | type mobiPDF struct {
 4 | 	DatabaseName       [32]byte `format:"string"` //Database name. This name is 0 terminated
 5 | 	FileAttributes     uint16
 6 | 	Version            uint16 //File version
 7 | 	CreationTime       uint32 `format:"date"` //Timestamp, according to wiki it's supposed to be in Mac format, but Mobi files that I see use Unix. Not sure if it's important.
 8 | 	ModificationTime   uint32 `format:"date"` //Timestamp
 9 | 	BackupTime         uint32 `format:"date"` //Timestamp
10 | 	ModificationNumber uint32
11 | 	AppInfo            uint32
12 | 	SortInfo           uint32
13 | 	Type               [4]byte `format:"string"` //BOOK
14 | 	Creator            [4]byte `format:"string"` //MOBI
15 | 	UniqueIDSeed       uint32  //Used internally to identify record
16 | 	NextRecordList     uint32  //Only used when in-memory on Palm OS. Always set to zero in stored files.
17 | 	RecordsNum         uint16  //Number of records in the file. Records are stored as array starting with 0. RecordsNum is total count of records, not last ID.
18 | }
19 | 


--------------------------------------------------------------------------------
/pdh.go:
--------------------------------------------------------------------------------
 1 | package mobi
 2 | 
 3 | type mobiPDHCompression uint16
 4 | 
 5 | // Compression Enum
 6 | const (
 7 | 	// CompressionNone uint16(1). Text is stored without any compression
 8 | 	CompressionNone mobiPDHCompression = 1
 9 | 	// CompressionPalmDoc uint16(2). Text is compressed using simple LZ77 algorithm
10 | 	CompressionPalmDoc mobiPDHCompression = 2
11 | 	// CompressionHuffCdic uint16(17480). Text is compressed using HuffCdic
12 | 	CompressionHuffCdic mobiPDHCompression = 17480
13 | )
14 | 
15 | //PalmDoc Header
16 | type mobiPDH struct {
17 | 	Compression mobiPDHCompression //0  // 1 == no compression, 2 = PalmDOC compression, 17480 = HUFF/CDIC compression
18 | 	Unk1        uint16             //2  // Always zero
19 | 	TextLength  uint32             //4  // Uncompressed length of the entire text of the book
20 | 	RecordCount uint16             //8  // Number of PDB records used for the text of the book.
21 | 	RecordSize  uint16             //10 // Maximum size of each record containing text, always 4096
22 | 	Encryption  uint16             //12 // 0 == no encryption, 1 = Old Mobipocket Encryption, 2 = Mobipocket Encryption
23 | 	Unk2        uint16             //12 // Usually zero
24 | }
25 | 


--------------------------------------------------------------------------------
/peeker.go:
--------------------------------------------------------------------------------
 1 | package mobi
 2 | 
 3 | type Peeker []uint8
 4 | 
 5 | func (p Peeker) Magic() mobiMagicType {
 6 | 	return mobiMagicType(p)
 7 | }
 8 | 
 9 | func (p Peeker) String() string {
10 | 	return string(p)
11 | }
12 | 
13 | func (p Peeker) Bytes() []uint8 {
14 | 	return p
15 | }
16 | 
17 | func (p Peeker) Len() int {
18 | 	return len(p)
19 | }
20 | 


--------------------------------------------------------------------------------
/ptagx.go:
--------------------------------------------------------------------------------
 1 | package mobi
 2 | 
 3 | const (
 4 | 	TagEntry_END                uint8 = 0
 5 | 	TagEntry_Pos                      = 1  // NCX | Position offset for the beginning of NCX record (filepos) Ex: Beginning of a chapter
 6 | 	TagEntry_Len                      = 2  // NCX | Record lenght. Ex: Chapter lenght
 7 | 	TagEntry_NameOffset               = 3  // NCX | Label text offset in CNCX
 8 | 	TagEntry_DepthLvl                 = 4  // NCX | Depth/Level of CNCX
 9 | 	TagEntry_KOffs                    = 5  // NCX | kind CNCX offset
10 | 	TagEntry_PosFid                   = 6  // NCX | pos:fid
11 | 	TagEntry_Parent                   = 21 // NCX | Parent
12 | 	TagEntry_Child1                   = 22 // NCX | First child
13 | 	TagEntry_ChildN                   = 23 // NCX | Last child
14 | 	TagEntry_ImageIndex               = 69
15 | 	TagEntry_DescOffset               = 70 // Description offset in cncx
16 | 	TagEntry_AuthorOffset             = 71 // Author offset in cncx
17 | 	TagEntry_ImageCaptionOffset       = 72 // Image caption offset in cncx
18 | 	TagEntry_ImgAttrOffset            = 73 // Image attribution offset in cncx
19 | )
20 | 
21 | var tagEntryMap = map[uint8]string{
22 | 	TagEntry_Pos:                "Offset",
23 | 	TagEntry_Len:                "Lenght",
24 | 	TagEntry_NameOffset:         "Label",
25 | 	TagEntry_DepthLvl:           "Depth",
26 | 	TagEntry_KOffs:              "Kind",
27 | 	TagEntry_PosFid:             "Pos:Fid",
28 | 	TagEntry_Parent:             "Parent",
29 | 	TagEntry_Child1:             "First Child",
30 | 	TagEntry_ChildN:             "Last Child",
31 | 	TagEntry_ImageIndex:         "Image Index",
32 | 	TagEntry_DescOffset:         "Description",
33 | 	TagEntry_AuthorOffset:       "Author",
34 | 	TagEntry_ImageCaptionOffset: "Image Caption Offset",
35 | 	TagEntry_ImgAttrOffset:      "Image Attr Offset"}
36 | 
37 | type mobiPTagx struct {
38 | 	Tag             uint8
39 | 	Tag_Value_Count uint8
40 | 	Value_Count     uint32
41 | 	Value_Bytes     uint32
42 | }
43 | 


--------------------------------------------------------------------------------
/reader.go:
--------------------------------------------------------------------------------
  1 | package mobi
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"os"
  8 | 	"reflect"
  9 | 	"strconv"
 10 | )
 11 | 
 12 | type MobiReader struct {
 13 | 	Mobi
 14 | }
 15 | 
 16 | func NewReader(filename string) (out *MobiReader, err error) {
 17 | 	out = &MobiReader{}
 18 | 	out.file, err = os.Open(filename)
 19 | 	if err != nil {
 20 | 		return nil, err
 21 | 	}
 22 | 
 23 | 	out.fileStat, err = out.file.Stat()
 24 | 	if err != nil {
 25 | 		return nil, err
 26 | 	}
 27 | 
 28 | 	return out, out.Parse()
 29 | }
 30 | 
 31 | func (r *MobiReader) Parse() (err error) {
 32 | 	if err = r.parsePdf(); err != nil {
 33 | 		return
 34 | 	}
 35 | 
 36 | 	if err = r.parsePdh(); err != nil {
 37 | 		return
 38 | 	}
 39 | 
 40 | 	// Check if INDX offset is set + attempt to parse INDX
 41 | 	if r.Header.IndxRecodOffset > 0 {
 42 | 		err = r.parseIndexRecord(r.Header.IndxRecodOffset)
 43 | 		if err != nil {
 44 | 			return
 45 | 		}
 46 | 	}
 47 | 
 48 | 	return
 49 | }
 50 | 
 51 | // parseHeader reads Palm Database Format header, and record offsets
 52 | func (r *MobiReader) parsePdf() error {
 53 | 	//First we read PDF Header, this will help us parse subsequential data
 54 | 	//binary.Read will take struct and fill it with data from mobi File
 55 | 	err := binary.Read(r.file, binary.BigEndian, &r.Pdf)
 56 | 	if err != nil {
 57 | 		return err
 58 | 	}
 59 | 
 60 | 	if r.Pdf.RecordsNum < 1 {
 61 | 		return errors.New("Number of records in this file is less than 1.")
 62 | 	}
 63 | 
 64 | 	r.Offsets = make([]mobiRecordOffset, r.Pdf.RecordsNum)
 65 | 	err = binary.Read(r.file, binary.BigEndian, &r.Offsets)
 66 | 	if err != nil {
 67 | 		return err
 68 | 	}
 69 | 
 70 | 	//After the records offsets there's a 2 byte padding
 71 | 	r.file.Seek(2, 1)
 72 | 
 73 | 	return nil
 74 | }
 75 | 
 76 | // parsePdh processes record 0 that contains PalmDoc Header, Mobi Header and Exth meta data
 77 | func (r *MobiReader) parsePdh() error {
 78 | 	// Palm Doc Header
 79 | 	// Now we go onto reading record 0 that contains Palm Doc Header, Mobi Header, Exth Header...
 80 | 	binary.Read(r.file, binary.BigEndian, &r.Pdh)
 81 | 
 82 | 	// Check and see if there's a record encryption
 83 | 	if r.Pdh.Encryption != 0 {
 84 | 		return errors.New("Records are encrypted.")
 85 | 	}
 86 | 
 87 | 	// Mobi Header
 88 | 	// Now it's time to read Mobi Header
 89 | 	if r.MatchMagic(magicMobi) {
 90 | 		binary.Read(r.file, binary.BigEndian, &r.Header)
 91 | 	} else {
 92 | 		return errors.New("Can not find MOBI header. File might be corrupt.")
 93 | 	}
 94 | 
 95 | 	// Current header struct only reads 232 bytes. So if actual header lenght is greater, then we need to skip to Exth.
 96 | 	Skip := int64(r.Header.HeaderLength) - int64(reflect.TypeOf(r.Header).Size())
 97 | 	r.file.Seek(Skip, 1)
 98 | 
 99 | 	// Exth Record
100 | 	// To check whenever there's EXTH record or not, we need to check and see if 6th bit of r.Header.ExthFlags is set.
101 | 	if hasBit(int(r.Header.ExthFlags), 6) {
102 | 		err := r.ExthParse()
103 | 
104 | 		if err != nil {
105 | 			return errors.New("Can not read EXTH record")
106 | 		}
107 | 	}
108 | 
109 | 	return nil
110 | }
111 | 
112 | func (r *MobiReader) parseIndexRecord(n uint32) error {
113 | 	_, err := r.OffsetToRecord(n)
114 | 	if err != nil {
115 | 		return err
116 | 	}
117 | 
118 | 	RecPos, _ := r.file.Seek(0, 1)
119 | 
120 | 	if !r.MatchMagic(magicIndx) {
121 | 		return errors.New("Index record not found at specified at given offset")
122 | 	}
123 | 	//fmt.Printf("Index %s %v\n", r.Peek(4), RecLen)
124 | 
125 | 	//if len(r.Indx) == 0 {
126 | 	r.Indx = append(r.Indx, mobiIndx{})
127 | 	//}
128 | 
129 | 	idx := &r.Indx[len(r.Indx)-1]
130 | 
131 | 	err = binary.Read(r.file, binary.BigEndian, idx)
132 | 	if err != nil {
133 | 		return err
134 | 	}
135 | 
136 | 	/* Tagx Record Parsing + Last CNCX */
137 | 	if idx.Tagx_Offset != 0 {
138 | 		_, err = r.file.Seek(RecPos+int64(idx.Tagx_Offset), 0)
139 | 		if err != nil {
140 | 			return err
141 | 		}
142 | 
143 | 		err = r.parseTagx()
144 | 		if err != nil {
145 | 			return err
146 | 		}
147 | 
148 | 		// Last CNCX record follows TAGX
149 | 		if idx.Cncx_Records_Count > 0 {
150 | 			r.Cncx = mobiCncx{}
151 | 			binary.Read(r.file, binary.BigEndian, &r.Cncx.Len)
152 | 
153 | 			r.Cncx.Id = make([]uint8, r.Cncx.Len)
154 | 			binary.Read(r.file, binary.LittleEndian, &r.Cncx.Id)
155 | 			r.file.Seek(1, 1) //Skip 0x0 termination
156 | 
157 | 			binary.Read(r.file, binary.BigEndian, &r.Cncx.NCX_Count)
158 | 
159 | 			// PrintStruct(r.Cncx)
160 | 		}
161 | 	}
162 | 
163 | 	/* Ordt Record Parsing */
164 | 	if idx.Idxt_Encoding == MOBI_ENC_UTF16 || idx.Ordt_Entries_Count > 0 {
165 | 		return errors.New("ORDT parser not implemented")
166 | 	}
167 | 
168 | 	/* Ligt Record Parsing */
169 | 	if idx.Ligt_Entries_Count > 0 {
170 | 		return errors.New("LIGT parser not implemented")
171 | 	}
172 | 
173 | 	/* Idxt Record Parsing */
174 | 	if idx.Idxt_Count > 0 {
175 | 		_, err = r.file.Seek(RecPos+int64(idx.Idxt_Offset), 0)
176 | 		if err != nil {
177 | 			return err
178 | 		}
179 | 
180 | 		err = r.parseIdxt(idx.Idxt_Count)
181 | 		if err != nil {
182 | 			return err
183 | 		}
184 | 	}
185 | 
186 | 	//CNCX Data?
187 | 	var Count = 0
188 | 	if idx.Indx_Type == INDX_TYPE_NORMAL {
189 | 		//r.file.Seek(RecPos+int64(idx.HeaderLen), 0)
190 | 
191 | 		var PTagxLen = []uint8{0}
192 | 		for i, offset := range r.Idxt.Offset {
193 | 			r.file.Seek(RecPos+int64(offset), 0)
194 | 
195 | 			// Read Byte containing the lenght of a label
196 | 			r.file.Read(PTagxLen)
197 | 
198 | 			// Read label
199 | 			PTagxLabel := make([]uint8, PTagxLen[0])
200 | 			r.file.Read(PTagxLabel)
201 | 
202 | 			PTagxLen1 := uint16(idx.Idxt_Offset) - r.Idxt.Offset[i]
203 | 			if i+1 < len(r.Idxt.Offset) {
204 | 				PTagxLen1 = r.Idxt.Offset[i+1] - r.Idxt.Offset[i]
205 | 			}
206 | 
207 | 			PTagxData := make([]uint8, PTagxLen1)
208 | 			r.file.Read(PTagxData)
209 | 			fmt.Printf("\n------ %v --------\n", i)
210 | 			r.parsePtagx(PTagxData)
211 | 			Count++
212 | 			//fmt.Printf("Len: %v | Label: %s | %v\n", PTagxLen, PTagxLabel, Count)
213 | 		}
214 | 	}
215 | 
216 | 	// Check next record
217 | 	//r.OffsetToRecord(n + 1)
218 | 
219 | 	//
220 | 	// Process remaining INDX records
221 | 	if idx.Indx_Type == INDX_TYPE_INFLECTION {
222 | 		r.parseIndexRecord(n + 1)
223 | 	}
224 | 	//fmt.Printf("%s", )
225 | 	// Read Tagx
226 | 	//		if idx.Tagx_Offset > 0 {
227 | 	//			err := r.parseTagx()
228 | 	//			if err != nil {
229 | 	//				return err
230 | 	//			}
231 | 	//		}
232 | 
233 | 	return nil
234 | }
235 | 
236 | // MatchMagic matches next N bytes (based on lenght of magic word)
237 | func (r *MobiReader) MatchMagic(magic mobiMagicType) bool {
238 | 	if r.Peek(len(magic)).Magic() == magic {
239 | 		return true
240 | 	}
241 | 	return false
242 | }
243 | 
244 | // Peek returns next N bytes without advancing the reader.
245 | func (r *MobiReader) Peek(n int) Peeker {
246 | 	buf := make([]uint8, n)
247 | 	r.file.Read(buf)
248 | 	r.file.Seek(int64(n)*-1, 1)
249 | 	return buf
250 | }
251 | 
252 | // Parse reads/parses Exth meta data records from file
253 | func (r *MobiReader) ExthParse() error {
254 | 	// If next 4 bytes are not EXTH then we have a problem
255 | 	if !r.MatchMagic(magicExth) {
256 | 		return errors.New("Currect reading position does not contain EXTH record")
257 | 	}
258 | 
259 | 	binary.Read(r.file, binary.BigEndian, &r.Exth.Identifier)
260 | 	binary.Read(r.file, binary.BigEndian, &r.Exth.HeaderLenght)
261 | 	binary.Read(r.file, binary.BigEndian, &r.Exth.RecordCount)
262 | 
263 | 	r.Exth.Records = make([]mobiExthRecord, r.Exth.RecordCount)
264 | 	for i, _ := range r.Exth.Records {
265 | 		binary.Read(r.file, binary.BigEndian, &r.Exth.Records[i].RecordType)
266 | 		binary.Read(r.file, binary.BigEndian, &r.Exth.Records[i].RecordLength)
267 | 
268 | 		r.Exth.Records[i].Value = make([]uint8, r.Exth.Records[i].RecordLength-8)
269 | 
270 | 		Tag := getExthMetaByTag(r.Exth.Records[i].RecordType)
271 | 		switch Tag.Type {
272 | 		case EXTH_TYPE_BINARY:
273 | 			binary.Read(r.file, binary.BigEndian, &r.Exth.Records[i].Value)
274 | 			//			fmt.Printf("%v: %v\n", Tag.Name, r.Exth.Records[i].Value)
275 | 		case EXTH_TYPE_STRING:
276 | 			binary.Read(r.file, binary.LittleEndian, &r.Exth.Records[i].Value)
277 | 			//			fmt.Printf("%v: %s\n", Tag.Name, r.Exth.Records[i].Value)
278 | 		case EXTH_TYPE_NUMERIC:
279 | 			binary.Read(r.file, binary.BigEndian, &r.Exth.Records[i].Value)
280 | 			//			fmt.Printf("%v: %d\n", Tag.Name, binary.BigEndian.Uint32(r.Exth.Records[i].Value))
281 | 		}
282 | 	}
283 | 
284 | 	return nil
285 | }
286 | 
287 | // OffsetToRecord sets reading position to record N, returns total record lenght
288 | func (r *MobiReader) OffsetToRecord(nu uint32) (uint32, error) {
289 | 	n := int(nu)
290 | 	if n > int(r.Pdf.RecordsNum)-1 {
291 | 		return 0, errors.New("Record ID requested is greater than total amount of records")
292 | 	}
293 | 
294 | 	RecLen := uint32(0)
295 | 	if n+1 < int(r.Pdf.RecordsNum) {
296 | 		RecLen = r.Offsets[n+1].Offset
297 | 	} else {
298 | 		RecLen = uint32(r.fileStat.Size())
299 | 	}
300 | 
301 | 	_, err := r.file.Seek(int64(r.Offsets[n].Offset), 0)
302 | 
303 | 	return RecLen - r.Offsets[n].Offset, err
304 | }
305 | 
306 | func (r *MobiReader) parseTagx() error {
307 | 	if !r.MatchMagic(magicTagx) {
308 | 		return errors.New("TAGX record not found at given offset.")
309 | 	}
310 | 
311 | 	r.Tagx = mobiTagx{}
312 | 
313 | 	binary.Read(r.file, binary.BigEndian, &r.Tagx.Identifier)
314 | 	binary.Read(r.file, binary.BigEndian, &r.Tagx.HeaderLenght)
315 | 	if r.Tagx.HeaderLenght < 12 {
316 | 		return errors.New("TAGX record too short")
317 | 	}
318 | 	binary.Read(r.file, binary.BigEndian, &r.Tagx.ControlByteCount)
319 | 
320 | 	TagCount := (r.Tagx.HeaderLenght - 12) / 4
321 | 	r.Tagx.Tags = make([]mobiTagxTags, TagCount)
322 | 
323 | 	for i := 0; i < int(TagCount); i++ {
324 | 		err := binary.Read(r.file, binary.BigEndian, &r.Tagx.Tags[i])
325 | 		if err != nil {
326 | 			return err
327 | 		}
328 | 	}
329 | 
330 | 	fmt.Println("TagX called")
331 | 	// PrintStruct(r.Tagx)
332 | 
333 | 	return nil
334 | }
335 | 
336 | func (r *MobiReader) parseIdxt(IdxtCount uint32) error {
337 | 	fmt.Println("parseIdxt called")
338 | 	if !r.MatchMagic(magicIdxt) {
339 | 		return errors.New("IDXT record not found at given offset.")
340 | 	}
341 | 
342 | 	binary.Read(r.file, binary.BigEndian, &r.Idxt.Identifier)
343 | 
344 | 	r.Idxt.Offset = make([]uint16, IdxtCount)
345 | 
346 | 	binary.Read(r.file, binary.BigEndian, &r.Idxt.Offset)
347 | 	//for id, _ := range r.Idxt.Offset {
348 | 	//	binary.Read(r.Buffer, binary.BigEndian, &r.Idxt.Offset[id])
349 | 	//}
350 | 
351 | 	//Skip two bytes? Or skip necessary amount to reach total lenght in multiples of 4?
352 | 	r.file.Seek(2, 1)
353 | 
354 | 	// PrintStruct(r.Idxt)
355 | 	return nil
356 | }
357 | 
358 | func (r *MobiReader) parsePtagx(data []byte) {
359 | 	//control_byte_count
360 | 	//tagx
361 | 	control_bytes := data[:r.Tagx.ControlByteCount]
362 | 	data = data[r.Tagx.ControlByteCount:]
363 | 
364 | 	var Ptagx []mobiPTagx //= make([]mobiPTagx, r.Tagx.TagCount())
365 | 
366 | 	for _, x := range r.Tagx.Tags {
367 | 		if x.Control_Byte == 0x01 {
368 | 			control_bytes = control_bytes[1:]
369 | 			continue
370 | 		}
371 | 
372 | 		value := control_bytes[0] & x.Bitmask
373 | 		if value != 0 {
374 | 			var value_count uint32
375 | 			var value_bytes uint32
376 | 
377 | 			if value == x.Bitmask {
378 | 				if setBits[x.Bitmask] > 1 {
379 | 					// If all bits of masked value are set and the mask has more
380 | 					// than one bit, a variable width value will follow after
381 | 					// the control bytes which defines the length of bytes (NOT
382 | 					// the value count!) which will contain the corresponding
383 | 					// variable width values.
384 | 					var consumed uint32
385 | 					value_bytes, consumed = vwiDec(data, true)
386 | 					//fmt.Printf("\nConsumed %v", consumed)
387 | 					data = data[consumed:]
388 | 				} else {
389 | 					value_count = 1
390 | 				}
391 | 			} else {
392 | 				mask := x.Bitmask
393 | 				for {
394 | 					if mask&1 != 0 {
395 | 						//fmt.Printf("Break")
396 | 						break
397 | 					}
398 | 					mask >>= 1
399 | 					value >>= 1
400 | 				}
401 | 				value_count = uint32(value)
402 | 			}
403 | 
404 | 			Ptagx = append(Ptagx, mobiPTagx{x.Tag, x.TagNum, value_count, value_bytes})
405 | 			//						ptagx[ptagx_count].tag = tagx->tags[i].tag;
406 | 			//       ptagx[ptagx_count].tag_value_count = tagx->tags[i].values_count;
407 | 			//       ptagx[ptagx_count].value_count = value_count;
408 | 			//       ptagx[ptagx_count].value_bytes = value_bytes;
409 | 
410 | 			//fmt.Printf("TAGX %v %v VC:%v VB:%v\n", x.Tag, x.TagNum, value_count, value_bytes)
411 | 		}
412 | 	}
413 | 	fmt.Printf("%+v", Ptagx)
414 | 	var IndxEntry []mobiIndxEntry
415 | 	for iz, x := range Ptagx {
416 | 		var values []uint32
417 | 
418 | 		if x.Value_Count != 0 {
419 | 			// Read value_count * values_per_entry variable width values.
420 | 			fmt.Printf("\nDec: ")
421 | 			for i := 0; i < int(x.Value_Count)*int(x.Tag_Value_Count); i++ {
422 | 				byts, consumed := vwiDec(data, true)
423 | 				data = data[consumed:]
424 | 
425 | 				values = append(values, byts)
426 | 				IndxEntry = append(IndxEntry, mobiIndxEntry{x.Tag, byts})
427 | 				fmt.Printf("%v %s: %v ", iz, tagEntryMap[x.Tag], byts)
428 | 			}
429 | 		} else {
430 | 			// Convert value_bytes to variable width values.
431 | 			total_consumed := 0
432 | 			for {
433 | 				if total_consumed < int(x.Value_Bytes) {
434 | 					byts, consumed := vwiDec(data, true)
435 | 					data = data[consumed:]
436 | 
437 | 					total_consumed += int(consumed)
438 | 
439 | 					values = append(values, byts)
440 | 					IndxEntry = append(IndxEntry, mobiIndxEntry{x.Tag, byts})
441 | 				} else {
442 | 					break
443 | 				}
444 | 			}
445 | 			if total_consumed != int(x.Value_Bytes) {
446 | 				panic("Error not enough bytes are consumed. Consumed " + strconv.Itoa(total_consumed) + " out of " + strconv.Itoa(int(x.Value_Bytes)))
447 | 			}
448 | 		}
449 | 	}
450 | 	fmt.Println("---------------------------")
451 | }
452 | 


--------------------------------------------------------------------------------
/tagx.go:
--------------------------------------------------------------------------------
 1 | package mobi
 2 | 
 3 | var mobiTagxMap = map[uint8]mobiTagxTags{
 4 | 	TagEntry_Pos:        mobiTagxTags{1, 1, 1, 0},
 5 | 	TagEntry_Len:        mobiTagxTags{2, 1, 2, 0},
 6 | 	TagEntry_NameOffset: mobiTagxTags{3, 1, 4, 0},
 7 | 	TagEntry_DepthLvl:   mobiTagxTags{4, 1, 8, 0},
 8 | 	TagEntry_Parent:     mobiTagxTags{21, 1, 16, 0},
 9 | 	TagEntry_Child1:     mobiTagxTags{22, 1, 32, 0},
10 | 	TagEntry_ChildN:     mobiTagxTags{23, 1, 64, 0},
11 | 	TagEntry_PosFid:     mobiTagxTags{6, 2, 128, 0},
12 | 	TagEntry_END:        mobiTagxTags{0, 0, 0, 1}}
13 | 
14 | type mobiTagx struct {
15 | 	Identifier       [4]byte `format:"string"`
16 | 	HeaderLenght     uint32  `init:"Tags" op:"-12 /4"`
17 | 	ControlByteCount uint32
18 | 	Tags             []mobiTagxTags
19 | 	//[]byte //HeaderLenght - 12 | Multiple of 4
20 | 
21 | 	//The tag table entries are multiple of 4 bytes. The first byte is
22 | 	//the tag, the second byte the number of values, the third byte the
23 | 	//bit mask and the fourth byte indicates the end of the control byte.
24 | 	//If the fourth byte is 0x01, all other bytes of the entry are zero.
25 | 
26 | 	//Unk1 [8]uint8 //Unrealated to Tagx? || Related to CNCX Record? 8 bytes
27 | }
28 | 
29 | type mobiTagxTags struct {
30 | 	Tag          uint8 // /**< Tag */
31 | 	TagNum       uint8 // /**< Number of values */
32 | 	Bitmask      uint8 /**< Bitmask */
33 | 	Control_Byte uint8 /**< EOF control byte */
34 | }
35 | 
36 | func (r *mobiTagx) TagCount() int {
37 | 	return len(r.Tags)
38 | }
39 | 


--------------------------------------------------------------------------------
/util.go:
--------------------------------------------------------------------------------
  1 | package mobi
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/binary"
  6 | 	"fmt"
  7 | 	"reflect"
  8 | 	"regexp"
  9 | 	"strconv"
 10 | 	"time"
 11 | )
 12 | 
 13 | func printStruct(x interface{}) {
 14 | 	ref := reflect.ValueOf(x)
 15 | 
 16 | 	if ref.Kind() == reflect.Ptr {
 17 | 		ref = ref.Elem()
 18 | 	}
 19 | 
 20 | 	var CurPos uintptr = 0
 21 | 	fmt.Println("---------------------- " + ref.Type().Name() + " ----------------------")
 22 | 	for i := 0; i < ref.NumField(); i++ {
 23 | 		val := ref.Field(i)
 24 | 		typ := ref.Type().Field(i)
 25 | 		//: %-10v , int(CurPos)+int(typ.Type.Size())
 26 | 
 27 | 		var value interface{}
 28 | 		switch typ.Tag.Get("format") {
 29 | 		case "bits":
 30 | 			value = fmt.Sprintf("bit(%b)", val.Interface())
 31 | 		case "string":
 32 | 			value = fmt.Sprintf("%s", val.Interface())
 33 | 		case "hex":
 34 | 			value = fmt.Sprintf("% x", val.Interface())
 35 | 		case "date":
 36 | 			if tim_, err := strconv.ParseInt(val.String(), 10, 64); err != nil {
 37 | 				//BUG(fix): Check Mac/Unix timestamp format
 38 | 				//If the time has the top bit set, it's an unsigned 32-bit number counting from 1st Jan 1904
 39 | 				//If the time has the top bit clear, it's a signed 32-bit number counting from 1st Jan 1970.
 40 | 				value = time.Unix(tim_, 0)
 41 | 			} else {
 42 | 				value = val.Interface()
 43 | 			}
 44 | 		default:
 45 | 			value = val.Interface()
 46 | 		}
 47 | 
 48 | 		//switch val.Kind() {
 49 | 		//case reflect.Slice:
 50 | 		////	for i := 0; i < val.NumField(); i++ {
 51 | 		//		PrintStruct(val.Index(i))
 52 | 		//		//fmt.Println(fmt.Sprintf("%-25v", typ.Name), fmt.Sprintf("%-5v:", CurPos), value)
 53 | 		//CurPos += typ.Type.Size()
 54 | 		//	}
 55 | 		//default:
 56 | 		fmt.Println(fmt.Sprintf("%-25v", typ.Name), fmt.Sprintf("%-5v:", CurPos), value)
 57 | 		CurPos += typ.Type.Size()
 58 | 		//}
 59 | 
 60 | 	}
 61 | }
 62 | 
 63 | func hasBit(n int, pos uint) bool {
 64 | 	val := n & (1 << pos)
 65 | 	return (val > 0)
 66 | }
 67 | 
 68 | func getExthMetaByTag(tag uint32) mobiExthMeta {
 69 | 	for i := 0; i < len(ExthMeta); i++ {
 70 | 		if ExthMeta[i].ID == tag {
 71 | 			return ExthMeta[i]
 72 | 		}
 73 | 	}
 74 | 	return ExthMeta[0]
 75 | }
 76 | 
 77 | var setBits [256]uint8 = [256]uint8{
 78 | 	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
 79 | 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 80 | 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 81 | 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 82 | 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 83 | 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 84 | 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 85 | 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 86 | 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 87 | 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 88 | 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 89 | 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 90 | 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 91 | 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 92 | 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 93 | 	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
 94 | }
 95 | 
 96 | // VwiDec decoders variable lenght integer. Returns value and number of bytes consumed
 97 | func vwiDec(src []uint8, forward bool) (uint32, uint32) {
 98 | 	var val uint32 = 0 //val = 0
 99 | 	var byts []uint8   // byts = bytearray()
100 | 
101 | 	if !forward { //if not forward:
102 | 		for i, j := 0, len(src)-1; i < j; i, j = i+1, j-1 { //     src.reverse()
103 | 			src[i], src[j] = src[j], src[i]
104 | 		}
105 | 	}
106 | 	for _, bnum := range src {
107 | 		mask := ^(uint8(1) << 7)
108 | 		byts = append(byts, bnum&mask)
109 | 		if bnum>>7 == 1 {
110 | 			break
111 | 		}
112 | 	}
113 | 
114 | 	if !forward { //if not forward:
115 | 		for i, j := 0, len(byts)-1; i < j; i, j = i+1, j-1 { //     src.reverse()
116 | 			byts[i], byts[j] = byts[j], byts[i]
117 | 		}
118 | 	}
119 | 
120 | 	for _, Byte := range byts {
121 | 		val = val << 7
122 | 		val |= uint32(Byte)
123 | 	}
124 | 
125 | 	return val, uint32(len(byts))
126 | }
127 | 
128 | func vwiEncInt(x int) []uint8 {
129 | 	buf := make([]uint8, 64)
130 | 	z := 0
131 | 	for {
132 | 		buf[z] = byte(x) & 0x7f
133 | 		x >>= 7
134 | 		z++
135 | 		if x == 0 {
136 | 			break
137 | 		}
138 | 	}
139 | 	buf[0] |= 0x80
140 | 	for i, j := 0, z-1; i < j; i, j = i+1, j-1 {
141 | 		buf[i], buf[j] = buf[j], buf[i]
142 | 	}
143 | 	return buf[:z]
144 | }
145 | 
146 | func minimizeHTML(x []byte) []byte { //, int
147 | 	//Clear multiple spaces
148 | 	out := regexp.MustCompile("[ ]+").ReplaceAllString(string(x), " ")
149 | 	out = regexp.MustCompile("[\t\r\n]").ReplaceAllString(out, "")
150 | 	//Clear tabs, new lines
151 | 	return []byte(out) //, len(out)
152 | }
153 | 
154 | var mask_to_bit_shifts = map[int]uint8{1: 0, 2: 1, 3: 0, 4: 2, 8: 3, 12: 2, 16: 4, 32: 5, 48: 4, 64: 6, 128: 7, 192: 6}
155 | 
156 | func controlByte(tagx []mobiTagxTags) []byte {
157 | 	var cbs []byte
158 | 	var ans uint8 = 0
159 | 	for _, tags := range tagx {
160 | 		if tags.Control_Byte == 1 {
161 | 			cbs = append(cbs, ans)
162 | 			ans = 0
163 | 			continue
164 | 		}
165 | 		nvals := uint8(1)
166 | 		nentries := nvals / tags.TagNum
167 | 		shifts := mask_to_bit_shifts[int(tags.Bitmask)]
168 | 		ans |= tags.Bitmask & (nentries << shifts)
169 | 	}
170 | 	return cbs
171 | }
172 | 
173 | func stringToBytes(value string, output interface{}) {
174 | 	out := reflect.ValueOf(output).Elem()
175 | 
176 | 	for i := 0; i < out.Type().Len(); i++ {
177 | 		if i > len(value)-1 {
178 | 			break
179 | 		}
180 | 		out.Index(i).Set(reflect.ValueOf(byte(value[i])))
181 | 	}
182 | }
183 | 
184 | func underlineTitle(x string) string {
185 | 	x = regexp.MustCompile("[^-A-Za-z0-9]").ReplaceAllString(x, "_")
186 | 	if len(x) > 31 {
187 | 		return x[:31]
188 | 	}
189 | 	return x
190 | }
191 | 
192 | func palmDocLZ77Pack(data []byte) []byte {
193 | 	var outB []byte
194 | 
195 | 	var tailLen = int(data[len(data)-1])
196 | 	var tail = data[(len(data)-1)-tailLen:] /*-multibyte*/
197 | 	data = data[:(len(data)-1)-tailLen]     /* -multibyte*/
198 | 
199 | 	var ldata = len(data)
200 | 
201 | 	for i := 0; i < ldata; i++ {
202 | 		if i > 10 && (ldata-i) > 10 {
203 | 			found := false
204 | 
205 | 			//Bound offset saves times on look up
206 | 			//Todo: custom lookup
207 | 			var reset bool
208 | 			boundOffset := i - 2047
209 | 			if boundOffset < 0 {
210 | 				boundOffset = 0
211 | 			} else {
212 | 				reset = true
213 | 			}
214 | 
215 | 			// If there's no match for 3 letters then no point looking
216 | 			if f := bytes.LastIndex(data[boundOffset:i], data[i:i+3]); f != -1 {
217 | 				for chunk_len := 10; chunk_len > 2; chunk_len-- {
218 | 					j := bytes.LastIndex(data[boundOffset:i], data[i:i+chunk_len])
219 | 					if j != -1 {
220 | 						if reset {
221 | 							j = i - 2047 + j
222 | 							reset = false
223 | 						}
224 | 
225 | 						found = true
226 | 
227 | 						var m int64 = int64(i) - int64(j)
228 | 
229 | 						var code int64 = 0x8000 + ((m << 3) & 0x3ff8) + (int64(chunk_len) - 3)
230 | 
231 | 						outB = append(outB, byte(code>>8))
232 | 						outB = append(outB, byte(code))
233 | 						i += chunk_len - 1
234 | 						break
235 | 					}
236 | 				}
237 | 			}
238 | 			if found {
239 | 				continue
240 | 			} else {
241 | 				//				Try forward
242 | 				//				matchLen := 0
243 | 				//				for z := 1; z < 10; z++ {
244 | 				//					if data[i+z] == data[i] {
245 | 				//						matchLen++
246 | 				//					} else {
247 | 				//						break
248 | 				//					}
249 | 				//				}
250 | 				//				if matchLen > 3 {
251 | 				//					//					fmt.Printf("\nLen CHeck: %v = %v", i, matchLen)
252 | 				//					var m int64 = 1
253 | 				//					var code int64 = 0x8000 + ((m << 3) & 0x3ff8) + (int64(matchLen) - 3)
254 | 				//					outB = append(outB, data[i])
255 | 				//					outB = append(outB, byte(code>>8))
256 | 				//					outB = append(outB, byte(code))
257 | 				//					//					fmt.Printf("Code: %x %x", byte(code>>8), byte(code))
258 | 				//					i += matchLen
259 | 				//					//if(ldata > )
260 | 				//					continue
261 | 				//				}
262 | 			}
263 | 		}
264 | 
265 | 		ch := data[i]
266 | 		och := byte(ch)
267 | 
268 | 		if och == 0x20 && (i+1) < ldata {
269 | 			onch := byte(data[i+1])
270 | 			if onch >= 0x40 && onch < 0x80 {
271 | 				outB = append(outB, onch^0x80)
272 | 				i += 1
273 | 				continue
274 | 			} else {
275 | 				outB = append(outB, och)
276 | 				continue
277 | 			}
278 | 		}
279 | 		if och == 0 || (och > 8 && och < 0x80) {
280 | 			outB = append(outB, och)
281 | 		} else {
282 | 			j := i
283 | 			var binseq []uint8
284 | 
285 | 			for {
286 | 				if j < ldata && len(binseq) < 8 {
287 | 					ch = data[j]
288 | 					och = byte(ch)
289 | 					if och == 0 || (och > 8 && och < 0x80) {
290 | 						break
291 | 					}
292 | 					binseq = append(binseq, och)
293 | 					j += 1
294 | 				} else {
295 | 					break
296 | 				}
297 | 			}
298 | 			outB = append(outB, byte(len(binseq)))
299 | 
300 | 			for rr := 0; rr < len(binseq); rr++ {
301 | 				outB = append(outB, binseq[rr])
302 | 			}
303 | 
304 | 			i += len(binseq) - 1
305 | 		}
306 | 	}
307 | 	outB = append(outB, tail...)
308 | 	return outB
309 | }
310 | 
311 | func int32ToBytes(i uint32) []byte {
312 | 	buf := new(bytes.Buffer)
313 | 	binary.Write(buf, binary.BigEndian, i)
314 | 	return buf.Bytes()
315 | }
316 | 


--------------------------------------------------------------------------------
/writer.go:
--------------------------------------------------------------------------------
  1 | package mobi
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/binary"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"io/ioutil"
  9 | 	"math/rand"
 10 | 	"os"
 11 | 	"time"
 12 | )
 13 | 
 14 | type MobiWriter struct {
 15 | 	file *os.File
 16 | 
 17 | 	timestamp   uint32
 18 | 	title       string
 19 | 	compression mobiPDHCompression
 20 | 
 21 | 	chapterCount int
 22 | 	chapters     []mobiChapter
 23 | 
 24 | 	bookHtml *bytes.Buffer
 25 | 
 26 | 	cncxBuffer      *bytes.Buffer
 27 | 	cncxLabelBuffer *bytes.Buffer
 28 | 
 29 | 	// Text Records
 30 | 	Records [][]uint8
 31 | 
 32 | 	Embedded []EmbeddedData
 33 | 	Mobi
 34 | }
 35 | 
 36 | type EmbType int
 37 | 
 38 | const (
 39 | 	EmbCover EmbType = iota
 40 | 	EmbThumb
 41 | )
 42 | 
 43 | type EmbeddedData struct {
 44 | 	Type EmbType
 45 | 	Data []byte
 46 | }
 47 | 
 48 | func (w *MobiWriter) embed(FileType EmbType, Data []byte) int {
 49 | 	w.Embedded = append(w.Embedded, EmbeddedData{Type: FileType, Data: Data})
 50 | 	return len(w.Embedded) - 1
 51 | }
 52 | 
 53 | func (w *MobiWriter) NewExthRecord(recType ExthType, value interface{}) {
 54 | 	w.Exth.Add(uint32(recType), value)
 55 | }
 56 | 
 57 | func (w *MobiWriter) AddCover(cover, thumbnail string) {
 58 | 	coverData, err := ioutil.ReadFile(cover)
 59 | 	if err != nil {
 60 | 		panic("Can not load file " + cover)
 61 | 	}
 62 | 	thumbnailData, err := ioutil.ReadFile(thumbnail)
 63 | 	if err != nil {
 64 | 		panic("Can not load file " + cover)
 65 | 	}
 66 | 
 67 | 	w.embed(EmbCover, coverData)
 68 | 	w.embed(EmbThumb, thumbnailData)
 69 | }
 70 | 
 71 | // NewWriter initializes a writer. Takes a pointer to file and book title/database name
 72 | func NewWriter(filename string) (writer *MobiWriter, err error) {
 73 | 	writer = &MobiWriter{}
 74 | 	writer.file, err = os.Create(filename)
 75 | 	if err != nil {
 76 | 		return nil, err
 77 | 	}
 78 | 	return
 79 | }
 80 | 
 81 | func (w *MobiWriter) Title(i string) *MobiWriter {
 82 | 	w.title = i
 83 | 	return w
 84 | }
 85 | 
 86 | func (w *MobiWriter) Compression(i mobiPDHCompression) *MobiWriter {
 87 | 	w.compression = i
 88 | 	return w
 89 | }
 90 | 
 91 | // AddRecord adds a new record. Returns Id
 92 | func (w *MobiWriter) AddRecord(data []uint8) Mint {
 93 | 	//	fmt.Printf("Adding record : %s\n", data)
 94 | 	w.Records = append(w.Records, data)
 95 | 	return w.RecordCount() - 1
 96 | }
 97 | 
 98 | func (w *MobiWriter) RecordCount() Mint {
 99 | 	return Mint(len(w.Records))
100 | }
101 | 
102 | func (w *MobiWriter) Write() {
103 | 	// Generate HTML file
104 | 	w.bookHtml = new(bytes.Buffer)
105 | 	w.bookHtml.WriteString("<html><head></head><body>")
106 | 	for i, _ := range w.chapters {
107 | 		w.chapters[i].generateHTML(w.bookHtml)
108 | 	}
109 | 	w.bookHtml.WriteString("</body></html>")
110 | 
111 | 	// Generate MOBI
112 | 	w.generateCNCX() // Generates CNCX
113 | 	w.timestamp = uint32(time.Now().Unix())
114 | 
115 | 	// Generate Records
116 | 	// Record 0 - Reserve [Expand Record size in case Exth is modified by third party readers? 1024*10?]
117 | 	w.AddRecord([]uint8{0})
118 | 
119 | 	// Book Records
120 | 	w.Pdh.TextLength = uint32(w.bookHtml.Len())
121 | 
122 | 	// makeRecord := func(RecN []byte) []byte {
123 | 	// 	rLen := len(RecN)
124 | 	// 	if rLen == 0 {
125 | 	// 		return []byte{}
126 | 	// 	}
127 | 
128 | 	// 	if rLen > MOBI_MAX_RECORD_SIZE {
129 | 	// 		Trail := rLen - MOBI_MAX_RECORD_SIZE
130 | 	// 		RecN = append(RecN, byte(Trail))
131 | 	// 	} else {
132 | 	// 		RecN = append(RecN, 0)
133 | 	// 	}
134 | 
135 | 	// 	if w.compression == CompressionPalmDoc {
136 | 	// 		RecN = palmDocLZ77Pack(RecN)
137 | 	// 	}
138 | 
139 | 	// 	return RecN
140 | 	// }
141 | 
142 | 	makeRecord := func(RecN []byte) []byte {
143 | 		rLen := len(RecN)
144 | 		if rLen == 0 {
145 | 			//TODO: Return error?
146 | 			return []byte{}
147 | 		}
148 | 
149 | 		if w.compression == CompressionPalmDoc {
150 | 			RecN = palmDocLZ77Pack(RecN)
151 | 		} else {
152 | 			RecN = append(RecN, 0)
153 | 		}
154 | 
155 | 		return RecN
156 | 	}
157 | 
158 | 	RecN := bytes.NewBuffer([]byte{})
159 | 	for {
160 | 		rRune, rSize, err := w.bookHtml.ReadRune()
161 | 		if err == io.EOF {
162 | 			w.AddRecord(makeRecord(RecN.Bytes()))
163 | 			RecN = bytes.NewBuffer([]byte{})
164 | 			break
165 | 		}
166 | 
167 | 		//Rune length + 1 padding + record length
168 | 		if rSize+RecN.Len()+1 > MOBI_MAX_RECORD_SIZE {
169 | 			w.AddRecord(makeRecord(RecN.Bytes()))
170 | 			w.bookHtml.UnreadRune()
171 | 			RecN = bytes.NewBuffer([]byte{})
172 | 			continue
173 | 		}
174 | 
175 | 		RecN.WriteRune(rRune)
176 | 
177 | 		// if RecN.Len() >= MOBI_MAX_RECORD_SIZE {
178 | 		// 	w.AddRecord(makeRecord(RecN.Bytes()))
179 | 		// 	RecN = bytes.NewBuffer([]byte{})
180 | 		// }
181 | 	}
182 | 	// for {
183 | 	// 	run, _, err := w.bookHtml.ReadRune()
184 | 	// 	if err == io.EOF {
185 | 	// 		w.AddRecord(makeRecord(RecN.Bytes()))
186 | 	// 		RecN = bytes.NewBuffer([]byte{})
187 | 	// 		break
188 | 	// 	}
189 | 	// 	RecN.WriteRune(run)
190 | 
191 | 	// 	if RecN.Len() >= MOBI_MAX_RECORD_SIZE {
192 | 	// 		w.AddRecord(makeRecord(RecN.Bytes()))
193 | 	// 		RecN = bytes.NewBuffer([]byte{})
194 | 	// 	}
195 | 	// }
196 | 	w.Pdh.RecordCount = w.RecordCount().UInt16() - 1
197 | 
198 | 	// Index0
199 | 	w.AddRecord([]uint8{0, 0})
200 | 	w.Header.FirstNonBookIndex = w.RecordCount().UInt32()
201 | 
202 | 	w.writeINDX_1()
203 | 	w.writeINDX_2()
204 | 
205 | 	// Image
206 | 	//FirstImageIndex : array index
207 | 	//EXTH_COVER - offset from FirstImageIndex
208 | 	if w.EmbeddedCount() > 0 {
209 | 		w.Header.FirstImageIndex = w.RecordCount().UInt32()
210 | 		//		c.Mh.FirstImageIndex = i + 2
211 | 		for i, e := range w.Embedded {
212 | 			w.Records = append(w.Records, e.Data)
213 | 			switch e.Type {
214 | 			case EmbCover:
215 | 				w.Exth.Add(EXTH_KF8COVERURI, fmt.Sprintf("kindle:embed:%04d", i+1))
216 | 				w.Exth.Add(EXTH_COVEROFFSET, i)
217 | 			case EmbThumb:
218 | 				w.Exth.Add(EXTH_THUMBOFFSET, i)
219 | 			}
220 | 		}
221 | 		//		for z := 0; z < w.EmbeddedCount(); z++ {
222 | 
223 | 		//			w.Records = append(w.Records, w.Images[z])
224 | 		//		}
225 | 	} else {
226 | 		w.Header.FirstImageIndex = 4294967295
227 | 	}
228 | 
229 | 	// CNCX Record
230 | 
231 | 	// Resource Record
232 | 	// w.Header.FirstImageIndex = 4294967295
233 | 	// w.Header.FirstNonBookIndex = w.RecordCount().UInt32()
234 | 	w.Header.LastContentRecordNumber = w.RecordCount().UInt16() - 1
235 | 	w.Header.FlisRecordIndex = w.AddRecord(w.generateFlis()).UInt32() // Flis
236 | 	w.Header.FcisRecordIndex = w.AddRecord(w.generateFcis()).UInt32() // Fcis
237 | 	w.AddRecord([]uint8{0xE9, 0x8E, 0x0D, 0x0A})                      // EOF
238 | 
239 | 	//fmt.Printf("%+s\n", w.Records)
240 | 	w.initPDF()
241 | 	w.initPDH()
242 | 	w.initHeader()
243 | 	w.initExth()
244 | 	_, err := w.file.Seek(1, 1)
245 | 	if err != nil {
246 | 		panic(err)
247 | 	}
248 | 	w.file.WriteString(w.title)
249 | 	_, err = w.file.Seek((int64(w.Pdh.RecordCount)*8)+1024*10, 0)
250 | 	if err != nil {
251 | 		panic(err)
252 | 	}
253 | 	for i := 1; i < w.RecordCount().Int(); i++ {
254 | 		_, err := w.file.Write(w.Records[i])
255 | 		if err != nil {
256 | 			panic(err)
257 | 		}
258 | 	}
259 | 
260 | 	w.file.Close()
261 | }
262 | 
263 | func (w *MobiWriter) EmbeddedCount() Mint {
264 | 	return Mint(len(w.Embedded))
265 | }
266 | 
267 | func (w *MobiWriter) generateCNCX() {
268 | 	/*
269 | 		Single  [Off, Len, Label, Depth]
270 | 		Parent: [Off, Len, Label, Depth] + [FirstChild, Last Child]
271 | 		Child:  [Off, Len, Label, Depth] + [Parent]
272 | 
273 | 
274 | 		CNCX Structure
275 | 		0. Header 1
276 | 		1. Header 2 [Has children 3,4,5]
277 | 		2. Header 3 [Has childred 6,7]
278 | 		3. Child 1 of Header 2
279 | 		4. Child 2 of Header 2
280 | 		5. Child 3 of Header 2
281 | 		6. Child 1 of Header 3
282 | 		7. Child 2 of Header 3
283 | 	*/
284 | 	w.cncxLabelBuffer = new(bytes.Buffer)
285 | 	w.cncxBuffer = new(bytes.Buffer)
286 | 	w.chapterCount = 0
287 | 
288 | 	TagxSingle := []mobiTagxTags{
289 | 		mobiTagxMap[TagEntry_Pos],
290 | 		mobiTagxMap[TagEntry_Len],
291 | 		mobiTagxMap[TagEntry_NameOffset],
292 | 		mobiTagxMap[TagEntry_DepthLvl],
293 | 		mobiTagxMap[TagEntry_END]}
294 | 
295 | 	TagxParent := []mobiTagxTags{
296 | 		mobiTagxMap[TagEntry_Pos],
297 | 		mobiTagxMap[TagEntry_Len],
298 | 		mobiTagxMap[TagEntry_NameOffset],
299 | 		mobiTagxMap[TagEntry_DepthLvl],
300 | 		mobiTagxMap[TagEntry_Child1],
301 | 		mobiTagxMap[TagEntry_ChildN],
302 | 		mobiTagxMap[TagEntry_END]}
303 | 
304 | 	TagxChild := []mobiTagxTags{
305 | 		mobiTagxMap[TagEntry_Pos],
306 | 		mobiTagxMap[TagEntry_Len],
307 | 		mobiTagxMap[TagEntry_NameOffset],
308 | 		mobiTagxMap[TagEntry_DepthLvl],
309 | 		mobiTagxMap[TagEntry_Parent],
310 | 		mobiTagxMap[TagEntry_END]}
311 | 
312 | 	var Id = len(w.chapters)
313 | 
314 | 	for _, node := range w.chapters {
315 | 		if node.SubChapterCount() > 0 {
316 | 			ch1 := Id
317 | 			chN := Id + node.SubChapterCount() - 1
318 | 			fmt.Printf("Parent: %v %v %v [CHILDREN: %v %v]\n", Id, node.SubChapterCount(), node.Title, ch1, chN)
319 | 			Id += node.SubChapterCount()
320 | 
321 | 			CNCX_ID := fmt.Sprintf("%03v", Id)
322 | 
323 | 			w.Idxt.Offset = append(w.Idxt.Offset, uint16(MOBI_INDX_HEADER_LEN+w.cncxBuffer.Len()))
324 | 
325 | 			w.cncxBuffer.WriteByte(byte(len(CNCX_ID)))             // Len of ID
326 | 			w.cncxBuffer.WriteString(CNCX_ID)                      // ID
327 | 			w.cncxBuffer.WriteByte(controlByte(TagxParent)[0])     // Controll Byte
328 | 			w.cncxBuffer.Write(vwiEncInt(node.RecordOffset))       // Record offset
329 | 			w.cncxBuffer.Write(vwiEncInt(node.Len))                // Lenght of a record
330 | 			w.cncxBuffer.Write(vwiEncInt(w.cncxLabelBuffer.Len())) // Label Offset // Offset relative to CNXC record
331 | 			w.cncxLabelBuffer.Write(vwiEncInt(len(node.Title)))    // CNCXLabel lenght
332 | 			w.cncxLabelBuffer.WriteString(node.Title)              // CNCXLabel title
333 | 			w.cncxBuffer.Write(vwiEncInt(0))                       // Depth
334 | 			w.cncxBuffer.Write(vwiEncInt(ch1))                     // Child1
335 | 			w.cncxBuffer.Write(vwiEncInt(chN))                     // ChildN
336 | 			w.chapterCount++
337 | 		} else {
338 | 			CNCX_ID := fmt.Sprintf("%03v", w.chapterCount)
339 | 
340 | 			w.Idxt.Offset = append(w.Idxt.Offset, uint16(MOBI_INDX_HEADER_LEN+w.cncxBuffer.Len()))
341 | 			// fmt.Printf("Node: %d  >  %d = %d\n", MOBI_INDX_HEADER_LEN, w.cncxBuffer.Len(), MOBI_INDX_HEADER_LEN+w.cncxBuffer.Len())
342 | 			w.cncxBuffer.WriteByte(byte(len(CNCX_ID)))         // Len of ID
343 | 			w.cncxBuffer.WriteString(CNCX_ID)                  // ID
344 | 			w.cncxBuffer.WriteByte(controlByte(TagxSingle)[0]) // Controll Byte
345 | 			w.cncxBuffer.Write(vwiEncInt(node.RecordOffset))   // Record offset
346 | 			fmt.Printf("Offset: %v\n", node.RecordOffset)
347 | 			w.cncxBuffer.Write(vwiEncInt(node.Len))                // Lenght of a record
348 | 			w.cncxBuffer.Write(vwiEncInt(w.cncxLabelBuffer.Len())) // Label Offset 	// Offset relative to CNXC record
349 | 			w.cncxLabelBuffer.Write(vwiEncInt(len(node.Title)))    // CNCXLabel lenght
350 | 			w.cncxLabelBuffer.WriteString(node.Title)              // CNCXLabel title
351 | 			w.cncxBuffer.Write(vwiEncInt(0))                       // Depth
352 | 			w.chapterCount++
353 | 		}
354 | 
355 | 	}
356 | 	Id = len(w.chapters)
357 | 
358 | 	for i, node := range w.chapters {
359 | 		for _, child := range node.SubChapters {
360 | 			fmt.Printf("Child: %v %v %v\n", Id, i, child.Title)
361 | 			CNCX_ID := fmt.Sprintf("%03v", w.chapterCount)
362 | 			//				fmt.Printf("Node: %v\n", CNCX_ID)
363 | 			w.Idxt.Offset = append(w.Idxt.Offset, uint16(MOBI_INDX_HEADER_LEN+w.cncxBuffer.Len()))
364 | 
365 | 			w.cncxBuffer.WriteByte(byte(len(CNCX_ID)))             // Len of ID
366 | 			w.cncxBuffer.WriteString(CNCX_ID)                      // ID
367 | 			w.cncxBuffer.WriteByte(controlByte(TagxChild)[0])      // Controll Byte
368 | 			w.cncxBuffer.Write(vwiEncInt(child.RecordOffset))      // Record offset
369 | 			w.cncxBuffer.Write(vwiEncInt(child.Len))               // Lenght of a record
370 | 			w.cncxBuffer.Write(vwiEncInt(w.cncxLabelBuffer.Len())) // Label Offset //Offset relative to CNXC record
371 | 			w.cncxLabelBuffer.Write(vwiEncInt(len(child.Title)))   // CNCXLabel lenght
372 | 			w.cncxLabelBuffer.WriteString(child.Title)             // CNCXLabel title
373 | 			w.cncxBuffer.Write(vwiEncInt(1))                       // Depth
374 | 			w.cncxBuffer.Write(vwiEncInt(i))                       // Parent
375 | 			w.chapterCount++
376 | 			Id++
377 | 		}
378 | 	}
379 | 	//	return
380 | 	//	for _, node := range w.Nodes {
381 | 	//		if node.ChildCount() == 0 {
382 | 	//			CNCX_ID := fmt.Sprintf("%03v", w.NodeCount)
383 | 	//			//			fmt.Printf("Node: %v\n", CNCX_ID)
384 | 	//			w.Idxt2.Offset = append(w.Idxt2.Offset, uint16(MOBI_INDX_HEADER_LEN+w.Cncx.Len()))
385 | 
386 | 	//			w.Cncx.WriteByte(byte(len(CNCX_ID)))                 // Len of ID
387 | 	//			w.Cncx.WriteString(CNCX_ID)                          // ID
388 | 	//			w.Cncx.WriteByte(ControlByte(TagxSingle)[0])         // Controll Byte
389 | 	//			w.Cncx.Write(vwiEncInt(node.RecordOffset, true))     // Record offset
390 | 	//			w.Cncx.Write(vwiEncInt(len(node.Html)))        // Lenght of a record
391 | 	//			w.Cncx.Write(vwiEncInt(w.CncxLabels.Len(), true))    // Label Offset 	// Offset relative to CNXC record
392 | 	//			w.CncxLabels.Write(vwiEncInt(len(node.Title), true)) // CNCXLabel lenght
393 | 	//			w.CncxLabels.WriteString(node.Title)                 // CNCXLabel title
394 | 	//			w.Cncx.Write(vwiEncInt(0, true))                     // Depth
395 | 	//			w.NodeCount++
396 | 	//		}
397 | 	//		if node.ChildCount() > 0 {
398 | 	//			CNCX_ID := fmt.Sprintf("%03v", w.NodeCount)
399 | 	//			//			fmt.Printf("Node: %v\n", CNCX_ID)
400 | 	//			w.Idxt2.Offset = append(w.Idxt2.Offset, uint16(MOBI_INDX_HEADER_LEN+w.Cncx.Len()))
401 | 
402 | 	//			// Get Offset relative to IDXT?
403 | 	//			w.Cncx.WriteByte(byte(len(CNCX_ID)))                         // Len of ID
404 | 	//			w.Cncx.WriteString(CNCX_ID)                                  // ID
405 | 	//			w.Cncx.WriteByte(ControlByte(TagxParent)[0])                 // Controll Byte
406 | 	//			w.Cncx.Write(vwiEncInt(node.RecordOffset, true))             // Record offset
407 | 	//			w.Cncx.Write(vwiEncInt(node.Len, true))                      // Lenght of a record
408 | 	//			w.Cncx.Write(vwiEncInt(w.CncxLabels.Len(), true))            // Label Offset // Offset relative to CNXC record
409 | 	//			w.CncxLabels.Write(vwiEncInt(len(node.Title), true))         // CNCXLabel lenght
410 | 	//			w.CncxLabels.WriteString(node.Title)                         // CNCXLabel title
411 | 	//			w.Cncx.Write(vwiEncInt(0, true))                             // Depth
412 | 	//			w.Cncx.Write(vwiEncInt(w.NodeCount+1, true))                 // Child1
413 | 	//			w.Cncx.Write(vwiEncInt(w.NodeCount+node.ChildCount(), true)) // ChildN
414 | 	//			w.NodeCount++
415 | 
416 | 	//			for _, child := range node.Children {
417 | 	//				CNCX_ID := fmt.Sprintf("%03v", w.NodeCount)
418 | 	//				//				fmt.Printf("Node: %v\n", CNCX_ID)
419 | 	//				w.Idxt2.Offset = append(w.Idxt2.Offset, uint16(MOBI_INDX_HEADER_LEN+w.Cncx.Len()))
420 | 
421 | 	//				w.Cncx.WriteByte(byte(len(CNCX_ID)))                  // Len of ID
422 | 	//				w.Cncx.WriteString(CNCX_ID)                           // ID
423 | 	//				w.Cncx.WriteByte(ControlByte(TagxChild)[0])           // Controll Byte
424 | 	//				w.Cncx.Write(vwiEncInt(child.RecordOffset, true))     // Record offset
425 | 	//				w.Cncx.Write(vwiEncInt(child.Len, true))              // Lenght of a record
426 | 	//				w.Cncx.Write(vwiEncInt(w.CncxLabels.Len(), true))     // Label Offset //Offset relative to CNXC record
427 | 	//				w.CncxLabels.Write(vwiEncInt(len(child.Title), true)) // CNCXLabel lenght
428 | 	//				w.CncxLabels.WriteString(child.Title)                 // CNCXLabel title
429 | 	//				w.Cncx.Write(vwiEncInt(1, true))                      // Depth
430 | 	//				w.Cncx.Write(vwiEncInt(child.Parent, true))           // Parent
431 | 	//				w.NodeCount++
432 | 	//			}
433 | 	//		}
434 | 	//	}
435 | }
436 | 
437 | func (w *MobiWriter) initPDF() *MobiWriter {
438 | 	stringToBytes(underlineTitle(w.title), &w.Pdf.DatabaseName) // Set Database Name
439 | 	w.Pdf.CreationTime = w.timestamp                            // Set Time
440 | 	w.Pdf.ModificationTime = w.timestamp                        // Set Time
441 | 	stringToBytes("BOOK", &w.Pdf.Type)                          // Palm Database File Code
442 | 	stringToBytes("MOBI", &w.Pdf.Creator)                       // *
443 | 	w.Pdf.UniqueIDSeed = rand.New(rand.NewSource(9)).Uint32()   // UniqueID
444 | 
445 | 	w.Pdf.RecordsNum = w.RecordCount().UInt16()
446 | 
447 | 	binary.Write(w.file, binary.BigEndian, w.Pdf) // Write
448 | 
449 | 	Oft := uint32((w.Pdf.RecordsNum * 8) + MOBI_PALMDB_HEADER_LEN + 2)
450 | 
451 | 	for i := uint16(0); i < w.Pdf.RecordsNum; i++ {
452 | 
453 | 		binary.Write(w.file, binary.BigEndian, mobiRecordOffset{Offset: Oft, UniqueID: i}) // Write
454 | 		if i == 0 {
455 | 			Oft = (uint32(w.Pdh.RecordCount) * 8) + uint32(1024*10)
456 | 		}
457 | 		if i > 0 {
458 | 			Oft += uint32(len(w.Records[i]))
459 | 		}
460 | 	}
461 | 
462 | 	w.file.Write([]uint8{0, 0})
463 | 
464 | 	return w
465 | }
466 | 
467 | func (w *MobiWriter) initPDH() *MobiWriter {
468 | 	w.Pdh.Compression = w.compression
469 | 	w.Pdh.RecordSize = MOBI_MAX_RECORD_SIZE
470 | 
471 | 	binary.Write(w.file, binary.BigEndian, w.Pdh) // Write
472 | 	return w
473 | }
474 | 
475 | func (w *MobiWriter) initHeader() *MobiWriter {
476 | 	stringToBytes("MOBI", &w.Header.Identifier)
477 | 	w.Header.HeaderLength = 232
478 | 	w.Header.MobiType = 2
479 | 	w.Header.TextEncoding = 65001
480 | 	w.Header.UniqueID = w.Pdf.UniqueIDSeed + 1
481 | 	w.Header.FileVersion = 6
482 | 	w.Header.MinVersion = 6
483 | 	w.Header.OrthographicIndex = 4294967295
484 | 	w.Header.InflectionIndex = 4294967295
485 | 	w.Header.IndexNames = 4294967295
486 | 	w.Header.Locale = 1033
487 | 	w.Header.IndexKeys = 4294967295
488 | 	w.Header.ExtraIndex0 = 4294967295
489 | 	w.Header.ExtraIndex1 = 4294967295
490 | 	w.Header.ExtraIndex2 = 4294967295
491 | 	w.Header.ExtraIndex3 = 4294967295
492 | 	w.Header.ExtraIndex4 = 4294967295
493 | 	w.Header.ExtraIndex5 = 4294967295
494 | 	w.Header.ExthFlags = 80
495 | 	w.Header.DrmOffset = 4294967295
496 | 	w.Header.DrmCount = 4294967295
497 | 	w.Header.FirstContentRecordNumber = 1
498 | 	w.Header.FcisRecordCount = 1
499 | 	w.Header.FlisRecordCount = 1
500 | 
501 | 	w.Header.Unknown7 = 0
502 | 	w.Header.Unknown8 = 0
503 | 
504 | 	w.Header.SrcsRecordIndex = 4294967295
505 | 	w.Header.SrcsRecordCount = 0
506 | 
507 | 	w.Header.Unknown9 = 4294967295
508 | 	w.Header.Unknown10 = 4294967295
509 | 	//w.Header.FirstCompilationDataSectionCount = 4294967295
510 | 	//w.Header.NumberOfCompilationDataSections = 4294967295
511 | 	w.Header.ExtraRecordDataFlags = 1 //1
512 | 
513 | 	w.Header.FullNameLength = uint32(len(w.title))
514 | 	w.Header.FullNameOffset = uint32(MOBI_PALMDOC_HEADER_LEN + MOBI_MOBIHEADER_LEN + w.Exth.GetHeaderLenght() + 1)
515 | 
516 | 	binary.Write(w.file, binary.BigEndian, w.Header) // Write
517 | 	return w
518 | }
519 | 
520 | func (w *MobiWriter) initExth() *MobiWriter {
521 | 	stringToBytes("EXTH", &w.Exth.Identifier)
522 | 	w.Exth.HeaderLenght = 12
523 | 
524 | 	for _, k := range w.Exth.Records {
525 | 		w.Exth.HeaderLenght += k.RecordLength
526 | 	}
527 | 
528 | 	Padding := w.Exth.HeaderLenght % 4
529 | 	w.Exth.HeaderLenght += Padding
530 | 
531 | 	w.Exth.RecordCount = uint32(len(w.Exth.Records))
532 | 
533 | 	binary.Write(w.file, binary.BigEndian, w.Exth.Identifier)
534 | 	binary.Write(w.file, binary.BigEndian, w.Exth.HeaderLenght)
535 | 	binary.Write(w.file, binary.BigEndian, w.Exth.RecordCount)
536 | 
537 | 	for _, k := range w.Exth.Records {
538 | 		binary.Write(w.file, binary.BigEndian, k.RecordType)
539 | 		binary.Write(w.file, binary.BigEndian, k.RecordLength)
540 | 		binary.Write(w.file, binary.BigEndian, k.Value)
541 | 	}
542 | 
543 | 	// Add zeros to reach multiples of 4 for the header
544 | 	for Padding != 0 {
545 | 		w.file.Write([]byte{0})
546 | 		Padding--
547 | 	}
548 | 	return w
549 | }
550 | 


--------------------------------------------------------------------------------
/writer_indx.go:
--------------------------------------------------------------------------------
  1 | package mobi
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/binary"
  6 | )
  7 | 
  8 | func (w *MobiWriter) chapterIsDeep() bool {
  9 | 	for _, node := range w.chapters {
 10 | 		if node.SubChapterCount() > 0 {
 11 | 			return true
 12 | 		}
 13 | 	}
 14 | 	return false
 15 | }
 16 | 
 17 | func (w *MobiWriter) writeINDX_1() {
 18 | 	buf := new(bytes.Buffer)
 19 | 	// Tagx
 20 | 	tagx := mobiTagx{}
 21 | 	if w.chapterIsDeep() {
 22 | 		tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_Pos])
 23 | 		tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_Len])
 24 | 		tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_NameOffset])
 25 | 		tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_DepthLvl])
 26 | 		tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_Parent])
 27 | 		tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_Child1])
 28 | 		tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_ChildN])
 29 | 		tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_END])
 30 | 	} else {
 31 | 		tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_Pos])
 32 | 		tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_Len])
 33 | 		tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_NameOffset])
 34 | 		tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_DepthLvl])
 35 | 		tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_END])
 36 | 	}
 37 | 
 38 | 	/*************************************/
 39 | 
 40 | 	/*************************************/
 41 | 	magicTagx.WriteTo(&tagx.Identifier)
 42 | 	tagx.ControlByteCount = 1
 43 | 	tagx.HeaderLenght = uint32(tagx.TagCount()*4) + 12
 44 | 
 45 | 	TagX := new(bytes.Buffer)
 46 | 	binary.Write(TagX, binary.BigEndian, tagx.Identifier)
 47 | 	binary.Write(TagX, binary.BigEndian, tagx.HeaderLenght)
 48 | 	binary.Write(TagX, binary.BigEndian, tagx.ControlByteCount)
 49 | 	binary.Write(TagX, binary.BigEndian, tagx.Tags)
 50 | 
 51 | 	// Indx
 52 | 	//	IndxBin := new(bytes.Buffer)
 53 | 	indx := mobiIndx{}
 54 | 	magicIndx.WriteTo(&indx.Identifier)
 55 | 	indx.HeaderLen = MOBI_INDX_HEADER_LEN
 56 | 	indx.Indx_Type = INDX_TYPE_INFLECTION
 57 | 	indx.Idxt_Count = 1
 58 | 	indx.Idxt_Encoding = MOBI_ENC_UTF8
 59 | 	indx.SetUnk2 = 4294967295
 60 | 	indx.Cncx_Records_Count = 1
 61 | 	indx.Idxt_Entry_Count = uint32(w.chapterCount)
 62 | 	indx.Tagx_Offset = MOBI_INDX_HEADER_LEN
 63 | 
 64 | 	//binary.Write(IndxBin, binary.BigEndian, indx)
 65 | 	// Idxt
 66 | 
 67 | 	/************/
 68 | 
 69 | 	IdxtLast := len(w.Idxt.Offset)
 70 | 	Offset := w.Idxt.Offset[IdxtLast-1]
 71 | 	Rec := w.cncxBuffer.Bytes()[Offset-MOBI_INDX_HEADER_LEN:]
 72 | 
 73 | 	Rec = Rec[0 : Rec[0]+1]
 74 | 	RLen := len(Rec)
 75 | 
 76 | 	//w.File.Write(Rec)
 77 | 
 78 | 	Padding := (RLen + 2) % 4
 79 | 
 80 | 	//IDXT_OFFSET, := w.File.Seek(0, 1)
 81 | 
 82 | 	indx.Idxt_Offset = MOBI_INDX_HEADER_LEN + uint32(TagX.Len()) + uint32(RLen+2+Padding) // Offset to Idxt Record
 83 | 	//w.Idxt1.Offset = []uint16{uint16(offset)}
 84 | 	/************/
 85 | 
 86 | 	binary.Write(buf, binary.BigEndian, indx)
 87 | 	buf.Write(TagX.Bytes())
 88 | 	buf.Write(Rec)
 89 | 	binary.Write(buf, binary.BigEndian, uint16(IdxtLast))
 90 | 
 91 | 	for Padding != 0 {
 92 | 		buf.Write([]byte{0})
 93 | 		Padding--
 94 | 	}
 95 | 
 96 | 	buf.WriteString(magicIdxt.String())
 97 | 
 98 | 	binary.Write(buf, binary.BigEndian, uint16(MOBI_INDX_HEADER_LEN+uint32(TagX.Len())))
 99 | 
100 | 	//ioutil.WriteFile("TAGX_TEST", TagX.Bytes(), 0644)
101 | 	//ioutil.WriteFile("INDX_TEST", IndxBin.Bytes(), 0644)
102 | 	buf.Write([]uint8{0, 0})
103 | 	w.Header.IndxRecodOffset = w.AddRecord(buf.Bytes()).UInt32()
104 | }
105 | 
106 | func (w *MobiWriter) writeINDX_2() {
107 | 	buf := new(bytes.Buffer)
108 | 	indx := mobiIndx{}
109 | 	magicIndx.WriteTo(&indx.Identifier)
110 | 	indx.HeaderLen = MOBI_INDX_HEADER_LEN
111 | 	indx.Indx_Type = INDX_TYPE_NORMAL
112 | 	indx.Unk1 = uint32(1)
113 | 	indx.Idxt_Encoding = 4294967295
114 | 	indx.SetUnk2 = 4294967295
115 | 	indx.Idxt_Offset = uint32(MOBI_INDX_HEADER_LEN + w.cncxBuffer.Len())
116 | 	indx.Idxt_Count = uint32(len(w.Idxt.Offset))
117 | 
118 | 	binary.Write(buf, binary.BigEndian, indx)
119 | 	buf.Write(w.cncxBuffer.Bytes())
120 | 
121 | 	buf.WriteString(magicIdxt.String())
122 | 	for _, offset := range w.Idxt.Offset {
123 | 		//Those offsets are not relative INDX record.
124 | 		//So we need to adjust that.
125 | 		binary.Write(buf, binary.BigEndian, offset) //+MOBI_INDX_HEADER_LEN)
126 | 
127 | 	}
128 | 
129 | 	Padding := (len(w.Idxt.Offset) + 4) % 4
130 | 	for Padding != 0 {
131 | 		buf.Write([]byte{0})
132 | 		Padding--
133 | 	}
134 | 
135 | 	w.AddRecord(buf.Bytes())
136 | 	w.AddRecord(w.cncxLabelBuffer.Bytes())
137 | }
138 | 


--------------------------------------------------------------------------------