├── .gitignore ├── LICENSE ├── README.md ├── chapter.go ├── cncx.go ├── exth.go ├── fcis.go ├── flis.go ├── header.go ├── idxt.go ├── indx.go ├── mint.go ├── mobi.go ├── pdf.go ├── pdh.go ├── peeker.go ├── ptagx.go ├── reader.go ├── tagx.go ├── util.go ├── writer.go └── writer_indx.go /.gitignore: -------------------------------------------------------------------------------- 1 | output.mobi 2 | sample/ 3 | tools/ 4 | .vscode/ 5 | main._o -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Vladimir Konovalov. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mobi 2 | Writer/Reader for Mobi format. 3 | 4 | **Note:** All testing were done on `Kindle Previewer` (Windows) and `Kindle Paperwhite (6th Gen)` 5 | 6 | ## Before You Start 7 | - This is more or less WIP. Use at your own risk. 8 | - This package was written for a specific task, thus there are certain limitations, such as: 9 | - `img` tags are ignored and not embedded. 10 | - TOC depth does not go beyond 1. Meaning for now you can only have chapters and sub-chapters. But sub-chaper can not have it's own sub-chapters. 11 | - HTML formatting is supported, but rendering is dependant on your eBook reader. (For Kindle see [Supported HTML Tags in Book Content](https://kdp.amazon.com/help?topicId=A1JPUWCSD6F59O)) 12 | - Cover images should be in JPG (I have not tested GIF, which sould be [supported](https://kdp.amazon.com/help?topicId=A1B6GKJ79HC7AN)). 13 | - **IMPORTANT**: Images resized using `image/jpeg` package will not display (in Kindle) because [JFIF APP0 marker segment](https://en.wikipedia.org/wiki/JPEG_File_Interchange_Format#JFIF_APP0_marker_segment) is not generated by `image/jpeg` package. 14 | - Table of Content is automaticaly generated. 15 | 16 | ## Usage 17 | ### Writer 18 | 19 | m, err := mobi.NewWriter("output.mobi") 20 | if err != nil { 21 | panic(err) 22 | } 23 | 24 | m.Title("Book Title") 25 | m.Compression(mobi.CompressionNone) // LZ77 compression is also possible using mobi.CompressionPalmDoc 26 | 27 | // Add cover image 28 | m.AddCover("data/cover.jpg", "data/thumbnail.jpg") 29 | 30 | // Meta data 31 | m.NewExthRecord(mobi.EXTH_DOCTYPE, "EBOK") 32 | m.NewExthRecord(mobi.EXTH_AUTHOR, "Book Author Name") 33 | // See exth.go for additional EXTH record IDs 34 | 35 | // Add chapters and subchapters 36 | ch1 := m.NewChapter("Chapter 1", []byte("Some text here")) 37 | ch1.AddSubChapter("Chapter 1-1", []byte("Some text here")) 38 | ch1.AddSubChapter("Chapter 1-2", []byte("Some text here")) 39 | 40 | m.NewChapter("Chapter 2", []byte("Some text here")).AddSubChapter("Chapter 2-1", []byte("Some text here")).AddSubChapter("Chapter 2-2", []byte("Some text here")) 41 | m.NewChapter("Chapter 3", []byte("Some text here")).AddSubChapter("Chapter 3-1", []byte("Some text here")) 42 | m.NewChapter("Chapter 4", []byte("Some text here")).AddSubChapter("Chapter 4-1", []byte("Some text here")) 43 | 44 | // Output MOBI File 45 | m.Write() 46 | 47 | ### Reader 48 | For now, Reader does not give any useful information. -------------------------------------------------------------------------------- /chapter.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | import "bytes" 4 | 5 | type mobiChapter struct { 6 | Id int 7 | Parent int 8 | Title string 9 | RecordOffset int 10 | LabelOffset int 11 | Len int 12 | Html []uint8 13 | SubChapters []*mobiChapter 14 | } 15 | 16 | func (w *MobiWriter) NewChapter(title string, text []byte) *mobiChapter { 17 | w.chapters = append(w.chapters, mobiChapter{Id: w.chapterCount, Title: title, Html: minimizeHTML(text)}) 18 | w.chapterCount++ 19 | return &w.chapters[len(w.chapters)-1] 20 | } 21 | 22 | func (w *mobiChapter) AddSubChapter(title string, text []byte) *mobiChapter { 23 | w.SubChapters = append(w.SubChapters, &mobiChapter{Parent: w.Id, Title: title, Html: minimizeHTML(text)}) 24 | return w 25 | } 26 | 27 | func (w *mobiChapter) SubChapterCount() int { 28 | return len(w.SubChapters) 29 | } 30 | 31 | func (w *mobiChapter) generateHTML(out *bytes.Buffer) { 32 | //Add check for unsupported HTML tags, characters, clean up HTML 33 | w.RecordOffset = out.Len() 34 | Len0 := out.Len() 35 | //fmt.Printf("Offset: --- %v %v \n", w.Offset, w.Title) 36 | out.WriteString("

" + w.Title + "

") 37 | out.Write(w.Html) 38 | out.WriteString("") 39 | w.Len = out.Len() - Len0 40 | for i, _ := range w.SubChapters { 41 | w.SubChapters[i].generateHTML(out) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /cncx.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | type mobiCncx struct { 4 | Len uint8 `init:"Id"` //Lenght of Cncx ID 5 | Id []uint8 `format:"string"` //String ID, 6 | NCX_Count uint16 // Number of IndxEntries 7 | // Pad with zeros to reach a multiple of 4 8 | /* 9 | 0 - 2: IDLen Lenght of ID 10 | 2 - *: ID 11 | 12 | */ 13 | } 14 | -------------------------------------------------------------------------------- /exth.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | // Type of EXTH record. If it's Binary/Numberic then read/write 4 | // it using BigEndian, String is read/write using LittleEndian 5 | type ExthType uint32 6 | 7 | const ( 8 | EXTH_TYPE_NUMERIC ExthType = 0 9 | EXTH_TYPE_STRING ExthType = 1 10 | EXTH_TYPE_BINARY ExthType = 2 11 | ) 12 | 13 | // EXTH record IDs 14 | const ( 15 | EXTH_DRMSERVER uint32 = 1 16 | EXTH_DRMCOMMERCE = 2 17 | EXTH_DRMEBOOKBASE = 3 18 | EXTH_TITLE = 99 /**< */ 19 | EXTH_AUTHOR = 100 /**< */ 20 | EXTH_PUBLISHER = 101 /**< */ 21 | EXTH_IMPRINT = 102 /**< */ 22 | EXTH_DESCRIPTION = 103 /**< */ 23 | EXTH_ISBN = 104 /**< */ 24 | EXTH_SUBJECT = 105 // Could appear multiple times /**< */ 25 | EXTH_PUBLISHINGDATE = 106 /**< */ 26 | EXTH_REVIEW = 107 /**< */ 27 | EXTH_CONTRIBUTOR = 108 /**< */ 28 | EXTH_RIGHTS = 109 /**< */ 29 | EXTH_SUBJECTCODE = 110 /**< */ 30 | EXTH_TYPE = 111 /**< */ 31 | EXTH_SOURCE = 112 /**< */ 32 | EXTH_ASIN = 113 // Kindle Paperwhite labels books with "Personal" if they don't have this record. 33 | EXTH_VERSION = 114 34 | EXTH_SAMPLE = 115 // 0x0001 if the book content is only a sample of the full book 35 | EXTH_STARTREADING = 116 // Position (4-byte offset) in file at which to open when first opened /**< Start reading */ 36 | EXTH_ADULT = 117 // Mobipocket Creator adds this if Adult only is checked on its GUI; contents: "yes" /**< */ 37 | EXTH_PRICE = 118 // As text, e.g. "4.99" /**< */ 38 | EXTH_CURRENCY = 119 // As text, e.g. "USD" /**< */ 39 | EXTH_KF8BOUNDARY = 121 40 | EXTH_FIXEDLAYOUT = 122 /**< */ 41 | EXTH_BOOKTYPE = 123 /**< */ 42 | EXTH_ORIENTATIONLOCK = 124 /**< */ 43 | EXTH_COUNTRESOURCES = 125 44 | EXTH_ORIGRESOLUTION = 126 /**< */ 45 | EXTH_ZEROGUTTER = 127 /**< */ 46 | EXTH_ZEROMARGIN = 128 /**< */ 47 | EXTH_KF8COVERURI = 129 48 | EXTH_RESCOFFSET = 131 49 | EXTH_REGIONMAGNI = 132 /**< */ 50 | 51 | EXTH_DICTNAME = 200 // As text /**< */ 52 | EXTH_COVEROFFSET = 201 // Add to first image field in Mobi Header to find PDB record containing the cover image/**< */ 53 | EXTH_THUMBOFFSET = 202 // Add to first image field in Mobi Header to find PDB record containing the thumbnail cover image 54 | EXTH_HASFAKECOVER = 203 55 | EXTH_CREATORSOFT = 204 //Known Values: 1=mobigen, 2=Mobipocket Creator, 200=kindlegen (Windows), 201=kindlegen (Linux), 202=kindlegen (Mac). 56 | EXTH_CREATORMAJOR = 205 57 | EXTH_CREATORMINOR = 206 58 | EXTH_CREATORBUILD = 207 59 | EXTH_WATERMARK = 208 60 | EXTH_TAMPERKEYS = 209 61 | 62 | EXTH_FONTSIGNATURE = 300 63 | 64 | EXTH_CLIPPINGLIMIT = 401 // Integer percentage of the text allowed to be clipped. Usually 10. 65 | EXTH_PUBLISHERLIMIT = 402 66 | EXTH_UNK403 = 403 67 | EXTH_TTSDISABLE = 404 // 1 - Text to Speech disabled; 0 - Text to Speech enabled 68 | EXTH_UNK405 = 405 // 1 in this field seems to indicate a rental book 69 | EXTH_RENTAL = 406 // If this field is removed from a rental, the book says it expired in 1969 70 | EXTH_UNK407 = 407 71 | EXTH_UNK450 = 450 72 | EXTH_UNK451 = 451 73 | EXTH_UNK452 = 452 74 | EXTH_UNK453 = 453 75 | 76 | EXTH_DOCTYPE = 501 // PDOC - Personal Doc; EBOK - ebook; EBSP - ebook sample; 77 | EXTH_LASTUPDATE = 502 78 | EXTH_UPDATEDTITLE = 503 79 | EXTH_ASIN504 = 504 // ?? ASIN in this record. 80 | EXTH_TITLEFILEAS = 508 81 | EXTH_CREATORFILEAS = 517 82 | EXTH_PUBLISHERFILEAS = 522 83 | EXTH_LANGUAGE = 524 /**< */ 84 | EXTH_ALIGNMENT = 525 // ?? horizontal-lr in this record /**< */ 85 | EXTH_PAGEDIR = 527 86 | EXTH_OVERRIDEFONTS = 528 /**< */ 87 | EXTH_SORCEDESC = 529 88 | EXTH_DICTLANGIN = 531 89 | EXTH_DICTLANGOUT = 532 90 | EXTH_UNK534 = 534 91 | EXTH_CREATORBUILDREV = 535 92 | ) 93 | 94 | // EXTH Tag ID - Name - Type relationship 95 | var ExthMeta = []mobiExthMeta{ 96 | {0, 0, ""}, 97 | {EXTH_SAMPLE, EXTH_TYPE_NUMERIC, "Sample"}, 98 | {EXTH_STARTREADING, EXTH_TYPE_NUMERIC, "Start offset"}, 99 | {EXTH_KF8BOUNDARY, EXTH_TYPE_NUMERIC, "K8 Boundary Offset"}, 100 | {EXTH_COUNTRESOURCES, EXTH_TYPE_NUMERIC, "K8 Resources Count"}, // of , fonts, images 101 | {EXTH_RESCOFFSET, EXTH_TYPE_NUMERIC, "RESC Offset"}, 102 | {EXTH_COVEROFFSET, EXTH_TYPE_NUMERIC, "Cover Offset"}, 103 | {EXTH_THUMBOFFSET, EXTH_TYPE_NUMERIC, "Thumbnail Offset"}, 104 | {EXTH_HASFAKECOVER, EXTH_TYPE_NUMERIC, "Has Fake Cover"}, 105 | {EXTH_CREATORSOFT, EXTH_TYPE_NUMERIC, "Creator Software"}, 106 | {EXTH_CREATORMAJOR, EXTH_TYPE_NUMERIC, "Creator Major Version"}, 107 | {EXTH_CREATORMINOR, EXTH_TYPE_NUMERIC, "Creator Minor Version"}, 108 | {EXTH_CREATORBUILD, EXTH_TYPE_NUMERIC, "Creator Build Number"}, 109 | {EXTH_CLIPPINGLIMIT, EXTH_TYPE_NUMERIC, "Clipping Limit"}, 110 | {EXTH_PUBLISHERLIMIT, EXTH_TYPE_NUMERIC, "Publisher Limit"}, 111 | {EXTH_TTSDISABLE, EXTH_TYPE_NUMERIC, "Text-to-Speech Disabled"}, 112 | {EXTH_RENTAL, EXTH_TYPE_NUMERIC, "Rental Indicator"}, 113 | {EXTH_DRMSERVER, EXTH_TYPE_STRING, "DRM Server ID"}, 114 | {EXTH_DRMCOMMERCE, EXTH_TYPE_STRING, "DRM Commerce ID"}, 115 | {EXTH_DRMEBOOKBASE, EXTH_TYPE_STRING, "DRM Ebookbase Book ID"}, 116 | {EXTH_TITLE, EXTH_TYPE_STRING, "Title"}, 117 | {EXTH_AUTHOR, EXTH_TYPE_STRING, "Creator"}, 118 | {EXTH_PUBLISHER, EXTH_TYPE_STRING, "Publisher"}, 119 | {EXTH_IMPRINT, EXTH_TYPE_STRING, "Imprint"}, 120 | {EXTH_DESCRIPTION, EXTH_TYPE_STRING, "Description"}, 121 | {EXTH_ISBN, EXTH_TYPE_STRING, "ISBN"}, 122 | {EXTH_SUBJECT, EXTH_TYPE_STRING, "Subject"}, 123 | {EXTH_PUBLISHINGDATE, EXTH_TYPE_STRING, "Published"}, 124 | {EXTH_REVIEW, EXTH_TYPE_STRING, "Review"}, 125 | {EXTH_CONTRIBUTOR, EXTH_TYPE_STRING, "Contributor"}, 126 | {EXTH_RIGHTS, EXTH_TYPE_STRING, "Rights"}, 127 | {EXTH_SUBJECTCODE, EXTH_TYPE_STRING, "Subject Code"}, 128 | {EXTH_TYPE, EXTH_TYPE_STRING, "Type"}, 129 | {EXTH_SOURCE, EXTH_TYPE_STRING, "Source"}, 130 | {EXTH_ASIN, EXTH_TYPE_STRING, "ASIN"}, 131 | {EXTH_VERSION, EXTH_TYPE_STRING, "Version Number"}, 132 | {EXTH_ADULT, EXTH_TYPE_STRING, "Adult"}, 133 | {EXTH_PRICE, EXTH_TYPE_STRING, "Price"}, 134 | {EXTH_CURRENCY, EXTH_TYPE_STRING, "Currency"}, 135 | {EXTH_FIXEDLAYOUT, EXTH_TYPE_STRING, "Fixed Layout"}, 136 | {EXTH_BOOKTYPE, EXTH_TYPE_STRING, "Book Type"}, 137 | {EXTH_ORIENTATIONLOCK, EXTH_TYPE_STRING, "Orientation Lock"}, 138 | {EXTH_ORIGRESOLUTION, EXTH_TYPE_STRING, "Original Resolution"}, 139 | {EXTH_ZEROGUTTER, EXTH_TYPE_STRING, "Zero Gutter"}, 140 | {EXTH_ZEROMARGIN, EXTH_TYPE_STRING, "Zero margin"}, 141 | {EXTH_KF8COVERURI, EXTH_TYPE_STRING, "K8 Masthead/Cover Image"}, 142 | {EXTH_REGIONMAGNI, EXTH_TYPE_STRING, "Region Magnification"}, 143 | {EXTH_DICTNAME, EXTH_TYPE_STRING, "Dictionary Short Name"}, 144 | {EXTH_WATERMARK, EXTH_TYPE_STRING, "Watermark"}, 145 | {EXTH_DOCTYPE, EXTH_TYPE_STRING, "Document Type"}, 146 | {EXTH_LASTUPDATE, EXTH_TYPE_STRING, "Last Update Time"}, 147 | {EXTH_UPDATEDTITLE, EXTH_TYPE_STRING, "Updated Title"}, 148 | {EXTH_ASIN504, EXTH_TYPE_STRING, "ASIN (504)"}, 149 | {EXTH_TITLEFILEAS, EXTH_TYPE_STRING, "Title File As"}, 150 | {EXTH_CREATORFILEAS, EXTH_TYPE_STRING, "Creator File As"}, 151 | {EXTH_PUBLISHERFILEAS, EXTH_TYPE_STRING, "Publisher File As"}, 152 | {EXTH_LANGUAGE, EXTH_TYPE_STRING, "Language"}, 153 | {EXTH_ALIGNMENT, EXTH_TYPE_STRING, "Primary Writing Mode"}, 154 | {EXTH_PAGEDIR, EXTH_TYPE_STRING, "Page Progression Direction"}, 155 | {EXTH_OVERRIDEFONTS, EXTH_TYPE_STRING, "Override Kindle Fonts"}, 156 | {EXTH_SORCEDESC, EXTH_TYPE_STRING, "Original Source description"}, 157 | {EXTH_DICTLANGIN, EXTH_TYPE_STRING, "Dictionary Input Language"}, 158 | {EXTH_DICTLANGOUT, EXTH_TYPE_STRING, "Dictionary output Language"}, 159 | {EXTH_UNK534, EXTH_TYPE_STRING, "Unknown (534)"}, 160 | {EXTH_CREATORBUILDREV, EXTH_TYPE_STRING, "Kindlegen BuildRev Number"}, 161 | {EXTH_TAMPERKEYS, EXTH_TYPE_BINARY, "Tamper Proof Keys"}, 162 | {EXTH_FONTSIGNATURE, EXTH_TYPE_BINARY, "Font Signature"}, 163 | {EXTH_UNK403, EXTH_TYPE_BINARY, "Unknown (403)"}, 164 | {EXTH_UNK405, EXTH_TYPE_BINARY, "Unknown (405)"}, 165 | {EXTH_UNK407, EXTH_TYPE_BINARY, "Unknown (407)"}, 166 | {EXTH_UNK450, EXTH_TYPE_BINARY, "Unknown (450)"}, 167 | {EXTH_UNK451, EXTH_TYPE_BINARY, "Unknown (451)"}, 168 | {EXTH_UNK452, EXTH_TYPE_BINARY, "Unknown (452)"}, 169 | {EXTH_UNK453, EXTH_TYPE_BINARY, "Unknown (453)"}} 170 | 171 | type mobiExth struct { 172 | Identifier [4]uint8 `format:"string"` 173 | HeaderLenght uint32 // The length of the EXTH header, including the previous 4 bytes - but not including the final padding. 174 | RecordCount uint32 // The number of records in the EXTH header. the rest of the EXTH header consists of repeated EXTH records to the end of the EXTH length. 175 | 176 | Records []mobiExthRecord // Lenght of RecordCount 177 | 178 | // []uint8 - lenght of X. Where X is the amount of bytes needed to reach multiples of 4 for the whole EXTH record 179 | 180 | // According to Wiki padding null bytes are not included into header lenght calculation, but from what 181 | // I see in mobi files, those bytes are included in total calculation. 182 | } 183 | 184 | type mobiExthRecord struct { 185 | RecordType uint32 // Exth Record type. Just a number identifying what's stored in the record 186 | RecordLength uint32 // Length of EXTH record = L , including the 8 bytes in the type and length fields 187 | Value []uint8 188 | } 189 | 190 | // Copy from https://github.com/bfabiszewski/libmobi/blob/f4f75982f0c00b592c418bfcf3f9920600e81573/src/util.c 191 | type mobiExthMeta struct { 192 | ID uint32 193 | Type ExthType 194 | Name string 195 | } 196 | 197 | func (w *mobiExth) GetHeaderLenght() int { 198 | elen := 12 199 | 200 | for _, k := range w.Records { 201 | elen += int(k.RecordLength) 202 | } 203 | 204 | Padding := elen % 4 205 | elen += Padding 206 | 207 | return elen 208 | } 209 | 210 | func (e *mobiExth) Add(recType uint32, Value interface{}) *mobiExth { 211 | e.RecordCount++ 212 | 213 | var MetaType = getExthMetaByTag(recType) 214 | var ExthRec mobiExthRecord = mobiExthRecord{RecordType: recType} 215 | 216 | switch MetaType.Type { 217 | case EXTH_TYPE_BINARY: 218 | ExthRec.Value = Value.([]uint8) 219 | case EXTH_TYPE_NUMERIC: 220 | var castValue uint32 221 | switch Value.(type) { 222 | case int: 223 | castValue = uint32(Value.(int)) 224 | case uint16: 225 | castValue = uint32(Value.(uint16)) 226 | case uint32: 227 | castValue = uint32(Value.(uint32)) 228 | case uint64: 229 | castValue = uint32(Value.(uint64)) 230 | case int16: 231 | castValue = uint32(Value.(int16)) 232 | case int32: 233 | castValue = uint32(Value.(int32)) 234 | case int64: 235 | castValue = uint32(Value.(int64)) 236 | default: 237 | panic("EXTH_TYPE_NUMERIC type is unsupported") 238 | } 239 | ExthRec.Value = int32ToBytes(castValue) 240 | case EXTH_TYPE_STRING: 241 | switch Value.(type) { 242 | case []uint8: 243 | ExthRec.Value = Value.([]uint8) 244 | case string: 245 | ExthRec.Value = []uint8(Value.(string)) 246 | } 247 | default: 248 | panic("Unknown EXTH meta type") 249 | } 250 | 251 | ExthRec.RecordLength = uint32(8 + len(ExthRec.Value)) 252 | e.Records = append(e.Records, ExthRec) 253 | return e 254 | } 255 | -------------------------------------------------------------------------------- /fcis.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | ) 7 | 8 | type mobiFcis struct { // RECORD -1 9 | Identifier uint32 //UINT ID ; 10 | Fixed0 uint32 //UINT // fixed1 ; 11 | Fixed1 uint32 //UINT // fixed2 ; 12 | Fixed2 uint32 //UINT fixed3 ; 13 | Fixed3 uint32 //UINT // fixed4 ; 14 | Fixed4 uint32 //UINT // fixed5 ; 15 | Fixed5 uint32 //UINT fixed6 ; 16 | Fixed6 uint32 //UINT fixed7 ; 17 | Fixed7 uint32 //UINT fixed8 ; 18 | Fixed8 uint16 //USHORT fixed9 ; 19 | Fixed9 uint16 //USHORT fixed10 ; 20 | Fixed10 uint32 //UINT fixed11 ; 21 | } //FCISRECORD;*/ 22 | 23 | func (w *MobiWriter) generateFcis() []byte { 24 | c := mobiFcis{} 25 | c.Identifier = 1178814803 //StringToBytes("FLIS", &c.Identifier) 26 | c.Fixed0 = 20 27 | c.Fixed1 = 16 28 | c.Fixed2 = 1 29 | //c.Fixed3 30 | c.Fixed4 = w.Pdh.TextLength 31 | //c.Fixed5 = 0 32 | c.Fixed6 = 32 33 | c.Fixed7 = 8 34 | c.Fixed8 = 1 35 | c.Fixed9 = 1 36 | //c.Fixed10 = 0 37 | 38 | buf := new(bytes.Buffer) 39 | binary.Write(buf, binary.BigEndian, c) 40 | return buf.Bytes() 41 | } 42 | -------------------------------------------------------------------------------- /flis.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | ) 7 | 8 | type mobiFlis struct { // RECORD -2 9 | Identifier uint32 //ID ; 10 | Fixed0 uint32 //UINT fixed1 ; 11 | Fixed1 uint16 //USHORT fixed2 ; 12 | Fixed2 uint16 //USHORT fixed3 ; 13 | Fixed3 uint32 //UINT fixed4 ; 14 | Fixed4 uint32 //UINT fixed5 ; 15 | Fixed5 uint16 //USHORT fixed6 ; 16 | Fixed6 uint16 //USHORT fixed7 ; 17 | Fixed7 uint32 //UINT fixed8 ; 18 | Fixed8 uint32 //UINT fixed9 ; 19 | Fixed9 uint32 //UINT fixed10 ; 20 | } //FLISRECORD; 21 | 22 | func (w *MobiWriter) generateFlis() []byte { 23 | c := mobiFlis{} 24 | c.Identifier = 1179404627 //StringToBytes("FLIS", &c.Identifier) 25 | c.Fixed0 = 8 26 | c.Fixed1 = 65 27 | //c.Fixed2 28 | //c.Fixed3 29 | c.Fixed4 = 4294967295 30 | c.Fixed5 = 1 31 | c.Fixed6 = 3 32 | c.Fixed7 = 3 33 | c.Fixed8 = 1 34 | c.Fixed9 = 4294967295 35 | 36 | buf := new(bytes.Buffer) 37 | binary.Write(buf, binary.BigEndian, c) 38 | return buf.Bytes() 39 | } 40 | -------------------------------------------------------------------------------- /header.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | type mobiHeader struct { 4 | Identifier [4]uint8 `format:"string"` // Must be characters MOBI 5 | HeaderLength uint32 // The length of the MOBI header, including the previous 4 bytes 6 | MobiType uint32 // Mobi type enum 7 | TextEncoding uint32 // 1252 = CP1252 (WinLatin1); 65001 = UTF-8 8 | UniqueID uint32 // Some kind of unique ID number (random?) 9 | FileVersion uint32 // Version of the Mobipocket format used in this file. //If FileVersion == 8. Then it's KF8 10 | OrthographicIndex uint32 // Section number of orthographic meta index. 0xFFFFFFFF if index is not available. 11 | InflectionIndex uint32 // Section number of inflection meta index. 0xFFFFFFFF if index is not available. 12 | IndexNames uint32 // 0xFFFFFFFF if index is not available. 13 | IndexKeys uint32 // 0xFFFFFFFF if index is not available. 14 | ExtraIndex0 uint32 // Section number of extra 0 meta index. 0xFFFFFFFF if index is not available. 15 | ExtraIndex1 uint32 // Section number of extra 1 meta index. 0xFFFFFFFF if index is not available. 16 | ExtraIndex2 uint32 // Section number of extra 2 meta index. 0xFFFFFFFF if index is not available. 17 | ExtraIndex3 uint32 // Section number of extra 3 meta index. 0xFFFFFFFF if index is not available. 18 | ExtraIndex4 uint32 // Section number of extra 4 meta index. 0xFFFFFFFF if index is not available. 19 | ExtraIndex5 uint32 // Section number of extra 5 meta index. 0xFFFFFFFF if index is not available. 20 | FirstNonBookIndex uint32 // First record number (starting with 0) that's not the book's text 21 | FullNameOffset uint32 // Offset in record 0 (not from start of file) of the full name of the book 22 | FullNameLength uint32 // Length in bytes of the full name of the book 23 | Locale uint32 // Book locale code. Low byte is main language 09=English, next byte is dialect, 08=British, 04=US. Thus US English is 1033, UK English is 2057. 24 | InputLanguage uint32 //Input language for a dictionary 25 | OutputLanguage uint32 //Output language for a dictionary 26 | MinVersion uint32 //Minimum mobipocket version support needed to read this file. 27 | FirstImageIndex uint32 //First record number (starting with 0) that contains an image. Image records should be sequential. 28 | HuffmanRecordOffset uint32 //The record number of the first huffman compression record. 29 | HuffmanRecordCount uint32 //The number of huffman compression records. 30 | HuffmanTableOffset uint32 31 | HuffmanTableLength uint32 32 | ExthFlags uint32 //Bitfield. If bit 6 (0x40) is set, then there's an EXTH record 33 | Unknown1 [32]byte //Unknown values 34 | DrmOffset uint32 //Offset to DRM key info in DRMed files. 0xFFFFFFFF if no DRM 35 | DrmCount uint32 //Number of entries in DRM info. 0xFFFFFFFF if no DRM 36 | DrmSize uint32 //Number of bytes in DRM info. 37 | DrmFlags uint32 //Some flags concerning the DRM info. 38 | Unknown0 [12]byte //Unknown values 39 | 40 | // If it's KF8 41 | // FdstRecordIndex uint32 42 | // else 43 | FirstContentRecordNumber uint16 //Number of first text record. Normally 1. 44 | LastContentRecordNumber uint16 //Number of last image record or number of last text record if it contains no images. Includes Image, DATP, HUFF, DRM. 45 | //End else 46 | 47 | Unknown6 uint32 //FdstRecordCount? //Use 0x00000001. 48 | FcisRecordIndex uint32 49 | FcisRecordCount uint32 //Use 0x00000001. // Always 1 50 | FlisRecordIndex uint32 51 | FlisRecordCount uint32 //Use 0x00000001. // Always 1 52 | Unknown7 uint32 53 | Unknown8 uint32 54 | SrcsRecordIndex uint32 55 | SrcsRecordCount uint32 56 | Unknown9 uint32 57 | Unknown10 uint32 58 | 59 | // A set of binary flags, some of which indicate extra data at the end of each text block. This only 60 | // seems to be valid for Mobipocket format version 5 and 6 (and higher?), when the header length is 228 (0xE4) or 232 (0xE8). 61 | // bit 1 (0x1): 62 | // bit 2 (0x2): 63 | // bit 3 (0x4): 64 | // Setting bit 2 (0x2) disables functionality. 65 | ExtraRecordDataFlags uint32 `format:"bits"` 66 | IndxRecodOffset uint32 //(If not 0xFFFFFFFF) The record number of the first INDX record created from an ncx file. 67 | 68 | //If header lenght is 248 then there's 16 extra bytes. 69 | 70 | /* 71 | If KF8 72 | FragmentIndex uint32 73 | SkeletonIndex uint32 74 | Else 75 | unknown14 uint32 76 | unknown15 uint32 77 | 78 | DatpIndex uint32 79 | 80 | If KF8 81 | GuideIndex uint32 82 | Else 83 | unknown16 uint32 84 | 85 | unknown17 uint32 86 | unknown18 uint32 87 | unknown19 uint32 ? 88 | unknown20 uint32 ? 89 | */ 90 | } 91 | -------------------------------------------------------------------------------- /idxt.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | type mobiIdxt struct { 4 | Identifier [4]byte `format:"string"` 5 | Offset []uint16 /* mobiIndx.HeaderLenght + len(mobiTagx.HeaderLenght) */ 6 | //Unk1 uint16 // Pad with zeros to make it multiples of 4? 7 | } 8 | -------------------------------------------------------------------------------- /indx.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | const ( 4 | INDX_TYPE_NORMAL uint32 = 0 5 | INDX_TYPE_INFLECTION uint32 = 2 6 | ) 7 | 8 | type mobiIndx struct { 9 | Identifier [4]byte `format:"string"` 10 | HeaderLen uint32 11 | Unk0 uint32 12 | Unk1 uint32 /* 1 when inflection is normal? */ 13 | Indx_Type uint32 /* 12: 0 - normal, 2 - inflection */ 14 | Idxt_Offset uint32 /* 20: IDXT offset */ 15 | Idxt_Count uint32 /* 24: entries count */ 16 | Idxt_Encoding uint32 /* 28: encoding */ 17 | SetUnk2 uint32 //-1 18 | Idxt_Entry_Count uint32 /* 36: total entries count */ 19 | Ordt_Offset uint32 20 | Ligt_Offset uint32 21 | Ligt_Entries_Count uint32 /* 48: LIGT entries count */ 22 | Cncx_Records_Count uint32 /* 52: CNCX entries count */ 23 | Unk3 [108]byte 24 | Ordt_Type uint32 /* 164: ORDT type */ 25 | Ordt_Entries_Count uint32 /* 168: ORDT entries count */ 26 | Ordt1_Offset uint32 /* 172: ORDT1 offset */ 27 | Ordt2_Offset uint32 /* 176: ORDT2 offset */ 28 | Tagx_Offset uint32 /* 180: */ 29 | Unk4 uint32 /* 184: */ /* ? Default index string offset ? */ 30 | Unk5 uint32 /* 188: */ /* ? Default index string length ? */ 31 | } 32 | 33 | type mobiIndxEntry struct { 34 | EntryID uint8 35 | EntryValue uint32 36 | } 37 | -------------------------------------------------------------------------------- /mint.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | type Mint int 4 | 5 | func (i Mint) UInt16() uint16 { 6 | return uint16(i) 7 | } 8 | 9 | func (i Mint) UInt32() uint32 { 10 | return uint32(i) 11 | } 12 | 13 | func (i Mint) Int() int { 14 | return int(i) 15 | } 16 | -------------------------------------------------------------------------------- /mobi.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | import ( 4 | "os" 5 | "reflect" 6 | ) 7 | 8 | type Mobi struct { 9 | file *os.File 10 | fileStat os.FileInfo 11 | 12 | Pdf mobiPDF // Palm Database Format: http://wiki.mobileread.com/wiki/PDB#Palm_Database_Format 13 | Offsets []mobiRecordOffset // Offsets for all the records. Starting from beginning of a file. 14 | Pdh mobiPDH 15 | 16 | Header mobiHeader 17 | Exth mobiExth 18 | 19 | //Index 20 | Indx []mobiIndx 21 | Idxt mobiIdxt 22 | Cncx mobiCncx 23 | Tagx mobiTagx 24 | PTagx []mobiPTagx 25 | } 26 | 27 | const ( 28 | MOBI_MAX_RECORD_SIZE = 4096 29 | MOBI_PALMDB_HEADER_LEN = 78 30 | MOBI_INDX_HEADER_LEN = 192 31 | MOBI_PALMDOC_HEADER_LEN = 16 32 | MOBI_MOBIHEADER_LEN = 232 33 | ) 34 | 35 | type mobiRecordOffset struct { 36 | Offset uint32 //The offset of record {N} from the start of the PDB of this record 37 | Attributes uint8 //Bit Field. The least significant four bits are used to represent the category values. 38 | Skip uint8 //UniqueID is supposed to take 3 bytes, but for our inteded purposes uint16(UniqueID) should work. Let me know if there's any mobi files with more than 32767 records 39 | UniqueID uint16 //The unique ID for this record. Often just a sequential count from 0 40 | } 41 | 42 | const ( 43 | magicMobi mobiMagicType = "MOBI" 44 | magicExth mobiMagicType = "EXTH" 45 | magicHuff mobiMagicType = "HUFF" 46 | magicCdic mobiMagicType = "CDIC" 47 | magicFdst mobiMagicType = "FDST" 48 | magicIdxt mobiMagicType = "IDXT" 49 | magicIndx mobiMagicType = "INDX" 50 | magicLigt mobiMagicType = "LIGT" 51 | magicOrdt mobiMagicType = "ORDT" 52 | magicTagx mobiMagicType = "TAGX" 53 | magicFont mobiMagicType = "FONT" 54 | magicAudi mobiMagicType = "AUDI" 55 | magicVide mobiMagicType = "VIDE" 56 | magicResc mobiMagicType = "RESC" 57 | magicBoundary mobiMagicType = "BOUNDARY" 58 | ) 59 | 60 | type mobiMagicType string 61 | 62 | func (m mobiMagicType) String() string { 63 | return string(m) 64 | } 65 | 66 | func (m mobiMagicType) WriteTo(output interface{}) { 67 | out := reflect.ValueOf(output).Elem() 68 | 69 | if out.Type().Len() != len(m) { 70 | panic("Magic lenght is larger than target size") 71 | } 72 | 73 | for i := 0; i < out.Type().Len(); i++ { 74 | if i > len(m)-1 { 75 | break 76 | } 77 | out.Index(i).Set(reflect.ValueOf(byte(m[i]))) 78 | } 79 | } 80 | 81 | const ( 82 | MOBI_ENC_CP1252 = 1252 /**< cp-1252 encoding */ 83 | MOBI_ENC_UTF8 = 65001 /**< utf-8 encoding */ 84 | MOBI_ENC_UTF16 = 65002 /**< utf-16 encoding */ 85 | ) -------------------------------------------------------------------------------- /pdf.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | type mobiPDF struct { 4 | DatabaseName [32]byte `format:"string"` //Database name. This name is 0 terminated 5 | FileAttributes uint16 6 | Version uint16 //File version 7 | CreationTime uint32 `format:"date"` //Timestamp, according to wiki it's supposed to be in Mac format, but Mobi files that I see use Unix. Not sure if it's important. 8 | ModificationTime uint32 `format:"date"` //Timestamp 9 | BackupTime uint32 `format:"date"` //Timestamp 10 | ModificationNumber uint32 11 | AppInfo uint32 12 | SortInfo uint32 13 | Type [4]byte `format:"string"` //BOOK 14 | Creator [4]byte `format:"string"` //MOBI 15 | UniqueIDSeed uint32 //Used internally to identify record 16 | NextRecordList uint32 //Only used when in-memory on Palm OS. Always set to zero in stored files. 17 | RecordsNum uint16 //Number of records in the file. Records are stored as array starting with 0. RecordsNum is total count of records, not last ID. 18 | } 19 | -------------------------------------------------------------------------------- /pdh.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | type mobiPDHCompression uint16 4 | 5 | // Compression Enum 6 | const ( 7 | // CompressionNone uint16(1). Text is stored without any compression 8 | CompressionNone mobiPDHCompression = 1 9 | // CompressionPalmDoc uint16(2). Text is compressed using simple LZ77 algorithm 10 | CompressionPalmDoc mobiPDHCompression = 2 11 | // CompressionHuffCdic uint16(17480). Text is compressed using HuffCdic 12 | CompressionHuffCdic mobiPDHCompression = 17480 13 | ) 14 | 15 | //PalmDoc Header 16 | type mobiPDH struct { 17 | Compression mobiPDHCompression //0 // 1 == no compression, 2 = PalmDOC compression, 17480 = HUFF/CDIC compression 18 | Unk1 uint16 //2 // Always zero 19 | TextLength uint32 //4 // Uncompressed length of the entire text of the book 20 | RecordCount uint16 //8 // Number of PDB records used for the text of the book. 21 | RecordSize uint16 //10 // Maximum size of each record containing text, always 4096 22 | Encryption uint16 //12 // 0 == no encryption, 1 = Old Mobipocket Encryption, 2 = Mobipocket Encryption 23 | Unk2 uint16 //12 // Usually zero 24 | } 25 | -------------------------------------------------------------------------------- /peeker.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | type Peeker []uint8 4 | 5 | func (p Peeker) Magic() mobiMagicType { 6 | return mobiMagicType(p) 7 | } 8 | 9 | func (p Peeker) String() string { 10 | return string(p) 11 | } 12 | 13 | func (p Peeker) Bytes() []uint8 { 14 | return p 15 | } 16 | 17 | func (p Peeker) Len() int { 18 | return len(p) 19 | } 20 | -------------------------------------------------------------------------------- /ptagx.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | const ( 4 | TagEntry_END uint8 = 0 5 | TagEntry_Pos = 1 // NCX | Position offset for the beginning of NCX record (filepos) Ex: Beginning of a chapter 6 | TagEntry_Len = 2 // NCX | Record lenght. Ex: Chapter lenght 7 | TagEntry_NameOffset = 3 // NCX | Label text offset in CNCX 8 | TagEntry_DepthLvl = 4 // NCX | Depth/Level of CNCX 9 | TagEntry_KOffs = 5 // NCX | kind CNCX offset 10 | TagEntry_PosFid = 6 // NCX | pos:fid 11 | TagEntry_Parent = 21 // NCX | Parent 12 | TagEntry_Child1 = 22 // NCX | First child 13 | TagEntry_ChildN = 23 // NCX | Last child 14 | TagEntry_ImageIndex = 69 15 | TagEntry_DescOffset = 70 // Description offset in cncx 16 | TagEntry_AuthorOffset = 71 // Author offset in cncx 17 | TagEntry_ImageCaptionOffset = 72 // Image caption offset in cncx 18 | TagEntry_ImgAttrOffset = 73 // Image attribution offset in cncx 19 | ) 20 | 21 | var tagEntryMap = map[uint8]string{ 22 | TagEntry_Pos: "Offset", 23 | TagEntry_Len: "Lenght", 24 | TagEntry_NameOffset: "Label", 25 | TagEntry_DepthLvl: "Depth", 26 | TagEntry_KOffs: "Kind", 27 | TagEntry_PosFid: "Pos:Fid", 28 | TagEntry_Parent: "Parent", 29 | TagEntry_Child1: "First Child", 30 | TagEntry_ChildN: "Last Child", 31 | TagEntry_ImageIndex: "Image Index", 32 | TagEntry_DescOffset: "Description", 33 | TagEntry_AuthorOffset: "Author", 34 | TagEntry_ImageCaptionOffset: "Image Caption Offset", 35 | TagEntry_ImgAttrOffset: "Image Attr Offset"} 36 | 37 | type mobiPTagx struct { 38 | Tag uint8 39 | Tag_Value_Count uint8 40 | Value_Count uint32 41 | Value_Bytes uint32 42 | } 43 | -------------------------------------------------------------------------------- /reader.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "fmt" 7 | "os" 8 | "reflect" 9 | "strconv" 10 | ) 11 | 12 | type MobiReader struct { 13 | Mobi 14 | } 15 | 16 | func NewReader(filename string) (out *MobiReader, err error) { 17 | out = &MobiReader{} 18 | out.file, err = os.Open(filename) 19 | if err != nil { 20 | return nil, err 21 | } 22 | 23 | out.fileStat, err = out.file.Stat() 24 | if err != nil { 25 | return nil, err 26 | } 27 | 28 | return out, out.Parse() 29 | } 30 | 31 | func (r *MobiReader) Parse() (err error) { 32 | if err = r.parsePdf(); err != nil { 33 | return 34 | } 35 | 36 | if err = r.parsePdh(); err != nil { 37 | return 38 | } 39 | 40 | // Check if INDX offset is set + attempt to parse INDX 41 | if r.Header.IndxRecodOffset > 0 { 42 | err = r.parseIndexRecord(r.Header.IndxRecodOffset) 43 | if err != nil { 44 | return 45 | } 46 | } 47 | 48 | return 49 | } 50 | 51 | // parseHeader reads Palm Database Format header, and record offsets 52 | func (r *MobiReader) parsePdf() error { 53 | //First we read PDF Header, this will help us parse subsequential data 54 | //binary.Read will take struct and fill it with data from mobi File 55 | err := binary.Read(r.file, binary.BigEndian, &r.Pdf) 56 | if err != nil { 57 | return err 58 | } 59 | 60 | if r.Pdf.RecordsNum < 1 { 61 | return errors.New("Number of records in this file is less than 1.") 62 | } 63 | 64 | r.Offsets = make([]mobiRecordOffset, r.Pdf.RecordsNum) 65 | err = binary.Read(r.file, binary.BigEndian, &r.Offsets) 66 | if err != nil { 67 | return err 68 | } 69 | 70 | //After the records offsets there's a 2 byte padding 71 | r.file.Seek(2, 1) 72 | 73 | return nil 74 | } 75 | 76 | // parsePdh processes record 0 that contains PalmDoc Header, Mobi Header and Exth meta data 77 | func (r *MobiReader) parsePdh() error { 78 | // Palm Doc Header 79 | // Now we go onto reading record 0 that contains Palm Doc Header, Mobi Header, Exth Header... 80 | binary.Read(r.file, binary.BigEndian, &r.Pdh) 81 | 82 | // Check and see if there's a record encryption 83 | if r.Pdh.Encryption != 0 { 84 | return errors.New("Records are encrypted.") 85 | } 86 | 87 | // Mobi Header 88 | // Now it's time to read Mobi Header 89 | if r.MatchMagic(magicMobi) { 90 | binary.Read(r.file, binary.BigEndian, &r.Header) 91 | } else { 92 | return errors.New("Can not find MOBI header. File might be corrupt.") 93 | } 94 | 95 | // Current header struct only reads 232 bytes. So if actual header lenght is greater, then we need to skip to Exth. 96 | Skip := int64(r.Header.HeaderLength) - int64(reflect.TypeOf(r.Header).Size()) 97 | r.file.Seek(Skip, 1) 98 | 99 | // Exth Record 100 | // To check whenever there's EXTH record or not, we need to check and see if 6th bit of r.Header.ExthFlags is set. 101 | if hasBit(int(r.Header.ExthFlags), 6) { 102 | err := r.ExthParse() 103 | 104 | if err != nil { 105 | return errors.New("Can not read EXTH record") 106 | } 107 | } 108 | 109 | return nil 110 | } 111 | 112 | func (r *MobiReader) parseIndexRecord(n uint32) error { 113 | _, err := r.OffsetToRecord(n) 114 | if err != nil { 115 | return err 116 | } 117 | 118 | RecPos, _ := r.file.Seek(0, 1) 119 | 120 | if !r.MatchMagic(magicIndx) { 121 | return errors.New("Index record not found at specified at given offset") 122 | } 123 | //fmt.Printf("Index %s %v\n", r.Peek(4), RecLen) 124 | 125 | //if len(r.Indx) == 0 { 126 | r.Indx = append(r.Indx, mobiIndx{}) 127 | //} 128 | 129 | idx := &r.Indx[len(r.Indx)-1] 130 | 131 | err = binary.Read(r.file, binary.BigEndian, idx) 132 | if err != nil { 133 | return err 134 | } 135 | 136 | /* Tagx Record Parsing + Last CNCX */ 137 | if idx.Tagx_Offset != 0 { 138 | _, err = r.file.Seek(RecPos+int64(idx.Tagx_Offset), 0) 139 | if err != nil { 140 | return err 141 | } 142 | 143 | err = r.parseTagx() 144 | if err != nil { 145 | return err 146 | } 147 | 148 | // Last CNCX record follows TAGX 149 | if idx.Cncx_Records_Count > 0 { 150 | r.Cncx = mobiCncx{} 151 | binary.Read(r.file, binary.BigEndian, &r.Cncx.Len) 152 | 153 | r.Cncx.Id = make([]uint8, r.Cncx.Len) 154 | binary.Read(r.file, binary.LittleEndian, &r.Cncx.Id) 155 | r.file.Seek(1, 1) //Skip 0x0 termination 156 | 157 | binary.Read(r.file, binary.BigEndian, &r.Cncx.NCX_Count) 158 | 159 | // PrintStruct(r.Cncx) 160 | } 161 | } 162 | 163 | /* Ordt Record Parsing */ 164 | if idx.Idxt_Encoding == MOBI_ENC_UTF16 || idx.Ordt_Entries_Count > 0 { 165 | return errors.New("ORDT parser not implemented") 166 | } 167 | 168 | /* Ligt Record Parsing */ 169 | if idx.Ligt_Entries_Count > 0 { 170 | return errors.New("LIGT parser not implemented") 171 | } 172 | 173 | /* Idxt Record Parsing */ 174 | if idx.Idxt_Count > 0 { 175 | _, err = r.file.Seek(RecPos+int64(idx.Idxt_Offset), 0) 176 | if err != nil { 177 | return err 178 | } 179 | 180 | err = r.parseIdxt(idx.Idxt_Count) 181 | if err != nil { 182 | return err 183 | } 184 | } 185 | 186 | //CNCX Data? 187 | var Count = 0 188 | if idx.Indx_Type == INDX_TYPE_NORMAL { 189 | //r.file.Seek(RecPos+int64(idx.HeaderLen), 0) 190 | 191 | var PTagxLen = []uint8{0} 192 | for i, offset := range r.Idxt.Offset { 193 | r.file.Seek(RecPos+int64(offset), 0) 194 | 195 | // Read Byte containing the lenght of a label 196 | r.file.Read(PTagxLen) 197 | 198 | // Read label 199 | PTagxLabel := make([]uint8, PTagxLen[0]) 200 | r.file.Read(PTagxLabel) 201 | 202 | PTagxLen1 := uint16(idx.Idxt_Offset) - r.Idxt.Offset[i] 203 | if i+1 < len(r.Idxt.Offset) { 204 | PTagxLen1 = r.Idxt.Offset[i+1] - r.Idxt.Offset[i] 205 | } 206 | 207 | PTagxData := make([]uint8, PTagxLen1) 208 | r.file.Read(PTagxData) 209 | fmt.Printf("\n------ %v --------\n", i) 210 | r.parsePtagx(PTagxData) 211 | Count++ 212 | //fmt.Printf("Len: %v | Label: %s | %v\n", PTagxLen, PTagxLabel, Count) 213 | } 214 | } 215 | 216 | // Check next record 217 | //r.OffsetToRecord(n + 1) 218 | 219 | // 220 | // Process remaining INDX records 221 | if idx.Indx_Type == INDX_TYPE_INFLECTION { 222 | r.parseIndexRecord(n + 1) 223 | } 224 | //fmt.Printf("%s", ) 225 | // Read Tagx 226 | // if idx.Tagx_Offset > 0 { 227 | // err := r.parseTagx() 228 | // if err != nil { 229 | // return err 230 | // } 231 | // } 232 | 233 | return nil 234 | } 235 | 236 | // MatchMagic matches next N bytes (based on lenght of magic word) 237 | func (r *MobiReader) MatchMagic(magic mobiMagicType) bool { 238 | if r.Peek(len(magic)).Magic() == magic { 239 | return true 240 | } 241 | return false 242 | } 243 | 244 | // Peek returns next N bytes without advancing the reader. 245 | func (r *MobiReader) Peek(n int) Peeker { 246 | buf := make([]uint8, n) 247 | r.file.Read(buf) 248 | r.file.Seek(int64(n)*-1, 1) 249 | return buf 250 | } 251 | 252 | // Parse reads/parses Exth meta data records from file 253 | func (r *MobiReader) ExthParse() error { 254 | // If next 4 bytes are not EXTH then we have a problem 255 | if !r.MatchMagic(magicExth) { 256 | return errors.New("Currect reading position does not contain EXTH record") 257 | } 258 | 259 | binary.Read(r.file, binary.BigEndian, &r.Exth.Identifier) 260 | binary.Read(r.file, binary.BigEndian, &r.Exth.HeaderLenght) 261 | binary.Read(r.file, binary.BigEndian, &r.Exth.RecordCount) 262 | 263 | r.Exth.Records = make([]mobiExthRecord, r.Exth.RecordCount) 264 | for i, _ := range r.Exth.Records { 265 | binary.Read(r.file, binary.BigEndian, &r.Exth.Records[i].RecordType) 266 | binary.Read(r.file, binary.BigEndian, &r.Exth.Records[i].RecordLength) 267 | 268 | r.Exth.Records[i].Value = make([]uint8, r.Exth.Records[i].RecordLength-8) 269 | 270 | Tag := getExthMetaByTag(r.Exth.Records[i].RecordType) 271 | switch Tag.Type { 272 | case EXTH_TYPE_BINARY: 273 | binary.Read(r.file, binary.BigEndian, &r.Exth.Records[i].Value) 274 | // fmt.Printf("%v: %v\n", Tag.Name, r.Exth.Records[i].Value) 275 | case EXTH_TYPE_STRING: 276 | binary.Read(r.file, binary.LittleEndian, &r.Exth.Records[i].Value) 277 | // fmt.Printf("%v: %s\n", Tag.Name, r.Exth.Records[i].Value) 278 | case EXTH_TYPE_NUMERIC: 279 | binary.Read(r.file, binary.BigEndian, &r.Exth.Records[i].Value) 280 | // fmt.Printf("%v: %d\n", Tag.Name, binary.BigEndian.Uint32(r.Exth.Records[i].Value)) 281 | } 282 | } 283 | 284 | return nil 285 | } 286 | 287 | // OffsetToRecord sets reading position to record N, returns total record lenght 288 | func (r *MobiReader) OffsetToRecord(nu uint32) (uint32, error) { 289 | n := int(nu) 290 | if n > int(r.Pdf.RecordsNum)-1 { 291 | return 0, errors.New("Record ID requested is greater than total amount of records") 292 | } 293 | 294 | RecLen := uint32(0) 295 | if n+1 < int(r.Pdf.RecordsNum) { 296 | RecLen = r.Offsets[n+1].Offset 297 | } else { 298 | RecLen = uint32(r.fileStat.Size()) 299 | } 300 | 301 | _, err := r.file.Seek(int64(r.Offsets[n].Offset), 0) 302 | 303 | return RecLen - r.Offsets[n].Offset, err 304 | } 305 | 306 | func (r *MobiReader) parseTagx() error { 307 | if !r.MatchMagic(magicTagx) { 308 | return errors.New("TAGX record not found at given offset.") 309 | } 310 | 311 | r.Tagx = mobiTagx{} 312 | 313 | binary.Read(r.file, binary.BigEndian, &r.Tagx.Identifier) 314 | binary.Read(r.file, binary.BigEndian, &r.Tagx.HeaderLenght) 315 | if r.Tagx.HeaderLenght < 12 { 316 | return errors.New("TAGX record too short") 317 | } 318 | binary.Read(r.file, binary.BigEndian, &r.Tagx.ControlByteCount) 319 | 320 | TagCount := (r.Tagx.HeaderLenght - 12) / 4 321 | r.Tagx.Tags = make([]mobiTagxTags, TagCount) 322 | 323 | for i := 0; i < int(TagCount); i++ { 324 | err := binary.Read(r.file, binary.BigEndian, &r.Tagx.Tags[i]) 325 | if err != nil { 326 | return err 327 | } 328 | } 329 | 330 | fmt.Println("TagX called") 331 | // PrintStruct(r.Tagx) 332 | 333 | return nil 334 | } 335 | 336 | func (r *MobiReader) parseIdxt(IdxtCount uint32) error { 337 | fmt.Println("parseIdxt called") 338 | if !r.MatchMagic(magicIdxt) { 339 | return errors.New("IDXT record not found at given offset.") 340 | } 341 | 342 | binary.Read(r.file, binary.BigEndian, &r.Idxt.Identifier) 343 | 344 | r.Idxt.Offset = make([]uint16, IdxtCount) 345 | 346 | binary.Read(r.file, binary.BigEndian, &r.Idxt.Offset) 347 | //for id, _ := range r.Idxt.Offset { 348 | // binary.Read(r.Buffer, binary.BigEndian, &r.Idxt.Offset[id]) 349 | //} 350 | 351 | //Skip two bytes? Or skip necessary amount to reach total lenght in multiples of 4? 352 | r.file.Seek(2, 1) 353 | 354 | // PrintStruct(r.Idxt) 355 | return nil 356 | } 357 | 358 | func (r *MobiReader) parsePtagx(data []byte) { 359 | //control_byte_count 360 | //tagx 361 | control_bytes := data[:r.Tagx.ControlByteCount] 362 | data = data[r.Tagx.ControlByteCount:] 363 | 364 | var Ptagx []mobiPTagx //= make([]mobiPTagx, r.Tagx.TagCount()) 365 | 366 | for _, x := range r.Tagx.Tags { 367 | if x.Control_Byte == 0x01 { 368 | control_bytes = control_bytes[1:] 369 | continue 370 | } 371 | 372 | value := control_bytes[0] & x.Bitmask 373 | if value != 0 { 374 | var value_count uint32 375 | var value_bytes uint32 376 | 377 | if value == x.Bitmask { 378 | if setBits[x.Bitmask] > 1 { 379 | // If all bits of masked value are set and the mask has more 380 | // than one bit, a variable width value will follow after 381 | // the control bytes which defines the length of bytes (NOT 382 | // the value count!) which will contain the corresponding 383 | // variable width values. 384 | var consumed uint32 385 | value_bytes, consumed = vwiDec(data, true) 386 | //fmt.Printf("\nConsumed %v", consumed) 387 | data = data[consumed:] 388 | } else { 389 | value_count = 1 390 | } 391 | } else { 392 | mask := x.Bitmask 393 | for { 394 | if mask&1 != 0 { 395 | //fmt.Printf("Break") 396 | break 397 | } 398 | mask >>= 1 399 | value >>= 1 400 | } 401 | value_count = uint32(value) 402 | } 403 | 404 | Ptagx = append(Ptagx, mobiPTagx{x.Tag, x.TagNum, value_count, value_bytes}) 405 | // ptagx[ptagx_count].tag = tagx->tags[i].tag; 406 | // ptagx[ptagx_count].tag_value_count = tagx->tags[i].values_count; 407 | // ptagx[ptagx_count].value_count = value_count; 408 | // ptagx[ptagx_count].value_bytes = value_bytes; 409 | 410 | //fmt.Printf("TAGX %v %v VC:%v VB:%v\n", x.Tag, x.TagNum, value_count, value_bytes) 411 | } 412 | } 413 | fmt.Printf("%+v", Ptagx) 414 | var IndxEntry []mobiIndxEntry 415 | for iz, x := range Ptagx { 416 | var values []uint32 417 | 418 | if x.Value_Count != 0 { 419 | // Read value_count * values_per_entry variable width values. 420 | fmt.Printf("\nDec: ") 421 | for i := 0; i < int(x.Value_Count)*int(x.Tag_Value_Count); i++ { 422 | byts, consumed := vwiDec(data, true) 423 | data = data[consumed:] 424 | 425 | values = append(values, byts) 426 | IndxEntry = append(IndxEntry, mobiIndxEntry{x.Tag, byts}) 427 | fmt.Printf("%v %s: %v ", iz, tagEntryMap[x.Tag], byts) 428 | } 429 | } else { 430 | // Convert value_bytes to variable width values. 431 | total_consumed := 0 432 | for { 433 | if total_consumed < int(x.Value_Bytes) { 434 | byts, consumed := vwiDec(data, true) 435 | data = data[consumed:] 436 | 437 | total_consumed += int(consumed) 438 | 439 | values = append(values, byts) 440 | IndxEntry = append(IndxEntry, mobiIndxEntry{x.Tag, byts}) 441 | } else { 442 | break 443 | } 444 | } 445 | if total_consumed != int(x.Value_Bytes) { 446 | panic("Error not enough bytes are consumed. Consumed " + strconv.Itoa(total_consumed) + " out of " + strconv.Itoa(int(x.Value_Bytes))) 447 | } 448 | } 449 | } 450 | fmt.Println("---------------------------") 451 | } 452 | -------------------------------------------------------------------------------- /tagx.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | var mobiTagxMap = map[uint8]mobiTagxTags{ 4 | TagEntry_Pos: mobiTagxTags{1, 1, 1, 0}, 5 | TagEntry_Len: mobiTagxTags{2, 1, 2, 0}, 6 | TagEntry_NameOffset: mobiTagxTags{3, 1, 4, 0}, 7 | TagEntry_DepthLvl: mobiTagxTags{4, 1, 8, 0}, 8 | TagEntry_Parent: mobiTagxTags{21, 1, 16, 0}, 9 | TagEntry_Child1: mobiTagxTags{22, 1, 32, 0}, 10 | TagEntry_ChildN: mobiTagxTags{23, 1, 64, 0}, 11 | TagEntry_PosFid: mobiTagxTags{6, 2, 128, 0}, 12 | TagEntry_END: mobiTagxTags{0, 0, 0, 1}} 13 | 14 | type mobiTagx struct { 15 | Identifier [4]byte `format:"string"` 16 | HeaderLenght uint32 `init:"Tags" op:"-12 /4"` 17 | ControlByteCount uint32 18 | Tags []mobiTagxTags 19 | //[]byte //HeaderLenght - 12 | Multiple of 4 20 | 21 | //The tag table entries are multiple of 4 bytes. The first byte is 22 | //the tag, the second byte the number of values, the third byte the 23 | //bit mask and the fourth byte indicates the end of the control byte. 24 | //If the fourth byte is 0x01, all other bytes of the entry are zero. 25 | 26 | //Unk1 [8]uint8 //Unrealated to Tagx? || Related to CNCX Record? 8 bytes 27 | } 28 | 29 | type mobiTagxTags struct { 30 | Tag uint8 // /**< Tag */ 31 | TagNum uint8 // /**< Number of values */ 32 | Bitmask uint8 /**< Bitmask */ 33 | Control_Byte uint8 /**< EOF control byte */ 34 | } 35 | 36 | func (r *mobiTagx) TagCount() int { 37 | return len(r.Tags) 38 | } 39 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "fmt" 7 | "reflect" 8 | "regexp" 9 | "strconv" 10 | "time" 11 | ) 12 | 13 | func printStruct(x interface{}) { 14 | ref := reflect.ValueOf(x) 15 | 16 | if ref.Kind() == reflect.Ptr { 17 | ref = ref.Elem() 18 | } 19 | 20 | var CurPos uintptr = 0 21 | fmt.Println("---------------------- " + ref.Type().Name() + " ----------------------") 22 | for i := 0; i < ref.NumField(); i++ { 23 | val := ref.Field(i) 24 | typ := ref.Type().Field(i) 25 | //: %-10v , int(CurPos)+int(typ.Type.Size()) 26 | 27 | var value interface{} 28 | switch typ.Tag.Get("format") { 29 | case "bits": 30 | value = fmt.Sprintf("bit(%b)", val.Interface()) 31 | case "string": 32 | value = fmt.Sprintf("%s", val.Interface()) 33 | case "hex": 34 | value = fmt.Sprintf("% x", val.Interface()) 35 | case "date": 36 | if tim_, err := strconv.ParseInt(val.String(), 10, 64); err != nil { 37 | //BUG(fix): Check Mac/Unix timestamp format 38 | //If the time has the top bit set, it's an unsigned 32-bit number counting from 1st Jan 1904 39 | //If the time has the top bit clear, it's a signed 32-bit number counting from 1st Jan 1970. 40 | value = time.Unix(tim_, 0) 41 | } else { 42 | value = val.Interface() 43 | } 44 | default: 45 | value = val.Interface() 46 | } 47 | 48 | //switch val.Kind() { 49 | //case reflect.Slice: 50 | //// for i := 0; i < val.NumField(); i++ { 51 | // PrintStruct(val.Index(i)) 52 | // //fmt.Println(fmt.Sprintf("%-25v", typ.Name), fmt.Sprintf("%-5v:", CurPos), value) 53 | //CurPos += typ.Type.Size() 54 | // } 55 | //default: 56 | fmt.Println(fmt.Sprintf("%-25v", typ.Name), fmt.Sprintf("%-5v:", CurPos), value) 57 | CurPos += typ.Type.Size() 58 | //} 59 | 60 | } 61 | } 62 | 63 | func hasBit(n int, pos uint) bool { 64 | val := n & (1 << pos) 65 | return (val > 0) 66 | } 67 | 68 | func getExthMetaByTag(tag uint32) mobiExthMeta { 69 | for i := 0; i < len(ExthMeta); i++ { 70 | if ExthMeta[i].ID == tag { 71 | return ExthMeta[i] 72 | } 73 | } 74 | return ExthMeta[0] 75 | } 76 | 77 | var setBits [256]uint8 = [256]uint8{ 78 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 79 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 80 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 81 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 82 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 83 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 84 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 85 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 86 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 87 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 88 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 89 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 90 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 91 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 92 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 93 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, 94 | } 95 | 96 | // VwiDec decoders variable lenght integer. Returns value and number of bytes consumed 97 | func vwiDec(src []uint8, forward bool) (uint32, uint32) { 98 | var val uint32 = 0 //val = 0 99 | var byts []uint8 // byts = bytearray() 100 | 101 | if !forward { //if not forward: 102 | for i, j := 0, len(src)-1; i < j; i, j = i+1, j-1 { // src.reverse() 103 | src[i], src[j] = src[j], src[i] 104 | } 105 | } 106 | for _, bnum := range src { 107 | mask := ^(uint8(1) << 7) 108 | byts = append(byts, bnum&mask) 109 | if bnum>>7 == 1 { 110 | break 111 | } 112 | } 113 | 114 | if !forward { //if not forward: 115 | for i, j := 0, len(byts)-1; i < j; i, j = i+1, j-1 { // src.reverse() 116 | byts[i], byts[j] = byts[j], byts[i] 117 | } 118 | } 119 | 120 | for _, Byte := range byts { 121 | val = val << 7 122 | val |= uint32(Byte) 123 | } 124 | 125 | return val, uint32(len(byts)) 126 | } 127 | 128 | func vwiEncInt(x int) []uint8 { 129 | buf := make([]uint8, 64) 130 | z := 0 131 | for { 132 | buf[z] = byte(x) & 0x7f 133 | x >>= 7 134 | z++ 135 | if x == 0 { 136 | break 137 | } 138 | } 139 | buf[0] |= 0x80 140 | for i, j := 0, z-1; i < j; i, j = i+1, j-1 { 141 | buf[i], buf[j] = buf[j], buf[i] 142 | } 143 | return buf[:z] 144 | } 145 | 146 | func minimizeHTML(x []byte) []byte { //, int 147 | //Clear multiple spaces 148 | out := regexp.MustCompile("[ ]+").ReplaceAllString(string(x), " ") 149 | out = regexp.MustCompile("[\t\r\n]").ReplaceAllString(out, "") 150 | //Clear tabs, new lines 151 | return []byte(out) //, len(out) 152 | } 153 | 154 | var mask_to_bit_shifts = map[int]uint8{1: 0, 2: 1, 3: 0, 4: 2, 8: 3, 12: 2, 16: 4, 32: 5, 48: 4, 64: 6, 128: 7, 192: 6} 155 | 156 | func controlByte(tagx []mobiTagxTags) []byte { 157 | var cbs []byte 158 | var ans uint8 = 0 159 | for _, tags := range tagx { 160 | if tags.Control_Byte == 1 { 161 | cbs = append(cbs, ans) 162 | ans = 0 163 | continue 164 | } 165 | nvals := uint8(1) 166 | nentries := nvals / tags.TagNum 167 | shifts := mask_to_bit_shifts[int(tags.Bitmask)] 168 | ans |= tags.Bitmask & (nentries << shifts) 169 | } 170 | return cbs 171 | } 172 | 173 | func stringToBytes(value string, output interface{}) { 174 | out := reflect.ValueOf(output).Elem() 175 | 176 | for i := 0; i < out.Type().Len(); i++ { 177 | if i > len(value)-1 { 178 | break 179 | } 180 | out.Index(i).Set(reflect.ValueOf(byte(value[i]))) 181 | } 182 | } 183 | 184 | func underlineTitle(x string) string { 185 | x = regexp.MustCompile("[^-A-Za-z0-9]").ReplaceAllString(x, "_") 186 | if len(x) > 31 { 187 | return x[:31] 188 | } 189 | return x 190 | } 191 | 192 | func palmDocLZ77Pack(data []byte) []byte { 193 | var outB []byte 194 | 195 | var tailLen = int(data[len(data)-1]) 196 | var tail = data[(len(data)-1)-tailLen:] /*-multibyte*/ 197 | data = data[:(len(data)-1)-tailLen] /* -multibyte*/ 198 | 199 | var ldata = len(data) 200 | 201 | for i := 0; i < ldata; i++ { 202 | if i > 10 && (ldata-i) > 10 { 203 | found := false 204 | 205 | //Bound offset saves times on look up 206 | //Todo: custom lookup 207 | var reset bool 208 | boundOffset := i - 2047 209 | if boundOffset < 0 { 210 | boundOffset = 0 211 | } else { 212 | reset = true 213 | } 214 | 215 | // If there's no match for 3 letters then no point looking 216 | if f := bytes.LastIndex(data[boundOffset:i], data[i:i+3]); f != -1 { 217 | for chunk_len := 10; chunk_len > 2; chunk_len-- { 218 | j := bytes.LastIndex(data[boundOffset:i], data[i:i+chunk_len]) 219 | if j != -1 { 220 | if reset { 221 | j = i - 2047 + j 222 | reset = false 223 | } 224 | 225 | found = true 226 | 227 | var m int64 = int64(i) - int64(j) 228 | 229 | var code int64 = 0x8000 + ((m << 3) & 0x3ff8) + (int64(chunk_len) - 3) 230 | 231 | outB = append(outB, byte(code>>8)) 232 | outB = append(outB, byte(code)) 233 | i += chunk_len - 1 234 | break 235 | } 236 | } 237 | } 238 | if found { 239 | continue 240 | } else { 241 | // Try forward 242 | // matchLen := 0 243 | // for z := 1; z < 10; z++ { 244 | // if data[i+z] == data[i] { 245 | // matchLen++ 246 | // } else { 247 | // break 248 | // } 249 | // } 250 | // if matchLen > 3 { 251 | // // fmt.Printf("\nLen CHeck: %v = %v", i, matchLen) 252 | // var m int64 = 1 253 | // var code int64 = 0x8000 + ((m << 3) & 0x3ff8) + (int64(matchLen) - 3) 254 | // outB = append(outB, data[i]) 255 | // outB = append(outB, byte(code>>8)) 256 | // outB = append(outB, byte(code)) 257 | // // fmt.Printf("Code: %x %x", byte(code>>8), byte(code)) 258 | // i += matchLen 259 | // //if(ldata > ) 260 | // continue 261 | // } 262 | } 263 | } 264 | 265 | ch := data[i] 266 | och := byte(ch) 267 | 268 | if och == 0x20 && (i+1) < ldata { 269 | onch := byte(data[i+1]) 270 | if onch >= 0x40 && onch < 0x80 { 271 | outB = append(outB, onch^0x80) 272 | i += 1 273 | continue 274 | } else { 275 | outB = append(outB, och) 276 | continue 277 | } 278 | } 279 | if och == 0 || (och > 8 && och < 0x80) { 280 | outB = append(outB, och) 281 | } else { 282 | j := i 283 | var binseq []uint8 284 | 285 | for { 286 | if j < ldata && len(binseq) < 8 { 287 | ch = data[j] 288 | och = byte(ch) 289 | if och == 0 || (och > 8 && och < 0x80) { 290 | break 291 | } 292 | binseq = append(binseq, och) 293 | j += 1 294 | } else { 295 | break 296 | } 297 | } 298 | outB = append(outB, byte(len(binseq))) 299 | 300 | for rr := 0; rr < len(binseq); rr++ { 301 | outB = append(outB, binseq[rr]) 302 | } 303 | 304 | i += len(binseq) - 1 305 | } 306 | } 307 | outB = append(outB, tail...) 308 | return outB 309 | } 310 | 311 | func int32ToBytes(i uint32) []byte { 312 | buf := new(bytes.Buffer) 313 | binary.Write(buf, binary.BigEndian, i) 314 | return buf.Bytes() 315 | } 316 | -------------------------------------------------------------------------------- /writer.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "math/rand" 10 | "os" 11 | "time" 12 | ) 13 | 14 | type MobiWriter struct { 15 | file *os.File 16 | 17 | timestamp uint32 18 | title string 19 | compression mobiPDHCompression 20 | 21 | chapterCount int 22 | chapters []mobiChapter 23 | 24 | bookHtml *bytes.Buffer 25 | 26 | cncxBuffer *bytes.Buffer 27 | cncxLabelBuffer *bytes.Buffer 28 | 29 | // Text Records 30 | Records [][]uint8 31 | 32 | Embedded []EmbeddedData 33 | Mobi 34 | } 35 | 36 | type EmbType int 37 | 38 | const ( 39 | EmbCover EmbType = iota 40 | EmbThumb 41 | ) 42 | 43 | type EmbeddedData struct { 44 | Type EmbType 45 | Data []byte 46 | } 47 | 48 | func (w *MobiWriter) embed(FileType EmbType, Data []byte) int { 49 | w.Embedded = append(w.Embedded, EmbeddedData{Type: FileType, Data: Data}) 50 | return len(w.Embedded) - 1 51 | } 52 | 53 | func (w *MobiWriter) NewExthRecord(recType ExthType, value interface{}) { 54 | w.Exth.Add(uint32(recType), value) 55 | } 56 | 57 | func (w *MobiWriter) AddCover(cover, thumbnail string) { 58 | coverData, err := ioutil.ReadFile(cover) 59 | if err != nil { 60 | panic("Can not load file " + cover) 61 | } 62 | thumbnailData, err := ioutil.ReadFile(thumbnail) 63 | if err != nil { 64 | panic("Can not load file " + cover) 65 | } 66 | 67 | w.embed(EmbCover, coverData) 68 | w.embed(EmbThumb, thumbnailData) 69 | } 70 | 71 | // NewWriter initializes a writer. Takes a pointer to file and book title/database name 72 | func NewWriter(filename string) (writer *MobiWriter, err error) { 73 | writer = &MobiWriter{} 74 | writer.file, err = os.Create(filename) 75 | if err != nil { 76 | return nil, err 77 | } 78 | return 79 | } 80 | 81 | func (w *MobiWriter) Title(i string) *MobiWriter { 82 | w.title = i 83 | return w 84 | } 85 | 86 | func (w *MobiWriter) Compression(i mobiPDHCompression) *MobiWriter { 87 | w.compression = i 88 | return w 89 | } 90 | 91 | // AddRecord adds a new record. Returns Id 92 | func (w *MobiWriter) AddRecord(data []uint8) Mint { 93 | // fmt.Printf("Adding record : %s\n", data) 94 | w.Records = append(w.Records, data) 95 | return w.RecordCount() - 1 96 | } 97 | 98 | func (w *MobiWriter) RecordCount() Mint { 99 | return Mint(len(w.Records)) 100 | } 101 | 102 | func (w *MobiWriter) Write() { 103 | // Generate HTML file 104 | w.bookHtml = new(bytes.Buffer) 105 | w.bookHtml.WriteString("") 106 | for i, _ := range w.chapters { 107 | w.chapters[i].generateHTML(w.bookHtml) 108 | } 109 | w.bookHtml.WriteString("") 110 | 111 | // Generate MOBI 112 | w.generateCNCX() // Generates CNCX 113 | w.timestamp = uint32(time.Now().Unix()) 114 | 115 | // Generate Records 116 | // Record 0 - Reserve [Expand Record size in case Exth is modified by third party readers? 1024*10?] 117 | w.AddRecord([]uint8{0}) 118 | 119 | // Book Records 120 | w.Pdh.TextLength = uint32(w.bookHtml.Len()) 121 | 122 | // makeRecord := func(RecN []byte) []byte { 123 | // rLen := len(RecN) 124 | // if rLen == 0 { 125 | // return []byte{} 126 | // } 127 | 128 | // if rLen > MOBI_MAX_RECORD_SIZE { 129 | // Trail := rLen - MOBI_MAX_RECORD_SIZE 130 | // RecN = append(RecN, byte(Trail)) 131 | // } else { 132 | // RecN = append(RecN, 0) 133 | // } 134 | 135 | // if w.compression == CompressionPalmDoc { 136 | // RecN = palmDocLZ77Pack(RecN) 137 | // } 138 | 139 | // return RecN 140 | // } 141 | 142 | makeRecord := func(RecN []byte) []byte { 143 | rLen := len(RecN) 144 | if rLen == 0 { 145 | //TODO: Return error? 146 | return []byte{} 147 | } 148 | 149 | if w.compression == CompressionPalmDoc { 150 | RecN = palmDocLZ77Pack(RecN) 151 | } else { 152 | RecN = append(RecN, 0) 153 | } 154 | 155 | return RecN 156 | } 157 | 158 | RecN := bytes.NewBuffer([]byte{}) 159 | for { 160 | rRune, rSize, err := w.bookHtml.ReadRune() 161 | if err == io.EOF { 162 | w.AddRecord(makeRecord(RecN.Bytes())) 163 | RecN = bytes.NewBuffer([]byte{}) 164 | break 165 | } 166 | 167 | //Rune length + 1 padding + record length 168 | if rSize+RecN.Len()+1 > MOBI_MAX_RECORD_SIZE { 169 | w.AddRecord(makeRecord(RecN.Bytes())) 170 | w.bookHtml.UnreadRune() 171 | RecN = bytes.NewBuffer([]byte{}) 172 | continue 173 | } 174 | 175 | RecN.WriteRune(rRune) 176 | 177 | // if RecN.Len() >= MOBI_MAX_RECORD_SIZE { 178 | // w.AddRecord(makeRecord(RecN.Bytes())) 179 | // RecN = bytes.NewBuffer([]byte{}) 180 | // } 181 | } 182 | // for { 183 | // run, _, err := w.bookHtml.ReadRune() 184 | // if err == io.EOF { 185 | // w.AddRecord(makeRecord(RecN.Bytes())) 186 | // RecN = bytes.NewBuffer([]byte{}) 187 | // break 188 | // } 189 | // RecN.WriteRune(run) 190 | 191 | // if RecN.Len() >= MOBI_MAX_RECORD_SIZE { 192 | // w.AddRecord(makeRecord(RecN.Bytes())) 193 | // RecN = bytes.NewBuffer([]byte{}) 194 | // } 195 | // } 196 | w.Pdh.RecordCount = w.RecordCount().UInt16() - 1 197 | 198 | // Index0 199 | w.AddRecord([]uint8{0, 0}) 200 | w.Header.FirstNonBookIndex = w.RecordCount().UInt32() 201 | 202 | w.writeINDX_1() 203 | w.writeINDX_2() 204 | 205 | // Image 206 | //FirstImageIndex : array index 207 | //EXTH_COVER - offset from FirstImageIndex 208 | if w.EmbeddedCount() > 0 { 209 | w.Header.FirstImageIndex = w.RecordCount().UInt32() 210 | // c.Mh.FirstImageIndex = i + 2 211 | for i, e := range w.Embedded { 212 | w.Records = append(w.Records, e.Data) 213 | switch e.Type { 214 | case EmbCover: 215 | w.Exth.Add(EXTH_KF8COVERURI, fmt.Sprintf("kindle:embed:%04d", i+1)) 216 | w.Exth.Add(EXTH_COVEROFFSET, i) 217 | case EmbThumb: 218 | w.Exth.Add(EXTH_THUMBOFFSET, i) 219 | } 220 | } 221 | // for z := 0; z < w.EmbeddedCount(); z++ { 222 | 223 | // w.Records = append(w.Records, w.Images[z]) 224 | // } 225 | } else { 226 | w.Header.FirstImageIndex = 4294967295 227 | } 228 | 229 | // CNCX Record 230 | 231 | // Resource Record 232 | // w.Header.FirstImageIndex = 4294967295 233 | // w.Header.FirstNonBookIndex = w.RecordCount().UInt32() 234 | w.Header.LastContentRecordNumber = w.RecordCount().UInt16() - 1 235 | w.Header.FlisRecordIndex = w.AddRecord(w.generateFlis()).UInt32() // Flis 236 | w.Header.FcisRecordIndex = w.AddRecord(w.generateFcis()).UInt32() // Fcis 237 | w.AddRecord([]uint8{0xE9, 0x8E, 0x0D, 0x0A}) // EOF 238 | 239 | //fmt.Printf("%+s\n", w.Records) 240 | w.initPDF() 241 | w.initPDH() 242 | w.initHeader() 243 | w.initExth() 244 | _, err := w.file.Seek(1, 1) 245 | if err != nil { 246 | panic(err) 247 | } 248 | w.file.WriteString(w.title) 249 | _, err = w.file.Seek((int64(w.Pdh.RecordCount)*8)+1024*10, 0) 250 | if err != nil { 251 | panic(err) 252 | } 253 | for i := 1; i < w.RecordCount().Int(); i++ { 254 | _, err := w.file.Write(w.Records[i]) 255 | if err != nil { 256 | panic(err) 257 | } 258 | } 259 | 260 | w.file.Close() 261 | } 262 | 263 | func (w *MobiWriter) EmbeddedCount() Mint { 264 | return Mint(len(w.Embedded)) 265 | } 266 | 267 | func (w *MobiWriter) generateCNCX() { 268 | /* 269 | Single [Off, Len, Label, Depth] 270 | Parent: [Off, Len, Label, Depth] + [FirstChild, Last Child] 271 | Child: [Off, Len, Label, Depth] + [Parent] 272 | 273 | 274 | CNCX Structure 275 | 0. Header 1 276 | 1. Header 2 [Has children 3,4,5] 277 | 2. Header 3 [Has childred 6,7] 278 | 3. Child 1 of Header 2 279 | 4. Child 2 of Header 2 280 | 5. Child 3 of Header 2 281 | 6. Child 1 of Header 3 282 | 7. Child 2 of Header 3 283 | */ 284 | w.cncxLabelBuffer = new(bytes.Buffer) 285 | w.cncxBuffer = new(bytes.Buffer) 286 | w.chapterCount = 0 287 | 288 | TagxSingle := []mobiTagxTags{ 289 | mobiTagxMap[TagEntry_Pos], 290 | mobiTagxMap[TagEntry_Len], 291 | mobiTagxMap[TagEntry_NameOffset], 292 | mobiTagxMap[TagEntry_DepthLvl], 293 | mobiTagxMap[TagEntry_END]} 294 | 295 | TagxParent := []mobiTagxTags{ 296 | mobiTagxMap[TagEntry_Pos], 297 | mobiTagxMap[TagEntry_Len], 298 | mobiTagxMap[TagEntry_NameOffset], 299 | mobiTagxMap[TagEntry_DepthLvl], 300 | mobiTagxMap[TagEntry_Child1], 301 | mobiTagxMap[TagEntry_ChildN], 302 | mobiTagxMap[TagEntry_END]} 303 | 304 | TagxChild := []mobiTagxTags{ 305 | mobiTagxMap[TagEntry_Pos], 306 | mobiTagxMap[TagEntry_Len], 307 | mobiTagxMap[TagEntry_NameOffset], 308 | mobiTagxMap[TagEntry_DepthLvl], 309 | mobiTagxMap[TagEntry_Parent], 310 | mobiTagxMap[TagEntry_END]} 311 | 312 | var Id = len(w.chapters) 313 | 314 | for _, node := range w.chapters { 315 | if node.SubChapterCount() > 0 { 316 | ch1 := Id 317 | chN := Id + node.SubChapterCount() - 1 318 | fmt.Printf("Parent: %v %v %v [CHILDREN: %v %v]\n", Id, node.SubChapterCount(), node.Title, ch1, chN) 319 | Id += node.SubChapterCount() 320 | 321 | CNCX_ID := fmt.Sprintf("%03v", Id) 322 | 323 | w.Idxt.Offset = append(w.Idxt.Offset, uint16(MOBI_INDX_HEADER_LEN+w.cncxBuffer.Len())) 324 | 325 | w.cncxBuffer.WriteByte(byte(len(CNCX_ID))) // Len of ID 326 | w.cncxBuffer.WriteString(CNCX_ID) // ID 327 | w.cncxBuffer.WriteByte(controlByte(TagxParent)[0]) // Controll Byte 328 | w.cncxBuffer.Write(vwiEncInt(node.RecordOffset)) // Record offset 329 | w.cncxBuffer.Write(vwiEncInt(node.Len)) // Lenght of a record 330 | w.cncxBuffer.Write(vwiEncInt(w.cncxLabelBuffer.Len())) // Label Offset // Offset relative to CNXC record 331 | w.cncxLabelBuffer.Write(vwiEncInt(len(node.Title))) // CNCXLabel lenght 332 | w.cncxLabelBuffer.WriteString(node.Title) // CNCXLabel title 333 | w.cncxBuffer.Write(vwiEncInt(0)) // Depth 334 | w.cncxBuffer.Write(vwiEncInt(ch1)) // Child1 335 | w.cncxBuffer.Write(vwiEncInt(chN)) // ChildN 336 | w.chapterCount++ 337 | } else { 338 | CNCX_ID := fmt.Sprintf("%03v", w.chapterCount) 339 | 340 | w.Idxt.Offset = append(w.Idxt.Offset, uint16(MOBI_INDX_HEADER_LEN+w.cncxBuffer.Len())) 341 | // fmt.Printf("Node: %d > %d = %d\n", MOBI_INDX_HEADER_LEN, w.cncxBuffer.Len(), MOBI_INDX_HEADER_LEN+w.cncxBuffer.Len()) 342 | w.cncxBuffer.WriteByte(byte(len(CNCX_ID))) // Len of ID 343 | w.cncxBuffer.WriteString(CNCX_ID) // ID 344 | w.cncxBuffer.WriteByte(controlByte(TagxSingle)[0]) // Controll Byte 345 | w.cncxBuffer.Write(vwiEncInt(node.RecordOffset)) // Record offset 346 | fmt.Printf("Offset: %v\n", node.RecordOffset) 347 | w.cncxBuffer.Write(vwiEncInt(node.Len)) // Lenght of a record 348 | w.cncxBuffer.Write(vwiEncInt(w.cncxLabelBuffer.Len())) // Label Offset // Offset relative to CNXC record 349 | w.cncxLabelBuffer.Write(vwiEncInt(len(node.Title))) // CNCXLabel lenght 350 | w.cncxLabelBuffer.WriteString(node.Title) // CNCXLabel title 351 | w.cncxBuffer.Write(vwiEncInt(0)) // Depth 352 | w.chapterCount++ 353 | } 354 | 355 | } 356 | Id = len(w.chapters) 357 | 358 | for i, node := range w.chapters { 359 | for _, child := range node.SubChapters { 360 | fmt.Printf("Child: %v %v %v\n", Id, i, child.Title) 361 | CNCX_ID := fmt.Sprintf("%03v", w.chapterCount) 362 | // fmt.Printf("Node: %v\n", CNCX_ID) 363 | w.Idxt.Offset = append(w.Idxt.Offset, uint16(MOBI_INDX_HEADER_LEN+w.cncxBuffer.Len())) 364 | 365 | w.cncxBuffer.WriteByte(byte(len(CNCX_ID))) // Len of ID 366 | w.cncxBuffer.WriteString(CNCX_ID) // ID 367 | w.cncxBuffer.WriteByte(controlByte(TagxChild)[0]) // Controll Byte 368 | w.cncxBuffer.Write(vwiEncInt(child.RecordOffset)) // Record offset 369 | w.cncxBuffer.Write(vwiEncInt(child.Len)) // Lenght of a record 370 | w.cncxBuffer.Write(vwiEncInt(w.cncxLabelBuffer.Len())) // Label Offset //Offset relative to CNXC record 371 | w.cncxLabelBuffer.Write(vwiEncInt(len(child.Title))) // CNCXLabel lenght 372 | w.cncxLabelBuffer.WriteString(child.Title) // CNCXLabel title 373 | w.cncxBuffer.Write(vwiEncInt(1)) // Depth 374 | w.cncxBuffer.Write(vwiEncInt(i)) // Parent 375 | w.chapterCount++ 376 | Id++ 377 | } 378 | } 379 | // return 380 | // for _, node := range w.Nodes { 381 | // if node.ChildCount() == 0 { 382 | // CNCX_ID := fmt.Sprintf("%03v", w.NodeCount) 383 | // // fmt.Printf("Node: %v\n", CNCX_ID) 384 | // w.Idxt2.Offset = append(w.Idxt2.Offset, uint16(MOBI_INDX_HEADER_LEN+w.Cncx.Len())) 385 | 386 | // w.Cncx.WriteByte(byte(len(CNCX_ID))) // Len of ID 387 | // w.Cncx.WriteString(CNCX_ID) // ID 388 | // w.Cncx.WriteByte(ControlByte(TagxSingle)[0]) // Controll Byte 389 | // w.Cncx.Write(vwiEncInt(node.RecordOffset, true)) // Record offset 390 | // w.Cncx.Write(vwiEncInt(len(node.Html))) // Lenght of a record 391 | // w.Cncx.Write(vwiEncInt(w.CncxLabels.Len(), true)) // Label Offset // Offset relative to CNXC record 392 | // w.CncxLabels.Write(vwiEncInt(len(node.Title), true)) // CNCXLabel lenght 393 | // w.CncxLabels.WriteString(node.Title) // CNCXLabel title 394 | // w.Cncx.Write(vwiEncInt(0, true)) // Depth 395 | // w.NodeCount++ 396 | // } 397 | // if node.ChildCount() > 0 { 398 | // CNCX_ID := fmt.Sprintf("%03v", w.NodeCount) 399 | // // fmt.Printf("Node: %v\n", CNCX_ID) 400 | // w.Idxt2.Offset = append(w.Idxt2.Offset, uint16(MOBI_INDX_HEADER_LEN+w.Cncx.Len())) 401 | 402 | // // Get Offset relative to IDXT? 403 | // w.Cncx.WriteByte(byte(len(CNCX_ID))) // Len of ID 404 | // w.Cncx.WriteString(CNCX_ID) // ID 405 | // w.Cncx.WriteByte(ControlByte(TagxParent)[0]) // Controll Byte 406 | // w.Cncx.Write(vwiEncInt(node.RecordOffset, true)) // Record offset 407 | // w.Cncx.Write(vwiEncInt(node.Len, true)) // Lenght of a record 408 | // w.Cncx.Write(vwiEncInt(w.CncxLabels.Len(), true)) // Label Offset // Offset relative to CNXC record 409 | // w.CncxLabels.Write(vwiEncInt(len(node.Title), true)) // CNCXLabel lenght 410 | // w.CncxLabels.WriteString(node.Title) // CNCXLabel title 411 | // w.Cncx.Write(vwiEncInt(0, true)) // Depth 412 | // w.Cncx.Write(vwiEncInt(w.NodeCount+1, true)) // Child1 413 | // w.Cncx.Write(vwiEncInt(w.NodeCount+node.ChildCount(), true)) // ChildN 414 | // w.NodeCount++ 415 | 416 | // for _, child := range node.Children { 417 | // CNCX_ID := fmt.Sprintf("%03v", w.NodeCount) 418 | // // fmt.Printf("Node: %v\n", CNCX_ID) 419 | // w.Idxt2.Offset = append(w.Idxt2.Offset, uint16(MOBI_INDX_HEADER_LEN+w.Cncx.Len())) 420 | 421 | // w.Cncx.WriteByte(byte(len(CNCX_ID))) // Len of ID 422 | // w.Cncx.WriteString(CNCX_ID) // ID 423 | // w.Cncx.WriteByte(ControlByte(TagxChild)[0]) // Controll Byte 424 | // w.Cncx.Write(vwiEncInt(child.RecordOffset, true)) // Record offset 425 | // w.Cncx.Write(vwiEncInt(child.Len, true)) // Lenght of a record 426 | // w.Cncx.Write(vwiEncInt(w.CncxLabels.Len(), true)) // Label Offset //Offset relative to CNXC record 427 | // w.CncxLabels.Write(vwiEncInt(len(child.Title), true)) // CNCXLabel lenght 428 | // w.CncxLabels.WriteString(child.Title) // CNCXLabel title 429 | // w.Cncx.Write(vwiEncInt(1, true)) // Depth 430 | // w.Cncx.Write(vwiEncInt(child.Parent, true)) // Parent 431 | // w.NodeCount++ 432 | // } 433 | // } 434 | // } 435 | } 436 | 437 | func (w *MobiWriter) initPDF() *MobiWriter { 438 | stringToBytes(underlineTitle(w.title), &w.Pdf.DatabaseName) // Set Database Name 439 | w.Pdf.CreationTime = w.timestamp // Set Time 440 | w.Pdf.ModificationTime = w.timestamp // Set Time 441 | stringToBytes("BOOK", &w.Pdf.Type) // Palm Database File Code 442 | stringToBytes("MOBI", &w.Pdf.Creator) // * 443 | w.Pdf.UniqueIDSeed = rand.New(rand.NewSource(9)).Uint32() // UniqueID 444 | 445 | w.Pdf.RecordsNum = w.RecordCount().UInt16() 446 | 447 | binary.Write(w.file, binary.BigEndian, w.Pdf) // Write 448 | 449 | Oft := uint32((w.Pdf.RecordsNum * 8) + MOBI_PALMDB_HEADER_LEN + 2) 450 | 451 | for i := uint16(0); i < w.Pdf.RecordsNum; i++ { 452 | 453 | binary.Write(w.file, binary.BigEndian, mobiRecordOffset{Offset: Oft, UniqueID: i}) // Write 454 | if i == 0 { 455 | Oft = (uint32(w.Pdh.RecordCount) * 8) + uint32(1024*10) 456 | } 457 | if i > 0 { 458 | Oft += uint32(len(w.Records[i])) 459 | } 460 | } 461 | 462 | w.file.Write([]uint8{0, 0}) 463 | 464 | return w 465 | } 466 | 467 | func (w *MobiWriter) initPDH() *MobiWriter { 468 | w.Pdh.Compression = w.compression 469 | w.Pdh.RecordSize = MOBI_MAX_RECORD_SIZE 470 | 471 | binary.Write(w.file, binary.BigEndian, w.Pdh) // Write 472 | return w 473 | } 474 | 475 | func (w *MobiWriter) initHeader() *MobiWriter { 476 | stringToBytes("MOBI", &w.Header.Identifier) 477 | w.Header.HeaderLength = 232 478 | w.Header.MobiType = 2 479 | w.Header.TextEncoding = 65001 480 | w.Header.UniqueID = w.Pdf.UniqueIDSeed + 1 481 | w.Header.FileVersion = 6 482 | w.Header.MinVersion = 6 483 | w.Header.OrthographicIndex = 4294967295 484 | w.Header.InflectionIndex = 4294967295 485 | w.Header.IndexNames = 4294967295 486 | w.Header.Locale = 1033 487 | w.Header.IndexKeys = 4294967295 488 | w.Header.ExtraIndex0 = 4294967295 489 | w.Header.ExtraIndex1 = 4294967295 490 | w.Header.ExtraIndex2 = 4294967295 491 | w.Header.ExtraIndex3 = 4294967295 492 | w.Header.ExtraIndex4 = 4294967295 493 | w.Header.ExtraIndex5 = 4294967295 494 | w.Header.ExthFlags = 80 495 | w.Header.DrmOffset = 4294967295 496 | w.Header.DrmCount = 4294967295 497 | w.Header.FirstContentRecordNumber = 1 498 | w.Header.FcisRecordCount = 1 499 | w.Header.FlisRecordCount = 1 500 | 501 | w.Header.Unknown7 = 0 502 | w.Header.Unknown8 = 0 503 | 504 | w.Header.SrcsRecordIndex = 4294967295 505 | w.Header.SrcsRecordCount = 0 506 | 507 | w.Header.Unknown9 = 4294967295 508 | w.Header.Unknown10 = 4294967295 509 | //w.Header.FirstCompilationDataSectionCount = 4294967295 510 | //w.Header.NumberOfCompilationDataSections = 4294967295 511 | w.Header.ExtraRecordDataFlags = 1 //1 512 | 513 | w.Header.FullNameLength = uint32(len(w.title)) 514 | w.Header.FullNameOffset = uint32(MOBI_PALMDOC_HEADER_LEN + MOBI_MOBIHEADER_LEN + w.Exth.GetHeaderLenght() + 1) 515 | 516 | binary.Write(w.file, binary.BigEndian, w.Header) // Write 517 | return w 518 | } 519 | 520 | func (w *MobiWriter) initExth() *MobiWriter { 521 | stringToBytes("EXTH", &w.Exth.Identifier) 522 | w.Exth.HeaderLenght = 12 523 | 524 | for _, k := range w.Exth.Records { 525 | w.Exth.HeaderLenght += k.RecordLength 526 | } 527 | 528 | Padding := w.Exth.HeaderLenght % 4 529 | w.Exth.HeaderLenght += Padding 530 | 531 | w.Exth.RecordCount = uint32(len(w.Exth.Records)) 532 | 533 | binary.Write(w.file, binary.BigEndian, w.Exth.Identifier) 534 | binary.Write(w.file, binary.BigEndian, w.Exth.HeaderLenght) 535 | binary.Write(w.file, binary.BigEndian, w.Exth.RecordCount) 536 | 537 | for _, k := range w.Exth.Records { 538 | binary.Write(w.file, binary.BigEndian, k.RecordType) 539 | binary.Write(w.file, binary.BigEndian, k.RecordLength) 540 | binary.Write(w.file, binary.BigEndian, k.Value) 541 | } 542 | 543 | // Add zeros to reach multiples of 4 for the header 544 | for Padding != 0 { 545 | w.file.Write([]byte{0}) 546 | Padding-- 547 | } 548 | return w 549 | } 550 | -------------------------------------------------------------------------------- /writer_indx.go: -------------------------------------------------------------------------------- 1 | package mobi 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | ) 7 | 8 | func (w *MobiWriter) chapterIsDeep() bool { 9 | for _, node := range w.chapters { 10 | if node.SubChapterCount() > 0 { 11 | return true 12 | } 13 | } 14 | return false 15 | } 16 | 17 | func (w *MobiWriter) writeINDX_1() { 18 | buf := new(bytes.Buffer) 19 | // Tagx 20 | tagx := mobiTagx{} 21 | if w.chapterIsDeep() { 22 | tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_Pos]) 23 | tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_Len]) 24 | tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_NameOffset]) 25 | tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_DepthLvl]) 26 | tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_Parent]) 27 | tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_Child1]) 28 | tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_ChildN]) 29 | tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_END]) 30 | } else { 31 | tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_Pos]) 32 | tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_Len]) 33 | tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_NameOffset]) 34 | tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_DepthLvl]) 35 | tagx.Tags = append(tagx.Tags, mobiTagxMap[TagEntry_END]) 36 | } 37 | 38 | /*************************************/ 39 | 40 | /*************************************/ 41 | magicTagx.WriteTo(&tagx.Identifier) 42 | tagx.ControlByteCount = 1 43 | tagx.HeaderLenght = uint32(tagx.TagCount()*4) + 12 44 | 45 | TagX := new(bytes.Buffer) 46 | binary.Write(TagX, binary.BigEndian, tagx.Identifier) 47 | binary.Write(TagX, binary.BigEndian, tagx.HeaderLenght) 48 | binary.Write(TagX, binary.BigEndian, tagx.ControlByteCount) 49 | binary.Write(TagX, binary.BigEndian, tagx.Tags) 50 | 51 | // Indx 52 | // IndxBin := new(bytes.Buffer) 53 | indx := mobiIndx{} 54 | magicIndx.WriteTo(&indx.Identifier) 55 | indx.HeaderLen = MOBI_INDX_HEADER_LEN 56 | indx.Indx_Type = INDX_TYPE_INFLECTION 57 | indx.Idxt_Count = 1 58 | indx.Idxt_Encoding = MOBI_ENC_UTF8 59 | indx.SetUnk2 = 4294967295 60 | indx.Cncx_Records_Count = 1 61 | indx.Idxt_Entry_Count = uint32(w.chapterCount) 62 | indx.Tagx_Offset = MOBI_INDX_HEADER_LEN 63 | 64 | //binary.Write(IndxBin, binary.BigEndian, indx) 65 | // Idxt 66 | 67 | /************/ 68 | 69 | IdxtLast := len(w.Idxt.Offset) 70 | Offset := w.Idxt.Offset[IdxtLast-1] 71 | Rec := w.cncxBuffer.Bytes()[Offset-MOBI_INDX_HEADER_LEN:] 72 | 73 | Rec = Rec[0 : Rec[0]+1] 74 | RLen := len(Rec) 75 | 76 | //w.File.Write(Rec) 77 | 78 | Padding := (RLen + 2) % 4 79 | 80 | //IDXT_OFFSET, := w.File.Seek(0, 1) 81 | 82 | indx.Idxt_Offset = MOBI_INDX_HEADER_LEN + uint32(TagX.Len()) + uint32(RLen+2+Padding) // Offset to Idxt Record 83 | //w.Idxt1.Offset = []uint16{uint16(offset)} 84 | /************/ 85 | 86 | binary.Write(buf, binary.BigEndian, indx) 87 | buf.Write(TagX.Bytes()) 88 | buf.Write(Rec) 89 | binary.Write(buf, binary.BigEndian, uint16(IdxtLast)) 90 | 91 | for Padding != 0 { 92 | buf.Write([]byte{0}) 93 | Padding-- 94 | } 95 | 96 | buf.WriteString(magicIdxt.String()) 97 | 98 | binary.Write(buf, binary.BigEndian, uint16(MOBI_INDX_HEADER_LEN+uint32(TagX.Len()))) 99 | 100 | //ioutil.WriteFile("TAGX_TEST", TagX.Bytes(), 0644) 101 | //ioutil.WriteFile("INDX_TEST", IndxBin.Bytes(), 0644) 102 | buf.Write([]uint8{0, 0}) 103 | w.Header.IndxRecodOffset = w.AddRecord(buf.Bytes()).UInt32() 104 | } 105 | 106 | func (w *MobiWriter) writeINDX_2() { 107 | buf := new(bytes.Buffer) 108 | indx := mobiIndx{} 109 | magicIndx.WriteTo(&indx.Identifier) 110 | indx.HeaderLen = MOBI_INDX_HEADER_LEN 111 | indx.Indx_Type = INDX_TYPE_NORMAL 112 | indx.Unk1 = uint32(1) 113 | indx.Idxt_Encoding = 4294967295 114 | indx.SetUnk2 = 4294967295 115 | indx.Idxt_Offset = uint32(MOBI_INDX_HEADER_LEN + w.cncxBuffer.Len()) 116 | indx.Idxt_Count = uint32(len(w.Idxt.Offset)) 117 | 118 | binary.Write(buf, binary.BigEndian, indx) 119 | buf.Write(w.cncxBuffer.Bytes()) 120 | 121 | buf.WriteString(magicIdxt.String()) 122 | for _, offset := range w.Idxt.Offset { 123 | //Those offsets are not relative INDX record. 124 | //So we need to adjust that. 125 | binary.Write(buf, binary.BigEndian, offset) //+MOBI_INDX_HEADER_LEN) 126 | 127 | } 128 | 129 | Padding := (len(w.Idxt.Offset) + 4) % 4 130 | for Padding != 0 { 131 | buf.Write([]byte{0}) 132 | Padding-- 133 | } 134 | 135 | w.AddRecord(buf.Bytes()) 136 | w.AddRecord(w.cncxLabelBuffer.Bytes()) 137 | } 138 | --------------------------------------------------------------------------------