├── .gitignore ├── go.mod ├── input └── Dracula.epub ├── go.sum ├── internal ├── book │ ├── epub.go │ ├── text-book.go │ └── epub-parser.go ├── consts │ └── consts.go ├── debug │ └── debug.go ├── str │ └── str.go ├── file │ └── file.go └── tts │ └── tts.go ├── main.go ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | output/ 3 | .DS_Store -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module epub-tts 2 | 3 | go 1.22.0 4 | 5 | require golang.org/x/text v0.21.0 6 | -------------------------------------------------------------------------------- /input/Dracula.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rafael1mc/epub-tts/HEAD/input/Dracula.epub -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= 2 | golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= 3 | -------------------------------------------------------------------------------- /internal/book/epub.go: -------------------------------------------------------------------------------- 1 | package book 2 | 3 | type EpubSection struct { 4 | ID string `json:"id"` 5 | Title string `json:"title"` 6 | HtmlContent string `json:"htmlString"` 7 | } 8 | 9 | type Epub struct { 10 | Name string 11 | Toc map[string]string 12 | Sections []EpubSection 13 | } 14 | -------------------------------------------------------------------------------- /internal/consts/consts.go: -------------------------------------------------------------------------------- 1 | package consts 2 | 3 | const ( 4 | Perm = 0777 5 | InputFilePath = "input/Dracula.epub" 6 | 7 | IsDryRun = false // if true, will generate text files, but not audio files 8 | IsDebug = false // if true, will generate files for section json and html content as well 9 | 10 | SpeakProcessCompletion = true // if true, will say something at the end of the process 11 | ) 12 | 13 | const ( 14 | OutputRootDir = "output" 15 | OutputTxtDir = "txt" 16 | OutputTmpDir = "tmp" 17 | OutputDebugDir = "debug" 18 | ) 19 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "epub-tts/internal/book" 5 | "epub-tts/internal/consts" 6 | "epub-tts/internal/debug" 7 | "epub-tts/internal/file" 8 | "epub-tts/internal/tts" 9 | "fmt" 10 | ) 11 | 12 | func main() { 13 | fmt.Println(" ---== Execution Started ==--- ") 14 | 15 | epub, err := book.ParseEpub(consts.InputFilePath) 16 | if err != nil { 17 | panic(err) 18 | } 19 | 20 | textBook := book.TextBookFromEpub(epub) 21 | 22 | err = file.CreateOutputDirs(textBook.Name) 23 | if err != nil { 24 | panic(err) 25 | } 26 | 27 | err = file.SaveChapters(textBook) 28 | if err != nil { 29 | panic(err) 30 | } 31 | debug.GenerateDebugFiles(epub) 32 | 33 | tts := tts.NewTTS(3, textBook) 34 | tts.Run() 35 | 36 | if consts.SpeakProcessCompletion { 37 | tts.Speak("TTS completed") 38 | } 39 | fmt.Println(" ---== Execution ended ==--- ") 40 | } 41 | -------------------------------------------------------------------------------- /internal/book/text-book.go: -------------------------------------------------------------------------------- 1 | package book 2 | 3 | import ( 4 | "epub-tts/internal/str" 5 | "strings" 6 | ) 7 | 8 | type Chapter struct { 9 | ID string 10 | Name string 11 | Content string 12 | } 13 | 14 | func (c Chapter) NameOrID() string { 15 | if c.Name == "" { 16 | return c.ID 17 | } 18 | return c.Name 19 | } 20 | 21 | type TextBook struct { 22 | Name string 23 | Chapters []Chapter 24 | } 25 | 26 | func TextBookFromEpub(input Epub) TextBook { 27 | chapters := []Chapter{} 28 | 29 | for _, v := range input.Sections { 30 | name := str.SanitizeString(v.Title) 31 | name = strings.ReplaceAll(name, "\n", "") 32 | chapter := Chapter{ 33 | ID: str.SanitizeString(v.ID), 34 | Name: name, 35 | Content: str.SanitizeString(str.RemoveTags(v.HtmlContent)), 36 | } 37 | chapters = append(chapters, chapter) 38 | } 39 | 40 | return TextBook{ 41 | Name: input.Name, 42 | Chapters: chapters, 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Rafael 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /internal/debug/debug.go: -------------------------------------------------------------------------------- 1 | package debug 2 | 3 | import ( 4 | "encoding/json" 5 | "epub-tts/internal/book" 6 | "epub-tts/internal/consts" 7 | "epub-tts/internal/file" 8 | "errors" 9 | "fmt" 10 | "os" 11 | ) 12 | 13 | func GenerateDebugFiles(epub book.Epub) { 14 | if !consts.IsDebug { 15 | return 16 | } 17 | fmt.Println("Saving debug files") 18 | 19 | err := os.MkdirAll(file.DebugDir(epub.Name), consts.Perm) 20 | if err != nil && !errors.Is(err, os.ErrExist) { 21 | panic(err) 22 | } 23 | 24 | for k, v := range epub.Sections { 25 | // 26 | // JSON 27 | // 28 | jsonContent, err := json.Marshal(v) 29 | if err != nil { 30 | panic(err) 31 | } 32 | 33 | err = os.WriteFile( 34 | file.GetOutputPath(k, file.DebugDir(epub.Name), v.ID, "json"), 35 | jsonContent, 36 | consts.Perm, 37 | ) 38 | if err != nil { 39 | fmt.Println("Failed to save json debug file") 40 | } 41 | 42 | // 43 | // HTML 44 | // 45 | err = os.WriteFile( 46 | file.GetOutputPath(k, file.DebugDir(epub.Name), v.ID, "html"), 47 | []byte(v.HtmlContent), 48 | consts.Perm, 49 | ) 50 | if err != nil { 51 | fmt.Println("Failed to save html debug file") 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # epub-tts 2 | 3 | Convert ePUB into audio files. 4 | 5 | Code will parse the ePUB into sections (which roughly correlates to book chapters) and 'text-to-speech' each section into its own audio file.
Output will be prefixed with a number to maintain order. 6 | 7 | 8 | This is an alpha, proof of concept version. 9 | To me, it's supposed to be a simple alternative for when eyes are tired but the mind is not :) 10 | 11 | 12 |
13 | 14 | # Requirements 15 | - Run on MacOS 16 | - ffmpeg installed and available in $PATH 17 | - Golang 18 | 19 | # How to use 20 | 1. Clone this repo 21 | 2. Replace the file inside `volume/input.epub` with the book you want to convert to audio (keep file name) 22 | 3. Execute the program (note that it will take quite some time, but you should see _some_ output during execution): 23 | ``` 24 | go run . 25 | ``` 26 | 5. You should see a new `output` folder with each text and audio file. 27 | 28 | # TODO 29 | - [x] Parse ePUB from Golang 30 | - [x] Organize code 31 | - [x] Add worker pools for batch conversion and less CPU strain 32 | - [x] Reduce output audio size 33 | - [x] Extract chapter info 34 | - [ ] Add more sample ePUBs 35 | - [ ] Add automated tests 36 | - [x] Separate output by folder 37 | - [ ] Handle multiple input 38 | - [ ] Organize the code some more 39 | - [ ] Support other languages beyond english 40 | - [ ] Display progress 41 | - [ ] Break down a big section to be TTS concurrently, and merge after whole section is done 42 | - [ ] Add support for Ubuntu TTS 43 | - [ ] Add Web UI to Drag and Drop epub files 44 | - [ ] ? 45 | 46 | ### Dependencies 47 | - MacOS `say` command 48 | - Note: The example book in this repo is taken from [Project Guttenber](https://www.gutenberg.org/about/), with Copyright Status as "Public domain in the USA" 49 |
50 | 51 | # License 52 | Check [LICENSE](https://github.com/rafael1mc/epub-tts/blob/main/LICENSE) file. -------------------------------------------------------------------------------- /internal/str/str.go: -------------------------------------------------------------------------------- 1 | package str 2 | 3 | import ( 4 | "regexp" 5 | "strings" 6 | "unicode" 7 | 8 | "golang.org/x/text/runes" 9 | "golang.org/x/text/transform" 10 | "golang.org/x/text/unicode/norm" 11 | ) 12 | 13 | func SanitizeString(str string) string { 14 | str = strings.Trim(str, "\r\n\t ") 15 | str = strings.ReplaceAll(str, "\r\n", "\n") 16 | 17 | // make lines with only spaces to be just lines so they can be grouped below 18 | blankLineRegex := regexp.MustCompile(`(?m)^\s*$`) 19 | str = blankLineRegex.ReplaceAllString(str, "\n") 20 | 21 | str = strings.Map(func(r rune) rune { 22 | if unicode.IsPrint(r) || r == '\n' { 23 | return r 24 | } 25 | return -1 26 | }, str) 27 | 28 | // remove excess line breaks 29 | for strings.Contains(str, "\n\n\n") { 30 | str = strings.ReplaceAll(str, "\n\n\n", "\n\n") 31 | } 32 | 33 | return str 34 | } 35 | 36 | // CleanFileName removes invalid characters for filenames 37 | // and also removes accents and special characters. 38 | func CleanFileName(input string) string { 39 | // Normalize the input string to remove accents 40 | normalized, err := normalize(input) 41 | if err != nil { 42 | // TODO add log 43 | normalized = input 44 | } 45 | 46 | normalized = strings.ReplaceAll(normalized, "—", "_") 47 | normalized = strings.ReplaceAll(normalized, ":", "_") 48 | 49 | // Define a regular expression that allows only alphanumeric characters, dashes, and underscores 50 | re := regexp.MustCompile(`[^a-zA-Z0-9\s\-_\.]`) 51 | 52 | // Remove any character that is not a word character, whitespace, dash, or period 53 | cleaned := re.ReplaceAllString(normalized, "") 54 | 55 | // Optionally replace spaces with underscores or dashes 56 | cleaned = strings.ReplaceAll(cleaned, " ", "_") 57 | cleaned = strings.ReplaceAll(cleaned, `\n`, "") 58 | cleaned = strings.ReplaceAll(cleaned, "\n", "") 59 | 60 | return cleaned 61 | } 62 | 63 | // https://stackoverflow.com/a/65981868 64 | func normalize(s string) (string, error) { 65 | t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) 66 | result, _, err := transform.String(t, s) 67 | if err != nil { 68 | return "", err 69 | } 70 | 71 | return result, nil 72 | } 73 | 74 | func RemoveTags(input string) string { 75 | // Define the regex pattern to match HTML tags 76 | tagRegex := regexp.MustCompile(`<[^>]+>`) 77 | // Replace all occurrences of the tag pattern with an empty string 78 | return tagRegex.ReplaceAllString(input, "") 79 | } 80 | -------------------------------------------------------------------------------- /internal/file/file.go: -------------------------------------------------------------------------------- 1 | package file 2 | 3 | import ( 4 | "epub-tts/internal/book" 5 | "epub-tts/internal/consts" 6 | "epub-tts/internal/str" 7 | "errors" 8 | "fmt" 9 | "os" 10 | "path" 11 | "path/filepath" 12 | "strings" 13 | ) 14 | 15 | func normalizeBookName(bookName string) string { 16 | cleanName := strings.ToLower(str.CleanFileName(bookName)) 17 | nameLen := len(cleanName) 18 | if nameLen > 50 { 19 | nameLen = 50 20 | } 21 | 22 | return cleanName[:nameLen] 23 | } 24 | 25 | func rootDir(bookName string) string { 26 | return path.Join( 27 | consts.OutputRootDir, 28 | normalizeBookName(bookName), 29 | ) 30 | } 31 | 32 | func txtDir(bookName string) string { 33 | return path.Join( 34 | rootDir(bookName), 35 | consts.OutputTxtDir, 36 | ) 37 | } 38 | 39 | func TmpDir(bookName string) string { 40 | return path.Join( 41 | rootDir(bookName), 42 | consts.OutputTmpDir, 43 | ) 44 | } 45 | 46 | func DebugDir(bookName string) string { 47 | return path.Join( 48 | rootDir(bookName), 49 | consts.OutputDebugDir, 50 | ) 51 | } 52 | 53 | func CreateOutputDirs(bookName string) error { 54 | var err error 55 | 56 | fmt.Println("Creating tmp dir", TmpDir(bookName)) 57 | err = os.MkdirAll(TmpDir(bookName), consts.Perm) 58 | if err != nil && !errors.Is(err, os.ErrExist) { 59 | return err 60 | } 61 | 62 | err = os.MkdirAll(txtDir(bookName), consts.Perm) 63 | if err != nil && !errors.Is(err, os.ErrExist) { 64 | return err 65 | } 66 | 67 | return nil 68 | } 69 | 70 | func SaveChapters(textBook book.TextBook) error { 71 | fmt.Println("Saving chapter text files.") 72 | for k, v := range textBook.Chapters { 73 | filename := GetTextfileName(k, textBook.Name, v) 74 | err := os.WriteFile( 75 | filename, 76 | []byte(v.Content), 77 | consts.Perm, 78 | ) 79 | if err != nil { 80 | return err 81 | } 82 | } 83 | 84 | return nil 85 | } 86 | 87 | func GetTextfileName(pos int, bookName string, chapter book.Chapter) string { 88 | return GetOutputPath(pos, txtDir(bookName), chapter.NameOrID(), "txt") 89 | } 90 | 91 | func GetTtsAudioFilename(pos int, bookName string, chapter book.Chapter) string { 92 | return GetOutputPath(pos, TmpDir(bookName), chapter.NameOrID(), "aiff") 93 | } 94 | 95 | func GetConvertedAudioFilename(pos int, bookName string, chapter book.Chapter) string { 96 | return GetOutputPath(pos, rootDir(bookName), chapter.NameOrID(), "mp3") 97 | } 98 | 99 | func GetOutputPath(pos int, outputFolder string, name string, extension string) string { 100 | filename := fmt.Sprintf("%d-%s.%s", pos, name, extension) 101 | filename = strings.ToLower(filename) 102 | filename = str.CleanFileName(filename) 103 | 104 | filePath := filepath.Join(outputFolder, filename) 105 | 106 | return filePath 107 | } 108 | -------------------------------------------------------------------------------- /internal/tts/tts.go: -------------------------------------------------------------------------------- 1 | package tts 2 | 3 | import ( 4 | "epub-tts/internal/book" 5 | "epub-tts/internal/consts" 6 | "epub-tts/internal/file" 7 | "fmt" 8 | "os" 9 | "os/exec" 10 | ) 11 | 12 | type TTS struct { 13 | workerCount int 14 | 15 | textBook book.TextBook 16 | } 17 | 18 | type job struct { 19 | ID int 20 | BookName string 21 | Chapter book.Chapter 22 | } 23 | 24 | type jobDone struct { 25 | job 26 | Error error 27 | } 28 | 29 | func NewTTS( 30 | workerCount int, 31 | textBook book.TextBook, 32 | ) *TTS { 33 | return &TTS{ 34 | workerCount: workerCount, 35 | textBook: textBook, 36 | } 37 | } 38 | 39 | func (t TTS) Run() { 40 | fmt.Println("Running text-to-speech") 41 | 42 | jobCount := len(t.textBook.Chapters) 43 | jobInputChan := make(chan job, jobCount) 44 | jobDoneChan := make(chan jobDone, jobCount) 45 | 46 | t.launchWorkers(jobInputChan, jobDoneChan) 47 | 48 | for k, v := range t.textBook.Chapters { 49 | jobInputChan <- job{ID: k, BookName: t.textBook.Name, Chapter: v} 50 | } 51 | close(jobInputChan) 52 | 53 | for range jobCount { 54 | jobDone := <-jobDoneChan 55 | if jobDone.Error != nil { 56 | fmt.Println("Failed to process item", jobDone.Chapter.Name, "with error", jobDone.Error) 57 | } 58 | } 59 | 60 | os.RemoveAll(file.TmpDir(t.textBook.Name)) 61 | } 62 | 63 | func (t TTS) Speak(text string) { 64 | cmd := fmt.Sprintf(`say "%s"`, text) 65 | exec.Command("/bin/sh", "-c", cmd).Output() 66 | } 67 | 68 | func (t TTS) launchWorkers(jobInputChan <-chan job, jobDoneChan chan<- jobDone) { 69 | fmt.Println("Launching", t.workerCount, "worker(s)") 70 | for k := range t.workerCount { 71 | go t.launchWorker(k, jobInputChan, jobDoneChan) 72 | } 73 | } 74 | 75 | func (t TTS) launchWorker(id int, inputChan <-chan job, doneChan chan<- jobDone) { 76 | // TODO: use worker id and doneChan with error 77 | for i := range inputChan { 78 | if consts.IsDryRun { 79 | doneChan <- jobDone{job: i} 80 | continue 81 | } 82 | 83 | _ = ttsChapter(i.ID, i.BookName, i.Chapter) 84 | audioConvert(i.ID, i.BookName, i.Chapter) 85 | // TODO: maybe already delete the aiff file here, to prevent growing then shriking 86 | // some books generate GBs on aiff 87 | doneChan <- jobDone{job: i} // not sending errors yet 88 | } 89 | } 90 | 91 | func ttsChapter(pos int, bookName string, chapter book.Chapter) string { 92 | audioName := file.GetTtsAudioFilename(pos, bookName, chapter) 93 | 94 | fmt.Println("🎤 Narrating chapter: '" + audioName + "' 🎤") 95 | cmdStr := fmt.Sprintf(`say -f "%s" -o "%s"`, file.GetTextfileName(pos, bookName, chapter), audioName) 96 | out, _ := exec.Command("/bin/sh", "-c", cmdStr).Output() 97 | 98 | return string(out) 99 | } 100 | 101 | func audioConvert(pos int, bookName string, chapter book.Chapter) string { 102 | ttsAudioName := file.GetTtsAudioFilename(pos, bookName, chapter) 103 | convertedAudioName := file.GetConvertedAudioFilename(pos, bookName, chapter) 104 | 105 | fmt.Println("🔄 Converting chapter: '" + ttsAudioName + "' 🔄") 106 | cmdStr := fmt.Sprintf(`ffmpeg -y -i %s %s`, ttsAudioName, convertedAudioName) 107 | out, _ := exec.Command("/bin/sh", "-c", cmdStr).Output() 108 | 109 | fmt.Println("✅ Chapter '" + convertedAudioName + "' converted ✅") 110 | return string(out) 111 | } 112 | -------------------------------------------------------------------------------- /internal/book/epub-parser.go: -------------------------------------------------------------------------------- 1 | package book 2 | 3 | import ( 4 | "archive/zip" 5 | "encoding/xml" 6 | "fmt" 7 | "io" 8 | "net/url" 9 | "path/filepath" 10 | "strings" 11 | ) 12 | 13 | // Container structure to parse container.xml 14 | type Container struct { 15 | Rootfiles []Rootfile `xml:"rootfiles>rootfile"` 16 | } 17 | 18 | // Rootfile structure for OPF reference 19 | type Rootfile struct { 20 | FullPath string `xml:"full-path,attr"` 21 | } 22 | 23 | // Package structure to parse OPF file 24 | type Package struct { 25 | Manifest []Item `xml:"manifest>item"` 26 | Spine []Itemref `xml:"spine>itemref"` 27 | } 28 | 29 | // Item structure for manifest items 30 | type Item struct { 31 | ID string `xml:"id,attr"` 32 | Href string `xml:"href,attr"` 33 | } 34 | 35 | // Itemref structure for spine items 36 | type Itemref struct { 37 | IDRef string `xml:"idref,attr"` 38 | } 39 | 40 | // NavPoint represents a navigation point in the EPUB toc.ncx file 41 | type NavPoint struct { 42 | Text string `xml:"navLabel>text"` 43 | // Src string `xml:"content>src,attr"` // this doesn;t work 44 | Src string `xml:"content,attr"` 45 | SubNavPoints []NavPoint `xml:"navPoint"` 46 | } 47 | 48 | func (n *NavPoint) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { 49 | var aux struct { 50 | Text string `xml:"navLabel>text"` 51 | Content struct { 52 | Src string `xml:"src,attr"` 53 | } `xml:"content"` 54 | SubNavPoints []NavPoint `xml:"navPoint"` 55 | } 56 | if err := d.DecodeElement(&aux, &start); err != nil { 57 | return err 58 | } 59 | 60 | n.Text = aux.Text 61 | n.Src = aux.Content.Src 62 | n.SubNavPoints = aux.SubNavPoints 63 | 64 | return nil 65 | } 66 | 67 | // NCX represents the structure of the toc.ncx file 68 | type NCX struct { 69 | Title string `xml:"docTitle>text"` 70 | NavMap []NavPoint `xml:"navMap>navPoint"` 71 | } 72 | 73 | func ParseEpub(epubPath string) (Epub, error) { 74 | // Open the ePUB file as a zip archive 75 | r, err := zip.OpenReader(epubPath) 76 | if err != nil { 77 | return Epub{}, err 78 | } 79 | defer r.Close() 80 | 81 | // Read the container.xml to locate the OPF file 82 | container, err := readContainer(r) 83 | if err != nil { 84 | return Epub{}, err 85 | } 86 | 87 | // Parse the OPF file 88 | packageData, err := readOPF(r, container.Rootfiles[0].FullPath) 89 | if err != nil { 90 | return Epub{}, err 91 | } 92 | 93 | basePath := extractBasePath(container.Rootfiles[0].FullPath) 94 | tocFileName := findTocFileName(packageData.Manifest) 95 | 96 | ncx, err := parseNCX(r, basePath, tocFileName) 97 | if err != nil { 98 | fmt.Println("Failed to parse ncx file") 99 | } 100 | 101 | // Parse table of contents 102 | tableOfContents, err := extractTableOfContents(ncx) 103 | if err != nil { 104 | fmt.Println("Failed to parse table of contents", err) 105 | } 106 | 107 | book := Epub{ 108 | Name: ncx.Title, 109 | Toc: map[string]string{}, 110 | Sections: []EpubSection{}, 111 | } 112 | 113 | // Get content in order of the spine 114 | for _, spineItem := range packageData.Spine { 115 | manifestItem := findManifestItem(packageData.Manifest, spineItem.IDRef) 116 | if manifestItem != nil { 117 | currFile := filepath.Join(basePath, manifestItem.Href) 118 | content, err := readFileFromZip(r, currFile) 119 | if err != nil { 120 | return Epub{}, err 121 | } 122 | 123 | title := tableOfContents[manifestItem.Href] 124 | if title == "" { 125 | title = tableOfContents[currFile] 126 | } 127 | 128 | book.Sections = append(book.Sections, EpubSection{ 129 | ID: manifestItem.ID, 130 | Title: title, 131 | HtmlContent: string(content), 132 | }) 133 | } 134 | } 135 | 136 | return book, nil 137 | } 138 | 139 | // readContainer reads and parses the container.xml 140 | func readContainer(r *zip.ReadCloser) (*Container, error) { 141 | content, err := readFileFromZip(r, "META-INF/container.xml") 142 | if err != nil { 143 | return nil, err 144 | } 145 | 146 | var container Container 147 | if err := xml.Unmarshal(content, &container); err != nil { 148 | return nil, err 149 | } 150 | 151 | return &container, nil 152 | } 153 | 154 | // readOPF reads and parses the OPF file 155 | func readOPF(r *zip.ReadCloser, opfPath string) (*Package, error) { 156 | content, err := readFileFromZip(r, opfPath) 157 | if err != nil { 158 | return nil, err 159 | } 160 | 161 | var packageData Package 162 | if err := xml.Unmarshal(content, &packageData); err != nil { 163 | return nil, err 164 | } 165 | 166 | return &packageData, nil 167 | } 168 | 169 | // readFileFromZip extracts a file's content from the zip archive 170 | func readFileFromZip(r *zip.ReadCloser, name string) ([]byte, error) { 171 | for _, file := range r.File { 172 | if file.Name == name { 173 | rc, err := file.Open() 174 | if err != nil { 175 | return nil, err 176 | } 177 | defer rc.Close() 178 | 179 | return io.ReadAll(rc) 180 | } 181 | } 182 | 183 | return nil, fmt.Errorf("file not found: %s", name) 184 | } 185 | 186 | func extractTableOfContents( 187 | ncx *NCX, 188 | ) (map[string]string, error) { 189 | result := map[string]string{} 190 | addNavPoints(result, ncx.NavMap) 191 | 192 | return result, nil 193 | } 194 | 195 | func addNavPoints(m map[string]string, navPoints []NavPoint) { 196 | for _, v := range navPoints { 197 | parsedSrc, err := url.Parse(v.Src) 198 | if err != nil { 199 | // TODO: log 200 | m[v.Src] = v.Text 201 | continue 202 | } 203 | 204 | parsedSrc.Fragment = "" 205 | parsedSrc.RawQuery = "" 206 | src := parsedSrc.String() 207 | 208 | m[src] = v.Text 209 | 210 | if len(v.SubNavPoints) > 0 { 211 | addNavPoints(m, v.SubNavPoints) 212 | } 213 | } 214 | } 215 | 216 | func parseNCX(r *zip.ReadCloser, basePath string, tocFileName string) (*NCX, error) { 217 | ncxContent, err := readFileFromZip(r, filepath.Join(basePath, tocFileName)) 218 | if err != nil { 219 | return nil, err 220 | } 221 | 222 | // Parse the NCX XML 223 | var ncx NCX 224 | err = xml.Unmarshal(ncxContent, &ncx) 225 | if err != nil { 226 | return nil, fmt.Errorf("failed to parse XML: %w", err) 227 | } 228 | 229 | return &ncx, nil 230 | } 231 | 232 | // findManifestItem finds a manifest item by ID 233 | func findManifestItem(manifest []Item, id string) *Item { 234 | for _, item := range manifest { 235 | if item.ID == id { 236 | return &item 237 | } 238 | } 239 | 240 | return nil 241 | } 242 | 243 | func extractBasePath(fullPath string) string { 244 | parsedFullPath := strings.Split(fullPath, "/") 245 | var fullPathBase string 246 | if len(parsedFullPath) > 1 { 247 | fullPathBase = strings.Join(parsedFullPath[:len(parsedFullPath)-1], "/") 248 | } 249 | 250 | return fullPathBase 251 | } 252 | 253 | func findTocFileName(manifestItems []Item) string { 254 | for _, v := range manifestItems { 255 | if strings.Contains(v.ID, "ncx") && 256 | strings.Contains(v.Href, "ncx") { 257 | return v.Href 258 | } 259 | } 260 | 261 | return "toc.ncx" // TODO look for any ncx file inside the whole zip 262 | } 263 | --------------------------------------------------------------------------------