├── .gitignore
├── go.mod
├── input
└── Dracula.epub
├── go.sum
├── internal
├── book
│ ├── epub.go
│ ├── text-book.go
│ └── epub-parser.go
├── consts
│ └── consts.go
├── debug
│ └── debug.go
├── str
│ └── str.go
├── file
│ └── file.go
└── tts
│ └── tts.go
├── main.go
├── LICENSE
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | output/
3 | .DS_Store
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module epub-tts
2 |
3 | go 1.22.0
4 |
5 | require golang.org/x/text v0.21.0
6 |
--------------------------------------------------------------------------------
/input/Dracula.epub:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rafael1mc/epub-tts/HEAD/input/Dracula.epub
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
2 | golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
3 |
--------------------------------------------------------------------------------
/internal/book/epub.go:
--------------------------------------------------------------------------------
1 | package book
2 |
3 | type EpubSection struct {
4 | ID string `json:"id"`
5 | Title string `json:"title"`
6 | HtmlContent string `json:"htmlString"`
7 | }
8 |
9 | type Epub struct {
10 | Name string
11 | Toc map[string]string
12 | Sections []EpubSection
13 | }
14 |
--------------------------------------------------------------------------------
/internal/consts/consts.go:
--------------------------------------------------------------------------------
1 | package consts
2 |
3 | const (
4 | Perm = 0777
5 | InputFilePath = "input/Dracula.epub"
6 |
7 | IsDryRun = false // if true, will generate text files, but not audio files
8 | IsDebug = false // if true, will generate files for section json and html content as well
9 |
10 | SpeakProcessCompletion = true // if true, will say something at the end of the process
11 | )
12 |
13 | const (
14 | OutputRootDir = "output"
15 | OutputTxtDir = "txt"
16 | OutputTmpDir = "tmp"
17 | OutputDebugDir = "debug"
18 | )
19 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "epub-tts/internal/book"
5 | "epub-tts/internal/consts"
6 | "epub-tts/internal/debug"
7 | "epub-tts/internal/file"
8 | "epub-tts/internal/tts"
9 | "fmt"
10 | )
11 |
12 | func main() {
13 | fmt.Println(" ---== Execution Started ==--- ")
14 |
15 | epub, err := book.ParseEpub(consts.InputFilePath)
16 | if err != nil {
17 | panic(err)
18 | }
19 |
20 | textBook := book.TextBookFromEpub(epub)
21 |
22 | err = file.CreateOutputDirs(textBook.Name)
23 | if err != nil {
24 | panic(err)
25 | }
26 |
27 | err = file.SaveChapters(textBook)
28 | if err != nil {
29 | panic(err)
30 | }
31 | debug.GenerateDebugFiles(epub)
32 |
33 | tts := tts.NewTTS(3, textBook)
34 | tts.Run()
35 |
36 | if consts.SpeakProcessCompletion {
37 | tts.Speak("TTS completed")
38 | }
39 | fmt.Println(" ---== Execution ended ==--- ")
40 | }
41 |
--------------------------------------------------------------------------------
/internal/book/text-book.go:
--------------------------------------------------------------------------------
1 | package book
2 |
3 | import (
4 | "epub-tts/internal/str"
5 | "strings"
6 | )
7 |
8 | type Chapter struct {
9 | ID string
10 | Name string
11 | Content string
12 | }
13 |
14 | func (c Chapter) NameOrID() string {
15 | if c.Name == "" {
16 | return c.ID
17 | }
18 | return c.Name
19 | }
20 |
21 | type TextBook struct {
22 | Name string
23 | Chapters []Chapter
24 | }
25 |
26 | func TextBookFromEpub(input Epub) TextBook {
27 | chapters := []Chapter{}
28 |
29 | for _, v := range input.Sections {
30 | name := str.SanitizeString(v.Title)
31 | name = strings.ReplaceAll(name, "\n", "")
32 | chapter := Chapter{
33 | ID: str.SanitizeString(v.ID),
34 | Name: name,
35 | Content: str.SanitizeString(str.RemoveTags(v.HtmlContent)),
36 | }
37 | chapters = append(chapters, chapter)
38 | }
39 |
40 | return TextBook{
41 | Name: input.Name,
42 | Chapters: chapters,
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Rafael
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/internal/debug/debug.go:
--------------------------------------------------------------------------------
1 | package debug
2 |
3 | import (
4 | "encoding/json"
5 | "epub-tts/internal/book"
6 | "epub-tts/internal/consts"
7 | "epub-tts/internal/file"
8 | "errors"
9 | "fmt"
10 | "os"
11 | )
12 |
13 | func GenerateDebugFiles(epub book.Epub) {
14 | if !consts.IsDebug {
15 | return
16 | }
17 | fmt.Println("Saving debug files")
18 |
19 | err := os.MkdirAll(file.DebugDir(epub.Name), consts.Perm)
20 | if err != nil && !errors.Is(err, os.ErrExist) {
21 | panic(err)
22 | }
23 |
24 | for k, v := range epub.Sections {
25 | //
26 | // JSON
27 | //
28 | jsonContent, err := json.Marshal(v)
29 | if err != nil {
30 | panic(err)
31 | }
32 |
33 | err = os.WriteFile(
34 | file.GetOutputPath(k, file.DebugDir(epub.Name), v.ID, "json"),
35 | jsonContent,
36 | consts.Perm,
37 | )
38 | if err != nil {
39 | fmt.Println("Failed to save json debug file")
40 | }
41 |
42 | //
43 | // HTML
44 | //
45 | err = os.WriteFile(
46 | file.GetOutputPath(k, file.DebugDir(epub.Name), v.ID, "html"),
47 | []byte(v.HtmlContent),
48 | consts.Perm,
49 | )
50 | if err != nil {
51 | fmt.Println("Failed to save html debug file")
52 | }
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # epub-tts
2 |
3 | Convert ePUB into audio files.
4 |
5 | Code will parse the ePUB into sections (which roughly correlates to book chapters) and 'text-to-speech' each section into its own audio file.
Output will be prefixed with a number to maintain order.
6 |
7 |
8 | This is an alpha, proof of concept version.
9 | To me, it's supposed to be a simple alternative for when eyes are tired but the mind is not :)
10 |
11 |
12 |
13 |
14 | # Requirements
15 | - Run on MacOS
16 | - ffmpeg installed and available in $PATH
17 | - Golang
18 |
19 | # How to use
20 | 1. Clone this repo
21 | 2. Replace the file inside `volume/input.epub` with the book you want to convert to audio (keep file name)
22 | 3. Execute the program (note that it will take quite some time, but you should see _some_ output during execution):
23 | ```
24 | go run .
25 | ```
26 | 5. You should see a new `output` folder with each text and audio file.
27 |
28 | # TODO
29 | - [x] Parse ePUB from Golang
30 | - [x] Organize code
31 | - [x] Add worker pools for batch conversion and less CPU strain
32 | - [x] Reduce output audio size
33 | - [x] Extract chapter info
34 | - [ ] Add more sample ePUBs
35 | - [ ] Add automated tests
36 | - [x] Separate output by folder
37 | - [ ] Handle multiple input
38 | - [ ] Organize the code some more
39 | - [ ] Support other languages beyond english
40 | - [ ] Display progress
41 | - [ ] Break down a big section to be TTS concurrently, and merge after whole section is done
42 | - [ ] Add support for Ubuntu TTS
43 | - [ ] Add Web UI to Drag and Drop epub files
44 | - [ ] ?
45 |
46 | ### Dependencies
47 | - MacOS `say` command
48 | - Note: The example book in this repo is taken from [Project Guttenber](https://www.gutenberg.org/about/), with Copyright Status as "Public domain in the USA"
49 |
50 |
51 | # License
52 | Check [LICENSE](https://github.com/rafael1mc/epub-tts/blob/main/LICENSE) file.
--------------------------------------------------------------------------------
/internal/str/str.go:
--------------------------------------------------------------------------------
1 | package str
2 |
3 | import (
4 | "regexp"
5 | "strings"
6 | "unicode"
7 |
8 | "golang.org/x/text/runes"
9 | "golang.org/x/text/transform"
10 | "golang.org/x/text/unicode/norm"
11 | )
12 |
13 | func SanitizeString(str string) string {
14 | str = strings.Trim(str, "\r\n\t ")
15 | str = strings.ReplaceAll(str, "\r\n", "\n")
16 |
17 | // make lines with only spaces to be just lines so they can be grouped below
18 | blankLineRegex := regexp.MustCompile(`(?m)^\s*$`)
19 | str = blankLineRegex.ReplaceAllString(str, "\n")
20 |
21 | str = strings.Map(func(r rune) rune {
22 | if unicode.IsPrint(r) || r == '\n' {
23 | return r
24 | }
25 | return -1
26 | }, str)
27 |
28 | // remove excess line breaks
29 | for strings.Contains(str, "\n\n\n") {
30 | str = strings.ReplaceAll(str, "\n\n\n", "\n\n")
31 | }
32 |
33 | return str
34 | }
35 |
36 | // CleanFileName removes invalid characters for filenames
37 | // and also removes accents and special characters.
38 | func CleanFileName(input string) string {
39 | // Normalize the input string to remove accents
40 | normalized, err := normalize(input)
41 | if err != nil {
42 | // TODO add log
43 | normalized = input
44 | }
45 |
46 | normalized = strings.ReplaceAll(normalized, "—", "_")
47 | normalized = strings.ReplaceAll(normalized, ":", "_")
48 |
49 | // Define a regular expression that allows only alphanumeric characters, dashes, and underscores
50 | re := regexp.MustCompile(`[^a-zA-Z0-9\s\-_\.]`)
51 |
52 | // Remove any character that is not a word character, whitespace, dash, or period
53 | cleaned := re.ReplaceAllString(normalized, "")
54 |
55 | // Optionally replace spaces with underscores or dashes
56 | cleaned = strings.ReplaceAll(cleaned, " ", "_")
57 | cleaned = strings.ReplaceAll(cleaned, `\n`, "")
58 | cleaned = strings.ReplaceAll(cleaned, "\n", "")
59 |
60 | return cleaned
61 | }
62 |
63 | // https://stackoverflow.com/a/65981868
64 | func normalize(s string) (string, error) {
65 | t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
66 | result, _, err := transform.String(t, s)
67 | if err != nil {
68 | return "", err
69 | }
70 |
71 | return result, nil
72 | }
73 |
74 | func RemoveTags(input string) string {
75 | // Define the regex pattern to match HTML tags
76 | tagRegex := regexp.MustCompile(`<[^>]+>`)
77 | // Replace all occurrences of the tag pattern with an empty string
78 | return tagRegex.ReplaceAllString(input, "")
79 | }
80 |
--------------------------------------------------------------------------------
/internal/file/file.go:
--------------------------------------------------------------------------------
1 | package file
2 |
3 | import (
4 | "epub-tts/internal/book"
5 | "epub-tts/internal/consts"
6 | "epub-tts/internal/str"
7 | "errors"
8 | "fmt"
9 | "os"
10 | "path"
11 | "path/filepath"
12 | "strings"
13 | )
14 |
15 | func normalizeBookName(bookName string) string {
16 | cleanName := strings.ToLower(str.CleanFileName(bookName))
17 | nameLen := len(cleanName)
18 | if nameLen > 50 {
19 | nameLen = 50
20 | }
21 |
22 | return cleanName[:nameLen]
23 | }
24 |
25 | func rootDir(bookName string) string {
26 | return path.Join(
27 | consts.OutputRootDir,
28 | normalizeBookName(bookName),
29 | )
30 | }
31 |
32 | func txtDir(bookName string) string {
33 | return path.Join(
34 | rootDir(bookName),
35 | consts.OutputTxtDir,
36 | )
37 | }
38 |
39 | func TmpDir(bookName string) string {
40 | return path.Join(
41 | rootDir(bookName),
42 | consts.OutputTmpDir,
43 | )
44 | }
45 |
46 | func DebugDir(bookName string) string {
47 | return path.Join(
48 | rootDir(bookName),
49 | consts.OutputDebugDir,
50 | )
51 | }
52 |
53 | func CreateOutputDirs(bookName string) error {
54 | var err error
55 |
56 | fmt.Println("Creating tmp dir", TmpDir(bookName))
57 | err = os.MkdirAll(TmpDir(bookName), consts.Perm)
58 | if err != nil && !errors.Is(err, os.ErrExist) {
59 | return err
60 | }
61 |
62 | err = os.MkdirAll(txtDir(bookName), consts.Perm)
63 | if err != nil && !errors.Is(err, os.ErrExist) {
64 | return err
65 | }
66 |
67 | return nil
68 | }
69 |
70 | func SaveChapters(textBook book.TextBook) error {
71 | fmt.Println("Saving chapter text files.")
72 | for k, v := range textBook.Chapters {
73 | filename := GetTextfileName(k, textBook.Name, v)
74 | err := os.WriteFile(
75 | filename,
76 | []byte(v.Content),
77 | consts.Perm,
78 | )
79 | if err != nil {
80 | return err
81 | }
82 | }
83 |
84 | return nil
85 | }
86 |
87 | func GetTextfileName(pos int, bookName string, chapter book.Chapter) string {
88 | return GetOutputPath(pos, txtDir(bookName), chapter.NameOrID(), "txt")
89 | }
90 |
91 | func GetTtsAudioFilename(pos int, bookName string, chapter book.Chapter) string {
92 | return GetOutputPath(pos, TmpDir(bookName), chapter.NameOrID(), "aiff")
93 | }
94 |
95 | func GetConvertedAudioFilename(pos int, bookName string, chapter book.Chapter) string {
96 | return GetOutputPath(pos, rootDir(bookName), chapter.NameOrID(), "mp3")
97 | }
98 |
99 | func GetOutputPath(pos int, outputFolder string, name string, extension string) string {
100 | filename := fmt.Sprintf("%d-%s.%s", pos, name, extension)
101 | filename = strings.ToLower(filename)
102 | filename = str.CleanFileName(filename)
103 |
104 | filePath := filepath.Join(outputFolder, filename)
105 |
106 | return filePath
107 | }
108 |
--------------------------------------------------------------------------------
/internal/tts/tts.go:
--------------------------------------------------------------------------------
1 | package tts
2 |
3 | import (
4 | "epub-tts/internal/book"
5 | "epub-tts/internal/consts"
6 | "epub-tts/internal/file"
7 | "fmt"
8 | "os"
9 | "os/exec"
10 | )
11 |
12 | type TTS struct {
13 | workerCount int
14 |
15 | textBook book.TextBook
16 | }
17 |
18 | type job struct {
19 | ID int
20 | BookName string
21 | Chapter book.Chapter
22 | }
23 |
24 | type jobDone struct {
25 | job
26 | Error error
27 | }
28 |
29 | func NewTTS(
30 | workerCount int,
31 | textBook book.TextBook,
32 | ) *TTS {
33 | return &TTS{
34 | workerCount: workerCount,
35 | textBook: textBook,
36 | }
37 | }
38 |
39 | func (t TTS) Run() {
40 | fmt.Println("Running text-to-speech")
41 |
42 | jobCount := len(t.textBook.Chapters)
43 | jobInputChan := make(chan job, jobCount)
44 | jobDoneChan := make(chan jobDone, jobCount)
45 |
46 | t.launchWorkers(jobInputChan, jobDoneChan)
47 |
48 | for k, v := range t.textBook.Chapters {
49 | jobInputChan <- job{ID: k, BookName: t.textBook.Name, Chapter: v}
50 | }
51 | close(jobInputChan)
52 |
53 | for range jobCount {
54 | jobDone := <-jobDoneChan
55 | if jobDone.Error != nil {
56 | fmt.Println("Failed to process item", jobDone.Chapter.Name, "with error", jobDone.Error)
57 | }
58 | }
59 |
60 | os.RemoveAll(file.TmpDir(t.textBook.Name))
61 | }
62 |
63 | func (t TTS) Speak(text string) {
64 | cmd := fmt.Sprintf(`say "%s"`, text)
65 | exec.Command("/bin/sh", "-c", cmd).Output()
66 | }
67 |
68 | func (t TTS) launchWorkers(jobInputChan <-chan job, jobDoneChan chan<- jobDone) {
69 | fmt.Println("Launching", t.workerCount, "worker(s)")
70 | for k := range t.workerCount {
71 | go t.launchWorker(k, jobInputChan, jobDoneChan)
72 | }
73 | }
74 |
75 | func (t TTS) launchWorker(id int, inputChan <-chan job, doneChan chan<- jobDone) {
76 | // TODO: use worker id and doneChan with error
77 | for i := range inputChan {
78 | if consts.IsDryRun {
79 | doneChan <- jobDone{job: i}
80 | continue
81 | }
82 |
83 | _ = ttsChapter(i.ID, i.BookName, i.Chapter)
84 | audioConvert(i.ID, i.BookName, i.Chapter)
85 | // TODO: maybe already delete the aiff file here, to prevent growing then shriking
86 | // some books generate GBs on aiff
87 | doneChan <- jobDone{job: i} // not sending errors yet
88 | }
89 | }
90 |
91 | func ttsChapter(pos int, bookName string, chapter book.Chapter) string {
92 | audioName := file.GetTtsAudioFilename(pos, bookName, chapter)
93 |
94 | fmt.Println("🎤 Narrating chapter: '" + audioName + "' 🎤")
95 | cmdStr := fmt.Sprintf(`say -f "%s" -o "%s"`, file.GetTextfileName(pos, bookName, chapter), audioName)
96 | out, _ := exec.Command("/bin/sh", "-c", cmdStr).Output()
97 |
98 | return string(out)
99 | }
100 |
101 | func audioConvert(pos int, bookName string, chapter book.Chapter) string {
102 | ttsAudioName := file.GetTtsAudioFilename(pos, bookName, chapter)
103 | convertedAudioName := file.GetConvertedAudioFilename(pos, bookName, chapter)
104 |
105 | fmt.Println("🔄 Converting chapter: '" + ttsAudioName + "' 🔄")
106 | cmdStr := fmt.Sprintf(`ffmpeg -y -i %s %s`, ttsAudioName, convertedAudioName)
107 | out, _ := exec.Command("/bin/sh", "-c", cmdStr).Output()
108 |
109 | fmt.Println("✅ Chapter '" + convertedAudioName + "' converted ✅")
110 | return string(out)
111 | }
112 |
--------------------------------------------------------------------------------
/internal/book/epub-parser.go:
--------------------------------------------------------------------------------
1 | package book
2 |
3 | import (
4 | "archive/zip"
5 | "encoding/xml"
6 | "fmt"
7 | "io"
8 | "net/url"
9 | "path/filepath"
10 | "strings"
11 | )
12 |
13 | // Container structure to parse container.xml
14 | type Container struct {
15 | Rootfiles []Rootfile `xml:"rootfiles>rootfile"`
16 | }
17 |
18 | // Rootfile structure for OPF reference
19 | type Rootfile struct {
20 | FullPath string `xml:"full-path,attr"`
21 | }
22 |
23 | // Package structure to parse OPF file
24 | type Package struct {
25 | Manifest []Item `xml:"manifest>item"`
26 | Spine []Itemref `xml:"spine>itemref"`
27 | }
28 |
29 | // Item structure for manifest items
30 | type Item struct {
31 | ID string `xml:"id,attr"`
32 | Href string `xml:"href,attr"`
33 | }
34 |
35 | // Itemref structure for spine items
36 | type Itemref struct {
37 | IDRef string `xml:"idref,attr"`
38 | }
39 |
40 | // NavPoint represents a navigation point in the EPUB toc.ncx file
41 | type NavPoint struct {
42 | Text string `xml:"navLabel>text"`
43 | // Src string `xml:"content>src,attr"` // this doesn;t work
44 | Src string `xml:"content,attr"`
45 | SubNavPoints []NavPoint `xml:"navPoint"`
46 | }
47 |
48 | func (n *NavPoint) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
49 | var aux struct {
50 | Text string `xml:"navLabel>text"`
51 | Content struct {
52 | Src string `xml:"src,attr"`
53 | } `xml:"content"`
54 | SubNavPoints []NavPoint `xml:"navPoint"`
55 | }
56 | if err := d.DecodeElement(&aux, &start); err != nil {
57 | return err
58 | }
59 |
60 | n.Text = aux.Text
61 | n.Src = aux.Content.Src
62 | n.SubNavPoints = aux.SubNavPoints
63 |
64 | return nil
65 | }
66 |
67 | // NCX represents the structure of the toc.ncx file
68 | type NCX struct {
69 | Title string `xml:"docTitle>text"`
70 | NavMap []NavPoint `xml:"navMap>navPoint"`
71 | }
72 |
73 | func ParseEpub(epubPath string) (Epub, error) {
74 | // Open the ePUB file as a zip archive
75 | r, err := zip.OpenReader(epubPath)
76 | if err != nil {
77 | return Epub{}, err
78 | }
79 | defer r.Close()
80 |
81 | // Read the container.xml to locate the OPF file
82 | container, err := readContainer(r)
83 | if err != nil {
84 | return Epub{}, err
85 | }
86 |
87 | // Parse the OPF file
88 | packageData, err := readOPF(r, container.Rootfiles[0].FullPath)
89 | if err != nil {
90 | return Epub{}, err
91 | }
92 |
93 | basePath := extractBasePath(container.Rootfiles[0].FullPath)
94 | tocFileName := findTocFileName(packageData.Manifest)
95 |
96 | ncx, err := parseNCX(r, basePath, tocFileName)
97 | if err != nil {
98 | fmt.Println("Failed to parse ncx file")
99 | }
100 |
101 | // Parse table of contents
102 | tableOfContents, err := extractTableOfContents(ncx)
103 | if err != nil {
104 | fmt.Println("Failed to parse table of contents", err)
105 | }
106 |
107 | book := Epub{
108 | Name: ncx.Title,
109 | Toc: map[string]string{},
110 | Sections: []EpubSection{},
111 | }
112 |
113 | // Get content in order of the spine
114 | for _, spineItem := range packageData.Spine {
115 | manifestItem := findManifestItem(packageData.Manifest, spineItem.IDRef)
116 | if manifestItem != nil {
117 | currFile := filepath.Join(basePath, manifestItem.Href)
118 | content, err := readFileFromZip(r, currFile)
119 | if err != nil {
120 | return Epub{}, err
121 | }
122 |
123 | title := tableOfContents[manifestItem.Href]
124 | if title == "" {
125 | title = tableOfContents[currFile]
126 | }
127 |
128 | book.Sections = append(book.Sections, EpubSection{
129 | ID: manifestItem.ID,
130 | Title: title,
131 | HtmlContent: string(content),
132 | })
133 | }
134 | }
135 |
136 | return book, nil
137 | }
138 |
139 | // readContainer reads and parses the container.xml
140 | func readContainer(r *zip.ReadCloser) (*Container, error) {
141 | content, err := readFileFromZip(r, "META-INF/container.xml")
142 | if err != nil {
143 | return nil, err
144 | }
145 |
146 | var container Container
147 | if err := xml.Unmarshal(content, &container); err != nil {
148 | return nil, err
149 | }
150 |
151 | return &container, nil
152 | }
153 |
154 | // readOPF reads and parses the OPF file
155 | func readOPF(r *zip.ReadCloser, opfPath string) (*Package, error) {
156 | content, err := readFileFromZip(r, opfPath)
157 | if err != nil {
158 | return nil, err
159 | }
160 |
161 | var packageData Package
162 | if err := xml.Unmarshal(content, &packageData); err != nil {
163 | return nil, err
164 | }
165 |
166 | return &packageData, nil
167 | }
168 |
169 | // readFileFromZip extracts a file's content from the zip archive
170 | func readFileFromZip(r *zip.ReadCloser, name string) ([]byte, error) {
171 | for _, file := range r.File {
172 | if file.Name == name {
173 | rc, err := file.Open()
174 | if err != nil {
175 | return nil, err
176 | }
177 | defer rc.Close()
178 |
179 | return io.ReadAll(rc)
180 | }
181 | }
182 |
183 | return nil, fmt.Errorf("file not found: %s", name)
184 | }
185 |
186 | func extractTableOfContents(
187 | ncx *NCX,
188 | ) (map[string]string, error) {
189 | result := map[string]string{}
190 | addNavPoints(result, ncx.NavMap)
191 |
192 | return result, nil
193 | }
194 |
195 | func addNavPoints(m map[string]string, navPoints []NavPoint) {
196 | for _, v := range navPoints {
197 | parsedSrc, err := url.Parse(v.Src)
198 | if err != nil {
199 | // TODO: log
200 | m[v.Src] = v.Text
201 | continue
202 | }
203 |
204 | parsedSrc.Fragment = ""
205 | parsedSrc.RawQuery = ""
206 | src := parsedSrc.String()
207 |
208 | m[src] = v.Text
209 |
210 | if len(v.SubNavPoints) > 0 {
211 | addNavPoints(m, v.SubNavPoints)
212 | }
213 | }
214 | }
215 |
216 | func parseNCX(r *zip.ReadCloser, basePath string, tocFileName string) (*NCX, error) {
217 | ncxContent, err := readFileFromZip(r, filepath.Join(basePath, tocFileName))
218 | if err != nil {
219 | return nil, err
220 | }
221 |
222 | // Parse the NCX XML
223 | var ncx NCX
224 | err = xml.Unmarshal(ncxContent, &ncx)
225 | if err != nil {
226 | return nil, fmt.Errorf("failed to parse XML: %w", err)
227 | }
228 |
229 | return &ncx, nil
230 | }
231 |
232 | // findManifestItem finds a manifest item by ID
233 | func findManifestItem(manifest []Item, id string) *Item {
234 | for _, item := range manifest {
235 | if item.ID == id {
236 | return &item
237 | }
238 | }
239 |
240 | return nil
241 | }
242 |
243 | func extractBasePath(fullPath string) string {
244 | parsedFullPath := strings.Split(fullPath, "/")
245 | var fullPathBase string
246 | if len(parsedFullPath) > 1 {
247 | fullPathBase = strings.Join(parsedFullPath[:len(parsedFullPath)-1], "/")
248 | }
249 |
250 | return fullPathBase
251 | }
252 |
253 | func findTocFileName(manifestItems []Item) string {
254 | for _, v := range manifestItems {
255 | if strings.Contains(v.ID, "ncx") &&
256 | strings.Contains(v.Href, "ncx") {
257 | return v.Href
258 | }
259 | }
260 |
261 | return "toc.ncx" // TODO look for any ncx file inside the whole zip
262 | }
263 |
--------------------------------------------------------------------------------