├── packages.txt
├── .gitignore
├── public
    ├── 5200.epub
    └── 20250828.png
├── requirements.txt
├── CONTRIBUTING.md
├── utils
    ├── __init__.py
    ├── metadata_utils.py
    ├── audio_utils.py
    └── extract_chapters.py
├── LICENSE.md
├── app.py
├── README.md
├── cli.py
└── pipeline.py


/packages.txt:
--------------------------------------------------------------------------------
1 | ffmpeg


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | venv/
2 | __pycache__
3 | 
4 | # misc
5 | .DS_Store
6 | certificate.pem
7 | 


--------------------------------------------------------------------------------
/public/5200.epub:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adnjoo/kokoro-epub/HEAD/public/5200.epub


--------------------------------------------------------------------------------
/public/20250828.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adnjoo/kokoro-epub/HEAD/public/20250828.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio>=4.44.0
2 | kokoro
3 | torch       # CPU build is fine on Spaces; CUDA will be used if available
4 | ebooklib
5 | beautifulsoup4
6 | soundfile
7 | pydub       # optional, for MP3 merge (needs system ffmpeg)
8 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to kokoro-epub 💻
 2 | 
 3 | Thanks for helping improve **kokoro-epub**!
 4 | 
 5 | ## How to Contribute
 6 | 
 7 | * **Report bugs / request features** → [Open an issue](https://github.com/adnjoo/kokoro-epub/issues)
 8 | * **Code changes** → Fork, branch, and submit a PR
 9 | 
10 | ## Guidelines
11 | 
12 | * Keep PRs small and focused
13 | * Follow existing style & naming
14 | * Be respectful (see [Code of Conduct](https://opensource.guide/code-of-conduct/))
15 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # utils/__init__.py
 2 | """
 3 | Utility package for kokoro-epub.
 4 | 
 5 | This exposes the most common helpers so they can be imported
 6 | directly from `utils` instead of deep submodules.
 7 | """
 8 | 
 9 | from .audio_utils import merge_to_mp3, merge_to_m4b, chapter_duration_ms
10 | from .metadata_utils import write_chapters_metadata
11 | from .extract_chapters import extract_chapters
12 | 
13 | __all__ = [
14 |     "merge_to_mp3",
15 |     "merge_to_m4b",
16 |     "chapter_duration_ms",
17 |     "write_chapters_metadata",
18 |     "extract_chapters",
19 | ]
20 | 


--------------------------------------------------------------------------------
/utils/metadata_utils.py:
--------------------------------------------------------------------------------
 1 | def write_chapters_metadata(chapter_durations, out_txt):
 2 |     """
 3 |     chapter_durations: [(title, duration_ms), ...]
 4 |     """
 5 |     offset = 0
 6 |     with open(out_txt, "w", encoding="utf-8") as f:
 7 |         f.write(";FFMETADATA1\n")
 8 |         for title, dur in chapter_durations:
 9 |             start = offset
10 |             end = offset + dur
11 |             f.write("[CHAPTER]\n")
12 |             f.write("TIMEBASE=1/1000\n")
13 |             f.write(f"START={start}\n")
14 |             f.write(f"END={end}\n")
15 |             f.write(f"title={title}\n\n")
16 |             offset = end
17 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Andrew Njoo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights   
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell      
 9 | copies of the Software, and to permit persons to whom the Software is          
10 | furnished to do so, subject to the following conditions:                        
11 | 
12 | The above copyright notice and this permission notice shall be included in     
13 | all copies or substantial portions of the Software.                            
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR     
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,       
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE    
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER         
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN      
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | from pipeline import epub_to_audio, extract_chapters, DEFAULT_VOICE
 3 | 
 4 | with gr.Blocks(title="kokoro-epub — Free EPUB → Audiobook") as demo:
 5 |     gr.Markdown("## Free EPUB → Audiobook (Open Source)")
 6 | 
 7 |     epub_in = gr.File(label="EPUB file", file_types=[".epub"])
 8 |     chapter_selector = gr.CheckboxGroup(label="Select chapters to convert", choices=[])
 9 |     epub_in.change(
10 |         fn=lambda f: gr.update(
11 |             choices=[f"{t} ({len(txt.split())} words)" for (t, txt) in extract_chapters(f.name)] if f else []
12 |         ),
13 |         inputs=epub_in,
14 |         outputs=chapter_selector,
15 |     )
16 | 
17 |     with gr.Row():
18 |         voice = gr.Dropdown(
19 |             label="Voice",
20 |             value=DEFAULT_VOICE,
21 |             choices=["af_heart","af_alloy","af_bella","af_rose","am_michael","am_adam","am_mandarin"],
22 |         )
23 |         speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed")
24 |         format_choice = gr.Radio(label="Output format", choices=["MP3", "M4B"], value="MP3")
25 | 
26 |     run_btn = gr.Button("Convert")
27 |     audio_out = gr.File(label="Download MP3 (or ZIP)", visible=False)
28 |     m4b_out = gr.File(label="Download M4B (with chapters)", visible=False)
29 |     logs = gr.Textbox(label="Logs", lines=12)
30 | 
31 |     run_btn.click(
32 |         fn=epub_to_audio,
33 |         inputs=[epub_in, voice, speed, chapter_selector, format_choice],
34 |         outputs=[audio_out, m4b_out, logs],
35 |     )
36 | 
37 | if __name__ == "__main__":
38 |     demo.launch()
39 | 


--------------------------------------------------------------------------------
/utils/audio_utils.py:
--------------------------------------------------------------------------------
 1 | import os, shutil, subprocess
 2 | from pathlib import Path
 3 | from pydub import AudioSegment
 4 | 
 5 | def merge_to_mp3(wav_paths, out_mp3_path, bitrate="64k"):
 6 |     """Merge WAVs into a single MP3 using pydub/ffmpeg."""
 7 |     if shutil.which("ffmpeg") is None:
 8 |         return False
 9 |     combined = AudioSegment.silent(duration=0)
10 |     for w in wav_paths:
11 |         combined += AudioSegment.from_wav(w)
12 |     combined.export(out_mp3_path, format="mp3", bitrate=bitrate)
13 |     return True
14 | 
15 | def merge_to_m4b(wav_paths, out_m4b_path, chapters_txt=None, bitrate="64k"):
16 |     """Merge WAVs into .m4b (AAC) with optional chapters metadata."""
17 |     if shutil.which("ffmpeg") is None:
18 |         return False
19 |     list_file = Path(out_m4b_path).with_suffix(".concat.txt")
20 |     with open(list_file, "w", encoding="utf-8") as f:
21 |         for w in wav_paths:
22 |             f.write(f"file '{Path(w).as_posix()}'\n")
23 |     cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(list_file)]
24 |     if chapters_txt:
25 |         cmd += ["-i", str(chapters_txt), "-map_metadata", "1"]
26 |     cmd += ["-c:a", "aac", "-b:a", bitrate, "-movflags", "faststart", str(out_m4b_path)]
27 |     try:
28 |         result = subprocess.run(cmd, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
29 |         return result.returncode == 0
30 |     except Exception:
31 |         return False
32 | 
33 | def chapter_duration_ms(wav_files):
34 |     """Return total duration in ms for a list of wavs."""
35 |     return sum(len(AudioSegment.from_wav(w)) for w in wav_files)
36 | 


--------------------------------------------------------------------------------
/utils/extract_chapters.py:
--------------------------------------------------------------------------------
 1 | from ebooklib import epub, ITEM_DOCUMENT
 2 | from bs4 import BeautifulSoup
 3 | 
 4 | def extract_chapters(epub_path):
 5 |     """Extract chapter titles and text content from an EPUB file."""
 6 | 
 7 |     book = epub.read_epub(epub_path)
 8 |     chapters = []
 9 |     for idref, _ in book.spine:
10 |         item = book.get_item_with_id(idref)
11 |         if item.get_type() == ITEM_DOCUMENT:
12 |             soup = BeautifulSoup(item.get_content(), "html.parser")
13 |             text = soup.get_text().strip()
14 |             if not text:
15 |                 continue
16 | 
17 |             # find all chapter headings, not just the first
18 |             headings = soup.find_all(["h1", "h2", "h3"])
19 |             if headings:
20 |                 for heading in headings:
21 |                     title = heading.get_text(strip=True)
22 |                     content_parts = []
23 |                     for sib in heading.next_siblings:
24 |                         if getattr(sib, "name", None) in ["h1", "h2", "h3"]:
25 |                             break
26 |                         if hasattr(sib, "get_text"):
27 |                             content_parts.append(sib.get_text(" ", strip=True))
28 |                     chapter_text = " ".join(content_parts).strip()
29 |                     if chapter_text:
30 |                         # skip obvious boilerplate
31 |                         skip_titles = [
32 |                             "the project gutenberg", 
33 |                             "table of contents", 
34 |                             "the full project gutenberg license"
35 |                         ]
36 |                         if any(title.lower().startswith(s) for s in skip_titles):
37 |                             continue
38 | 
39 |                         chapters.append((title, chapter_text))
40 |             else:
41 |                 chapters.append((item.file_name, text))
42 |     return chapters
43 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Kokoro Epub
 3 | emoji: 💻
 4 | colorFrom: red
 5 | colorTo: gray
 6 | sdk: gradio
 7 | sdk_version: 5.44.0
 8 | app_file: app.py
 9 | pinned: false
10 | short_description: epub 2 mp3 / m4b
11 | ---
12 | 
13 | # kokoro-epub
14 | 
15 | > [!IMPORTANT]
16 | **This tool is intended for use with non-DRM, legally acquired eBooks only.** <br>
17 | The authors are not responsible for any misuse of this software or any resulting legal consequences. <br>
18 | 
19 | Converts EPUB to audiobook (MP3 or M4B (chapter markers)) using python.
20 | 
21 | <img src='public/20250828.png' width='400'>
22 | 
23 | ## Quick Start
24 | 
25 | ```bash
26 | # Setup
27 | python3.11 -m venv venv
28 | source venv/bin/activate
29 | pip install -r requirements.txt
30 | 
31 | # System requirement for MP3/M4B merge
32 | sudo apt install ffmpeg -y    # required for pydub exports
33 | 
34 | # Run
35 | python app.py
36 | ```
37 | 
38 | 👉 Try it free on [Hugging Face Space](https://huggingface.co/spaces/adnjoo/kokoro-epub) — or run faster in the cloud at [bookbearai.com](https://bookbearai.com).
39 | 
40 | ### GPU Torch
41 | 
42 | Gutenberg Kafka - The Metamorphosis 25152 words ~ 100 pages ~ 60mb (MP3 2h12m • 64 kbps • 24 kHz • mono)
43 | 
44 | 5060 Ti 16GB - ETA 194s/3' | WPS 130
45 | vs
46 | CPU - 1389s/23' | WPS 18
47 | 
48 | ```bash
49 |  pip install --upgrade torch --index-url https://download.pytorch.org/whl/cu128
50 | ```
51 | 
52 | ### CLI Usage
53 | 
54 | In addition to the Gradio UI, you can also run the tool directly from the command line with `cli.py`:
55 | 
56 | List chapters in an EPUB:
57 | ```bash
58 | python cli.py 5200.epub --list-chapters
59 | ````
60 | 
61 | Convert to M4B, selecting only chapters II and III:
62 | 
63 | ```bash
64 | python cli.py 5200.epub --format M4B --chapters 3,4 --out audiobooks
65 | ```
66 | 
67 | 
68 | ## Sample Output
69 | 
70 | <video src='https://github.com/user-attachments/assets/cd229d05-e59a-4e91-becf-4b3de1859607
71 | ' width=180></video>
72 | 
73 | ## Related Projects
74 | 
75 | If you're exploring other ebook-to-audio solutions, you might also check out:  
76 | - [readest](https://github.com/readest/readest) - modern e-reader with Edge TTS (22 voices)
77 | - [audiblez](https://github.com/santinic/audiblez) — CLI tool for converting text to audiobooks.  
78 | - [ebook2audiobook](https://github.com/DrewThomasson/ebook2audiobook) — Simple Python-based ebook-to-audio converter.  
79 | 
80 | ## License
81 | 
82 | MIT License. See [LICENSE.md](./LICENSE.md).
83 | 
84 | 
85 | ## Contributing
86 | 
87 | 📌 See [CONTRIBUTING.md](./CONTRIBUTING.md) if you'd like to contribute.
88 | 


--------------------------------------------------------------------------------
/cli.py:
--------------------------------------------------------------------------------
 1 | import argparse, sys
 2 | from pathlib import Path
 3 | import shutil
 4 | from pipeline import epub_to_audio, extract_chapters, DEFAULT_VOICE
 5 | 
 6 | def main():
 7 |     parser = argparse.ArgumentParser(description="EPUB → Audiobook CLI")
 8 |     parser.add_argument("epub", help="Path to EPUB file")
 9 |     parser.add_argument("--out", default="output", help="Output directory")
10 |     parser.add_argument("--voice", default=DEFAULT_VOICE, help="Voice (default af_heart)")
11 |     parser.add_argument("--speed", type=float, default=1.0, help="Playback speed multiplier")
12 |     parser.add_argument("--format", choices=["MP3","M4B"], default="MP3", help="Output format")
13 |     parser.add_argument("--list-chapters", action="store_true", help="List chapters and exit")
14 |     parser.add_argument("--chapters", help="Comma-separated chapter titles to convert", default=None)
15 |     args = parser.parse_args()
16 | 
17 |     epub_path = Path(args.epub)
18 |     chapters = extract_chapters(str(epub_path))
19 | 
20 |     # Dry run
21 |     if args.list_chapters:
22 |         print(f"\n📖 {epub_path.name} — {len(chapters)} chapters found:\n")
23 |         for i, (title, text) in enumerate(chapters, 1):
24 |             print(f"{i:02d}. {title} ({len(text.split())} words)")
25 |         sys.exit(0)
26 | 
27 |     # After parsing args and extracting chapters...
28 |     selected = []
29 |     if args.chapters:
30 |         parts = [c.strip() for c in args.chapters.split(",")]
31 |         for p in parts:
32 |             if p.isdigit():
33 |                 idx = int(p) - 1
34 |                 if 0 <= idx < len(chapters):
35 |                     selected.append(f"{chapters[idx][0]} ({len(chapters[idx][1].split())} words)")
36 |             else:
37 |                 selected.append(p)
38 | 
39 |     result_file = None
40 |     last_logs = ""
41 |     for mp3_out, m4b_out, logs in epub_to_audio(
42 |         epub_path.open("rb"),
43 |         args.voice,
44 |         args.speed,
45 |         selected,
46 |         args.format.upper(),
47 |         cli=True,
48 |     ):
49 |         if logs and logs != last_logs:
50 |             diff = logs[len(last_logs):]
51 |             if diff.strip():
52 |                 for line in diff.strip().splitlines():
53 |                     print("   " + line)
54 |             last_logs = logs
55 |         
56 |         if mp3_out or m4b_out:
57 |             tmp_result = Path(mp3_out or m4b_out)
58 |             out_dir = Path(args.out)
59 |             out_dir.mkdir(parents=True, exist_ok=True)
60 | 
61 |             # Rebuild filename in final output dir
62 |             final_path = out_dir / tmp_result.name
63 |             shutil.copy(tmp_result, final_path)
64 |             result_file = final_path
65 | 
66 |     print(f"\n✅ Audiobook ready: {result_file}")
67 | 
68 | if __name__ == "__main__":
69 |     main()
70 | 


--------------------------------------------------------------------------------
/pipeline.py:
--------------------------------------------------------------------------------
  1 | import re, time, tempfile
  2 | from pathlib import Path
  3 | import soundfile as sf
  4 | import torch
  5 | from kokoro import KPipeline
  6 | from gradio import update
  7 | 
  8 | from utils import (
  9 |     extract_chapters,
 10 |     merge_to_mp3,
 11 |     merge_to_m4b,
 12 |     chapter_duration_ms,
 13 |     write_chapters_metadata,
 14 | )
 15 | 
 16 | SPLIT_PATTERN = r"\n{2,}"
 17 | SAMPLE_RATE = 24000
 18 | DEFAULT_LANG = "a"
 19 | DEFAULT_VOICE = "af_heart"
 20 | 
 21 | 
 22 | def epub_to_audio(epub_file, voice, speed, selected_titles, format_choice, progress=None, cli=False):
 23 |     """
 24 |     Core generator that streams progress and yields (mp3_out, m4b_out, logs).
 25 |     Used by both Gradio and CLI.
 26 |     """
 27 |     start_time = time.time()
 28 |     workdir = tempfile.mkdtemp(prefix="kokoro_epub_")
 29 |     wav_dir = Path(workdir) / "wavs"
 30 |     wav_dir.mkdir(parents=True, exist_ok=True)
 31 | 
 32 |     logs = "🔎 Reading EPUB…"
 33 |     yield None, None, logs
 34 | 
 35 |     chapters = extract_chapters(epub_file.name)
 36 |     if not chapters:
 37 |         yield None, None, "❌ No chapters found."
 38 |         return
 39 | 
 40 |     if selected_titles:
 41 |         chapters = [
 42 |             (t, txt)
 43 |             for (t, txt) in chapters
 44 |             if f"{t} ({len(txt.split())} words)" in selected_titles
 45 |         ]
 46 | 
 47 |     # device
 48 |     if torch.cuda.is_available():
 49 |         device = "cuda"
 50 |         logs += f"\n✅ CUDA available: {torch.cuda.get_device_name(0)}"
 51 |     else:
 52 |         device = "cpu"
 53 |         logs += "\n⚠️ CUDA not available, using CPU."
 54 | 
 55 |     logs += f"\n🚀 Initializing Kokoro (device={device})…"
 56 |     yield None, None, logs
 57 | 
 58 |     pipeline = KPipeline(lang_code=DEFAULT_LANG, device=device)
 59 | 
 60 |     wav_paths = []
 61 |     chapter_durations = []
 62 |     part_idx = 0
 63 |     total = len(chapters)
 64 | 
 65 |     for ci, (title, text) in enumerate(chapters):
 66 |         chapter_start = time.time()
 67 |         logs += f"\n🔊 Starting {title} ({ci+1}/{total}) – {len(text.split())} words"
 68 |         yield None, None, logs
 69 | 
 70 |         chapter_wavs = []
 71 |         for _, _, audio in pipeline(
 72 |             text,
 73 |             voice=voice,
 74 |             speed=float(speed),
 75 |             split_pattern=SPLIT_PATTERN,
 76 |         ):
 77 |             safe_title = re.sub(r"[^a-zA-Z0-9]+", "_", title)[:30]
 78 |             wav_path = wav_dir / f"part_{part_idx:05d}_{safe_title}.wav"
 79 |             sf.write(str(wav_path), audio, SAMPLE_RATE)
 80 |             wav_paths.append(str(wav_path))
 81 |             chapter_wavs.append(str(wav_path))
 82 |             part_idx += 1
 83 | 
 84 |         if chapter_wavs:
 85 |             dur_ms = chapter_duration_ms(chapter_wavs)
 86 |             chapter_durations.append((title, dur_ms))
 87 | 
 88 |         logs += f"\n✅ Finished {title} in {time.time() - chapter_start:.2f}s"
 89 |         yield None, None, logs
 90 | 
 91 |     # outputs
 92 |     out_dir = Path(workdir)
 93 |     base_name = Path(epub_file.name).stem
 94 |     mp3_path = out_dir / f"{base_name}_{voice}.mp3"
 95 |     m4b_path = out_dir / f"{base_name}_{voice}.m4b"
 96 |     chapters_txt = out_dir / f"{base_name}_chapters.txt" if chapter_durations else None
 97 | 
 98 |     if chapter_durations:
 99 |         write_chapters_metadata(chapter_durations, chapters_txt)
100 |         logs += f"\n📝 Chapters metadata saved ({chapters_txt.name})."
101 | 
102 |     if format_choice == "MP3":
103 |         if merge_to_mp3(wav_paths, str(mp3_path)):
104 |             logs += f"\n✅ MP3 created ({mp3_path.name})."
105 |             logs += f"\n⏱️ Total time: {time.time() - start_time:.2f}s"
106 |             if cli:
107 |                 yield str(mp3_path), None, logs
108 |             else:
109 |                 yield update(value=str(mp3_path), visible=True), update(visible=False), logs
110 |         else:
111 |             yield None, None, "❌ Failed to merge MP3"
112 | 
113 |     elif format_choice == "M4B":
114 |         if merge_to_m4b(wav_paths, str(m4b_path), chapters_txt):
115 |             logs += f"\n📚 M4B created ({m4b_path.name})."
116 |             logs += f"\n⏱️ Total time: {time.time() - start_time:.2f}s"
117 |             if cli:
118 |                 yield None, str(m4b_path), logs
119 |             else:
120 |                 yield update(visible=False), update(value=str(m4b_path), visible=True), logs
121 |         else:
122 |             yield None, None, "❌ Failed to merge M4B"
123 | 


--------------------------------------------------------------------------------