├── packages.txt
├── .gitignore
├── public
├── 5200.epub
└── 20250828.png
├── requirements.txt
├── CONTRIBUTING.md
├── utils
├── __init__.py
├── metadata_utils.py
├── audio_utils.py
└── extract_chapters.py
├── LICENSE.md
├── app.py
├── README.md
├── cli.py
└── pipeline.py
/packages.txt:
--------------------------------------------------------------------------------
1 | ffmpeg
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | venv/
2 | __pycache__
3 |
4 | # misc
5 | .DS_Store
6 | certificate.pem
7 |
--------------------------------------------------------------------------------
/public/5200.epub:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adnjoo/kokoro-epub/HEAD/public/5200.epub
--------------------------------------------------------------------------------
/public/20250828.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adnjoo/kokoro-epub/HEAD/public/20250828.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio>=4.44.0
2 | kokoro
3 | torch # CPU build is fine on Spaces; CUDA will be used if available
4 | ebooklib
5 | beautifulsoup4
6 | soundfile
7 | pydub # optional, for MP3 merge (needs system ffmpeg)
8 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to kokoro-epub 💻
2 |
3 | Thanks for helping improve **kokoro-epub**!
4 |
5 | ## How to Contribute
6 |
7 | * **Report bugs / request features** → [Open an issue](https://github.com/adnjoo/kokoro-epub/issues)
8 | * **Code changes** → Fork, branch, and submit a PR
9 |
10 | ## Guidelines
11 |
12 | * Keep PRs small and focused
13 | * Follow existing style & naming
14 | * Be respectful (see [Code of Conduct](https://opensource.guide/code-of-conduct/))
15 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # utils/__init__.py
2 | """
3 | Utility package for kokoro-epub.
4 |
5 | This exposes the most common helpers so they can be imported
6 | directly from `utils` instead of deep submodules.
7 | """
8 |
9 | from .audio_utils import merge_to_mp3, merge_to_m4b, chapter_duration_ms
10 | from .metadata_utils import write_chapters_metadata
11 | from .extract_chapters import extract_chapters
12 |
13 | __all__ = [
14 | "merge_to_mp3",
15 | "merge_to_m4b",
16 | "chapter_duration_ms",
17 | "write_chapters_metadata",
18 | "extract_chapters",
19 | ]
20 |
--------------------------------------------------------------------------------
/utils/metadata_utils.py:
--------------------------------------------------------------------------------
1 | def write_chapters_metadata(chapter_durations, out_txt):
2 | """
3 | chapter_durations: [(title, duration_ms), ...]
4 | """
5 | offset = 0
6 | with open(out_txt, "w", encoding="utf-8") as f:
7 | f.write(";FFMETADATA1\n")
8 | for title, dur in chapter_durations:
9 | start = offset
10 | end = offset + dur
11 | f.write("[CHAPTER]\n")
12 | f.write("TIMEBASE=1/1000\n")
13 | f.write(f"START={start}\n")
14 | f.write(f"END={end}\n")
15 | f.write(f"title={title}\n\n")
16 | offset = end
17 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Andrew Njoo
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | import gradio as gr
2 | from pipeline import epub_to_audio, extract_chapters, DEFAULT_VOICE
3 |
4 | with gr.Blocks(title="kokoro-epub — Free EPUB → Audiobook") as demo:
5 | gr.Markdown("## Free EPUB → Audiobook (Open Source)")
6 |
7 | epub_in = gr.File(label="EPUB file", file_types=[".epub"])
8 | chapter_selector = gr.CheckboxGroup(label="Select chapters to convert", choices=[])
9 | epub_in.change(
10 | fn=lambda f: gr.update(
11 | choices=[f"{t} ({len(txt.split())} words)" for (t, txt) in extract_chapters(f.name)] if f else []
12 | ),
13 | inputs=epub_in,
14 | outputs=chapter_selector,
15 | )
16 |
17 | with gr.Row():
18 | voice = gr.Dropdown(
19 | label="Voice",
20 | value=DEFAULT_VOICE,
21 | choices=["af_heart","af_alloy","af_bella","af_rose","am_michael","am_adam","am_mandarin"],
22 | )
23 | speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed")
24 | format_choice = gr.Radio(label="Output format", choices=["MP3", "M4B"], value="MP3")
25 |
26 | run_btn = gr.Button("Convert")
27 | audio_out = gr.File(label="Download MP3 (or ZIP)", visible=False)
28 | m4b_out = gr.File(label="Download M4B (with chapters)", visible=False)
29 | logs = gr.Textbox(label="Logs", lines=12)
30 |
31 | run_btn.click(
32 | fn=epub_to_audio,
33 | inputs=[epub_in, voice, speed, chapter_selector, format_choice],
34 | outputs=[audio_out, m4b_out, logs],
35 | )
36 |
37 | if __name__ == "__main__":
38 | demo.launch()
39 |
--------------------------------------------------------------------------------
/utils/audio_utils.py:
--------------------------------------------------------------------------------
1 | import os, shutil, subprocess
2 | from pathlib import Path
3 | from pydub import AudioSegment
4 |
5 | def merge_to_mp3(wav_paths, out_mp3_path, bitrate="64k"):
6 | """Merge WAVs into a single MP3 using pydub/ffmpeg."""
7 | if shutil.which("ffmpeg") is None:
8 | return False
9 | combined = AudioSegment.silent(duration=0)
10 | for w in wav_paths:
11 | combined += AudioSegment.from_wav(w)
12 | combined.export(out_mp3_path, format="mp3", bitrate=bitrate)
13 | return True
14 |
15 | def merge_to_m4b(wav_paths, out_m4b_path, chapters_txt=None, bitrate="64k"):
16 | """Merge WAVs into .m4b (AAC) with optional chapters metadata."""
17 | if shutil.which("ffmpeg") is None:
18 | return False
19 | list_file = Path(out_m4b_path).with_suffix(".concat.txt")
20 | with open(list_file, "w", encoding="utf-8") as f:
21 | for w in wav_paths:
22 | f.write(f"file '{Path(w).as_posix()}'\n")
23 | cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(list_file)]
24 | if chapters_txt:
25 | cmd += ["-i", str(chapters_txt), "-map_metadata", "1"]
26 | cmd += ["-c:a", "aac", "-b:a", bitrate, "-movflags", "faststart", str(out_m4b_path)]
27 | try:
28 | result = subprocess.run(cmd, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
29 | return result.returncode == 0
30 | except Exception:
31 | return False
32 |
33 | def chapter_duration_ms(wav_files):
34 | """Return total duration in ms for a list of wavs."""
35 | return sum(len(AudioSegment.from_wav(w)) for w in wav_files)
36 |
--------------------------------------------------------------------------------
/utils/extract_chapters.py:
--------------------------------------------------------------------------------
1 | from ebooklib import epub, ITEM_DOCUMENT
2 | from bs4 import BeautifulSoup
3 |
4 | def extract_chapters(epub_path):
5 | """Extract chapter titles and text content from an EPUB file."""
6 |
7 | book = epub.read_epub(epub_path)
8 | chapters = []
9 | for idref, _ in book.spine:
10 | item = book.get_item_with_id(idref)
11 | if item.get_type() == ITEM_DOCUMENT:
12 | soup = BeautifulSoup(item.get_content(), "html.parser")
13 | text = soup.get_text().strip()
14 | if not text:
15 | continue
16 |
17 | # find all chapter headings, not just the first
18 | headings = soup.find_all(["h1", "h2", "h3"])
19 | if headings:
20 | for heading in headings:
21 | title = heading.get_text(strip=True)
22 | content_parts = []
23 | for sib in heading.next_siblings:
24 | if getattr(sib, "name", None) in ["h1", "h2", "h3"]:
25 | break
26 | if hasattr(sib, "get_text"):
27 | content_parts.append(sib.get_text(" ", strip=True))
28 | chapter_text = " ".join(content_parts).strip()
29 | if chapter_text:
30 | # skip obvious boilerplate
31 | skip_titles = [
32 | "the project gutenberg",
33 | "table of contents",
34 | "the full project gutenberg license"
35 | ]
36 | if any(title.lower().startswith(s) for s in skip_titles):
37 | continue
38 |
39 | chapters.append((title, chapter_text))
40 | else:
41 | chapters.append((item.file_name, text))
42 | return chapters
43 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Kokoro Epub
3 | emoji: 💻
4 | colorFrom: red
5 | colorTo: gray
6 | sdk: gradio
7 | sdk_version: 5.44.0
8 | app_file: app.py
9 | pinned: false
10 | short_description: epub 2 mp3 / m4b
11 | ---
12 |
13 | # kokoro-epub
14 |
15 | > [!IMPORTANT]
16 | **This tool is intended for use with non-DRM, legally acquired eBooks only.**
17 | The authors are not responsible for any misuse of this software or any resulting legal consequences.
18 |
19 | Converts EPUB to audiobook (MP3 or M4B (chapter markers)) using python.
20 |
21 |
22 |
23 | ## Quick Start
24 |
25 | ```bash
26 | # Setup
27 | python3.11 -m venv venv
28 | source venv/bin/activate
29 | pip install -r requirements.txt
30 |
31 | # System requirement for MP3/M4B merge
32 | sudo apt install ffmpeg -y # required for pydub exports
33 |
34 | # Run
35 | python app.py
36 | ```
37 |
38 | 👉 Try it free on [Hugging Face Space](https://huggingface.co/spaces/adnjoo/kokoro-epub) — or run faster in the cloud at [bookbearai.com](https://bookbearai.com).
39 |
40 | ### GPU Torch
41 |
42 | Gutenberg Kafka - The Metamorphosis 25152 words ~ 100 pages ~ 60mb (MP3 2h12m • 64 kbps • 24 kHz • mono)
43 |
44 | 5060 Ti 16GB - ETA 194s/3' | WPS 130
45 | vs
46 | CPU - 1389s/23' | WPS 18
47 |
48 | ```bash
49 | pip install --upgrade torch --index-url https://download.pytorch.org/whl/cu128
50 | ```
51 |
52 | ### CLI Usage
53 |
54 | In addition to the Gradio UI, you can also run the tool directly from the command line with `cli.py`:
55 |
56 | List chapters in an EPUB:
57 | ```bash
58 | python cli.py 5200.epub --list-chapters
59 | ````
60 |
61 | Convert to M4B, selecting only chapters II and III:
62 |
63 | ```bash
64 | python cli.py 5200.epub --format M4B --chapters 3,4 --out audiobooks
65 | ```
66 |
67 |
68 | ## Sample Output
69 |
70 |
72 |
73 | ## Related Projects
74 |
75 | If you're exploring other ebook-to-audio solutions, you might also check out:
76 | - [readest](https://github.com/readest/readest) - modern e-reader with Edge TTS (22 voices)
77 | - [audiblez](https://github.com/santinic/audiblez) — CLI tool for converting text to audiobooks.
78 | - [ebook2audiobook](https://github.com/DrewThomasson/ebook2audiobook) — Simple Python-based ebook-to-audio converter.
79 |
80 | ## License
81 |
82 | MIT License. See [LICENSE.md](./LICENSE.md).
83 |
84 |
85 | ## Contributing
86 |
87 | 📌 See [CONTRIBUTING.md](./CONTRIBUTING.md) if you'd like to contribute.
88 |
--------------------------------------------------------------------------------
/cli.py:
--------------------------------------------------------------------------------
1 | import argparse, sys
2 | from pathlib import Path
3 | import shutil
4 | from pipeline import epub_to_audio, extract_chapters, DEFAULT_VOICE
5 |
6 | def main():
7 | parser = argparse.ArgumentParser(description="EPUB → Audiobook CLI")
8 | parser.add_argument("epub", help="Path to EPUB file")
9 | parser.add_argument("--out", default="output", help="Output directory")
10 | parser.add_argument("--voice", default=DEFAULT_VOICE, help="Voice (default af_heart)")
11 | parser.add_argument("--speed", type=float, default=1.0, help="Playback speed multiplier")
12 | parser.add_argument("--format", choices=["MP3","M4B"], default="MP3", help="Output format")
13 | parser.add_argument("--list-chapters", action="store_true", help="List chapters and exit")
14 | parser.add_argument("--chapters", help="Comma-separated chapter titles to convert", default=None)
15 | args = parser.parse_args()
16 |
17 | epub_path = Path(args.epub)
18 | chapters = extract_chapters(str(epub_path))
19 |
20 | # Dry run
21 | if args.list_chapters:
22 | print(f"\n📖 {epub_path.name} — {len(chapters)} chapters found:\n")
23 | for i, (title, text) in enumerate(chapters, 1):
24 | print(f"{i:02d}. {title} ({len(text.split())} words)")
25 | sys.exit(0)
26 |
27 | # After parsing args and extracting chapters...
28 | selected = []
29 | if args.chapters:
30 | parts = [c.strip() for c in args.chapters.split(",")]
31 | for p in parts:
32 | if p.isdigit():
33 | idx = int(p) - 1
34 | if 0 <= idx < len(chapters):
35 | selected.append(f"{chapters[idx][0]} ({len(chapters[idx][1].split())} words)")
36 | else:
37 | selected.append(p)
38 |
39 | result_file = None
40 | last_logs = ""
41 | for mp3_out, m4b_out, logs in epub_to_audio(
42 | epub_path.open("rb"),
43 | args.voice,
44 | args.speed,
45 | selected,
46 | args.format.upper(),
47 | cli=True,
48 | ):
49 | if logs and logs != last_logs:
50 | diff = logs[len(last_logs):]
51 | if diff.strip():
52 | for line in diff.strip().splitlines():
53 | print(" " + line)
54 | last_logs = logs
55 |
56 | if mp3_out or m4b_out:
57 | tmp_result = Path(mp3_out or m4b_out)
58 | out_dir = Path(args.out)
59 | out_dir.mkdir(parents=True, exist_ok=True)
60 |
61 | # Rebuild filename in final output dir
62 | final_path = out_dir / tmp_result.name
63 | shutil.copy(tmp_result, final_path)
64 | result_file = final_path
65 |
66 | print(f"\n✅ Audiobook ready: {result_file}")
67 |
68 | if __name__ == "__main__":
69 | main()
70 |
--------------------------------------------------------------------------------
/pipeline.py:
--------------------------------------------------------------------------------
1 | import re, time, tempfile
2 | from pathlib import Path
3 | import soundfile as sf
4 | import torch
5 | from kokoro import KPipeline
6 | from gradio import update
7 |
8 | from utils import (
9 | extract_chapters,
10 | merge_to_mp3,
11 | merge_to_m4b,
12 | chapter_duration_ms,
13 | write_chapters_metadata,
14 | )
15 |
16 | SPLIT_PATTERN = r"\n{2,}"
17 | SAMPLE_RATE = 24000
18 | DEFAULT_LANG = "a"
19 | DEFAULT_VOICE = "af_heart"
20 |
21 |
22 | def epub_to_audio(epub_file, voice, speed, selected_titles, format_choice, progress=None, cli=False):
23 | """
24 | Core generator that streams progress and yields (mp3_out, m4b_out, logs).
25 | Used by both Gradio and CLI.
26 | """
27 | start_time = time.time()
28 | workdir = tempfile.mkdtemp(prefix="kokoro_epub_")
29 | wav_dir = Path(workdir) / "wavs"
30 | wav_dir.mkdir(parents=True, exist_ok=True)
31 |
32 | logs = "🔎 Reading EPUB…"
33 | yield None, None, logs
34 |
35 | chapters = extract_chapters(epub_file.name)
36 | if not chapters:
37 | yield None, None, "❌ No chapters found."
38 | return
39 |
40 | if selected_titles:
41 | chapters = [
42 | (t, txt)
43 | for (t, txt) in chapters
44 | if f"{t} ({len(txt.split())} words)" in selected_titles
45 | ]
46 |
47 | # device
48 | if torch.cuda.is_available():
49 | device = "cuda"
50 | logs += f"\n✅ CUDA available: {torch.cuda.get_device_name(0)}"
51 | else:
52 | device = "cpu"
53 | logs += "\n⚠️ CUDA not available, using CPU."
54 |
55 | logs += f"\n🚀 Initializing Kokoro (device={device})…"
56 | yield None, None, logs
57 |
58 | pipeline = KPipeline(lang_code=DEFAULT_LANG, device=device)
59 |
60 | wav_paths = []
61 | chapter_durations = []
62 | part_idx = 0
63 | total = len(chapters)
64 |
65 | for ci, (title, text) in enumerate(chapters):
66 | chapter_start = time.time()
67 | logs += f"\n🔊 Starting {title} ({ci+1}/{total}) – {len(text.split())} words"
68 | yield None, None, logs
69 |
70 | chapter_wavs = []
71 | for _, _, audio in pipeline(
72 | text,
73 | voice=voice,
74 | speed=float(speed),
75 | split_pattern=SPLIT_PATTERN,
76 | ):
77 | safe_title = re.sub(r"[^a-zA-Z0-9]+", "_", title)[:30]
78 | wav_path = wav_dir / f"part_{part_idx:05d}_{safe_title}.wav"
79 | sf.write(str(wav_path), audio, SAMPLE_RATE)
80 | wav_paths.append(str(wav_path))
81 | chapter_wavs.append(str(wav_path))
82 | part_idx += 1
83 |
84 | if chapter_wavs:
85 | dur_ms = chapter_duration_ms(chapter_wavs)
86 | chapter_durations.append((title, dur_ms))
87 |
88 | logs += f"\n✅ Finished {title} in {time.time() - chapter_start:.2f}s"
89 | yield None, None, logs
90 |
91 | # outputs
92 | out_dir = Path(workdir)
93 | base_name = Path(epub_file.name).stem
94 | mp3_path = out_dir / f"{base_name}_{voice}.mp3"
95 | m4b_path = out_dir / f"{base_name}_{voice}.m4b"
96 | chapters_txt = out_dir / f"{base_name}_chapters.txt" if chapter_durations else None
97 |
98 | if chapter_durations:
99 | write_chapters_metadata(chapter_durations, chapters_txt)
100 | logs += f"\n📝 Chapters metadata saved ({chapters_txt.name})."
101 |
102 | if format_choice == "MP3":
103 | if merge_to_mp3(wav_paths, str(mp3_path)):
104 | logs += f"\n✅ MP3 created ({mp3_path.name})."
105 | logs += f"\n⏱️ Total time: {time.time() - start_time:.2f}s"
106 | if cli:
107 | yield str(mp3_path), None, logs
108 | else:
109 | yield update(value=str(mp3_path), visible=True), update(visible=False), logs
110 | else:
111 | yield None, None, "❌ Failed to merge MP3"
112 |
113 | elif format_choice == "M4B":
114 | if merge_to_m4b(wav_paths, str(m4b_path), chapters_txt):
115 | logs += f"\n📚 M4B created ({m4b_path.name})."
116 | logs += f"\n⏱️ Total time: {time.time() - start_time:.2f}s"
117 | if cli:
118 | yield None, str(m4b_path), logs
119 | else:
120 | yield update(visible=False), update(value=str(m4b_path), visible=True), logs
121 | else:
122 | yield None, None, "❌ Failed to merge M4B"
123 |
--------------------------------------------------------------------------------