├── packages.txt ├── .gitignore ├── public ├── 5200.epub └── 20250828.png ├── requirements.txt ├── CONTRIBUTING.md ├── utils ├── __init__.py ├── metadata_utils.py ├── audio_utils.py └── extract_chapters.py ├── LICENSE.md ├── app.py ├── README.md ├── cli.py └── pipeline.py /packages.txt: -------------------------------------------------------------------------------- 1 | ffmpeg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | __pycache__ 3 | 4 | # misc 5 | .DS_Store 6 | certificate.pem 7 | -------------------------------------------------------------------------------- /public/5200.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adnjoo/kokoro-epub/HEAD/public/5200.epub -------------------------------------------------------------------------------- /public/20250828.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adnjoo/kokoro-epub/HEAD/public/20250828.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | gradio>=4.44.0 2 | kokoro 3 | torch # CPU build is fine on Spaces; CUDA will be used if available 4 | ebooklib 5 | beautifulsoup4 6 | soundfile 7 | pydub # optional, for MP3 merge (needs system ffmpeg) 8 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to kokoro-epub 💻 2 | 3 | Thanks for helping improve **kokoro-epub**! 4 | 5 | ## How to Contribute 6 | 7 | * **Report bugs / request features** → [Open an issue](https://github.com/adnjoo/kokoro-epub/issues) 8 | * **Code changes** → Fork, branch, and submit a PR 9 | 10 | ## Guidelines 11 | 12 | * Keep PRs small and focused 13 | * Follow existing style & naming 14 | * Be respectful (see [Code of Conduct](https://opensource.guide/code-of-conduct/)) 15 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | # utils/__init__.py 2 | """ 3 | Utility package for kokoro-epub. 4 | 5 | This exposes the most common helpers so they can be imported 6 | directly from `utils` instead of deep submodules. 7 | """ 8 | 9 | from .audio_utils import merge_to_mp3, merge_to_m4b, chapter_duration_ms 10 | from .metadata_utils import write_chapters_metadata 11 | from .extract_chapters import extract_chapters 12 | 13 | __all__ = [ 14 | "merge_to_mp3", 15 | "merge_to_m4b", 16 | "chapter_duration_ms", 17 | "write_chapters_metadata", 18 | "extract_chapters", 19 | ] 20 | -------------------------------------------------------------------------------- /utils/metadata_utils.py: -------------------------------------------------------------------------------- 1 | def write_chapters_metadata(chapter_durations, out_txt): 2 | """ 3 | chapter_durations: [(title, duration_ms), ...] 4 | """ 5 | offset = 0 6 | with open(out_txt, "w", encoding="utf-8") as f: 7 | f.write(";FFMETADATA1\n") 8 | for title, dur in chapter_durations: 9 | start = offset 10 | end = offset + dur 11 | f.write("[CHAPTER]\n") 12 | f.write("TIMEBASE=1/1000\n") 13 | f.write(f"START={start}\n") 14 | f.write(f"END={end}\n") 15 | f.write(f"title={title}\n\n") 16 | offset = end 17 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Andrew Njoo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from pipeline import epub_to_audio, extract_chapters, DEFAULT_VOICE 3 | 4 | with gr.Blocks(title="kokoro-epub — Free EPUB → Audiobook") as demo: 5 | gr.Markdown("## Free EPUB → Audiobook (Open Source)") 6 | 7 | epub_in = gr.File(label="EPUB file", file_types=[".epub"]) 8 | chapter_selector = gr.CheckboxGroup(label="Select chapters to convert", choices=[]) 9 | epub_in.change( 10 | fn=lambda f: gr.update( 11 | choices=[f"{t} ({len(txt.split())} words)" for (t, txt) in extract_chapters(f.name)] if f else [] 12 | ), 13 | inputs=epub_in, 14 | outputs=chapter_selector, 15 | ) 16 | 17 | with gr.Row(): 18 | voice = gr.Dropdown( 19 | label="Voice", 20 | value=DEFAULT_VOICE, 21 | choices=["af_heart","af_alloy","af_bella","af_rose","am_michael","am_adam","am_mandarin"], 22 | ) 23 | speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed") 24 | format_choice = gr.Radio(label="Output format", choices=["MP3", "M4B"], value="MP3") 25 | 26 | run_btn = gr.Button("Convert") 27 | audio_out = gr.File(label="Download MP3 (or ZIP)", visible=False) 28 | m4b_out = gr.File(label="Download M4B (with chapters)", visible=False) 29 | logs = gr.Textbox(label="Logs", lines=12) 30 | 31 | run_btn.click( 32 | fn=epub_to_audio, 33 | inputs=[epub_in, voice, speed, chapter_selector, format_choice], 34 | outputs=[audio_out, m4b_out, logs], 35 | ) 36 | 37 | if __name__ == "__main__": 38 | demo.launch() 39 | -------------------------------------------------------------------------------- /utils/audio_utils.py: -------------------------------------------------------------------------------- 1 | import os, shutil, subprocess 2 | from pathlib import Path 3 | from pydub import AudioSegment 4 | 5 | def merge_to_mp3(wav_paths, out_mp3_path, bitrate="64k"): 6 | """Merge WAVs into a single MP3 using pydub/ffmpeg.""" 7 | if shutil.which("ffmpeg") is None: 8 | return False 9 | combined = AudioSegment.silent(duration=0) 10 | for w in wav_paths: 11 | combined += AudioSegment.from_wav(w) 12 | combined.export(out_mp3_path, format="mp3", bitrate=bitrate) 13 | return True 14 | 15 | def merge_to_m4b(wav_paths, out_m4b_path, chapters_txt=None, bitrate="64k"): 16 | """Merge WAVs into .m4b (AAC) with optional chapters metadata.""" 17 | if shutil.which("ffmpeg") is None: 18 | return False 19 | list_file = Path(out_m4b_path).with_suffix(".concat.txt") 20 | with open(list_file, "w", encoding="utf-8") as f: 21 | for w in wav_paths: 22 | f.write(f"file '{Path(w).as_posix()}'\n") 23 | cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(list_file)] 24 | if chapters_txt: 25 | cmd += ["-i", str(chapters_txt), "-map_metadata", "1"] 26 | cmd += ["-c:a", "aac", "-b:a", bitrate, "-movflags", "faststart", str(out_m4b_path)] 27 | try: 28 | result = subprocess.run(cmd, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 29 | return result.returncode == 0 30 | except Exception: 31 | return False 32 | 33 | def chapter_duration_ms(wav_files): 34 | """Return total duration in ms for a list of wavs.""" 35 | return sum(len(AudioSegment.from_wav(w)) for w in wav_files) 36 | -------------------------------------------------------------------------------- /utils/extract_chapters.py: -------------------------------------------------------------------------------- 1 | from ebooklib import epub, ITEM_DOCUMENT 2 | from bs4 import BeautifulSoup 3 | 4 | def extract_chapters(epub_path): 5 | """Extract chapter titles and text content from an EPUB file.""" 6 | 7 | book = epub.read_epub(epub_path) 8 | chapters = [] 9 | for idref, _ in book.spine: 10 | item = book.get_item_with_id(idref) 11 | if item.get_type() == ITEM_DOCUMENT: 12 | soup = BeautifulSoup(item.get_content(), "html.parser") 13 | text = soup.get_text().strip() 14 | if not text: 15 | continue 16 | 17 | # find all chapter headings, not just the first 18 | headings = soup.find_all(["h1", "h2", "h3"]) 19 | if headings: 20 | for heading in headings: 21 | title = heading.get_text(strip=True) 22 | content_parts = [] 23 | for sib in heading.next_siblings: 24 | if getattr(sib, "name", None) in ["h1", "h2", "h3"]: 25 | break 26 | if hasattr(sib, "get_text"): 27 | content_parts.append(sib.get_text(" ", strip=True)) 28 | chapter_text = " ".join(content_parts).strip() 29 | if chapter_text: 30 | # skip obvious boilerplate 31 | skip_titles = [ 32 | "the project gutenberg", 33 | "table of contents", 34 | "the full project gutenberg license" 35 | ] 36 | if any(title.lower().startswith(s) for s in skip_titles): 37 | continue 38 | 39 | chapters.append((title, chapter_text)) 40 | else: 41 | chapters.append((item.file_name, text)) 42 | return chapters 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Kokoro Epub 3 | emoji: 💻 4 | colorFrom: red 5 | colorTo: gray 6 | sdk: gradio 7 | sdk_version: 5.44.0 8 | app_file: app.py 9 | pinned: false 10 | short_description: epub 2 mp3 / m4b 11 | --- 12 | 13 | # kokoro-epub 14 | 15 | > [!IMPORTANT] 16 | **This tool is intended for use with non-DRM, legally acquired eBooks only.**
17 | The authors are not responsible for any misuse of this software or any resulting legal consequences.
18 | 19 | Converts EPUB to audiobook (MP3 or M4B (chapter markers)) using python. 20 | 21 | 22 | 23 | ## Quick Start 24 | 25 | ```bash 26 | # Setup 27 | python3.11 -m venv venv 28 | source venv/bin/activate 29 | pip install -r requirements.txt 30 | 31 | # System requirement for MP3/M4B merge 32 | sudo apt install ffmpeg -y # required for pydub exports 33 | 34 | # Run 35 | python app.py 36 | ``` 37 | 38 | 👉 Try it free on [Hugging Face Space](https://huggingface.co/spaces/adnjoo/kokoro-epub) — or run faster in the cloud at [bookbearai.com](https://bookbearai.com). 39 | 40 | ### GPU Torch 41 | 42 | Gutenberg Kafka - The Metamorphosis 25152 words ~ 100 pages ~ 60mb (MP3 2h12m • 64 kbps • 24 kHz • mono) 43 | 44 | 5060 Ti 16GB - ETA 194s/3' | WPS 130 45 | vs 46 | CPU - 1389s/23' | WPS 18 47 | 48 | ```bash 49 | pip install --upgrade torch --index-url https://download.pytorch.org/whl/cu128 50 | ``` 51 | 52 | ### CLI Usage 53 | 54 | In addition to the Gradio UI, you can also run the tool directly from the command line with `cli.py`: 55 | 56 | List chapters in an EPUB: 57 | ```bash 58 | python cli.py 5200.epub --list-chapters 59 | ```` 60 | 61 | Convert to M4B, selecting only chapters II and III: 62 | 63 | ```bash 64 | python cli.py 5200.epub --format M4B --chapters 3,4 --out audiobooks 65 | ``` 66 | 67 | 68 | ## Sample Output 69 | 70 | 72 | 73 | ## Related Projects 74 | 75 | If you're exploring other ebook-to-audio solutions, you might also check out: 76 | - [readest](https://github.com/readest/readest) - modern e-reader with Edge TTS (22 voices) 77 | - [audiblez](https://github.com/santinic/audiblez) — CLI tool for converting text to audiobooks. 78 | - [ebook2audiobook](https://github.com/DrewThomasson/ebook2audiobook) — Simple Python-based ebook-to-audio converter. 79 | 80 | ## License 81 | 82 | MIT License. See [LICENSE.md](./LICENSE.md). 83 | 84 | 85 | ## Contributing 86 | 87 | 📌 See [CONTRIBUTING.md](./CONTRIBUTING.md) if you'd like to contribute. 88 | -------------------------------------------------------------------------------- /cli.py: -------------------------------------------------------------------------------- 1 | import argparse, sys 2 | from pathlib import Path 3 | import shutil 4 | from pipeline import epub_to_audio, extract_chapters, DEFAULT_VOICE 5 | 6 | def main(): 7 | parser = argparse.ArgumentParser(description="EPUB → Audiobook CLI") 8 | parser.add_argument("epub", help="Path to EPUB file") 9 | parser.add_argument("--out", default="output", help="Output directory") 10 | parser.add_argument("--voice", default=DEFAULT_VOICE, help="Voice (default af_heart)") 11 | parser.add_argument("--speed", type=float, default=1.0, help="Playback speed multiplier") 12 | parser.add_argument("--format", choices=["MP3","M4B"], default="MP3", help="Output format") 13 | parser.add_argument("--list-chapters", action="store_true", help="List chapters and exit") 14 | parser.add_argument("--chapters", help="Comma-separated chapter titles to convert", default=None) 15 | args = parser.parse_args() 16 | 17 | epub_path = Path(args.epub) 18 | chapters = extract_chapters(str(epub_path)) 19 | 20 | # Dry run 21 | if args.list_chapters: 22 | print(f"\n📖 {epub_path.name} — {len(chapters)} chapters found:\n") 23 | for i, (title, text) in enumerate(chapters, 1): 24 | print(f"{i:02d}. {title} ({len(text.split())} words)") 25 | sys.exit(0) 26 | 27 | # After parsing args and extracting chapters... 28 | selected = [] 29 | if args.chapters: 30 | parts = [c.strip() for c in args.chapters.split(",")] 31 | for p in parts: 32 | if p.isdigit(): 33 | idx = int(p) - 1 34 | if 0 <= idx < len(chapters): 35 | selected.append(f"{chapters[idx][0]} ({len(chapters[idx][1].split())} words)") 36 | else: 37 | selected.append(p) 38 | 39 | result_file = None 40 | last_logs = "" 41 | for mp3_out, m4b_out, logs in epub_to_audio( 42 | epub_path.open("rb"), 43 | args.voice, 44 | args.speed, 45 | selected, 46 | args.format.upper(), 47 | cli=True, 48 | ): 49 | if logs and logs != last_logs: 50 | diff = logs[len(last_logs):] 51 | if diff.strip(): 52 | for line in diff.strip().splitlines(): 53 | print(" " + line) 54 | last_logs = logs 55 | 56 | if mp3_out or m4b_out: 57 | tmp_result = Path(mp3_out or m4b_out) 58 | out_dir = Path(args.out) 59 | out_dir.mkdir(parents=True, exist_ok=True) 60 | 61 | # Rebuild filename in final output dir 62 | final_path = out_dir / tmp_result.name 63 | shutil.copy(tmp_result, final_path) 64 | result_file = final_path 65 | 66 | print(f"\n✅ Audiobook ready: {result_file}") 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /pipeline.py: -------------------------------------------------------------------------------- 1 | import re, time, tempfile 2 | from pathlib import Path 3 | import soundfile as sf 4 | import torch 5 | from kokoro import KPipeline 6 | from gradio import update 7 | 8 | from utils import ( 9 | extract_chapters, 10 | merge_to_mp3, 11 | merge_to_m4b, 12 | chapter_duration_ms, 13 | write_chapters_metadata, 14 | ) 15 | 16 | SPLIT_PATTERN = r"\n{2,}" 17 | SAMPLE_RATE = 24000 18 | DEFAULT_LANG = "a" 19 | DEFAULT_VOICE = "af_heart" 20 | 21 | 22 | def epub_to_audio(epub_file, voice, speed, selected_titles, format_choice, progress=None, cli=False): 23 | """ 24 | Core generator that streams progress and yields (mp3_out, m4b_out, logs). 25 | Used by both Gradio and CLI. 26 | """ 27 | start_time = time.time() 28 | workdir = tempfile.mkdtemp(prefix="kokoro_epub_") 29 | wav_dir = Path(workdir) / "wavs" 30 | wav_dir.mkdir(parents=True, exist_ok=True) 31 | 32 | logs = "🔎 Reading EPUB…" 33 | yield None, None, logs 34 | 35 | chapters = extract_chapters(epub_file.name) 36 | if not chapters: 37 | yield None, None, "❌ No chapters found." 38 | return 39 | 40 | if selected_titles: 41 | chapters = [ 42 | (t, txt) 43 | for (t, txt) in chapters 44 | if f"{t} ({len(txt.split())} words)" in selected_titles 45 | ] 46 | 47 | # device 48 | if torch.cuda.is_available(): 49 | device = "cuda" 50 | logs += f"\n✅ CUDA available: {torch.cuda.get_device_name(0)}" 51 | else: 52 | device = "cpu" 53 | logs += "\n⚠️ CUDA not available, using CPU." 54 | 55 | logs += f"\n🚀 Initializing Kokoro (device={device})…" 56 | yield None, None, logs 57 | 58 | pipeline = KPipeline(lang_code=DEFAULT_LANG, device=device) 59 | 60 | wav_paths = [] 61 | chapter_durations = [] 62 | part_idx = 0 63 | total = len(chapters) 64 | 65 | for ci, (title, text) in enumerate(chapters): 66 | chapter_start = time.time() 67 | logs += f"\n🔊 Starting {title} ({ci+1}/{total}) – {len(text.split())} words" 68 | yield None, None, logs 69 | 70 | chapter_wavs = [] 71 | for _, _, audio in pipeline( 72 | text, 73 | voice=voice, 74 | speed=float(speed), 75 | split_pattern=SPLIT_PATTERN, 76 | ): 77 | safe_title = re.sub(r"[^a-zA-Z0-9]+", "_", title)[:30] 78 | wav_path = wav_dir / f"part_{part_idx:05d}_{safe_title}.wav" 79 | sf.write(str(wav_path), audio, SAMPLE_RATE) 80 | wav_paths.append(str(wav_path)) 81 | chapter_wavs.append(str(wav_path)) 82 | part_idx += 1 83 | 84 | if chapter_wavs: 85 | dur_ms = chapter_duration_ms(chapter_wavs) 86 | chapter_durations.append((title, dur_ms)) 87 | 88 | logs += f"\n✅ Finished {title} in {time.time() - chapter_start:.2f}s" 89 | yield None, None, logs 90 | 91 | # outputs 92 | out_dir = Path(workdir) 93 | base_name = Path(epub_file.name).stem 94 | mp3_path = out_dir / f"{base_name}_{voice}.mp3" 95 | m4b_path = out_dir / f"{base_name}_{voice}.m4b" 96 | chapters_txt = out_dir / f"{base_name}_chapters.txt" if chapter_durations else None 97 | 98 | if chapter_durations: 99 | write_chapters_metadata(chapter_durations, chapters_txt) 100 | logs += f"\n📝 Chapters metadata saved ({chapters_txt.name})." 101 | 102 | if format_choice == "MP3": 103 | if merge_to_mp3(wav_paths, str(mp3_path)): 104 | logs += f"\n✅ MP3 created ({mp3_path.name})." 105 | logs += f"\n⏱️ Total time: {time.time() - start_time:.2f}s" 106 | if cli: 107 | yield str(mp3_path), None, logs 108 | else: 109 | yield update(value=str(mp3_path), visible=True), update(visible=False), logs 110 | else: 111 | yield None, None, "❌ Failed to merge MP3" 112 | 113 | elif format_choice == "M4B": 114 | if merge_to_m4b(wav_paths, str(m4b_path), chapters_txt): 115 | logs += f"\n📚 M4B created ({m4b_path.name})." 116 | logs += f"\n⏱️ Total time: {time.time() - start_time:.2f}s" 117 | if cli: 118 | yield None, str(m4b_path), logs 119 | else: 120 | yield update(visible=False), update(value=str(m4b_path), visible=True), logs 121 | else: 122 | yield None, None, "❌ Failed to merge M4B" 123 | --------------------------------------------------------------------------------