├── .DS_Store ├── .gitignore ├── LICENSE ├── README.md ├── make.py ├── requirements.txt └── test_books ├── .DS_Store ├── lemo.epub └── lemo_translated.epub /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengzzzzz/translate-epub-book-by-openai/ee8491f902cf7af6d8d88cc0e1dd891c4f3654ae/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 zengzzzzz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # translate epub books by openai api 2 | translate epub books Using AI translate, the translation is not perfect, but it is very interesting. Welcome to pr . 3 | 4 | ![image](https://raw.githubusercontent.com/zengzzzzz/zengzzzzz-img/main/tranlate_epub_book_by_openai/readme_pic.jpg) 5 | 6 | ## ready 7 | 8 | 1. OpenAI api key 9 | 2. epub books 10 | 3. python3.8+ 11 | 12 | ## how to use 13 | 14 | 1. pip install -r requirements.txt 15 | 2. run this shell: `python3 make.py --book_name ${book_name} --openai_key ${openai_key}` 16 | 17 | ## attention 18 | 19 | 1. the openai api key is not free, you can apply for it on the [official website](https://platform.openai.com/) 20 | 21 | # thanks 22 | 23 | this project is based on the yihong's projects and I change it for myself. Batch translation is faster and less fee , suitable for those who do not pursue the perfect format .Delete the function which is not used. It is geared more toward programmers now than everyone else. 24 | 25 | - @[yihong](https://github.com/yihong0618) 26 | -------------------------------------------------------------------------------- /make.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import openai 4 | import ast 5 | from bs4 import BeautifulSoup as bs 6 | from ebooklib import epub 7 | from rich import print 8 | 9 | 10 | 11 | class ChatGPT: 12 | def __init__(self, key): 13 | self.key = key 14 | 15 | def translate(self, text): 16 | print(text) 17 | openai.api_key = self.key 18 | try: 19 | completion = openai.ChatCompletion.create( 20 | model="gpt-3.5-turbo", 21 | messages=[ 22 | { 23 | "role": "user", 24 | # english prompt here to save tokens 25 | "content": f"Please help me to translate `{text}` to Chinese, please return only translated content not include the origin text, maintain the same formatting as the original textual list individual elements ", 26 | } 27 | ], 28 | ) 29 | t_text = ( 30 | completion["choices"][0] 31 | .get("message") 32 | .get("content") 33 | .encode("utf8") 34 | .decode() 35 | ) 36 | # format the translated text, the original text is eg: "\n\n['\\n柠檬\\n\\n', '梶井基次郎']", we need the 37 | # element in the list, not the \n \n 38 | t_text = t_text.strip("\n") 39 | try: 40 | t_text = ast.literal_eval(t_text) 41 | except Exception: 42 | # some ["\n"] not literal_eval, not influence the result 43 | pass 44 | # openai has a time limit for api Limit: 20 / min 45 | time.sleep(3) 46 | except Exception as e: 47 | print(str(e), "will sleep 60 seconds") 48 | # TIME LIMIT for open api please pay 49 | time.sleep(60) 50 | completion = openai.ChatCompletion.create( 51 | model="gpt-3.5-turbo", 52 | messages=[ 53 | { 54 | "role": "user", 55 | "content": f"Please help me to translate `{text}` to Chinese, please return only translated content not include the origin text, maintain the same formatting as the original textual list individual elements", 56 | } 57 | ], 58 | ) 59 | t_text = ( 60 | completion["choices"][0] 61 | .get("message") 62 | .get("content") 63 | .encode("utf8") 64 | .decode() 65 | ) 66 | t_text = t_text.strip("\n") 67 | try: 68 | t_text = ast.literal_eval(t_text) 69 | except Exception: 70 | pass 71 | print(t_text) 72 | return t_text 73 | 74 | 75 | class BEPUB: 76 | def __init__(self, epub_name, key, batch_size): 77 | self.epub_name = epub_name 78 | self.translate_model = ChatGPT(key) 79 | self.origin_book = epub.read_epub(self.epub_name) 80 | self.batch_size = batch_size 81 | 82 | def translate_book(self): 83 | new_book = epub.EpubBook() 84 | new_book.metadata = self.origin_book.metadata 85 | new_book.spine = self.origin_book.spine 86 | new_book.toc = self.origin_book.toc 87 | batch_p = [] 88 | batch_count = 0 89 | for i in self.origin_book.get_items(): 90 | if i.get_type() == 9: 91 | soup = bs(i.content, "html.parser") 92 | p_list = soup.findAll("p") 93 | for p in p_list: 94 | if p.text and not p.text.isdigit(): 95 | batch_p.append(p) 96 | batch_count += 1 97 | if batch_count == self.batch_size: 98 | translated_batch = self.translate_model.translate([p.text for p in batch_p]) 99 | # to avoid the openai response is not the same order as the request 100 | batch_p[-1].string = batch_p[-1].text + ' '.join(map(str, translated_batch)) 101 | batch_p = [] 102 | batch_count = 0 103 | # Process any remaining paragraphs in the last batch 104 | if batch_p: 105 | translated_batch = self.translate_model.translate([p.text for p in batch_p]) 106 | for j, c_p in enumerate(batch_p): 107 | c_p.string = c_p.text + translated_batch[j] 108 | batch_p = [] 109 | batch_count = 0 110 | i.content = soup.prettify().encode() 111 | new_book.add_item(i) 112 | name = self.epub_name.split(".")[0] 113 | epub.write_epub(f"{name}_translated.epub", new_book, {}) 114 | 115 | 116 | if __name__ == "__main__": 117 | parser = argparse.ArgumentParser() 118 | parser.add_argument( 119 | "--book_name", 120 | dest="book_name", 121 | type=str, 122 | help="your epub book name", 123 | ) 124 | parser.add_argument( 125 | "--openai_key", 126 | dest="openai_key", 127 | type=str, 128 | default="", 129 | help="openai api key", 130 | ) 131 | parser.add_argument( 132 | "--batch_size", 133 | dest="batch_size", 134 | type=int, 135 | default=2, 136 | choices=[2,3,4,5], 137 | help="the batch size paragraph for translation , max is 5", 138 | ) 139 | options = parser.parse_args() 140 | OPENAI_API_KEY = options.openai_key 141 | if not OPENAI_API_KEY: 142 | raise Exception("Need openai API key, please google how to") 143 | if not options.book_name.endswith(".epub"): 144 | raise Exception("please use epub file") 145 | e = BEPUB(options.book_name, OPENAI_API_KEY, options.batch_size) 146 | e.translate_book() 147 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bs4 ==0.0.1 2 | openai ==0.27.0 3 | ebooklib ==0.18 4 | rich ==13.3.1 5 | 6 | -------------------------------------------------------------------------------- /test_books/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengzzzzz/translate-epub-book-by-openai/ee8491f902cf7af6d8d88cc0e1dd891c4f3654ae/test_books/.DS_Store -------------------------------------------------------------------------------- /test_books/lemo.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengzzzzz/translate-epub-book-by-openai/ee8491f902cf7af6d8d88cc0e1dd891c4f3654ae/test_books/lemo.epub -------------------------------------------------------------------------------- /test_books/lemo_translated.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengzzzzz/translate-epub-book-by-openai/ee8491f902cf7af6d8d88cc0e1dd891c4f3654ae/test_books/lemo_translated.epub --------------------------------------------------------------------------------