├── .gitignore ├── README.md ├── demo ├── example.en.duck.md ├── example.en.md ├── example.zh-CN.md └── example.zh-TW.md ├── main.py └── translate.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPT-Translator 2 | 3 | Translate markdown files using OpenAI's GPT-4 model. 4 | 5 | # Usage 6 | 7 | 1) First, clone the repo 8 | 9 | ```bash 10 | git clone https://github.com/daqing/gpt-translator.git 11 | ``` 12 | 13 | 2) Set `OPENAI_API_KEY` environment variable 14 | 15 | ```bash 16 | export OPENAI_API_KEY='sk-zPByWXXXXXXXXXXXXXXXXXXXXXXX' 17 | ``` 18 | 19 | 3) Run `main.py` 20 | 21 | ```bash 22 | python main.py --base-lang en --target-lang zh-CN --input ./demo/example.en.md --output ./example.zh-CN.md 23 | ``` 24 | 25 | This will translate `./demo/example.en.md` into simplified chinese and save to `./example.zh-CN.md`. 26 | 27 | Remember to set the `OPENAI_API_KEY` environment variable in order to use the GPT-4 model. 28 | -------------------------------------------------------------------------------- /demo/example.en.duck.md: -------------------------------------------------------------------------------- 1 | # Hello, Big Yellow Duck 2 | 3 | The big yellow duck is a form of art. -------------------------------------------------------------------------------- /demo/example.en.md: -------------------------------------------------------------------------------- 1 | # Welcome 2 | 3 | Hello, **GPT** Translator! 4 | -------------------------------------------------------------------------------- /demo/example.zh-CN.md: -------------------------------------------------------------------------------- 1 | # 你好,大黄鸭 2 | 3 | 大黄鸭是一种艺术。 4 | -------------------------------------------------------------------------------- /demo/example.zh-TW.md: -------------------------------------------------------------------------------- 1 | # 歡迎 2 | 3 | 你好,**GPT** 翻譯器! -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import os 3 | from translate import translate_full 4 | import argparse 5 | 6 | lang_dict = { 7 | "zh-CN": "chinese_simplified", 8 | "zh-TW": "chinese_traditional", 9 | "ru": "russian", 10 | "de": "german", 11 | "es": "spanish", 12 | "fr": "french", 13 | "ja": "japanese", 14 | "pt": "portuguese", 15 | "vi": "vietnamese", 16 | "ar": "arabic", 17 | "en": "english", 18 | } 19 | 20 | def translate_file(input_path, output_path, base_lang, target_lang, client): 21 | with open(input_path, "r") as f: 22 | file_content = f.read() 23 | 24 | print(f"Translating file {input_path} to {target_lang}...") 25 | translated_text = translate_full(file_content, base_lang, target_lang, client) 26 | 27 | with open(output_path, "w") as f: 28 | f.write(translated_text) 29 | 30 | def main(): 31 | # Check if OPENAI_API_KEY is set in the environment 32 | if "OPENAI_API_KEY" not in os.environ: 33 | print("Please set the OPENAI_API_KEY environment variable.") 34 | exit(1) 35 | 36 | client = openai.OpenAI() 37 | 38 | # Create the parser 39 | parser = argparse.ArgumentParser(description="Translate markdown files using OpenAI's GPT-4 model.") 40 | 41 | # Add the arguments 42 | parser.add_argument('--base-lang', metavar='base_lang', default="en", type=str, help='the base language to translate from') 43 | parser.add_argument('--target-lang', metavar='target_lang', type=str, help='the target language to translate to') 44 | 45 | parser.add_argument('--input', metavar='input file', type=str, help='path to the input file') 46 | parser.add_argument('--output', metavar='output file', type=str, help='path to the output file') 47 | 48 | # Parse the arguments 49 | args = parser.parse_args() 50 | 51 | if args.target_lang is None: 52 | print("Please specify the target language to translate to") 53 | exit(1) 54 | 55 | if args.base_lang not in lang_dict: 56 | print(f"Base language {args.base_lang} not supported. Supported languages: {', '.join(lang_dict.keys())}") 57 | exit(1) 58 | 59 | if args.target_lang not in lang_dict: 60 | print(f"Target language {args.target_lang} not supported. Supported languages: {', '.join(lang_dict.keys())}") 61 | exit(1) 62 | 63 | if args.input is None: 64 | print("Please specify the path to the input file.") 65 | exit(1) 66 | 67 | if args.output is None: 68 | print("Please specify the path to the output file.") 69 | exit(1) 70 | 71 | translate_file(args.input, args.output, lang_dict[args.base_lang], lang_dict[args.target_lang], client) 72 | 73 | # Run the main function 74 | main() 75 | -------------------------------------------------------------------------------- /translate.py: -------------------------------------------------------------------------------- 1 | split_string = "\n\n" 2 | format = "markdown" 3 | 4 | def translate_full(full_text, input_lang, target_lang, client): 5 | system_prompt = f"You are a translation tool. You receive a text snippet from a file in the following format:\n{format}\n\n. The file is also written in the language:\n{input_lang}\n\n. As a translation tool, you will solely return the same string in {target_lang} without losing or amending the original formatting. Your translations are accurate, aiming not to deviate from the original structure, content, writing style and tone." 6 | code_prompt = "Make sure don't translate code blocks in markdown format, and don't translate image paths in :src field, and do translate the alt field from img tag" 7 | 8 | messages = [ 9 | {"role": "system", "content": system_prompt}, 10 | {"role": "system", "content": code_prompt}, 11 | {"role": "user", "content": full_text} 12 | ] 13 | 14 | completion = client.chat.completions.create( 15 | model="gpt-4o", 16 | messages=messages 17 | ) 18 | 19 | return completion.choices[0].message.content 20 | --------------------------------------------------------------------------------