├── .dockerignore ├── .gitignore ├── All_Translation.py ├── Bing_translation.py ├── Deepl_Translation.py ├── Dockerfile ├── EbookTranslator ├── EbookTranslator │ ├── All_Translation.py │ ├── Deepl_Translation.py │ ├── LLMS_translation.py │ ├── YouDao_translation.py │ ├── __init__.py │ ├── cli.py │ ├── convert2pdf.py │ ├── load_config.py │ └── main_function.py ├── LICENSE ├── README.md ├── requirements.txt └── setup.py ├── LICENSE ├── LLMS_translation.py ├── OldMain.py ├── README.md ├── README_CN.md ├── README_JA.md ├── README_KO.md ├── README_TW.md ├── Subset_Font.py ├── YouDao_translation.py ├── app.py ├── build.py ├── config.json ├── convert2pdf.py ├── demo.mp4 ├── demo.pdf ├── demo_zh.pdf ├── docker-compose.yml ├── download_model.py ├── get_new_blocks.py ├── icon.ico ├── index.html ├── languagedetect.py ├── load_config.py ├── main.py ├── merge_pdf.py ├── pdf_thumbnail.py ├── pdfviewer.html ├── pdfviewer2.html ├── recent.json ├── requirements.txt ├── static ├── 1.js ├── 2.js ├── 3.js ├── 4.js ├── Figure_1.png ├── Line-model-demo.pdf ├── Line-model-demo_zh.pdf ├── PolyglotPDF.png ├── colorspace_issue_sample.pdf ├── demo.gif ├── demo.mp4 ├── i18n.js ├── main.css ├── merged_pdf │ └── 2403.20127v1_auto_zh.pdf ├── original │ ├── 2403.20127v1.pdf │ ├── 2501.05450v1.pdf │ └── demo.pdf ├── page1.png ├── page2.jpeg ├── page3.png ├── page4.png ├── setup.css ├── setup.js ├── target │ ├── 2403.20127v1_zh.pdf │ └── 2501.05450v1_zh.pdf └── thumbnail │ ├── ...txt │ ├── 2403.20127v1.png │ ├── 2501.05450v1.png │ ├── 2g2.png │ ├── 32g2.png │ ├── High-precision real-time autonomous driving targetdetection based on YOLOv8.png │ ├── g2.png │ ├── g55.png │ ├── g6.png │ ├── gl1.png │ ├── line.png │ ├── m2.png │ └── zz1.png ├── temp └── fonts │ └── zh_subset.ttf └── update_recent.py /.dockerignore: -------------------------------------------------------------------------------- 1 | # 忽略 Git 相关文件 2 | .git 3 | .gitignore 4 | 5 | # 忽略 Python 缓存和编译文件 6 | __pycache__/ 7 | **/__pycache__/ 8 | *.pyc 9 | *.pyo 10 | *.pyd 11 | 12 | # 忽略虚拟环境相关文件夹 13 | .Python 14 | env/ 15 | venv/ 16 | 17 | # 忽略 pip 日志 18 | pip-log.txt 19 | pip-delete-this-directory.txt 20 | 21 | # 忽略测试和覆盖率相关文件 22 | .tox/ 23 | .coverage 24 | .coverage.* 25 | .cache/ 26 | nosetests.xml 27 | coverage.xml 28 | *.cover 29 | 30 | # 忽略日志文件 31 | *.log 32 | logs/*.log 33 | 34 | # 忽略 pytest 缓存 35 | .pytest_cache/ 36 | 37 | # 忽略项目根目录下的 lib 文件夹 38 | lib/ 39 | 40 | # 忽略 IDE 配置文件 41 | .idea/ 42 | .vscode/ 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | venv 3 | *.local 4 | -------------------------------------------------------------------------------- /All_Translation.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | import Deepl_Translation as dt 4 | import YouDao_translation as yt 5 | import Bing_translation as bt 6 | import LLMS_translation as lt 7 | import asyncio 8 | from functools import wraps 9 | import threading 10 | from queue import Queue 11 | 12 | # 创建一个信号量,限制并发为1(串行处理) 13 | translation_semaphore = asyncio.Semaphore(1) 14 | # 创建一个队列处理锁,确保队列操作线程安全 15 | queue_lock = threading.Lock() 16 | # 创建翻译请求队列 17 | translation_queue = Queue() 18 | # 标记队列处理器是否已启动 19 | queue_processor_started = False 20 | 21 | def retry_on_error(max_retries=2, delay=1): 22 | def decorator(func): 23 | @wraps(func) 24 | def wrapper_sync(*args, **kwargs): 25 | retries = 0 26 | while retries <= max_retries: 27 | try: 28 | return func(*args, **kwargs) 29 | except Exception as e: 30 | retries += 1 31 | if retries <= max_retries: 32 | print(f"Error occurred: {str(e)}") 33 | print(f"Retrying... (Attempt {retries} of {max_retries})") 34 | time.sleep(delay) 35 | else: 36 | print(f"Max retries reached. Skipping... Final error: {str(e)}") 37 | return None 38 | return None 39 | 40 | async def wrapper_async(*args, **kwargs): 41 | retries = 0 42 | while retries <= max_retries: 43 | try: 44 | return await func(*args, **kwargs) 45 | except Exception as e: 46 | retries += 1 47 | if retries <= max_retries: 48 | print(f"Error occurred: {str(e)}") 49 | print(f"Retrying... (Attempt {retries} of {max_retries})") 50 | await asyncio.sleep(delay) 51 | else: 52 | print(f"Max retries reached. Skipping... Final error: {str(e)}") 53 | return None 54 | return None 55 | 56 | return wrapper_async if asyncio.iscoroutinefunction(func) else wrapper_sync 57 | return decorator 58 | 59 | # 队列处理器函数 60 | def process_translation_queue(): 61 | global queue_processor_started 62 | 63 | # 在这里只创建一次事件循环 64 | loop = asyncio.new_event_loop() 65 | asyncio.set_event_loop(loop) 66 | 67 | while True: 68 | task = translation_queue.get() 69 | if task is None: # 终止信号 70 | translation_queue.task_done() 71 | break 72 | try: 73 | func, args, kwargs, result_holder = task 74 | # 这里直接用上面创建的 loop 执行 75 | result = loop.run_until_complete(func(*args, **kwargs)) 76 | result_holder['result'] = result 77 | except Exception as e: 78 | print(f"Error processing translation task: {str(e)}") 79 | result_holder['result'] = None 80 | finally: 81 | translation_queue.task_done() 82 | 83 | # 跳出循环后,才一次性关闭事件循环 84 | # 先清理异步生成器 85 | loop.run_until_complete(loop.shutdown_asyncgens()) 86 | # 然后再 close 87 | loop.close() 88 | # 启动队列处理线程 89 | def ensure_queue_processor(): 90 | global queue_processor_started 91 | with queue_lock: 92 | if not queue_processor_started: 93 | threading.Thread(target=process_translation_queue, daemon=True).start() 94 | queue_processor_started = True 95 | 96 | class Online_translation: 97 | def __init__(self, original_language, target_language, translation_type, texts_to_process=[]): 98 | self.model_name = f"opus-mt-{original_language}-{target_language}" 99 | self.original_text = texts_to_process 100 | self.target_language = target_language 101 | self.original_lang = original_language 102 | self.translation_type = translation_type 103 | # 确保队列处理器已启动 104 | ensure_queue_processor() 105 | 106 | def run_async(self, coro): 107 | # 创建结果容器 108 | result_holder = {'result': None} 109 | 110 | # 将协程包装为任务并放入队列 111 | translation_queue.put((self._run_coro_with_semaphore, [coro], {}, result_holder)) 112 | 113 | # 等待任务完成 114 | translation_queue.join() 115 | 116 | # 返回结果 117 | return result_holder['result'] 118 | 119 | async def _run_coro_with_semaphore(self, coro): 120 | # 使用信号量确保串行执行 121 | async with translation_semaphore: 122 | return await coro 123 | 124 | def translation(self): 125 | print('翻译api', self.translation_type) 126 | if self.translation_type == 'deepl': 127 | translated_list = self.deepl_translation() 128 | elif self.translation_type == 'youdao': 129 | translated_list = self.youdao_translation() 130 | elif self.translation_type == 'bing': 131 | translated_list = self.bing_translation() 132 | elif self.translation_type == 'openai': 133 | translated_list = self.run_async(self.openai_translation()) 134 | elif self.translation_type == 'deepseek': 135 | translated_list = self.run_async(self.deepseek_translation()) 136 | elif self.translation_type == 'Doubao': 137 | translated_list = self.run_async(self.Doubao_translation()) 138 | elif self.translation_type == 'Qwen': 139 | translated_list = self.run_async(self.Qwen_translation()) 140 | elif self.translation_type == 'Grok': 141 | translated_list = self.run_async(self.Grok_translation()) 142 | elif self.translation_type == 'ThirdParty': 143 | translated_list = self.run_async(self.ThirdParty_translation()) 144 | elif self.translation_type == 'GLM': 145 | translated_list = self.run_async(self.GLM_translation()) 146 | else: 147 | translated_list = self.deepl_translation() 148 | 149 | return translated_list 150 | 151 | @retry_on_error() 152 | def deepl_translation(self): 153 | translated_texts = dt.translate( 154 | texts=self.original_text, 155 | original_lang=self.original_lang, 156 | target_lang=self.target_language 157 | ) 158 | return translated_texts 159 | 160 | @retry_on_error() 161 | def youdao_translation(self): 162 | translated_texts = yt.translate( 163 | texts=self.original_text, 164 | original_lang=self.original_lang, 165 | target_lang=self.target_language 166 | ) 167 | return translated_texts 168 | 169 | @retry_on_error() 170 | def bing_translation(self): 171 | try: 172 | translated_texts = bt.translate( 173 | texts=self.original_text, 174 | original_lang=self.original_lang, 175 | target_lang=self.target_language 176 | ) 177 | print(f"Bing translation completed: {len(translated_texts)} texts processed") 178 | return translated_texts 179 | except Exception as e: 180 | print(f"Error in Bing translation: {e}") 181 | return [""] * len(self.original_text) 182 | 183 | @retry_on_error() 184 | async def openai_translation(self): 185 | translator = lt.Openai_translation() 186 | translated_texts = await translator.translate( 187 | texts=self.original_text, 188 | original_lang=self.original_lang, 189 | target_lang=self.target_language 190 | ) 191 | return translated_texts 192 | 193 | @retry_on_error() 194 | async def deepseek_translation(self): 195 | translator = lt.Deepseek_translation() 196 | translated_texts = await translator.translate( 197 | texts=self.original_text, 198 | original_lang=self.original_lang, 199 | target_lang=self.target_language 200 | ) 201 | return translated_texts 202 | 203 | @retry_on_error() 204 | async def Doubao_translation(self): 205 | translator = lt.Doubao_translation() 206 | translated_texts = await translator.translate( 207 | texts=self.original_text, 208 | original_lang=self.original_lang, 209 | target_lang=self.target_language 210 | ) 211 | return translated_texts 212 | 213 | @retry_on_error() 214 | async def Qwen_translation(self): 215 | translator = lt.Qwen_translation() 216 | translated_texts = await translator.translate( 217 | texts=self.original_text, 218 | original_lang=self.original_lang, 219 | target_lang=self.target_language 220 | ) 221 | return translated_texts 222 | 223 | @retry_on_error() 224 | async def Grok_translation(self): 225 | translator = lt.Grok_translation() 226 | try: 227 | translated_texts = await translator.translate( 228 | texts=self.original_text, 229 | original_lang=self.original_lang, 230 | target_lang=self.target_language 231 | ) 232 | print(f"Grok translation completed: {len(translated_texts)} texts processed") 233 | return translated_texts 234 | except Exception as e: 235 | print(f"Error in Grok translation: {e}") 236 | return [""] * len(self.original_text) 237 | 238 | @retry_on_error() 239 | async def ThirdParty_translation(self): 240 | translator = lt.ThirdParty_translation() 241 | try: 242 | translated_texts = await translator.translate( 243 | texts=self.original_text, 244 | original_lang=self.original_lang, 245 | target_lang=self.target_language 246 | ) 247 | print(f"ThirdParty translation completed: {len(translated_texts)} texts processed") 248 | return translated_texts 249 | except Exception as e: 250 | print(f"Error in ThirdParty translation: {e}") 251 | return [""] * len(self.original_text) 252 | 253 | @retry_on_error() 254 | async def GLM_translation(self): 255 | translator = lt.GLM_translation() 256 | try: 257 | translated_texts = await translator.translate( 258 | texts=self.original_text, 259 | original_lang=self.original_lang, 260 | target_lang=self.target_language 261 | ) 262 | print(f"GLM translation completed: {len(translated_texts)} texts processed") 263 | return translated_texts 264 | except Exception as e: 265 | print(f"Error in GLM translation: {e}") 266 | return [""] * len(self.original_text) 267 | 268 | # 确保程序退出前清理资源 269 | import atexit 270 | 271 | @atexit.register 272 | def cleanup(): 273 | # 发送终止信号 274 | if queue_processor_started: 275 | translation_queue.put(None) 276 | # 给队列处理器一些时间来处理终止信号 277 | translation_queue.join() 278 | 279 | t = time.time() 280 | 281 | def split_text_to_fit_token_limit(text, encoder, index_text, max_length=280): 282 | tokens = encoder.encode(text) 283 | if len(tokens) <= max_length: 284 | return [(text, len(tokens), index_text)] 285 | 286 | split_points = [i for i, token in enumerate(tokens) if encoder.decode([token]).strip() in [' ', '.', '?', '!','!','?','。']] 287 | parts = [] 288 | last_split = 0 289 | for i, point in enumerate(split_points + [len(tokens)]): 290 | if point - last_split > max_length: 291 | part_tokens = tokens[last_split:split_points[i - 1]] 292 | parts.append((encoder.decode(part_tokens), len(part_tokens), index_text)) 293 | last_split = split_points[i - 1] 294 | elif i == len(split_points): 295 | part_tokens = tokens[last_split:] 296 | parts.append((encoder.decode(part_tokens), len(part_tokens), index_text)) 297 | 298 | return parts 299 | 300 | def process_texts(texts, encoder): 301 | processed_texts = [] 302 | for i, text in enumerate(texts): 303 | sub_texts = split_text_to_fit_token_limit(text, encoder, i) 304 | processed_texts.extend(sub_texts) 305 | return processed_texts 306 | 307 | def calculate_split_points(processed_texts, max_tokens=425): 308 | split_points = [] 309 | current_tokens = 0 310 | 311 | for i in range(len(processed_texts) - 1): 312 | current_tokens = processed_texts[i][1] 313 | next_tokens = processed_texts[i + 1][1] 314 | 315 | if current_tokens + next_tokens > max_tokens: 316 | split_points.append(i) 317 | 318 | split_points.append(len(processed_texts) - 1) 319 | 320 | return split_points 321 | # 322 | # def translate(texts,original_language,target_language): 323 | # from transformers import pipeline, AutoTokenizer 324 | # 325 | # model_name = f"./opus-mt-{original_language}-{target_language}" 326 | # pipe = pipeline("translation", model=model_name) 327 | # tokenizer = AutoTokenizer.from_pretrained(model_name) 328 | # 329 | # result = pipe(texts) 330 | # 331 | # result_values = [d['translation_text'] for d in result] 332 | # 333 | # return result_values 334 | # 335 | # def batch_translate(processed_texts, split_points,original_language,target_language): 336 | # translated_texts = [] 337 | # index_mapping = {} 338 | # 339 | # start_index = 0 340 | # 341 | # for split_point in split_points: 342 | # batch = processed_texts[start_index:split_point + 1] 343 | # batch_texts = [text for text, _, _ in batch] 344 | # translated_batch = translate(texts=batch_texts,original_language=original_language,target_language=target_language) 345 | # 346 | # for translated_text, (_, _, int_value) in zip(translated_batch, batch): 347 | # if int_value in index_mapping: 348 | # translated_texts[index_mapping[int_value]] += " " + translated_text 349 | # else: 350 | # index_mapping[int_value] = len(translated_texts) 351 | # translated_texts.append(translated_text) 352 | # 353 | # start_index = split_point + 1 354 | # 355 | # return translated_texts 356 | # 357 | -------------------------------------------------------------------------------- /Bing_translation.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | import time 4 | import threading 5 | import asyncio 6 | import aiohttp 7 | from concurrent.futures import ThreadPoolExecutor 8 | 9 | def translate(texts, original_lang, target_lang): 10 | """ 11 | 使用Bing翻译API翻译文本列表 - 高性能实现 12 | 13 | Args: 14 | texts: 要翻译的文本列表 15 | original_lang: 源语言代码 16 | target_lang: 目标语言代码 17 | 18 | Returns: 19 | 翻译后的文本列表 20 | """ 21 | # 确保输入文本为列表格式 22 | if isinstance(texts, str): 23 | texts = [texts] 24 | 25 | # 如果文本量小,使用简单的并发线程池 26 | if len(texts) <= 20: 27 | return translate_with_threadpool(texts, original_lang, target_lang) 28 | 29 | # 对于大量文本,使用异步IO处理 30 | return translate_with_asyncio(texts, original_lang, target_lang) 31 | 32 | 33 | def translate_with_threadpool(texts, original_lang, target_lang, max_workers=5): 34 | """使用线程池并发翻译小批量文本""" 35 | translator = BingTranslator(lang_in=original_lang, lang_out=target_lang) 36 | translated_texts = [""] * len(texts) 37 | 38 | def translate_one(index, text): 39 | try: 40 | translated_texts[index] = translator.do_translate(text) 41 | except Exception as e: 42 | print(f"翻译文本时出错 (索引 {index}): {e}") 43 | translated_texts[index] = "" 44 | 45 | # 使用线程池并发处理 46 | with ThreadPoolExecutor(max_workers=max_workers) as executor: 47 | futures = [executor.submit(translate_one, i, text) 48 | for i, text in enumerate(texts)] 49 | 50 | # 等待所有任务完成 51 | for future in futures: 52 | future.result() 53 | 54 | return translated_texts 55 | 56 | 57 | def translate_with_asyncio(texts, original_lang, target_lang): 58 | """使用asyncio异步处理大批量文本""" 59 | # 定义异步主函数 60 | async def main(): 61 | translator = AsyncBingTranslator(lang_in=original_lang, lang_out=target_lang) 62 | return await translator.translate_batch(texts) 63 | 64 | # 如果当前线程没有事件循环,创建一个新的 65 | try: 66 | loop = asyncio.get_event_loop() 67 | except RuntimeError: 68 | loop = asyncio.new_event_loop() 69 | asyncio.set_event_loop(loop) 70 | 71 | # 运行异步函数并返回结果 72 | return loop.run_until_complete(main()) 73 | 74 | 75 | def split_text_intelligently(text, max_length=1000): 76 | """智能分段文本,尽量在句子边界处断开""" 77 | if len(text) <= max_length: 78 | return [text] 79 | 80 | parts = [] 81 | start = 0 82 | 83 | while start < len(text): 84 | # 如果剩余文本不足max_length,直接添加 85 | if len(text) - start <= max_length: 86 | parts.append(text[start:]) 87 | break 88 | 89 | # 计算当前段落的结束位置 90 | end = start + max_length 91 | 92 | # 尝试在句子结束处断开(优先级:段落 > 句号 > 逗号 > 空格) 93 | paragraph_break = text.rfind('\n', start, end) 94 | if paragraph_break != -1 and paragraph_break > start + max_length * 0.5: 95 | end = paragraph_break + 1 96 | else: 97 | # 寻找句号、问号、感叹号等 98 | for sep in ['. ', '。', '?', '!', '? ', '! ']: 99 | pos = text.rfind(sep, start, end) 100 | if pos != -1 and pos > start + max_length * 0.5: 101 | end = pos + len(sep) 102 | break 103 | else: 104 | # 如果没找到句号,尝试在逗号处断开 105 | for sep in [', ', ',', '; ', ';']: 106 | pos = text.rfind(sep, start, end) 107 | if pos != -1 and pos > start + max_length * 0.7: 108 | end = pos + len(sep) 109 | break 110 | else: 111 | # 实在没有好的断点就在空格处断开 112 | pos = text.rfind(' ', start + max_length * 0.8, end) 113 | if pos != -1: 114 | end = pos + 1 115 | 116 | parts.append(text[start:end]) 117 | start = end 118 | 119 | return parts 120 | 121 | 122 | class BingTranslator: 123 | name = "bing" 124 | lang_map = {"zh": "zh-Hans"} 125 | 126 | # 会话参数缓存 127 | _cache_lock = threading.Lock() 128 | _sid_cache = None 129 | _sid_timestamp = 0 130 | _sid_cache_ttl = 300 # 5分钟缓存有效期 131 | 132 | def __init__(self, lang_in, lang_out, model=None, ignore_cache=False): 133 | # 处理语言代码映射 134 | self.lang_in = self.lang_map.get(lang_in, lang_in) 135 | self.lang_out = self.lang_map.get(lang_out, lang_out) 136 | 137 | # 自动语言检测处理 138 | if self.lang_in == "auto": 139 | self.lang_in = "auto-detect" 140 | 141 | self.model = model 142 | self.ignore_cache = ignore_cache 143 | self.session = requests.Session() 144 | self.endpoint = "https://www.bing.com/translator" 145 | self.headers = { 146 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0", 147 | } 148 | 149 | def find_sid(self): 150 | """获取必要的会话参数,使用缓存减少请求""" 151 | current_time = time.time() 152 | 153 | # 检查缓存是否有效 154 | with self._cache_lock: 155 | if (not self.ignore_cache and 156 | BingTranslator._sid_cache is not None and 157 | (current_time - BingTranslator._sid_timestamp) < BingTranslator._sid_cache_ttl): 158 | return BingTranslator._sid_cache 159 | 160 | # 缓存无效,重新获取参数 161 | response = self.session.get(self.endpoint, headers=self.headers) 162 | response.raise_for_status() 163 | url = response.url[:-10] 164 | ig = re.findall(r"\"ig\":\"(.*?)\"", response.text)[0] 165 | iid = re.findall(r"data-iid=\"(.*?)\"", response.text)[-1] 166 | key, token = re.findall( 167 | r"params_AbusePreventionHelper\s=\s\[(.*?),\"(.*?)\",", response.text 168 | )[0] 169 | 170 | # 更新缓存 171 | result = (url, ig, iid, key, token) 172 | with self._cache_lock: 173 | BingTranslator._sid_cache = result 174 | BingTranslator._sid_timestamp = current_time 175 | 176 | return result 177 | 178 | def do_translate(self, text): 179 | """执行翻译""" 180 | if not text or not text.strip(): 181 | return "" 182 | 183 | # 如果文本超过1000字符,分段翻译 184 | if len(text) > 1000: 185 | parts = split_text_intelligently(text) 186 | translated_parts = [] 187 | 188 | for part in parts: 189 | url, ig, iid, key, token = self.find_sid() 190 | response = self.session.post( 191 | f"{url}ttranslatev3?IG={ig}&IID={iid}", 192 | data={ 193 | "fromLang": self.lang_in, 194 | "to": self.lang_out, 195 | "text": part[:1000], # 确保不超过1000 196 | "token": token, 197 | "key": key, 198 | }, 199 | headers=self.headers, 200 | ) 201 | response.raise_for_status() 202 | translated_parts.append(response.json()[0]["translations"][0]["text"]) 203 | 204 | return ''.join(translated_parts) 205 | 206 | url, ig, iid, key, token = self.find_sid() 207 | response = self.session.post( 208 | f"{url}ttranslatev3?IG={ig}&IID={iid}", 209 | data={ 210 | "fromLang": self.lang_in, 211 | "to": self.lang_out, 212 | "text": text, 213 | "token": token, 214 | "key": key, 215 | }, 216 | headers=self.headers, 217 | ) 218 | response.raise_for_status() 219 | return response.json()[0]["translations"][0]["text"] 220 | 221 | 222 | class AsyncBingTranslator: 223 | """异步Bing翻译器实现""" 224 | lang_map = {"zh": "zh-Hans"} 225 | 226 | # 会话参数缓存 227 | _sid_cache = None 228 | _sid_timestamp = 0 229 | _sid_cache_ttl = 300 # 5分钟缓存有效期 230 | 231 | def __init__(self, lang_in, lang_out): 232 | self.lang_in = self.lang_map.get(lang_in, lang_in) 233 | self.lang_out = self.lang_map.get(lang_out, lang_out) 234 | 235 | if self.lang_in == "auto": 236 | self.lang_in = "auto-detect" 237 | 238 | self.headers = { 239 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0", 240 | } 241 | self.endpoint = "https://www.bing.com/translator" 242 | 243 | async def find_sid(self, session): 244 | """异步获取会话参数,带缓存""" 245 | current_time = time.time() 246 | 247 | # 检查缓存是否有效 248 | if (AsyncBingTranslator._sid_cache is not None and 249 | (current_time - AsyncBingTranslator._sid_timestamp) < AsyncBingTranslator._sid_cache_ttl): 250 | return AsyncBingTranslator._sid_cache 251 | 252 | # 缓存无效,异步获取新参数 253 | async with session.get(self.endpoint, headers=self.headers) as response: 254 | if response.status != 200: 255 | raise Exception(f"获取会话参数失败: HTTP {response.status}") 256 | 257 | text = await response.text() 258 | url = str(response.url)[:-10] 259 | ig = re.findall(r"\"ig\":\"(.*?)\"", text)[0] 260 | iid = re.findall(r"data-iid=\"(.*?)\"", text)[-1] 261 | key, token = re.findall( 262 | r"params_AbusePreventionHelper\s=\s\[(.*?),\"(.*?)\",", text 263 | )[0] 264 | 265 | # 更新缓存 266 | result = (url, ig, iid, key, token) 267 | AsyncBingTranslator._sid_cache = result 268 | AsyncBingTranslator._sid_timestamp = current_time 269 | 270 | return result 271 | 272 | async def translate_text(self, session, text): 273 | """翻译单个文本""" 274 | if not text or not text.strip(): 275 | return "" 276 | 277 | # 如果文本超过1000字符,分段翻译 278 | if len(text) > 1000: 279 | parts = split_text_intelligently(text) 280 | translated_parts = [] 281 | 282 | # 非递归异步处理每个文本块 283 | for part in parts: 284 | url, ig, iid, key, token = await self.find_sid(session) 285 | 286 | async with session.post( 287 | f"{url}ttranslatev3?IG={ig}&IID={iid}", 288 | data={ 289 | "fromLang": self.lang_in, 290 | "to": self.lang_out, 291 | "text": part[:1000], # 确保不超过1000 292 | "token": token, 293 | "key": key, 294 | }, 295 | headers=self.headers, 296 | ) as response: 297 | if response.status == 200: 298 | result = await response.json() 299 | translated_parts.append(result[0]["translations"][0]["text"]) 300 | else: 301 | print(f"翻译请求失败: HTTP {response.status}") 302 | translated_parts.append("") 303 | 304 | return ''.join(translated_parts) 305 | 306 | try: 307 | url, ig, iid, key, token = await self.find_sid(session) 308 | response = await session.post( 309 | f"{url}ttranslatev3?IG={ig}&IID={iid}", 310 | data={ 311 | "fromLang": self.lang_in, 312 | "to": self.lang_out, 313 | "text": text, 314 | "token": token, 315 | "key": key, 316 | }, 317 | headers=self.headers, 318 | ) 319 | if response.status == 200: 320 | result = await response.json() 321 | return result[0]["translations"][0]["text"] 322 | else: 323 | print(f"翻译请求失败: HTTP {response.status}") 324 | return "" 325 | except Exception as e: 326 | print(f"翻译过程中发生错误: {e}") 327 | print(f"原文: {text}") 328 | return "" 329 | 330 | async def translate_batch(self, texts, batch_size=10, max_concurrent=5): 331 | """批量翻译文本,控制并发数量和请求批次""" 332 | async with aiohttp.ClientSession() as session: 333 | results = [""] * len(texts) 334 | semaphore = asyncio.Semaphore(max_concurrent) 335 | 336 | async def translate_with_limit(index, text): 337 | retry_count = 0 338 | max_retries = 10 339 | backoff_time = 1.0 # 初始重试等待时间 340 | 341 | while retry_count < max_retries: 342 | try: 343 | async with semaphore: 344 | # 每批次间隔较小的延迟 345 | if index > 0 and index % batch_size == 0: 346 | await asyncio.sleep(0.1) 347 | 348 | 349 | translated = await self.translate_text(session, text) 350 | if translated: # 如果翻译成功 351 | results[index] = translated 352 | if retry_count > 0: # 如果是重试成功的 353 | print(f"第{index}个文本重试成功!") 354 | return 355 | except Exception as e: 356 | print(f"第{index}个文本翻译失败 (尝试 {retry_count+1}/{max_retries}): {e}") 357 | print(f"原文: {text}") 358 | 359 | # 如果到这里,说明需要重试 360 | retry_count += 1 361 | if retry_count < max_retries: 362 | print(f"将在{backoff_time}秒后重试...") 363 | await asyncio.sleep(backoff_time) 364 | backoff_time *= 2 # 指数退避策略 365 | else: 366 | print(f"已达到最大重试次数,翻译失败") 367 | results[index] = "" 368 | 369 | # 创建所有任务 370 | tasks = [ 371 | asyncio.create_task(translate_with_limit(i, text)) 372 | for i, text in enumerate(texts) 373 | ] 374 | 375 | # 等待所有任务完成 376 | await asyncio.gather(*tasks) 377 | return results 378 | 379 | 380 | # 测试代码 381 | if __name__ == "__main__": 382 | test_texts = ["Hello, world!", "How are you today?", "Python is amazing", "I love programming"] 383 | results = translate(test_texts, "en", "zh") 384 | 385 | for original, translated in zip(test_texts, results): 386 | print(f"Original: {original}") 387 | print(f"Translated: {translated}") 388 | print("-" * 30) -------------------------------------------------------------------------------- /Deepl_Translation.py: -------------------------------------------------------------------------------- 1 | import deepl 2 | import load_config 3 | def translate(texts,original_lang,target_lang): 4 | 5 | # 你的 DeepL 授权密钥 6 | 7 | 8 | # 获取指定服务的认证信息 9 | 10 | 11 | config = load_config.load_config() 12 | 13 | auth_key = config['translation_services']['deepl']['auth_key'] 14 | # print(auth_key) 15 | 16 | translator = deepl.Translator(auth_key) 17 | 18 | # 要翻译的文本列表 19 | 20 | 21 | # 翻译文本列表,目标语言设置为中文 22 | print(original_lang,target_lang) 23 | if original_lang == 'auto': 24 | results = translator.translate_text(texts, target_lang=target_lang) 25 | else: 26 | results = translator.translate_text(texts, source_lang=original_lang, target_lang=target_lang) 27 | 28 | 29 | # 初始化一个空列表来收集翻译结果 30 | translated_texts = [] 31 | 32 | # 遍历翻译结果,将它们添加到列表中 33 | for result in results: 34 | translated_texts.append(result.text) 35 | return translated_texts 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | # 1. 使用官方 Python 3.9 的精简版镜像作为基础 3 | FROM python:3.9-slim 4 | 5 | # 2. 如果你需要一些系统库支持,可在此处安装 6 | # 比如安装 gcc、libssl-dev 等 (仅举例) 7 | # RUN apt-get update && apt-get install -y --no-install-recommends \ 8 | # gcc \ 9 | # libssl-dev \ 10 | # && rm -rf /var/lib/apt/lists/* 11 | 12 | # 3. 设置工作目录 13 | WORKDIR /app 14 | 15 | # 4. 将 requirements.txt 复制到容器内 16 | COPY requirements.txt /app/ 17 | 18 | # 5. 安装 Python 依赖包 19 | RUN pip install --no-cache-dir -r requirements.txt 20 | 21 | # 6. 复制项目源代码到容器内 22 | COPY . /app 23 | 24 | # 7. 暴露端口 12226(如你的项目需要此端口) 25 | EXPOSE 12226 26 | 27 | # 8. 容器启动时,默认执行 Python 脚本 28 | CMD ["python", "app.py"] 29 | -------------------------------------------------------------------------------- /EbookTranslator/EbookTranslator/All_Translation.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | from .import Deepl_Translation as dt 4 | from .import YouDao_translation as yt 5 | from .import LLMS_translation as lt 6 | import asyncio 7 | 8 | loop = asyncio.new_event_loop() 9 | asyncio.set_event_loop(loop) 10 | # # 11 | # Get the encoder of a specific model, assume gpt3.5, tiktoken is extremely fast, 12 | # and the error of this statistical token method is small and can be ignored 13 | 14 | 15 | class Online_translation: 16 | def __init__(self, original_language, target_language, translation_type, texts_to_process=[]): 17 | self.model_name = f"opus-mt-{original_language}-{target_language}" 18 | self.original_text = texts_to_process 19 | self.target_language = target_language 20 | self.original_lang = original_language 21 | self.translation_type = translation_type 22 | 23 | def run_async(self, coro): 24 | # 往往只要 run_until_complete(),不手动 close() 即可 25 | return loop.run_until_complete(coro) 26 | 27 | def translation(self): 28 | print('translation api',self.translation_type) 29 | if self.translation_type == 'deepl': 30 | translated_list = self.deepl_translation() 31 | elif self.translation_type == 'youdao': 32 | translated_list = self.youdao_translation() 33 | elif self.translation_type == 'bing': 34 | # 使用同步包装器运行异步函数 35 | translated_list = self.run_async(self.bing_translation()) 36 | elif self.translation_type == 'openai': 37 | # 使用同步包装器运行异步函数 38 | translated_list = self.run_async(self.openai_translation()) 39 | elif self.translation_type == 'deepseek': 40 | # 使用同步包装器运行异步函数 41 | translated_list = self.run_async(self.deepseek_translation()) 42 | elif self.translation_type == 'Doubao': 43 | # 使用同步包装器运行异步函数 44 | translated_list = self.run_async(self.Doubao_translation()) 45 | elif self.translation_type == 'Qwen': 46 | # 使用同步包装器运行异步函数 47 | translated_list = self.run_async(self.Qwen_translation()) 48 | elif self.translation_type == 'Grok': 49 | # 使用同步包装器运行异步函数 50 | translated_list = self.run_async(self.Grok_translation()) 51 | elif self.translation_type == 'ThirdParty': 52 | # 使用同步包装器运行异步函数 53 | translated_list = self.run_async(self.ThirdParty_translation()) 54 | elif self.translation_type == 'GLM': 55 | # 使用同步包装器运行异步函数 56 | translated_list = self.run_async(self.GLM_translation()) 57 | else: 58 | translated_list = self.deepl_translation() 59 | 60 | return translated_list 61 | 62 | def deepl_translation(self): 63 | 64 | translated_texts = dt.translate(texts=self.original_text,original_lang=self.original_lang,target_lang=self.target_language) 65 | 66 | return translated_texts 67 | 68 | 69 | def youdao_translation(self): 70 | 71 | translated_texts = yt.translate(texts=self.original_text,original_lang=self.original_lang,target_lang=self.target_language) 72 | 73 | return translated_texts 74 | 75 | 76 | 77 | async def openai_translation(self): 78 | translator = lt.Openai_translation() 79 | translated_texts = await translator.translate( 80 | texts=self.original_text, 81 | original_lang=self.original_lang, 82 | target_lang=self.target_language 83 | ) 84 | return translated_texts 85 | 86 | async def deepseek_translation(self): 87 | translator = lt.Deepseek_translation() 88 | translated_texts = await translator.translate( 89 | texts=self.original_text, 90 | original_lang=self.original_lang, 91 | target_lang=self.target_language 92 | ) 93 | return translated_texts 94 | async def Doubao_translation(self): 95 | translator = lt.Doubao_translation() 96 | translated_texts = await translator.translate( 97 | texts=self.original_text, 98 | original_lang=self.original_lang, 99 | target_lang=self.target_language 100 | ) 101 | return translated_texts 102 | async def Qwen_translation(self): 103 | translator = lt.Qwen_translation() 104 | translated_texts = await translator.translate( 105 | texts=self.original_text, 106 | original_lang=self.original_lang, 107 | target_lang=self.target_language 108 | ) 109 | return translated_texts 110 | async def Grok_translation(self): 111 | translator = lt.Grok_translation() 112 | try: 113 | translated_texts = await translator.translate( 114 | texts=self.original_text, 115 | original_lang=self.original_lang, 116 | target_lang=self.target_language 117 | ) 118 | print(f"Grok translation completed: {len(translated_texts)} texts processed") 119 | return translated_texts 120 | except Exception as e: 121 | print(f"Error in Grok translation: {e}") 122 | return [""] * len(self.original_text) 123 | 124 | async def ThirdParty_translation(self): 125 | translator = lt.ThirdParty_translation() 126 | try: 127 | translated_texts = await translator.translate( 128 | texts=self.original_text, 129 | original_lang=self.original_lang, 130 | target_lang=self.target_language 131 | ) 132 | print(f"ThirdParty translation completed: {len(translated_texts)} texts processed") 133 | return translated_texts 134 | except Exception as e: 135 | print(f"Error in ThirdParty translation: {e}") 136 | return [""] * len(self.original_text) 137 | 138 | async def GLM_translation(self): 139 | translator = lt.GLM_translation() 140 | try: 141 | translated_texts = await translator.translate( 142 | texts=self.original_text, 143 | original_lang=self.original_lang, 144 | target_lang=self.target_language 145 | ) 146 | print(f"GLM translation completed: {len(translated_texts)} texts processed") 147 | return translated_texts 148 | except Exception as e: 149 | print(f"Error in GLM translation: {e}") 150 | return [""] * len(self.original_text) 151 | 152 | async def bing_translation(self): 153 | translator = lt.Bing_translation() 154 | try: 155 | translated_texts = await translator.translate( 156 | texts=self.original_text, 157 | original_lang=self.original_lang, 158 | target_lang=self.target_language 159 | ) 160 | print(f"Bing translation completed: {len(translated_texts)} texts processed") 161 | return translated_texts 162 | except Exception as e: 163 | print(f"Error in Bing translation: {e}") 164 | return [""] * len(self.original_text) 165 | 166 | 167 | t = time.time() 168 | def split_text_to_fit_token_limit(text, encoder, index_text, max_length=280): 169 | tokens = encoder.encode(text) 170 | if len(tokens) <= max_length: 171 | return [(text, len(tokens), index_text)] # Return text along with its token count and original index 返回文本及其标记计数和原始索引 172 | 173 | # Pre-calculate possible split points (spaces, periods, etc.) 174 | split_points = [i for i, token in enumerate(tokens) if encoder.decode([token]).strip() in [' ', '.', '?', '!','!','?','。']] 175 | parts = [] 176 | last_split = 0 177 | for i, point in enumerate(split_points + [len(tokens)]): # Ensure the last segment is included 178 | if point - last_split > max_length: 179 | part_tokens = tokens[last_split:split_points[i - 1]] 180 | parts.append((encoder.decode(part_tokens), len(part_tokens), index_text)) 181 | last_split = split_points[i - 1] 182 | elif i == len(split_points): # Handle the last part 183 | part_tokens = tokens[last_split:] 184 | parts.append((encoder.decode(part_tokens), len(part_tokens), index_text)) 185 | 186 | return parts 187 | 188 | def process_texts(texts, encoder): 189 | processed_texts = [] 190 | for i, text in enumerate(texts): 191 | sub_texts = split_text_to_fit_token_limit(text, encoder, i) 192 | processed_texts.extend(sub_texts) 193 | return processed_texts 194 | 195 | 196 | 197 | def calculate_split_points(processed_texts, max_tokens=425): 198 | split_points = [] # 存储划分点的索引 199 | current_tokens = 0 # 当前累积的token数 200 | 201 | for i in range(len(processed_texts) - 1): # 遍历到倒数第二个元素 202 | current_tokens = processed_texts[i][1] 203 | next_tokens = processed_texts[i + 1][1] 204 | 205 | # 如果当前元素和下一个元素的token数之和超过了限制 206 | if current_tokens + next_tokens > max_tokens: 207 | split_points.append(i) # 当前元素作为一个划分点 208 | # 注意:这里不需要重置 current_tokens,因为每次循环都是新的一对元素 209 | 210 | # 最后一个元素总是一个划分点,因为它后面没有元素与之相邻 211 | split_points.append(len(processed_texts) - 1) 212 | 213 | return split_points 214 | 215 | 216 | def translate(texts,original_language,target_language): 217 | # 这里仅返回相同的文本列表作为示例,实际中应返回翻译后的文本 218 | from transformers import pipeline, AutoTokenizer 219 | 220 | model_name = f"./opus-mt-{original_language}-{target_language}" # 请替换为实际路径 221 | # 创建翻译管道,指定本地模型路径 222 | pipe = pipeline("translation", model=model_name) 223 | # 获取tokenizer,指定本地模型路径 224 | tokenizer = AutoTokenizer.from_pretrained(model_name) 225 | 226 | result = pipe(texts) 227 | 228 | 229 | # 提取值并组合成新的列表 230 | result_values = [d['translation_text'] for d in result] 231 | 232 | return result_values 233 | 234 | 235 | 236 | def batch_translate(processed_texts, split_points,original_language,target_language): 237 | translated_texts = [] # 存储翻译后的文本的列表 238 | index_mapping = {} # 存储每个int_value对应在translated_texts中的索引 239 | 240 | start_index = 0 # 当前批次的起始索引 241 | 242 | # 遍历划分点,按批次翻译文本 243 | for split_point in split_points: 244 | # 提取当前批次的文本(不包括划分点的下一个元素) 245 | batch = processed_texts[start_index:split_point + 1] 246 | batch_texts = [text for text, _, _ in batch] 247 | # 翻译函数 248 | translated_batch = translate(texts=batch_texts,original_language=original_language,target_language=target_language) 249 | 250 | # 遍历当前批次的翻译结果 251 | for translated_text, (_, _, int_value) in zip(translated_batch, batch): 252 | if int_value in index_mapping: 253 | # 如果键已存在,将新的翻译文本与原有的值拼接 254 | translated_texts[index_mapping[int_value]] += " " + translated_text 255 | else: 256 | # 如果键不存在,直接添加到列表,并记录其索引 257 | index_mapping[int_value] = len(translated_texts) 258 | translated_texts.append(translated_text) 259 | 260 | # 更新下一批次的起始索引 261 | start_index = split_point + 1 262 | 263 | return translated_texts 264 | 265 | -------------------------------------------------------------------------------- /EbookTranslator/EbookTranslator/Deepl_Translation.py: -------------------------------------------------------------------------------- 1 | import deepl 2 | from .import load_config 3 | def translate(texts,original_lang,target_lang): 4 | 5 | # 你的 DeepL 授权密钥 6 | 7 | 8 | # 获取指定服务的认证信息 9 | 10 | 11 | config = load_config.load_config() 12 | 13 | auth_key = config['translation_services']['deepl']['auth_key'] 14 | # print(auth_key) 15 | 16 | translator = deepl.Translator(auth_key) 17 | 18 | # 要翻译的文本列表 19 | 20 | 21 | # 翻译文本列表,目标语言设置为中文 22 | print(original_lang,target_lang) 23 | if original_lang == 'auto': 24 | results = translator.translate_text(texts, target_lang=target_lang) 25 | else: 26 | results = translator.translate_text(texts, source_lang=original_lang, target_lang=target_lang) 27 | 28 | 29 | # 初始化一个空列表来收集翻译结果 30 | translated_texts = [] 31 | 32 | # 遍历翻译结果,将它们添加到列表中 33 | for result in results: 34 | translated_texts.append(result.text) 35 | return translated_texts 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /EbookTranslator/EbookTranslator/YouDao_translation.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import requests 3 | import hashlib 4 | import time 5 | import json 6 | 7 | 8 | def translate(texts,original_lang, target_lang): 9 | """ 10 | 有道翻译API接口 11 | 12 | 参数: 13 | texts: list, 要翻译的文本列表 14 | target_lang: str, 目标语言代码 15 | credentials: dict, 包含 app_key 和 app_secret 的字典 16 | 17 | 返回: 18 | list: 翻译后的文本列表 19 | """ 20 | YOUDAO_URL = 'https://openapi.youdao.com/v2/api' 21 | 22 | with open("config.json", 'r', encoding='utf-8') as f: 23 | config = json.load(f) 24 | 25 | # 获取指定服务的认证信息 26 | if target_lang == 'zh': 27 | target_lang='zh-CHS' 28 | service_name = "youdao" 29 | credentials = config['translation_services'].get(service_name) 30 | if not credentials: 31 | raise ValueError(f"Translation service '{service_name}' not found in config") 32 | 33 | 34 | def encrypt(sign_str): 35 | hash_algorithm = hashlib.sha256() 36 | hash_algorithm.update(sign_str.encode('utf-8')) 37 | return hash_algorithm.hexdigest() 38 | 39 | def truncate(q): 40 | if q is None: 41 | return None 42 | size = len(q) 43 | return q if size <= 20 else q[0:10] + str(size) + q[size - 10:size] 44 | 45 | def do_request(data): 46 | headers = {'Content-Type': 'application/x-www-form-urlencoded'} 47 | return requests.post(YOUDAO_URL, data=data, headers=headers) 48 | 49 | try: 50 | # 确保输入文本为列表格式 51 | if isinstance(texts, str): 52 | texts = [texts] 53 | 54 | print(type(texts)) 55 | 56 | # 准备请求数据 57 | data = { 58 | 'from': original_lang, 59 | 'to': target_lang, 60 | 'signType': 'v3', 61 | 'curtime': str(int(time.time())), 62 | 'appKey': credentials['app_key'], 63 | 'q': texts, 64 | 'salt': str(uuid.uuid1()), 65 | 'vocabId': "您的用户词表ID" 66 | } 67 | 68 | # 生成签名 69 | sign_str = (credentials['app_key'] + 70 | truncate(''.join(texts)) + 71 | data['salt'] + 72 | data['curtime'] + 73 | credentials['app_secret']) 74 | data['sign'] = encrypt(sign_str) 75 | 76 | # 发送请求 77 | response = do_request(data) 78 | response_data = json.loads(response.content.decode("utf-8")) 79 | 80 | # 提取翻译结果 81 | translations = [result["translation"] for result in response_data["translateResults"]] 82 | print(translations) 83 | return translations 84 | 85 | except Exception as e: 86 | print(f"翻译出错: {str(e)}") 87 | return None 88 | # 使用示例: 89 | if __name__ == '__main__': 90 | # 认证信息 91 | 92 | 93 | # 要翻译的文本 94 | texts = ["hello", '待输入的文字"2', "待输入的文字3"] 95 | original_lang = 'auto' 96 | 97 | # 目标语言 98 | target_lang = 'zh' 99 | 100 | # 调用翻译 101 | results = translate(texts,original_lang='auto', target_lang=target_lang) 102 | print(results,'ggg') 103 | 104 | if results: 105 | for original, translated in zip(texts, results): 106 | print(f"原文: {original}") 107 | print(f"译文: {translated}\n") 108 | 109 | -------------------------------------------------------------------------------- /EbookTranslator/EbookTranslator/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | EbookTranslator - 世界上性能最高的电子书保留布局翻译库 3 | The world's highest performing e-book retention layout translation library 4 | """ 5 | 6 | __version__ = '0.1.0' 7 | 8 | from .main_function import main_function 9 | 10 | # 导出主要类和函数 11 | __all__ = ['main_function'] 12 | -------------------------------------------------------------------------------- /EbookTranslator/EbookTranslator/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | EbookTranslator的命令行界面 4 | """ 5 | 6 | import argparse 7 | import sys 8 | import os 9 | from pathlib import Path 10 | from .main_function import main_function 11 | 12 | 13 | def main(): 14 | """命令行入口点""" 15 | parser = argparse.ArgumentParser(description='翻译PDF文档') 16 | parser.add_argument('pdf_path', type=str, help='PDF文件路径') 17 | parser.add_argument('-o', '--original', default='auto', help='原始语言 (默认: auto)') 18 | parser.add_argument('-t', '--target', default='zh', help='目标语言 (默认: zh)') 19 | parser.add_argument('-b', '--begin', type=int, default=1, help='开始页码 (默认: 1)') 20 | parser.add_argument('-e', '--end', type=int, default=None, help='结束页码 (默认: 最后一页)') 21 | parser.add_argument('-c', '--config', type=str, default=None, help='配置文件路径') 22 | parser.add_argument('-d', '--dpi', type=int, default=72, help='OCR模式的DPI (默认: 72)') 23 | 24 | args = parser.parse_args() 25 | 26 | # 检查PDF文件是否存在 27 | print('路径',args.pdf_path) 28 | 29 | if not os.path.exists(args.pdf_path): 30 | print(f"错误: 找不到文件 '{args.pdf_path}'") 31 | sys.exit(1) 32 | 33 | try: 34 | # 运行主函数 35 | translator = main_function( 36 | pdf_path=args.pdf_path, 37 | original_language=args.original, 38 | target_language=args.target, 39 | bn=args.begin, 40 | en=args.end, 41 | config_path=args.config, 42 | DPI=args.dpi 43 | ) 44 | translator.main() 45 | print(f"翻译完成! 输出文件保存在 target 目录") 46 | except Exception as e: 47 | print(f"翻译过程中发生错误: {e}") 48 | sys.exit(1) 49 | 50 | 51 | if __name__ == '__main__': 52 | main() 53 | -------------------------------------------------------------------------------- /EbookTranslator/EbookTranslator/convert2pdf.py: -------------------------------------------------------------------------------- 1 | import fitz 2 | import os 3 | 4 | 5 | def convert_to_pdf(input_file, output_file=None): 6 | """ 7 | 将支持的文档格式转换为 PDF,支持跨平台路径处理 8 | 9 | Args: 10 | input_file (str): 输入文件的完整路径 11 | output_file (str, optional): 输出PDF文件的完整路径。如果为None,则使用输入文件名+.pdf 12 | 13 | Returns: 14 | bool: 转换是否成功 15 | """ 16 | try: 17 | # 规范化路径,处理不同平台的路径分隔符 18 | input_file = os.path.normpath(input_file) 19 | 20 | if not os.path.exists(input_file): 21 | print(f"错误:输入文件 '{input_file}' 不存在") 22 | return False 23 | 24 | # 如果未指定输出文件,则基于输入文件生成输出路径 25 | if output_file is None: 26 | # 获取文件名和目录 27 | file_dir = os.path.dirname(input_file) 28 | file_name = os.path.basename(input_file) 29 | name_without_ext = os.path.splitext(file_name)[0] 30 | 31 | # 在同一目录下创建同名PDF文件 32 | output_file = os.path.join(file_dir, f"{name_without_ext}.pdf") 33 | 34 | # 确保输出目录存在 35 | output_dir = os.path.dirname(output_file) 36 | if output_dir and not os.path.exists(output_dir): 37 | os.makedirs(output_dir, exist_ok=True) 38 | 39 | print(f"正在处理文件: {input_file}") 40 | print(f"输出文件将保存为: {output_file}") 41 | 42 | # 1. 先用 fitz.open 打开文档(EPUB、XPS、FB2 等格式) 43 | doc = fitz.open(input_file) 44 | print(f"文档页数: {len(doc)}") 45 | 46 | # 2. 调用 convert_to_pdf() 得到 PDF 格式字节流 47 | pdf_bytes = doc.convert_to_pdf() 48 | 49 | # 3. 再以 "pdf" 格式打开这段字节流 50 | pdf_doc = fitz.open("pdf", pdf_bytes) 51 | 52 | # 4. 保存为真正的 PDF 文件 53 | pdf_doc.save(output_file) 54 | 55 | # 关闭文档 56 | pdf_doc.close() 57 | doc.close() 58 | 59 | # 检查输出文件是否成功创建 60 | if os.path.exists(output_file): 61 | print(f"转换成功!PDF文件已保存为: {output_file}") 62 | return True 63 | else: 64 | print("转换似乎完成,但输出文件未找到") 65 | return False 66 | 67 | except fitz.FileDataError as e: 68 | print(f"文件格式错误或文件损坏:{str(e)}") 69 | except PermissionError as e: 70 | print(f"权限错误:无法访问或写入文件 - {str(e)}") 71 | except Exception as e: 72 | print(f"转换失败,错误类型: {type(e).__name__}") 73 | print(f"错误详情: {str(e)}") 74 | # 在调试模式下打印完整的堆栈跟踪 75 | import traceback 76 | traceback.print_exc() 77 | 78 | return False 79 | # 使用示例 80 | if __name__ == "__main__": 81 | # 单个文件转换示例 82 | input_file = "666 (1).epub" 83 | 84 | # 验证文件扩展名 85 | if not input_file.lower().endswith(('.xps', '.epub', '.fb2', '.cbz', '.mobi')): 86 | print(f"不支持的文件格式。支持的格式包括: XPS, EPUB, FB2, CBZ, MOBI") 87 | else: 88 | convert_to_pdf(input_file) 89 | 90 | # 批量转换示例 91 | # input_directory = "documents" 92 | # batch_convert_to_pdf(input_directory) 93 | -------------------------------------------------------------------------------- /EbookTranslator/EbookTranslator/load_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import requests 4 | from pathlib import Path 5 | from typing import Optional, Dict 6 | 7 | 8 | def get_working_dir() -> Path: 9 | """ 10 | 获取工作目录 11 | 返回当前工作目录(即命令行执行目录或调用脚本所在目录) 12 | """ 13 | return Path.cwd() 14 | 15 | 16 | # 定义应用数据目录 17 | WORKING_DIR = get_working_dir() 18 | APP_DATA_DIR = WORKING_DIR # 示例:将 APP_DATA_DIR 定义为工作目录 19 | print(f"Working directory: {WORKING_DIR}") 20 | 21 | 22 | def resolve_path(path: str) -> Path: 23 | """ 24 | 解析路径,支持绝对路径、相对路径和文件名。 25 | 26 | Args: 27 | path (str): 输入路径,可以是绝对路径、相对路径或文件名。 28 | 29 | Returns: 30 | Path: 解析后的完整路径。 31 | """ 32 | # 如果 path 是绝对路径,直接返回 33 | if Path(path).is_absolute(): 34 | return Path(path) 35 | 36 | # 如果 path 是相对路径或文件名,与 APP_DATA_DIR 拼接 37 | return APP_DATA_DIR / path 38 | 39 | 40 | def load_config(config_path: Optional[str] = None) -> Optional[Dict]: 41 | """ 42 | 加载主配置文件,优先使用传入的 config_path 路径。 43 | 如果未传入或路径无效,则尝试使用 APP_DATA_DIR 中的文件。 44 | 如果 APP_DATA_DIR 中也没有 config.json,则从指定 URL 下载。 45 | 46 | Args: 47 | config_path (Optional[str]): 配置文件路径,可以是绝对路径、相对路径或文件名。 48 | 49 | Returns: 50 | Dict: 配置数据,如果加载失败则返回 None。 51 | """ 52 | try: 53 | # 如果传入了 config_path 参数,优先使用 54 | if config_path: 55 | config_path = resolve_path(config_path) # 解析路径 56 | if config_path.exists(): 57 | with config_path.open("r", encoding="utf-8") as f: 58 | return json.load(f) 59 | else: 60 | print(f"Specified config path does not exist: {config_path}") 61 | 62 | # 如果没有传入 config_path 或路径无效,则使用 APP_DATA_DIR 中的 config.json 63 | app_config_path = APP_DATA_DIR / "config.json" 64 | if app_config_path.exists(): 65 | with app_config_path.open("r", encoding="utf-8") as f: 66 | return json.load(f) 67 | else: 68 | # 如果 APP_DATA_DIR 中没有,则尝试从指定 URL 下载 config.json 69 | url = "https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/refs/heads/main/config.json" 70 | response = requests.get(url, timeout=20) 71 | if response.status_code == 200: 72 | # 将下载的内容保存到 APP_DATA_DIR 73 | APP_DATA_DIR.mkdir(parents=True, exist_ok=True) # 确保 APP_DATA_DIR 存在 74 | print( 75 | f"config.json file not found, downloading config.json from: {url}" 76 | ) 77 | with app_config_path.open("w", encoding="utf-8") as f: 78 | f.write(response.text) 79 | return response.json() 80 | else: 81 | print(f"Failed to download config.json, HTTP status code: {response.status_code}") 82 | return None 83 | except Exception as e: 84 | print(f"Error loading config: {str(e)}") 85 | return None 86 | 87 | 88 | def get_file_path(filename: str) -> Path: 89 | """ 90 | 获取配置文件的完整路径,优先使用 APP_DATA_DIR 中的文件。 91 | 92 | Args: 93 | filename (str): 配置文件名。 94 | 95 | Returns: 96 | Path: 配置文件的完整路径。 97 | """ 98 | # 首先检查 APP_DATA_DIR 中是否有该文件 99 | app_data_file = APP_DATA_DIR / filename 100 | if app_data_file.exists(): 101 | return app_data_file 102 | 103 | # 如果 APP_DATA_DIR 中没有,则使用当前脚本所在目录的文件 104 | return Path(__file__).parent / filename 105 | -------------------------------------------------------------------------------- /EbookTranslator/README.md: -------------------------------------------------------------------------------- 1 | English | [简体中文](https://github.com/CBIhalsen/PolyglotPDF/blob/main//README_CN.md) | [繁體中文](https://github.com/CBIhalsen/PolyglotPDF/blob/main/README_TW.md) | [日本語](https://github.com/CBIhalsen/PolyglotPDF/blob/main/README_JA.md) | [한국어](https://github.com/CBIhalsen/PolyglotPDF/blob/main/README_KO.md) 2 | # PolyglotPDF 3 | 4 | [![Python](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/) 5 | [![PDF](https://img.shields.io/badge/pdf-documentation-brightgreen.svg)](https://example.com) 6 | [![LaTeX](https://img.shields.io/badge/latex-typesetting-orange.svg)](https://www.latex-project.org/) 7 | [![Translation](https://img.shields.io/badge/translation-supported-yellow.svg)](https://example.com) 8 | [![Math](https://img.shields.io/badge/math-formulas-red.svg)](https://example.com) 9 | [![PyMuPDF](https://img.shields.io/badge/PyMuPDF-1.24.0-blue.svg)](https://pymupdf.readthedocs.io/) 10 | 11 | 12 | ## Demo 13 | 14 | 15 | ## Speed comparison 16 | 17 | 18 | ### [🎬 Watch Full Video](https://github.com/CBIhalsen/PolyglotPDF/blob/main/demo.mp4) 19 | llms has been added as the translation api of choice, Doubao ,Qwen ,deepseek v3 , gpt4-o-mini are recommended. The color space error can be resolved by filling the white areas in PDF files. The old text to text translation api has been removed. 20 | 21 | In addition, consider adding arxiv search function and rendering arxiv papers after latex translation. 22 | 23 | ### Pasges show 24 |
25 | 26 | 27 |
28 |
29 | 30 | 31 |
32 | 33 | 34 | # Chinese LLM API Application 35 | 36 | ## Doubao & Deepseek 37 | Apply through Volcengine platform: 38 | - Application URL: [Volcengine-Doubao](https://www.volcengine.com/product/doubao/) 39 | - Available Models: Doubao, Deepseek series models 40 | 41 | ## Tongyi Qwen 42 | Apply through Alibaba Cloud platform: 43 | - Application URL: [Alibaba Cloud-Tongyi Qwen](https://cn.aliyun.com/product/tongyi?from_alibabacloud=&utm_content=se_1019997984) 44 | - Available Models: Qwen-Max, Qwen-Plus series models 45 | 46 | 47 | ## Overview 48 | PolyglotPDF(EbookTranslation) is an advanced PDF processing tool that employs specialized techniques for ultra-fast text, table, and formula recognition in PDF documents, typically completing processing within 1 second. It features OCR capabilities and layout-preserving translation, with full document translations usually completed within 10 seconds (speed may vary depending on the translation API provider). 49 | 50 | ## Features 51 | - **Ultra-Fast Recognition**: Processes text, tables, and formulas in PDFs within ~1 second 52 | - **Layout-Preserving Translation**: Maintains original document formatting while translating content 53 | - **OCR Support**: Handles scanned documents efficiently 54 | - **Text-based PDF**:No GPU required 55 | - **Quick Translation**: Complete PDF translation in approximately 10 seconds 56 | - **Flexible API Integration**: Compatible with various translation service providers 57 | - **Web-based Comparison Interface**: Side-by-side comparison of original and translated documents 58 | - **Enhanced OCR Capabilities**: Improved accuracy in text recognition and processing 59 | - **Support for offline translation**: Use smaller translation model 60 | 61 | ## Installation and Setup 62 | 63 | 64 | 65 | ### There are several ways to use it. One is to install the library, 66 | 67 | ```bash 68 | pip install EbookTranslator 69 | ``` 70 | 71 | 72 | 73 | Basic usage: 74 | 75 | ```bash 76 | EbookTranslator your_file.pdf 77 | ``` 78 | 79 | Usage with parameters: 80 | 81 | ```bash 82 | EbookTranslator your_file.pdf -o en -t zh -b 1 -e 10 -c /path/to/config.json -d 300 83 | ``` 84 | 85 | #### Using in Python Code 86 | 87 | ```python 88 | from EbookTranslator import main_function 89 | 90 | translator = main_function( 91 | pdf_path="your_file.pdf", 92 | original_language="en", 93 | target_language="zh", 94 | bn=1, 95 | en=10, 96 | config_path="/path/to/config.json", 97 | DPI=300 98 | ) 99 | translator.main() 100 | ``` 101 | 102 | ## Parameter Description 103 | 104 | | Parameter | Command Line Option | Description | Default Value | 105 | |-----------|---------------------|-------------|---------------| 106 | | `pdf_path` | Positional argument | PDF file path | Required | 107 | | `original_language` | `-o, --original` | Source language | `auto` | 108 | | `target_language` | `-t, --target` | Target language | `zh` | 109 | | `bn` | `-b, --begin` | Starting page number | `1` | 110 | | `en` | `-e, --end` | Ending page number | Last page of the document | 111 | | `config_path` | `-c, --config` | Configuration file path | `config.json` in the current working directory | 112 | | `DPI` | `-d, --dpi` | DPI for OCR mode | `72` | 113 | 114 | #### Configuration File 115 | 116 | The configuration file is a JSON file, by default located at `config.json` in the current working directory. If it doesn't exist, the program will use built-in default settings. 117 | 118 | #### Configuration File Example 119 | 120 | ```json 121 | { 122 | "count": 4, 123 | "PPC": 20, 124 | "translation_services": { 125 | "Doubao": { 126 | "auth_key": "", 127 | "model_name": "" 128 | }, 129 | "Qwen": { 130 | "auth_key": "", 131 | "model_name": "qwen-plus" 132 | }, 133 | "deepl": { 134 | "auth_key": "" 135 | }, 136 | "deepseek": { 137 | "auth_key": "", 138 | "model_name": "ep-20250218224909-gps4n" 139 | }, 140 | "openai": { 141 | "auth_key": "", 142 | "model_name": "gpt-4o-mini" 143 | }, 144 | "youdao": { 145 | "app_key": "", 146 | "app_secret": "" 147 | } 148 | }, 149 | "ocr_services": { 150 | "tesseract": { 151 | "path": "C:\\Program Files\\Tesseract-OCR\\tesseract.exe" 152 | } 153 | }, 154 | "default_services": { 155 | "ocr_model": false, 156 | "line_model": false, 157 | "Enable_translation": true, 158 | "Translation_api": "openai" 159 | } 160 | } 161 | ``` 162 | 163 | #### Configuration Options 164 | 165 | - `translation_service`: Translation service provider (e.g., "google", "deepl", "baidu") 166 | - `api_key`: Translation API key (if required) 167 | - `translation_mode`: Translation mode, "online" or "offline" 168 | - `ocr_enabled`: Whether to enable OCR recognition 169 | - `tesseract_path`: Path to Tesseract OCR engine (if not in system PATH) 170 | - `output_dir`: Output directory 171 | - `language_codes`: Language code mapping 172 | - `font_mapping`: Fonts corresponding to different languages 173 | 174 | 175 | #### Output 176 | 177 | Translated PDF files will be saved in the directory specified by `output_dir` (default is the `target` folder in the current working directory). 178 | 179 | 180 | 181 | 182 | ## License 183 | 184 | MIT 185 | 186 | ## Use method for friendly UI interface 187 | 188 | 1. Clone the repository: 189 | ```bash 190 | git clone https://github.com/CBIhalsen/PolyglotPDF.git 191 | cd polyglotpdf 192 | ``` 193 | 194 | 2. Install required packages: 195 | ```bash 196 | pip install -r requirements.txt 197 | ``` 198 | 3. Configure your API key in config.json. The alicloud translation API is not recommended. 199 | 200 | 4. Run the application: 201 | ```bash 202 | python app.py 203 | ``` 204 | 205 | 5. Access the web interface: 206 | Open your browser and navigate to `http://127.0.0.1:8000` 207 | 208 | ## Requirements 209 | - Python 3.8+ 210 | - deepl==1.17.0 211 | - Flask==2.0.1 212 | - Flask-Cors==5.0.0 213 | - langdetect==1.0.9 214 | - Pillow==10.2.0 215 | - PyMuPDF==1.24.0 216 | - pytesseract==0.3.10 217 | - requests==2.31.0 218 | - tiktoken==0.6.0 219 | - Werkzeug==2.0.1 220 | 221 | ## Acknowledgments 222 | This project leverages PyMuPDF's capabilities for efficient PDF processing and layout preservation. 223 | 224 | ## Upcoming Improvements 225 | - PDF chat functionality 226 | - Academic PDF search integration 227 | - Optimization for even faster processing speeds 228 | 229 | ### Known Issues 230 | - **Issue Description**: Error during text re-editing: `code=4: only Gray, RGB, and CMYK colorspaces supported` 231 | - **Symptom**: Unsupported color space encountered during text block editing 232 | - **Current Workaround**: Skip text blocks with unsupported color spaces 233 | - **Proposed Solution**: Switch to OCR mode for entire pages containing unsupported color spaces 234 | - **Example**: [View PDF sample with unsupported color spaces](https://github.com/CBIhalsen/PolyglotPDF/blob/main/static/colorspace_issue_sample.pdf) 235 | 236 | 237 | ### Font Optimization 238 | Current font configuration in the `start` function of `main.py`: 239 | ```python 240 | # Current configuration 241 | css=f"* {{font-family:{get_font_by_language(self.target_language)};font-size:auto;color: #111111 ;font-weight:normal;}}" 242 | ``` 243 | 244 | You can optimize font display through the following methods: 245 | 246 | 1. **Modify Default Font Configuration** 247 | ```python 248 | # Custom font styles 249 | css=f"""* {{ 250 | font-family: {get_font_by_language(self.target_language)}; 251 | font-size: auto; 252 | color: #111111; 253 | font-weight: normal; 254 | letter-spacing: 0.5px; # Adjust letter spacing 255 | line-height: 1.5; # Adjust line height 256 | }}""" 257 | ``` 258 | 259 | 2. **Embed Custom Fonts** 260 | You can embed custom fonts by following these steps: 261 | - Place font files (.ttf, .otf) in the project's `fonts` directory 262 | - Use `@font-face` to declare custom fonts in CSS 263 | ```python 264 | css=f""" 265 | @font-face {{ 266 | font-family: 'CustomFont'; 267 | src: url('fonts/your-font.ttf') format('truetype'); 268 | }} 269 | * {{ 270 | font-family: 'CustomFont', {get_font_by_language(self.target_language)}; 271 | font-size: auto; 272 | font-weight: normal; 273 | }} 274 | """ 275 | ``` 276 | 277 | ### Basic Principles 278 | This project follows similar basic principles as Adobe Acrobat DC's PDF editing, using PyMuPDF for text block recognition and manipulation: 279 | 280 | - **Core Process**: 281 | ```python 282 | # Get text blocks from the page 283 | blocks = page.get_text("dict")["blocks"] 284 | 285 | # Process each text block 286 | for block in blocks: 287 | if block.get("type") == 0: # text block 288 | bbox = block["bbox"] # get text block boundary 289 | text = "" 290 | font_info = None 291 | # Collect text and font information 292 | for line in block["lines"]: 293 | for span in line["spans"]: 294 | text += span["text"] + " " 295 | ``` 296 | This approach directly processes PDF text blocks, maintaining the original layout while achieving efficient text extraction and modification. 297 | 298 | - **Technical Choices**: 299 | - Utilizes PyMuPDF for PDF parsing and editing 300 | - Focuses on text processing 301 | - Avoids complex operations like AI formula recognition, table processing, or page restructuring 302 | 303 | - **Why Avoid Complex Processing**: 304 | - AI recognition of formulas, tables, and PDF restructuring faces severe performance bottlenecks 305 | - Complex AI processing leads to high computational costs 306 | - Significantly increased processing time (potentially tens of seconds or more) 307 | - Difficult to deploy at scale with low costs in production environments 308 | - Not suitable for online services requiring quick response times 309 | 310 | - **Project Scope**: 311 | - This project only serves to demonstrate the correct approach for layout-preserved PDF translation and AI-assisted PDF reading. Converting PDF files to markdown format for large language models to read, in my opinion, is not a wise approach. 312 | - Aims for optimal performance-to-cost ratio 313 | 314 | - **Performance**: 315 | - PolyglotPDF API response time: ~1 second per page 316 | - Low computational resource requirements, suitable for scale deployment 317 | - High cost-effectiveness for commercial applications 318 | 319 | - * Contact author: 320 | QQ: 1421243966 321 | email: 1421243966@qq.com 322 | 323 | Related questions answered and discussed: 324 | 325 | QQ group: 326 | 1031477425 327 | 328 | 329 | 330 | -------------------------------------------------------------------------------- /EbookTranslator/requirements.txt: -------------------------------------------------------------------------------- 1 | deepl==1.17.0 2 | Flask 3 | flask-cors 4 | Pillow==10.2.0 5 | PyMuPDF==1.24.0 6 | pytesseract==0.3.10 7 | requests==2.31.0 8 | Werkzeug==2.0.1 9 | aiohttp 10 | -------------------------------------------------------------------------------- /EbookTranslator/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setup( 7 | name="EbookTranslator", 8 | version="0.3.3", 9 | author="Chen", 10 | author_email="1421243966@qq.com", 11 | description="The world's highest performing e-book retention layout translation library", 12 | long_description=long_description, # 添加这一行 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/1421243966/EbookTranslator", # 更新为您的实际GitHub仓库 15 | packages=find_packages(), 16 | classifiers=[ 17 | "Programming Language :: Python :: 3", 18 | "Programming Language :: Python :: 3.6", 19 | "Programming Language :: Python :: 3.7", 20 | "Programming Language :: Python :: 3.8", 21 | "Programming Language :: Python :: 3.9", 22 | "Programming Language :: Python :: 3.10", 23 | "License :: OSI Approved :: MIT License", 24 | "Operating System :: OS Independent", 25 | "Development Status :: 4 - Beta", 26 | "Intended Audience :: Developers", 27 | "Intended Audience :: Education", 28 | "Intended Audience :: Science/Research", 29 | "Topic :: Text Processing :: Linguistic", 30 | "Topic :: Utilities", 31 | ], 32 | python_requires=">=3.6", 33 | install_requires=[ 34 | "pymupdf>=1.18.0", 35 | "Pillow>=8.0.0", 36 | "pytesseract>=0.3.0", 37 | "deepl>=1.17.0", 38 | "requests>=2.25.0", 39 | "Werkzeug>=2.0.0", 40 | "aiohttp>=3.7.4", 41 | ], 42 | entry_points={ 43 | "console_scripts": [ 44 | "EbookTranslator=EbookTranslator.cli:main", 45 | ], 46 | }, 47 | include_package_data=True, 48 | keywords=["ebook", "translation", "pdf", "ocr", "nlp", "language"], 49 | project_urls={ 50 | "Bug Reports": "https://github.com/1421243966/EbookTranslator/issues", 51 | "Source": "https://github.com/1421243966/EbookTranslator", 52 | "Documentation": "https://github.com/1421243966/EbookTranslator#readme", 53 | }, 54 | ) 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | python包在2.2版本之前预计不会更新,2.2版本预估采取解析最底层span获取更信息的布局逻辑解决,预估解决:行内公式错误判断为公式块,错误将粗体文本进行分段bug,以及insert_html方法重复嵌入字体文件导致处理页数较大pdf时浪费计算资源极其卡顿。 目前效果,对于基于文本的pdf,polyglotpdf的解析方式依旧是最优解。 ocr和布局分析并不总是完美。(考虑处理文本上下标问题,大部分pdf文件中上标下标文本通过指定坐标和字体大小实现伪上下标,考虑替换为真正的上下标文字对应的Unicode编码,但并不完美),对于报告型表格文档,polyglotpdf效果相当完美,当然表格中的复杂矢量数学公式依旧无法正确处理)。 2 | 寻求意见的改进方法,对于复杂的颜色布局文本或者粗体参杂常规字体文本,提出以下方法,对于流内容我们可以解析为html格式如下: 3 | 4 |

ABSTRACT:

5 |

6 | The swine industry annually suffers significant economic losses caused by porcine reproductive and respiratory syndrome virus (PRRSV). Because the available commercial vaccines have limited protective efficacy against epidemic PRRSV, there is an urgent need for innovative solutions. Nanoparticle vaccines induce robust immune responses and have become a promising direction in vaccine development. In this study, we designed and produced a self-assembling nanoparticle vaccine derived from thermophilic archaeal ferritin to combat epidemic PRRSV. First, multiple T cell epitopes targeting viral structural proteins were identified by IFN-γ screening after PRRSV infection. Three different self-assembled nanoparticles with epitopes targeting viral GP3, GP4, and GP5. 7 |

8 | 9 | 这种解析内容只能由llms翻译,翻译结果如下: 10 | ```html 11 |

摘要:

12 |

13 | 猪产业每年因猪繁殖与呼吸综合征病毒(PRRSV)造成显著的经济损失。由于现有的商业疫苗对流行性PRRSV的保护效果有限,迫切需要创新的解决方案。纳米粒子疫苗能够引发强烈的免疫反应,已成为疫苗开发的一个有前景的方向。在本研究中,我们设计并生产了一种源自嗜热古细菌铁蛋白的自组装纳米粒子疫苗,以对抗流行性PRRSV。首先,通过PRRSV感染后的IFN-γ筛选,识别出针对病毒结构蛋白的多个T细胞表位。三种不同的自组装纳米粒子携带针对病毒GP3、GP4和GP5的表位。 14 |

15 | ``` 16 | 甚至包括粗体: 17 | ```html 18 |

摘要:

19 |

20 | 猪产业每年因猪繁殖与呼吸综合征病毒(PRRSV)造成显著的经济损失。由于现有的商业疫苗对流行性PRRSV的保护效果有限,迫切需要创新的解决方案。纳米粒子疫苗能够引发强烈的免疫反应,已成为疫苗开发的一个有前景的方向。在本研究中,我们设计并生产了一种源自嗜热古细菌铁蛋白的自组装纳米粒子疫苗,以对抗流行性PRRSV。首先,通过PRRSV感染后的IFN-γ筛选,识别出针对病毒结构蛋白的多个T细胞表位。三种不同的自组装纳米粒子携带针对病毒GP3、GP4和GP5的表位。 21 |

22 | ``` 23 | 这种方法会无线接近于完美的处理,目前考虑将此方法作为强化功能选用 24 | 25 | English | [简体中文](/README_CN.md) | [繁體中文](README_TW.md) | [日本語](README_JA.md) | [한국어](README_KO.md) 26 | # PolyglotPDF 27 | 28 | [![Python](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/) 29 | [![PDF](https://img.shields.io/badge/pdf-documentation-brightgreen.svg)](https://example.com) 30 | [![LaTeX](https://img.shields.io/badge/latex-typesetting-orange.svg)](https://www.latex-project.org/) 31 | [![Translation](https://img.shields.io/badge/translation-supported-yellow.svg)](https://example.com) 32 | [![Math](https://img.shields.io/badge/math-formulas-red.svg)](https://example.com) 33 | [![PyMuPDF](https://img.shields.io/badge/PyMuPDF-1.24.0-blue.svg)](https://pymupdf.readthedocs.io/) 34 | 35 | 36 | ## Demo 37 | 38 | 39 | ### [🎬 Watch Full Video](https://github.com/CBIhalsen/PolyglotPDF/blob/main/demo.mp4) 40 | llms has been added as the translation api of choice, Doubao ,Qwen ,deepseek v3 , gpt4-o-mini are recommended. The color space error can be resolved by filling the white areas in PDF files. The old text to text translation api has been removed. 41 | 42 | In addition, consider adding arxiv search function and rendering arxiv papers after latex translation. 43 | 44 | ### Pages show 45 |
46 | 47 | 48 |
49 |
50 | 51 | 52 |
53 | 54 | 55 | # Chinese LLM API Application 56 | 57 | ## Doubao & Deepseek 58 | Apply through Volcengine platform: 59 | - Application URL: [Volcengine-Doubao](https://www.volcengine.com/product/doubao/) 60 | - Available Models: Doubao, Deepseek series models 61 | 62 | ## Tongyi Qwen 63 | Apply through Alibaba Cloud platform: 64 | - Application URL: [Alibaba Cloud-Tongyi Qwen](https://cn.aliyun.com/product/tongyi?from_alibabacloud=&utm_content=se_1019997984) 65 | - Available Models: Qwen-Max, Qwen-Plus series models 66 | 67 | 68 | ## Overview 69 | PolyglotPDF is an advanced PDF processing tool that employs specialized techniques for ultra-fast text, table, and formula recognition in PDF documents, typically completing processing within 1 second. It features OCR capabilities and layout-preserving translation, with full document translations usually completed within 10 seconds (speed may vary depending on the translation API provider). 70 | 71 | 72 | ## Features 73 | - **Ultra-Fast Recognition**: Processes text, tables, and formulas in PDFs within ~1 second 74 | - **Layout-Preserving Translation**: Maintains original document formatting while translating content 75 | - **OCR Support**: Handles scanned documents efficiently 76 | - **Text-based PDF**:No GPU required 77 | - **Quick Translation**: Complete PDF translation in approximately 10 seconds 78 | - **Flexible API Integration**: Compatible with various translation service providers 79 | - **Web-based Comparison Interface**: Side-by-side comparison of original and translated documents 80 | - **Enhanced OCR Capabilities**: Improved accuracy in text recognition and processing 81 | - **Support for offline translation**: Use smaller translation model 82 | 83 | ## Installation and Usage 84 | 85 |
86 | Standard Installation 87 | 88 | 1. Clone the repository: 89 | ```bash 90 | git clone https://github.com/CBIhalsen/PolyglotPDF.git 91 | cd polyglotpdf 92 | ``` 93 | 94 | 2. Install required packages: 95 | ```bash 96 | pip install -r requirements.txt 97 | ``` 98 | 3. Configure your API key in config.json. The alicloud translation API is not recommended. 99 | 100 | 4. Run the application: 101 | ```bash 102 | python app.py 103 | ``` 104 | 105 | 5. Access the web interface: 106 | Open your browser and navigate to `http://127.0.0.1:8000` 107 |
108 | 109 |
110 | Docker Installation 111 | 112 | ## Quick Start Without Persistence 113 | 114 | If you want to quickly test PolyglotPDF without setting up persistent directories: 115 | 116 | ```bash 117 | # Pull the image first 118 | docker pull 2207397265/polyglotpdf:latest 119 | 120 | # Run container without mounting volumes (data will be lost when container is removed) 121 | docker run -d -p 12226:12226 --name polyglotpdf 2207397265/polyglotpdf:latest 122 | ``` 123 | 124 | This is the fastest way to try PolyglotPDF, but all uploaded PDFs and configuration changes will be lost when the container stops. 125 | 126 | ## Installation with Persistent Storage 127 | 128 | ```bash 129 | # Create necessary directories 130 | mkdir -p config fonts static/original static/target static/merged_pdf 131 | 132 | # Create config file 133 | nano config/config.json # or use any text editor 134 | # Copy configuration template from the project into this file 135 | # Make sure to fill in your API keys and other configuration details 136 | 137 | # Set permissions 138 | chmod -R 755 config fonts static 139 | ``` 140 | 141 | ### Quick Start 142 | 143 | Use the following commands to pull and run the PolyglotPDF Docker image: 144 | 145 | ```bash 146 | # Pull image 147 | docker pull 2207397265/polyglotpdf:latest 148 | 149 | # Run container 150 | docker run -d -p 12226:12226 --name polyglotpdf \ 151 | -v ./config/config.json:/app/config.json \ 152 | -v ./fonts:/app/fonts \ 153 | -v ./static/original:/app/static/original \ 154 | -v ./static/target:/app/static/target \ 155 | -v ./static/merged_pdf:/app/static/merged_pdf \ 156 | 2207397265/polyglotpdf:latest 157 | ``` 158 | 159 | ### Access the Application 160 | 161 | After the container starts, open in your browser: 162 | ``` 163 | http://localhost:12226 164 | ``` 165 | 166 | ### Using Docker Compose 167 | 168 | Create a `docker-compose.yml` file: 169 | 170 | ```yaml 171 | version: '3' 172 | services: 173 | polyglotpdf: 174 | image: 2207397265/polyglotpdf:latest 175 | ports: 176 | - "12226:12226" 177 | volumes: 178 | - ./config.json:/app/config.json # Configuration file 179 | - ./fonts:/app/fonts # Font files 180 | - ./static/original:/app/static/original # Original PDFs 181 | - ./static/target:/app/static/target # Translated PDFs 182 | - ./static/merged_pdf:/app/static/merged_pdf # Merged PDFs 183 | restart: unless-stopped 184 | ``` 185 | 186 | Then run: 187 | 188 | ```bash 189 | docker-compose up -d 190 | ``` 191 | 192 | ### Common Docker Commands 193 | 194 | ```bash 195 | # Stop container 196 | docker stop polyglotpdf 197 | 198 | # Restart container 199 | docker restart polyglotpdf 200 | 201 | # View logs 202 | docker logs polyglotpdf 203 | ``` 204 |
205 | 206 | ## Requirements 207 | - Python 3.8+ 208 | - deepl==1.17.0 209 | - Flask==2.0.1 210 | - Flask-Cors==5.0.0 211 | - langdetect==1.0.9 212 | - Pillow==10.2.0 213 | - PyMuPDF==1.24.0 214 | - pytesseract==0.3.10 215 | - requests==2.31.0 216 | - tiktoken==0.6.0 217 | - Werkzeug==2.0.1 218 | 219 | ## Acknowledgments 220 | This project leverages PyMuPDF's capabilities for efficient PDF processing and layout preservation. 221 | 222 | ## Upcoming Improvements 223 | - PDF chat functionality 224 | - Academic PDF search integration 225 | - Optimization for even faster processing speeds 226 | 227 | ### Known Issues 228 | - **Issue Description**: Error during text re-editing: `code=4: only Gray, RGB, and CMYK colorspaces supported` 229 | - **Symptom**: Unsupported color space encountered during text block editing 230 | - **Current Workaround**: Skip text blocks with unsupported color spaces 231 | - **Proposed Solution**: Switch to OCR mode for entire pages containing unsupported color spaces 232 | - **Example**: [View PDF sample with unsupported color spaces](https://github.com/CBIhalsen/PolyglotPDF/blob/main/static/colorspace_issue_sample.pdf) 233 | 234 | ### TODO 235 | - □ **Custom Terminology Database**: Support custom terminology databases with prompts for domain-specific professional translation 236 | - □ **AI Reflow Feature**: Convert double-column PDFs to single-column HTML blog format for easier reading on mobile devices 237 | - □ **Multi-format Export**: Export translation results to PDF, HTML, Markdown and other formats 238 | - □ **Multi-device Synchronization**: Read translations on mobile after processing on desktop 239 | - □ **Enhanced Merge Logic**: Improve the current merge logic by disabling font name detection and enabling horizontal, vertical, x, y range overlap merging 240 | 241 | ### Font Optimization 242 | Current font configuration in the `start` function of `main.py`: 243 | ```python 244 | # Current configuration 245 | css=f"* {{font-family:{get_font_by_language(self.target_language)};font-size:auto;color: #111111 ;font-weight:normal;}}" 246 | ``` 247 | 248 | You can optimize font display through the following methods: 249 | 250 | 1. **Modify Default Font Configuration** 251 | ```python 252 | # Custom font styles 253 | css=f"""* {{ 254 | font-family: {get_font_by_language(self.target_language)}; 255 | font-size: auto; 256 | color: #111111; 257 | font-weight: normal; 258 | letter-spacing: 0.5px; # Adjust letter spacing 259 | line-height: 1.5; # Adjust line height 260 | }}""" 261 | ``` 262 | 263 | 2. **Embed Custom Fonts** 264 | You can embed custom fonts by following these steps: 265 | - Place font files (.ttf, .otf) in the project's `fonts` directory 266 | - Use `@font-face` to declare custom fonts in CSS 267 | ```python 268 | css=f""" 269 | @font-face {{ 270 | font-family: 'CustomFont'; 271 | src: url('fonts/your-font.ttf') format('truetype'); 272 | }} 273 | * {{ 274 | font-family: 'CustomFont', {get_font_by_language(self.target_language)}; 275 | font-size: auto; 276 | font-weight: normal; 277 | }} 278 | """ 279 | ``` 280 | 281 | ### Basic Principles 282 | This project follows similar basic principles as Adobe Acrobat DC's PDF editing, using PyMuPDF for text block recognition and manipulation: 283 | 284 | - **Core Process**: 285 | ```python 286 | # Get text blocks from the page 287 | blocks = page.get_text("dict")["blocks"] 288 | 289 | # Process each text block 290 | for block in blocks: 291 | if block.get("type") == 0: # text block 292 | bbox = block["bbox"] # get text block boundary 293 | text = "" 294 | font_info = None 295 | # Collect text and font information 296 | for line in block["lines"]: 297 | for span in line["spans"]: 298 | text += span["text"] + " " 299 | ``` 300 | This approach directly processes PDF text blocks, maintaining the original layout while achieving efficient text extraction and modification. 301 | 302 | - **Technical Choices**: 303 | - Utilizes PyMuPDF for PDF parsing and editing 304 | - Focuses on text processing 305 | - Avoids complex operations like AI formula recognition, table processing, or page restructuring 306 | 307 | - **Why Avoid Complex Processing**: 308 | - AI recognition of formulas, tables, and PDF restructuring faces severe performance bottlenecks 309 | - Complex AI processing leads to high computational costs 310 | - Significantly increased processing time (potentially tens of seconds or more) 311 | - Difficult to deploy at scale with low costs in production environments 312 | - Not suitable for online services requiring quick response times 313 | 314 | - **Project Scope**: 315 | - This project only serves to demonstrate the correct approach for layout-preserved PDF translation and AI-assisted PDF reading. Converting PDF files to markdown format for large language models to read, in my opinion, is not a wise approach. 316 | - Aims for optimal performance-to-cost ratio 317 | 318 | - **Performance**: 319 | - PolyglotPDF API response time: ~1 second per page 320 | - Low computational resource requirements, suitable for scale deployment 321 | - High cost-effectiveness for commercial applications 322 | 323 | - * Contact author: 324 | QQ: 1421243966 325 | email: 1421243966@qq.com 326 | 327 | Related questions answered and discussed: 328 | 329 | QQ group: 330 | 1031477425 331 | 332 | -------------------------------------------------------------------------------- /README_CN.md: -------------------------------------------------------------------------------- 1 | 注: 对于pdf这种棘手的文件处理,对于文字版pdf的最优解:参考开源项目mupdf重构block识别算法只需要达到Adobe Acrobat Dc精度即可,不要舍近求远使用ocr扫描文字版pdf。 使用ai模型去理解pdf布局未来成本绝对会高于使用gpt4o mini这类价格! 对于pdf种公式识别出要么不处理,要么通过字体文件名称和对应unicode值进行映射。 ocr扫描文字版pdf相当愚蠢 2 | # PolyglotPDF 3 | 4 | [![Python](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/) 5 | [![PDF](https://img.shields.io/badge/pdf-documentation-brightgreen.svg)](https://example.com) 6 | [![LaTeX](https://img.shields.io/badge/latex-typesetting-orange.svg)](https://www.latex-project.org/) 7 | [![Translation](https://img.shields.io/badge/translation-supported-yellow.svg)](https://example.com) 8 | [![Math](https://img.shields.io/badge/math-formulas-red.svg)](https://example.com) 9 | [![PyMuPDF](https://img.shields.io/badge/PyMuPDF-1.24.0-blue.svg)](https://pymupdf.readthedocs.io/) 10 | 11 | ## Demo 12 | 13 | 14 | ### [🎬 Watch Full Video](https://github.com/CBIhalsen/PolyglotPDF/blob/main/demo.mp4) 15 | 已经加入llms作为翻译api的选择,建议选择:Doubao ,Qwen ,deepseek v3 ,gpt4-o-mini。色彩空间错误可以通过填充PDF文件中的白色区域来解决。 古老text to text翻译api已删除 16 | 17 | 另外,考虑添加arxiv搜索功能及对arxiv论文进行latex翻译后渲染。 18 | 19 | ### 页面展示 20 |
21 | 22 | 23 |
24 |
25 | 26 | 27 |
28 | 29 | # 国内大语言模型API申请 30 | 31 | ## Doubao & Deepseek 32 | 通过火山引擎平台申请: 33 | - 申请地址: [火山引擎-豆包](https://www.volcengine.com/product/doubao/) 34 | - 支持模型: 豆包(Doubao)、Deepseek系列模型 35 | 36 | ## 通义千问(Qwen) 37 | 通过阿里云平台申请: 38 | - 申请地址: [阿里云-通义千问](https://cn.aliyun.com/product/tongyi?from_alibabacloud=&utm_content=se_1019997984) 39 | - 支持模型: Qwen-Max、Qwen-Plus等系列模型 40 | 41 | 42 | ## 概述 43 | PolyglotPDF 是一款先进的 PDF 处理工具,采用特殊技术实现对 PDF 文档中的文字、表格和公式的超快速识别,通常仅需 1 秒即可完成处理。它支持 OCR 功能和完美保留版面的翻译功能,整篇文档的翻译通常可在 10 秒内完成(具体速度取决于翻译 API 服务商)。 44 | 45 | ## 主要特点 46 | - **超快识别**:在约 1 秒内完成对 PDF 中文字、表格和公式的处理 47 | - **保留版面翻译**:翻译过程中完整保持原文档的排版格式 48 | - **OCR 支持**:高效处理扫描版文档 49 | - **基于文本的 PDF**:不需要GPU 50 | - **快速翻译**:约 10 秒内完成整个 PDF 的翻译 51 | - **灵活的 API 集成**:可对接各种翻译服务提供商 52 | - **网页对比界面**:支持原文与译文的并排对比 53 | - **增强的 OCR 功能**:提供更准确的文本识别和处理能力 54 | - **支持离线翻译**:使用较小翻译模型 55 | 56 | ## 安装和设置 57 | 58 |
59 | 标准安装 60 | 61 | 1. 克隆仓库: 62 | ```bash 63 | git clone https://github.com/CBIhalsen/Polyglotpdf.git 64 | cd polyglotpdf 65 | ``` 66 | 67 | 2. 安装依赖包: 68 | ```bash 69 | pip install -r requirements.txt 70 | ``` 71 | 3. 在config.json内配置API密钥,不建议使用alicloud翻译API. 72 | 73 | 4. 运行应用: 74 | ```bash 75 | python app.py 76 | ``` 77 | 78 | 5. 访问网页界面: 79 | 在浏览器中打开 `http://127.0.0.1:8000` 80 |
81 | 82 |
83 | Docker 安装 84 | 85 | ## 无持久化快速启动 86 | 87 | 如果您想快速测试PolyglotPDF而不设置持久化目录: 88 | 89 | ```bash 90 | # 先拉取镜像 91 | docker pull 2207397265/polyglotpdf:latest 92 | 93 | # 不挂载卷的容器运行(容器删除后数据将丢失) 94 | docker run -d -p 12226:12226 --name polyglotpdf 2207397265/polyglotpdf:latest 95 | ``` 96 | 97 | 这是尝试PolyglotPDF最快的方式,但容器停止后,所有上传的PDF和配置更改都会丢失。 98 | 99 | ## 持久化存储安装 100 | 101 | ```bash 102 | # 创建必要目录 103 | mkdir -p config fonts static/original static/target static/merged_pdf 104 | 105 | # 创建配置文件 106 | nano config/config.json # 或使用任何文本编辑器 107 | # 复制项目中的配置模板到该文件 108 | # 注意填写您的API密钥等配置信息 109 | 110 | # 设置权限 111 | chmod -R 755 config fonts static 112 | ``` 113 | 114 | ## 快速启动 115 | 116 | 使用以下命令拉取并运行 PolyglotPDF Docker 镜像: 117 | 118 | ```bash 119 | # 拉取镜像 120 | docker pull 2207397265/polyglotpdf:latest 121 | 122 | # 运行容器 123 | docker run -d -p 12226:12226 --name polyglotpdf \ 124 | -v ./config/config.json:/app/config.json \ 125 | -v ./fonts:/app/fonts \ 126 | -v ./static/original:/app/static/original \ 127 | -v ./static/target:/app/static/target \ 128 | -v ./static/merged_pdf:/app/static/merged_pdf \ 129 | 2207397265/polyglotpdf:latest 130 | ``` 131 | 132 | ## 访问应用 133 | 134 | 容器启动后,在浏览器中打开: 135 | ``` 136 | http://localhost:12226 137 | ``` 138 | 139 | ## 使用 Docker Compose 140 | 141 | 创建 `docker-compose.yml` 文件: 142 | 143 | ```yaml 144 | version: '3' 145 | services: 146 | polyglotpdf: 147 | image: 2207397265/polyglotpdf:latest 148 | ports: 149 | - "12226:12226" 150 | volumes: 151 | - ./config/config.json:/app/config.json # 配置文件 152 | - ./fonts:/app/fonts # 字体文件 153 | - ./static/original:/app/static/original # 原始PDF 154 | - ./static/target:/app/static/target # 翻译后PDF 155 | - ./static/merged_pdf:/app/static/merged_pdf # 合并PDF 156 | restart: unless-stopped 157 | ``` 158 | 159 | 然后运行: 160 | 161 | ```bash 162 | docker-compose up -d 163 | ``` 164 | ## 常用 Docker 命令 165 | 166 | ```bash 167 | # 停止容器 168 | docker stop polyglotpdf 169 | 170 | # 重启容器 171 | docker restart polyglotpdf 172 | 173 | # 查看日志 174 | docker logs polyglotpdf 175 | ``` 176 |
177 | 178 | 179 | 180 | ## 环境要求 181 | - Python 3.8+ 182 | - deepl==1.17.0 183 | - Flask==2.0.1 184 | - Flask-Cors==5.0.0 185 | - langdetect==1.0.9 186 | - Pillow==10.2.0 187 | - PyMuPDF==1.24.0 188 | - pytesseract==0.3.10 189 | - requests==2.31.0 190 | - tiktoken==0.6.0 191 | - Werkzeug==2.0.1 192 | 193 | ## 致谢 194 | 本项目得益于 PyMuPDF 强大的 PDF 处理和版面保持功能。 195 | 196 | ## 即将推出的改进 197 | - PDF 聊天功能 198 | - 学术 PDF 搜索集成 199 | - 进一步提升处理速度 200 | 201 | ### 待修复问题 202 | - **问题描述**:应用重编辑时发生错误: `code=4: only Gray, RGB, and CMYK colorspaces supported` 203 | - **现象**:文本块应用编辑时遇到不支持的色彩空间 204 | - **当前解决方案**:遇到不支持的色彩空间时跳过该文本块 205 | - **待解决思路**:对于包含不支持色彩空间的页面,整页切换至OCR模式处理 206 | - **复现示例**:[查看不支持色彩空间的PDF样例](https://github.com/CBIhalsen/PolyglotPDF/blob/main/static/colorspace_issue_sample.pdf) 207 | 208 | 209 | ### TODO 210 | - □ **自定义术语库**:支持自定义术语库,设置prompt进行领域专业翻译 211 | - □ **AI重排功能**:把双栏的PDF转换成HTML博客的单栏线性阅读格式,便于移动端阅读 212 | - □ **多格式导出**:翻译结果可以导出为PDF、HTML、Markdown等格式 213 | - □ **多端同步**:电脑上翻译完,手机上也能看 214 | - □ **增强合并逻辑**:现版本默认合并逻辑把检测字体名字全部关闭,加上水平、垂直、x、y范围重叠全部合并 215 | 216 | 217 | ### 字体优化 218 | 当前在 `main.py` 的 `start` 函数中,文本插入使用了默认字体配置: 219 | ```python 220 | # 当前配置 221 | css=f"* {{font-family:{get_font_by_language(self.target_language)};font-size:auto;color: #111111 ;font-weight:normal;}}" 222 | ``` 223 | 224 | 你可以通过以下方式优化字体显示: 225 | 226 | 1. **修改默认字体配置** 227 | ```python 228 | # 自定义字体样式 229 | css=f"""* {{ 230 | font-family: {get_font_by_language(self.target_language)}; 231 | font-size: auto; 232 | color: #111111; 233 | font-weight: normal; 234 | letter-spacing: 0.5px; # 调整字间距 235 | line-height: 1.5; # 调整行高 236 | }}""" 237 | ``` 238 | 239 | 2. **嵌入自定义字体** 240 | 你可以通过以下步骤嵌入自定义字体: 241 | - 将字体文件(如.ttf,.otf)放置在项目的 `fonts` 目录下 242 | - 在CSS中使用 `@font-face` 声明自定义字体 243 | ```python 244 | css=f""" 245 | @font-face {{ 246 | font-family: 'CustomFont'; 247 | src: url('fonts/your-font.ttf') format('truetype'); 248 | }} 249 | * {{ 250 | font-family: 'CustomFont', {get_font_by_language(self.target_language)}; 251 | font-size: auto; 252 | font-weight: normal; 253 | }} 254 | """ 255 | ``` 256 | 257 | ### 基本原理 258 | 本项目采用与 Adobe Acrobat DC 编辑 PDF 类似的基本原理,基于 PyMuPDF 识别和处理 PDF 文本块: 259 | 260 | - **核心处理流程**: 261 | ```python 262 | # 获取页面中的文本块 263 | blocks = page.get_text("dict")["blocks"] 264 | 265 | # 遍历处理每个文本块 266 | for block in blocks: 267 | if block.get("type") == 0: # 文本块 268 | bbox = block["bbox"] # 获取文本块边界框 269 | text = "" 270 | font_info = None 271 | # 收集文本和字体信息 272 | for line in block["lines"]: 273 | for span in line["spans"]: 274 | text += span["text"] + " " 275 | ``` 276 | 这种方式直接处理 PDF 文本块,保持原有布局不变,实现高效的文本提取和修改。 277 | 278 | - **技术选择**: 279 | - 使用 PyMuPDF 进行 PDF 解析和编辑 280 | - 专注于文本处理,避免复杂化问题 281 | - 不进行 AI 识别公式、表格或页面重组等复杂操作 282 | 283 | - **为什么避免复杂处理**: 284 | - AI 识别公式、表格和重组 PDF 页面的方式存在严重的性能瓶颈 285 | - 复杂的 AI 处理导致计算成本高昂 286 | - 处理时间显著增加(可能需要数十秒甚至更长) 287 | - 难以在生产环境中大规模低成本部署 288 | - 不适合需要快速响应的在线服务 289 | 290 | - **项目定位**: 291 | - 主要用于保留布局的 PDF 文件翻译 292 | - 为 AI 辅助阅读 PDF 提供高效实现方式 293 | - 追求最佳性能价格比 294 | 295 | - **性能表现**: 296 | - PolyglotPDF API 服务响应时间:约 1 秒/页 297 | - 低计算资源消耗,适合规模化部署 298 | - 成本效益高,适合商业应用 299 | 300 | -------------------------------------------------------------------------------- /README_JA.md: -------------------------------------------------------------------------------- 1 | # PolyglotPDF 2 | 3 | [![Python](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/) 4 | [![PDF](https://img.shields.io/badge/pdf-documentation-brightgreen.svg)](https://example.com) 5 | [![LaTeX](https://img.shields.io/badge/latex-typesetting-orange.svg)](https://www.latex-project.org/) 6 | [![Translation](https://img.shields.io/badge/translation-supported-yellow.svg)](https://example.com) 7 | [![Math](https://img.shields.io/badge/math-formulas-red.svg)](https://example.com) 8 | [![PyMuPDF](https://img.shields.io/badge/PyMuPDF-1.24.0-blue.svg)](https://pymupdf.readthedocs.io/) 9 | 10 | ## デモ 11 | 12 | 13 | ### [🎬 フルビデオを見る](https://github.com/CBIhalsen/PolyglotPDF/blob/main/demo.mp4) 14 | 翻訳APIの選択肢としてLLMsが追加されました。推奨モデル:Doubao、Qwen、deepseek v3、gpt4-o-miniです。カラースペースエラーはPDFファイルの白色領域を埋めることで解決できます。古いtext to text翻訳APIは削除されました。 15 | 16 | また、arXiv検索機能とarXiv論文のLaTeX翻訳後のレンダリングの追加を検討中です。 17 | 18 | ### ページ表示 19 |
20 | 21 | 22 |
23 |
24 | 25 | 26 |
27 | 28 | # 中国の大規模言語モデルAPIの申請 29 | 30 | ## Doubao & Deepseek 31 | 火山エンジンプラットフォームから申請: 32 | - 申請先: [火山エンジン-Doubao](https://www.volcengine.com/product/doubao/) 33 | - 対応モデル: Doubao、Deepseekシリーズモデル 34 | 35 | ## 通義千問(Qwen) 36 | アリババクラウドプラットフォームから申請: 37 | - 申請先: [アリババクラウド-通義千問](https://cn.aliyun.com/product/tongyi?from_alibabacloud=&utm_content=se_1019997984) 38 | - 対応モデル: Qwen-Max、Qwen-Plusなどのシリーズモデル 39 | 40 | ## 概要 41 | PolyglotPDFは、特殊技術を用いてPDF文書内のテキスト、表、数式を超高速で認識する先進的なPDF処理ツールです。通常1秒以内で処理を完了し、OCR機能と完全なレイアウト保持翻訳機能をサポートしています。文書全体の翻訳は通常10秒以内で完了します(翻訳APIプロバイダーによって速度は異なります)。 42 | 43 | ## 主な特徴 44 | - **超高速認識**:約1秒でPDF内のテキスト、表、数式の処理を完了 45 | - **レイアウト保持翻訳**:翻訳時に原文書の書式を完全に保持 46 | - **OCRサポート**:スキャン版文書の効率的な処理 47 | - **テキストベースPDF**:GPUは不要 48 | - **高速翻訳**:約10秒でPDF全体の翻訳を完了 49 | - **柔軟なAPI統合**:各種翻訳サービスプロバイダーと連携可能 50 | - **Webベース比較インターフェース**:原文と訳文の並列比較をサポート 51 | - **強化されたOCR機能**:より正確なテキスト認識と処理能力 52 | - **オフライン翻訳対応**:小規模翻訳モデルの使用 53 | 54 | ## インストールとセットアップ 55 | 56 |
57 | 標準インストール 58 | 59 | 1. リポジトリのクローン: 60 | ```bash 61 | git clone https://github.com/CBIhalsen/Polyglotpdf.git 62 | cd polyglotpdf 63 | ``` 64 | 65 | 2. 依存パッケージのインストール: 66 | ```bash 67 | pip install -r requirements.txt 68 | ``` 69 | 70 | 3. config.json内でAPIキーを設定。alicloud翻訳APIの使用は推奨されません。 71 | 72 | 4. アプリケーションの実行: 73 | ```bash 74 | python app.py 75 | ``` 76 | 77 | 5. Webインターフェースへのアクセス: 78 | ブラウザで `http://127.0.0.1:8000` を開く 79 |
80 | 81 |
82 | Docker 使用方法 83 | 84 | ## 永続化なしの簡易起動 85 | 86 | 永続化ディレクトリを設定せずにPolyglotPDFをすぐにテストしたい場合: 87 | 88 | ```bash 89 | # まずイメージをプル 90 | docker pull 2207397265/polyglotpdf:latest 91 | 92 | # ボリュームをマウントせずにコンテナを実行(コンテナ削除後にデータは失われます) 93 | docker run -d -p 12226:12226 --name polyglotpdf 2207397265/polyglotpdf:latest 94 | ``` 95 | 96 | これはPolyglotPDFを試す最速の方法ですが、コンテナ停止後はアップロードしたPDFと設定変更がすべて失われます。 97 | 98 | ## 永続化ストレージでのインストール 99 | 100 | ```bash 101 | # 必要なディレクトリを作成 102 | mkdir -p config fonts static/original static/target static/merged_pdf 103 | 104 | # 設定ファイルを作成 105 | nano config/config.json # または任意のテキストエディタを使用 106 | # プロジェクトの設定テンプレートをこのファイルにコピー 107 | # APIキーなどの設定情報を入力してください 108 | 109 | # 権限を設定 110 | chmod -R 755 config fonts static 111 | ``` 112 | 113 | ## クイックスタート 114 | 115 | 以下のコマンドでPolyglotPDF Dockerイメージをプルして実行: 116 | 117 | ```bash 118 | # イメージをプル 119 | docker pull 2207397265/polyglotpdf:latest 120 | 121 | # コンテナを実行 122 | docker run -d -p 12226:12226 --name polyglotpdf \ 123 | -v ./config/config.json:/app/config.json \ 124 | -v ./fonts:/app/fonts \ 125 | -v ./static/original:/app/static/original \ 126 | -v ./static/target:/app/static/target \ 127 | -v ./static/merged_pdf:/app/static/merged_pdf \ 128 | 2207397265/polyglotpdf:latest 129 | ``` 130 | 131 | ## アプリケーションへのアクセス 132 | 133 | コンテナ起動後、ブラウザで開く: 134 | ``` 135 | http://localhost:12226 136 | ``` 137 | 138 | ## Docker Composeの使用 139 | 140 | `docker-compose.yml`ファイルを作成: 141 | 142 | ```yaml 143 | version: '3' 144 | services: 145 | polyglotpdf: 146 | image: 2207397265/polyglotpdf:latest 147 | ports: 148 | - "12226:12226" 149 | volumes: 150 | - ./config.json:/app/config.json # 設定ファイル 151 | - ./fonts:/app/fonts # フォントファイル 152 | - ./static/original:/app/static/original # 原本PDF 153 | - ./static/target:/app/static/target # 翻訳後PDF 154 | - ./static/merged_pdf:/app/static/merged_pdf # 結合PDF 155 | restart: unless-stopped 156 | ``` 157 | 158 | そして実行: 159 | 160 | ```bash 161 | docker-compose up -d 162 | ``` 163 | 164 | ## よく使うDockerコマンド 165 | 166 | ```bash 167 | # コンテナを停止 168 | docker stop polyglotpdf 169 | 170 | # コンテナを再起動 171 | docker restart polyglotpdf 172 | 173 | # ログの確認 174 | docker logs polyglotpdf 175 | ``` 176 |
177 | 178 | ## 環境要件 179 | - Python 3.8+ 180 | - deepl==1.17.0 181 | - Flask==2.0.1 182 | - Flask-Cors==5.0.0 183 | - langdetect==1.0.9 184 | - Pillow==10.2.0 185 | - PyMuPDF==1.24.0 186 | - pytesseract==0.3.10 187 | - requests==2.31.0 188 | - tiktoken==0.6.0 189 | - Werkzeug==2.0.1 190 | 191 | ## 謝辞 192 | 本プロジェクトはPyMuPDFの強力なPDF処理とレイアウト保持機能の恩恵を受けています。 193 | 194 | ## 今後の改善予定 195 | - PDFチャット機能 196 | - 学術PDF検索の統合 197 | - 処理速度のさらなる向上 198 | 199 | ### 修正待ちの問題 200 | - **問題の説明**:アプリケーション再編集時のエラー: `code=4: only Gray, RGB, and CMYK colorspaces supported` 201 | - **現象**:テキストブロックの編集時に非対応のカラースペースが発生 202 | - **現在の解決策**:非対応のカラースペースを含むテキストブロックをスキップ 203 | - **解決へのアプローチ**:非対応のカラースペースを含むページ全体をOCRモードで処理 204 | - **再現サンプル**:[非対応カラースペースのPDFサンプルを見る](https://github.com/CBIhalsen/PolyglotPDF/blob/main/static/colorspace_issue_sample.pdf) 205 | 206 | ### TODO 207 | - □ **カスタム用語集**: カスタム用語集をサポートし、特定分野の専門的な翻訳のためのプロンプト設定 208 | - □ **AI再配置機能**: 二段組みPDFをHTMLブログの一列リニア読書形式に変換し、モバイル端末での読書を容易にする 209 | - □ **複数形式エクスポート**: 翻訳結果をPDF、HTML、Markdown等の形式にエクスポート可能 210 | - □ **マルチデバイス同期**: コンピュータで翻訳完了後、スマートフォンでも閲覧可能 211 | - □ **強化されたマージロジック**: 現バージョンのデフォルトマージロジックではフォント名検出を完全に無効にし、水平・垂直・x・y範囲の重複をすべてマージする 212 | 213 | ### フォントの最適化 214 | 現在、`main.py`の`start`関数では、デフォルトのフォント設定でテキストを挿入しています: 215 | ```python 216 | # 現在の設定 217 | css=f"* {{font-family:{get_font_by_language(self.target_language)};font-size:auto;color: #111111 ;font-weight:normal;}}" 218 | ``` 219 | 220 | フォント表示は以下の方法で最適化できます: 221 | 222 | 1. **デフォルトフォント設定の変更** 223 | ```python 224 | # カスタムフォントスタイル 225 | css=f"""* {{ 226 | font-family: {get_font_by_language(self.target_language)}; 227 | font-size: auto; 228 | color: #111111; 229 | font-weight: normal; 230 | letter-spacing: 0.5px; # 文字間隔の調整 231 | line-height: 1.5; # 行の高さの調整 232 | }}""" 233 | ``` 234 | 235 | 2. **カスタムフォントの埋め込み** 236 | 以下の手順でカスタムフォントを埋め込むことができます: 237 | - フォントファイル(.ttf、.otfなど)をプロジェクトの`fonts`ディレクトリに配置 238 | - CSSで`@font-face`を使用してカスタムフォントを宣言 239 | ```python 240 | css=f""" 241 | @font-face {{ 242 | font-family: 'CustomFont'; 243 | src: url('fonts/your-font.ttf') format('truetype'); 244 | }} 245 | * {{ 246 | font-family: 'CustomFont', {get_font_by_language(self.target_language)}; 247 | font-size: auto; 248 | font-weight: normal; 249 | }} 250 | """ 251 | ``` 252 | 253 | ### 基本原理 254 | 本プロジェクトはAdobe Acrobat DCのPDF編集と同様の基本原理を採用し、PyMuPDFを使用してPDFテキストブロックを認識・処理します: 255 | 256 | - **コア処理フロー**: 257 | ```python 258 | # ページからテキストブロックを取得 259 | blocks = page.get_text("dict")["blocks"] 260 | 261 | # 各テキストブロックを処理 262 | for block in blocks: 263 | if block.get("type") == 0: # テキストブロック 264 | bbox = block["bbox"] # テキストブロックの境界ボックスを取得 265 | text = "" 266 | font_info = None 267 | # テキストとフォント情報の収集 268 | for line in block["lines"]: 269 | for span in line["spans"]: 270 | text += span["text"] + " " 271 | ``` 272 | この方法でPDFテキストブロックを直接処理し、元のレイアウトを保持したまま、効率的なテキストの抽出と修正を実現します。 273 | 274 | - **技術選択**: 275 | - PyMuPDFを使用してPDFの解析と編集を行う 276 | - テキスト処理に特化し、問題の複雑化を避ける 277 | - 数式、表、ページ再構成などの複雑なAI認識は行わない 278 | 279 | - **複雑な処理を避ける理由**: 280 | - 数式、表、PDFページ再構成のAI認識には深刻なパフォーマンスのボトルネックが存在 281 | - 複雑なAI処理は計算コストが高額 282 | - 処理時間が大幅に増加(数十秒以上かかる可能性) 283 | - 本番環境での大規模な低コスト展開が困難 284 | - オンラインサービスの迅速なレスポンスに不適 285 | 286 | - **プロジェクトの位置づけ**: 287 | - レイアウトを保持したPDFファイルの翻訳が主目的 288 | - PDFのAI支援読書に効率的な実装方法を提供 289 | - 最適なパフォーマンスとコスト比を追求 290 | 291 | - **パフォーマンス**: 292 | - PolyglotPDF APIサービスのレスポンス時間:約1秒/ページ 293 | - 低計算リソース消費で、スケーラブルな展開が可能 294 | - コスト効率が高く、商用利用に適している 295 | -------------------------------------------------------------------------------- /README_KO.md: -------------------------------------------------------------------------------- 1 | # PolyglotPDF 2 | 3 | [![Python](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/) 4 | [![PDF](https://img.shields.io/badge/pdf-documentation-brightgreen.svg)](https://example.com) 5 | [![LaTeX](https://img.shields.io/badge/latex-typesetting-orange.svg)](https://www.latex-project.org/) 6 | [![Translation](https://img.shields.io/badge/translation-supported-yellow.svg)](https://example.com) 7 | [![Math](https://img.shields.io/badge/math-formulas-red.svg)](https://example.com) 8 | [![PyMuPDF](https://img.shields.io/badge/PyMuPDF-1.24.0-blue.svg)](https://pymupdf.readthedocs.io/) 9 | 10 | ## 데모 11 | 12 | 13 | ### [🎬 전체 영상 보기](https://github.com/CBIhalsen/PolyglotPDF/blob/main/demo.mp4) 14 | 번역 API 선택지로 LLMs가 추가되었습니다. 권장 모델: Doubao, Qwen, deepseek v3, gpt4-o-mini입니다. 색상 공간 오류는 PDF 파일의 흰색 영역을 채우는 것으로 해결할 수 있습니다. 기존 text to text 번역 API는 삭제되었습니다. 15 | 16 | 또한, arXiv 검색 기능과 arXiv 논문의 LaTeX 번역 후 렌더링 추가를 고려 중입니다. 17 | 18 | ### 페이지 표시 19 |
20 | 21 | 22 |
23 |
24 | 25 | 26 |
27 | 28 | # 중국 대규모 언어 모델 API 신청 29 | 30 | ## Doubao & Deepseek 31 | 화산 엔진 플랫폼을 통한 신청: 32 | - 신청 주소: [화산 엔진-Doubao](https://www.volcengine.com/product/doubao/) 33 | - 지원 모델: Doubao, Deepseek 시리즈 모델 34 | 35 | ## 통의천문(Qwen) 36 | 알리바바 클라우드 플랫폼을 통한 신청: 37 | - 신청 주소: [알리바바 클라우드-통의천문](https://cn.aliyun.com/product/tongyi?from_alibabacloud=&utm_content=se_1019997984) 38 | - 지원 모델: Qwen-Max, Qwen-Plus 등 시리즈 모델 39 | 40 | ## 개요 41 | PolyglotPDF는 특수 기술을 사용하여 PDF 문서 내의 텍스트, 표, 수식을 초고속으로 인식하는 선진적인 PDF 처리 도구입니다. 보통 1초 이내에 처리를 완료하며, OCR 기능과 완벽한 레이아웃 유지 번역 기능을 지원합니다. 문서 전체의 번역은 보통 10초 이내에 완료됩니다(번역 API 제공업체에 따라 속도가 다릅니다). 42 | 43 | ## 주요 특징 44 | - **초고속 인식**: 약 1초 내에 PDF 내의 텍스트, 표, 수식 처리 완료 45 | - **레이아웃 유지 번역**: 번역 시 원문서의 서식을 완벽하게 유지 46 | - **OCR 지원**: 스캔 버전 문서의 효율적인 처리 47 | - **텍스트 기반 PDF**: GPU 불필요 48 | - **고속 번역**: 약 10초 내에 PDF 전체 번역 완료 49 | - **유연한 API 통합**: 각종 번역 서비스 제공업체와 연동 가능 50 | - **웹 기반 비교 인터페이스**: 원문과 번역문의 병렬 비교 지원 51 | - **강화된 OCR 기능**: 더 정확한 텍스트 인식과 처리 능력 52 | - **오프라인 번역 지원**: 소규모 번역 모델 사용 53 | 54 | ## 설치 및 설정 55 | 56 |
57 | 표준 설치 58 | 59 | 1. 저장소 클론: 60 | ```bash 61 | git clone https://github.com/CBIhalsen/Polyglotpdf.git 62 | cd polyglotpdf 63 | ``` 64 | 65 | 2. 의존성 패키지 설치: 66 | ```bash 67 | pip install -r requirements.txt 68 | ``` 69 | 70 | 3. config.json에서 API 키 설정. alicloud 번역 API 사용은 권장되지 않습니다. 71 | 72 | 4. 애플리케이션 실행: 73 | ```bash 74 | python app.py 75 | ``` 76 | 77 | 5. 웹 인터페이스 접속: 78 | 브라우저에서 `http://127.0.0.1:8000` 열기 79 |
80 | 81 |
82 | Docker 사용 방법 83 | 84 | ## 비지속성 빠른 시작 85 | 86 | 영구 디렉토리 설정 없이 PolyglotPDF를 빠르게 테스트하려면: 87 | 88 | ```bash 89 | # 먼저 이미지 가져오기 90 | docker pull 2207397265/polyglotpdf:latest 91 | 92 | # 볼륨 마운트 없이 컨테이너 실행(컨테이너 삭제 시 데이터 손실) 93 | docker run -d -p 12226:12226 --name polyglotpdf 2207397265/polyglotpdf:latest 94 | ``` 95 | 96 | 이것은 PolyglotPDF를 시도하는 가장 빠른 방법이지만, 컨테이너가 중지되면 업로드된 모든 PDF와 구성 변경 사항이 손실됩니다. 97 | 98 | ## 영구 저장소 설치 99 | 100 | ```bash 101 | # 필요한 디렉토리 생성 102 | mkdir -p config fonts static/original static/target static/merged_pdf 103 | 104 | # 설정 파일 생성 105 | nano config/config.json # 또는 원하는 텍스트 편집기 사용 106 | # 프로젝트의 설정 템플릿을 이 파일에 복사 107 | # API 키 등의 설정 정보를 입력하세요 108 | 109 | # 권한 설정 110 | chmod -R 755 config fonts static 111 | ``` 112 | 113 | ## 빠른 시작 114 | 115 | 다음 명령을 사용하여 PolyglotPDF Docker 이미지를 가져와 실행: 116 | 117 | ```bash 118 | # 이미지 가져오기 119 | docker pull 2207397265/polyglotpdf:latest 120 | 121 | # 컨테이너 실행 122 | docker run -d -p 12226:12226 --name polyglotpdf \ 123 | -v ./config/config.json:/app/config.json \ 124 | -v ./fonts:/app/fonts \ 125 | -v ./static/original:/app/static/original \ 126 | -v ./static/target:/app/static/target \ 127 | -v ./static/merged_pdf:/app/static/merged_pdf \ 128 | 2207397265/polyglotpdf:latest 129 | ``` 130 | 131 | ## 애플리케이션 접속 132 | 133 | 컨테이너가 시작된 후, 브라우저에서 열기: 134 | ``` 135 | http://localhost:12226 136 | ``` 137 | 138 | ## Docker Compose 사용 139 | 140 | `docker-compose.yml` 파일 생성: 141 | 142 | ```yaml 143 | version: '3' 144 | services: 145 | polyglotpdf: 146 | image: 2207397265/polyglotpdf:latest 147 | ports: 148 | - "12226:12226" 149 | volumes: 150 | - ./config.json:/app/config.json # 설정 파일 151 | - ./fonts:/app/fonts # 폰트 파일 152 | - ./static/original:/app/static/original # 원본 PDF 153 | - ./static/target:/app/static/target # 번역된 PDF 154 | - ./static/merged_pdf:/app/static/merged_pdf # 병합된 PDF 155 | restart: unless-stopped 156 | ``` 157 | 158 | 그리고 실행: 159 | 160 | ```bash 161 | docker-compose up -d 162 | ``` 163 | 164 | ## 자주 사용하는 Docker 명령어 165 | 166 | ```bash 167 | # 컨테이너 중지 168 | docker stop polyglotpdf 169 | 170 | # 컨테이너 재시작 171 | docker restart polyglotpdf 172 | 173 | # 로그 확인 174 | docker logs polyglotpdf 175 | ``` 176 |
177 | 178 | ## 환경 요구사항 179 | - Python 3.8+ 180 | - deepl==1.17.0 181 | - Flask==2.0.1 182 | - Flask-Cors==5.0.0 183 | - langdetect==1.0.9 184 | - Pillow==10.2.0 185 | - PyMuPDF==1.24.0 186 | - pytesseract==0.3.10 187 | - requests==2.31.0 188 | - tiktoken==0.6.0 189 | - Werkzeug==2.0.1 190 | 191 | ## 감사의 말 192 | 본 프로젝트는 PyMuPDF의 강력한 PDF 처리와 레이아웃 유지 기능의 혜택을 받았습니다. 193 | 194 | ## 향후 개선 예정 195 | - PDF 채팅 기능 196 | - 학술 PDF 검색 통합 197 | - 처리 속도 추가 향상 198 | 199 | ### 수정 대기 중인 문제 200 | - **문제 설명**: 애플리케이션 재편집 시 오류: `code=4: only Gray, RGB, and CMYK colorspaces supported` 201 | - **현상**: 텍스트 블록 편집 시 지원되지 않는 색상 공간 발생 202 | - **현재 해결책**: 지원되지 않는 색상 공간을 포함한 텍스트 블록 건너뛰기 203 | - **해결 접근 방식**: 지원되지 않는 색상 공간을 포함한 페이지 전체를 OCR 모드로 처리 204 | - **재현 샘플**: [지원되지 않는 색상 공간의 PDF 샘플 보기](https://github.com/CBIhalsen/PolyglotPDF/blob/main/static/colorspace_issue_sample.pdf) 205 | 206 | ### TODO 207 | - □ **사용자 정의 용어집**: 사용자 정의 용어집을 지원하고, 특정 분야의 전문적인 번역을 위한 프롬프트 설정 208 | - □ **AI 재배치 기능**: 두 칸 PDF를 HTML 블로그의 한 줄 선형 읽기 형식으로 변환하여 모바일 장치에서 읽기 편하게 함 209 | - □ **다중 형식 내보내기**: 번역 결과를 PDF, HTML, Markdown 등 다양한 형식으로 내보내기 210 | - □ **다중 기기 동기화**: 컴퓨터에서 번역 완료한 후 모바일에서도 볼 수 있음 211 | - □ **향상된 병합 로직**: 현재 버전의 기본 병합 로직에서 글꼴 이름 감지를 모두 비활성화하고, 가로, 세로, x, y 범위 중복이 모두 병합되도록 함 212 | 213 | ### 폰트 최적화 214 | 현재 `main.py`의 `start` 함수에서는 기본 폰트 설정으로 텍스트를 삽입합니다: 215 | ```python 216 | # 현재 설정 217 | css=f"* {{font-family:{get_font_by_language(self.target_language)};font-size:auto;color: #111111 ;font-weight:normal;}}" 218 | ``` 219 | 220 | 폰트 표시는 다음 방법으로 최적화할 수 있습니다: 221 | 222 | 1. **기본 폰트 설정 변경** 223 | ```python 224 | # 사용자 정의 폰트 스타일 225 | css=f"""* {{ 226 | font-family: {get_font_by_language(self.target_language)}; 227 | font-size: auto; 228 | color: #111111; 229 | font-weight: normal; 230 | letter-spacing: 0.5px; # 자간 조정 231 | line-height: 1.5; # 행간 조정 232 | }}""" 233 | ``` 234 | 235 | 2. **사용자 정의 폰트 임베딩** 236 | 다음 단계로 사용자 정의 폰트를 임베딩할 수 있습니다: 237 | - 폰트 파일(.ttf, .otf 등)을 프로젝트의 `fonts` 디렉토리에 배치 238 | - CSS에서 `@font-face`를 사용하여 사용자 정의 폰트 선언 239 | ```python 240 | css=f""" 241 | @font-face {{ 242 | font-family: 'CustomFont'; 243 | src: url('fonts/your-font.ttf') format('truetype'); 244 | }} 245 | * {{ 246 | font-family: 'CustomFont', {get_font_by_language(self.target_language)}; 247 | font-size: auto; 248 | font-weight: normal; 249 | }} 250 | """ 251 | ``` 252 | 253 | ### 기본 원리 254 | 본 프로젝트는 Adobe Acrobat DC의 PDF 편집과 유사한 기본 원리를 채택하고, PyMuPDF를 사용하여 PDF 텍스트 블록을 인식하고 처리합니다: 255 | 256 | - **핵심 처리 흐름**: 257 | ```python 258 | # 페이지에서 텍스트 블록 가져오기 259 | blocks = page.get_text("dict")["blocks"] 260 | 261 | # 각 텍스트 블록 처리 262 | for block in blocks: 263 | if block.get("type") == 0: # 텍스트 블록 264 | bbox = block["bbox"] # 텍스트 블록의 경계 상자 가져오기 265 | text = "" 266 | font_info = None 267 | # 텍스트와 폰트 정보 수집 268 | for line in block["lines"]: 269 | for span in line["spans"]: 270 | text += span["text"] + " " 271 | ``` 272 | 이 방법으로 PDF 텍스트 블록을 직접 처리하여 원래 레이아웃을 유지한 채 효율적인 텍스트 추출과 수정을 실현합니다. 273 | 274 | - **기술 선택**: 275 | - PyMuPDF를 사용하여 PDF 분석과 편집 수행 276 | - 텍스트 처리에 특화하여 문제의 복잡화 방지 277 | - 수식, 표, 페이지 재구성 등의 복잡한 AI 인식은 수행하지 않음 278 | 279 | - **복잡한 처리를 피하는 이유**: 280 | - 수식, 표, PDF 페이지 재구성의 AI 인식에는 심각한 성능 병목 현상 존재 281 | - 복잡한 AI 처리는 계산 비용이 높음 282 | - 처리 시간이 크게 증가(수십 초 이상 소요 가능) 283 | - 프로덕션 환경에서의 대규모 저비용 배포가 어려움 284 | - 온라인 서비스의 신속한 응답에 부적합 285 | 286 | - **프로젝트 위치**: 287 | - 레이아웃을 유지한 PDF 파일의 번역이 주목적 288 | - PDF의 AI 지원 읽기에 효율적인 구현 방법 제공 289 | - 최적의 성능과 비용 비율 추구 290 | 291 | - **성능**: 292 | - PolyglotPDF API 서비스의 응답 시간: 약 1초/페이지 293 | - 낮은 계산 리소스 소비로 확장 가능한 배포 가능 294 | - 비용 효율이 높아 상업적 사용에 적합 295 | -------------------------------------------------------------------------------- /README_TW.md: -------------------------------------------------------------------------------- 1 | # PolyglotPDF 2 | 3 | [![Python](https://img.shields.io/badge/python-3.8-blue.svg)](https://www.python.org/) 4 | [![PDF](https://img.shields.io/badge/pdf-documentation-brightgreen.svg)](https://example.com) 5 | [![LaTeX](https://img.shields.io/badge/latex-typesetting-orange.svg)](https://www.latex-project.org/) 6 | [![Translation](https://img.shields.io/badge/translation-supported-yellow.svg)](https://example.com) 7 | [![Math](https://img.shields.io/badge/math-formulas-red.svg)](https://example.com) 8 | [![PyMuPDF](https://img.shields.io/badge/PyMuPDF-1.24.0-blue.svg)](https://pymupdf.readthedocs.io/) 9 | 10 | ## 演示 11 | 12 | 13 | ### [🎬 觀看完整影片](https://github.com/CBIhalsen/PolyglotPDF/blob/main/demo.mp4) 14 | 翻譯API選項已新增LLMs。推薦模型:Doubao、Qwen、deepseek v3、gpt4-o-mini。色彩空間錯誤可透過填充PDF檔案的白色區域來解決。舊有的text to text翻譯API已被移除。 15 | 16 | 此外,我們正在考慮新增arXiv搜尋功能和arXiv論文的LaTeX翻譯後渲染功能。 17 | 18 | ### 頁面展示 19 |
20 | 21 | 22 |
23 |
24 | 25 | 26 |
27 | 28 | # 中國大型語言模型API申請 29 | 30 | ## Doubao & Deepseek 31 | 從火山引擎平台申請: 32 | - 申請地址:[火山引擎-Doubao](https://www.volcengine.com/product/doubao/) 33 | - 支援模型:Doubao、Deepseek系列模型 34 | 35 | ## 通義千問(Qwen) 36 | 從阿里雲平台申請: 37 | - 申請地址:[阿里雲-通義千問](https://cn.aliyun.com/product/tongyi?from_alibabacloud=&utm_content=se_1019997984) 38 | - 支援模型:Qwen-Max、Qwen-Plus等系列模型 39 | 40 | ## 概述 41 | PolyglotPDF是一款使用特殊技術,能夠超高速識別PDF文件中文字、表格、數學公式的先進PDF處理工具。通常能在1秒內完成處理,並支援OCR功能和完整的版面保持翻譯功能。整份文件的翻譯通常能在10秒內完成(速度依翻譯API提供商而異)。 42 | 43 | ## 主要特點 44 | - **超高速識別**:約1秒內完成PDF中文字、表格、數學公式的處理 45 | - **版面保持翻譯**:翻譯時完整保持原文件的格式 46 | - **OCR支援**:高效處理掃描版文件 47 | - **文字基礎PDF**:無需GPU 48 | - **快速翻譯**:約10秒完成PDF整體翻譯 49 | - **靈活API整合**:可與各種翻譯服務提供商連接 50 | - **網頁基礎比較介面**:支援原文與譯文並列比較 51 | - **強化OCR功能**:更準確的文字識別和處理能力 52 | - **離線翻譯支援**:使用小型翻譯模型 53 | 54 | ## 安裝與設定 55 | 56 |
57 | 標準安裝 58 | 59 | 1. 複製儲存庫: 60 | ```bash 61 | git clone https://github.com/CBIhalsen/Polyglotpdf.git 62 | cd polyglotpdf 63 | ``` 64 | 65 | 2. 安裝相依套件: 66 | ```bash 67 | pip install -r requirements.txt 68 | ``` 69 | 70 | 3. 在config.json中設定API金鑰。不建議使用alicloud翻譯API。 71 | 72 | 4. 執行應用程式: 73 | ```bash 74 | python app.py 75 | ``` 76 | 77 | 5. 存取網頁介面: 78 | 在瀏覽器中開啟 `http://127.0.0.1:8000` 79 |
80 | 81 |
82 | Docker 使用說明 83 | 84 | ## 無持久化快速啟動 85 | 86 | 如果您想快速測試PolyglotPDF而不設置持久化目錄: 87 | 88 | ```bash 89 | # 先拉取映像 90 | docker pull 2207397265/polyglotpdf:latest 91 | 92 | # 不掛載卷的容器運行(容器刪除後數據將丟失) 93 | docker run -d -p 12226:12226 --name polyglotpdf 2207397265/polyglotpdf:latest 94 | ``` 95 | 96 | 這是嘗試PolyglotPDF最快的方式,但容器停止後,所有上傳的PDF和配置更改都會丟失。 97 | 98 | ## 持久化存儲安裝 99 | 100 | ```bash 101 | # 創建必要目錄 102 | mkdir -p config fonts static/original static/target static/merged_pdf 103 | 104 | # 創建配置文件 105 | nano config/config.json # 或使用任何文本編輯器 106 | # 將項目中的配置模板複製到該文件 107 | # 請注意填寫您的API金鑰等配置信息 108 | 109 | # 設置權限 110 | chmod -R 755 config fonts static 111 | ``` 112 | 113 | ## 快速啟動 114 | 115 | 使用以下命令拉取並運行 PolyglotPDF Docker 映像: 116 | 117 | ```bash 118 | # 拉取映像 119 | docker pull 2207397265/polyglotpdf:latest 120 | 121 | # 運行容器 122 | docker run -d -p 12226:12226 --name polyglotpdf \ 123 | -v ./config/config.json:/app/config.json \ 124 | -v ./fonts:/app/fonts \ 125 | -v ./static/original:/app/static/original \ 126 | -v ./static/target:/app/static/target \ 127 | -v ./static/merged_pdf:/app/static/merged_pdf \ 128 | 2207397265/polyglotpdf:latest 129 | ``` 130 | 131 | ## 訪問應用 132 | 133 | 容器啟動後,在瀏覽器中打開: 134 | ``` 135 | http://localhost:12226 136 | ``` 137 | 138 | ## 使用 Docker Compose 139 | 140 | 創建 `docker-compose.yml` 文件: 141 | 142 | ```yaml 143 | version: '3' 144 | services: 145 | polyglotpdf: 146 | image: 2207397265/polyglotpdf:latest 147 | ports: 148 | - "12226:12226" 149 | volumes: 150 | - ./config.json:/app/config.json # 配置文件 151 | - ./fonts:/app/fonts # 字體文件 152 | - ./static/original:/app/static/original # 原始PDF 153 | - ./static/target:/app/static/target # 翻譯後PDF 154 | - ./static/merged_pdf:/app/static/merged_pdf # 合併PDF 155 | restart: unless-stopped 156 | ``` 157 | 158 | 然後運行: 159 | 160 | ```bash 161 | docker-compose up -d 162 | ``` 163 | 164 | ## 常用 Docker 命令 165 | 166 | ```bash 167 | # 停止容器 168 | docker stop polyglotpdf 169 | 170 | # 重啟容器 171 | docker restart polyglotpdf 172 | 173 | # 查看日誌 174 | docker logs polyglotpdf 175 | ``` 176 |
177 | 178 | ## 環境需求 179 | - Python 3.8+ 180 | - deepl==1.17.0 181 | - Flask==2.0.1 182 | - Flask-Cors==5.0.0 183 | - langdetect==1.0.9 184 | - Pillow==10.2.0 185 | - PyMuPDF==1.24.0 186 | - pytesseract==0.3.10 187 | - requests==2.31.0 188 | - tiktoken==0.6.0 189 | - Werkzeug==2.0.1 190 | 191 | ## 致謝 192 | 本專案受益於PyMuPDF強大的PDF處理和版面保持功能。 193 | 194 | ## 未來改進計劃 195 | - PDF聊天功能 196 | - 學術PDF搜尋整合 197 | - 進一步提升處理速度 198 | 199 | ### 待修正問題 200 | - **問題描述**:應用程式重新編輯時的錯誤:`code=4: only Gray, RGB, and CMYK colorspaces supported` 201 | - **現象**:編輯文字區塊時出現不支援的色彩空間 202 | - **目前解決方案**:跳過包含不支援色彩空間的文字區塊 203 | - **解決方向**:使用OCR模式處理包含不支援色彩空間的整個頁面 204 | - **重現範例**:[查看不支援色彩空間的PDF範例](https://github.com/CBIhalsen/PolyglotPDF/blob/main/static/colorspace_issue_sample.pdf) 205 | 206 | ### TODO 207 | - □ **自定義術語庫**:支援自定義術語庫,設置prompt進行領域專業翻譯 208 | - □ **AI重排功能**:把雙欄的PDF轉換成HTML部落格的單欄線性閱讀格式,便於移動端閱讀 209 | - □ **多格式匯出**:翻譯結果可以匯出為PDF、HTML、Markdown等格式 210 | - □ **多端同步**:電腦上翻譯完,手機上也能看 211 | - □ **增強合併邏輯**:現版本預設合併邏輯把檢測字體名字全部關閉,加上水平、垂直、x、y範圍重疊全部合併 212 | 213 | ### 字型最佳化 214 | 目前在`main.py`的`start`函數中,使用預設字型設定插入文字: 215 | ```python 216 | # 目前設定 217 | css=f"* {{font-family:{get_font_by_language(self.target_language)};font-size:auto;color: #111111 ;font-weight:normal;}}" 218 | ``` 219 | 220 | 字型顯示可透過以下方式最佳化: 221 | 222 | 1. **修改預設字型設定** 223 | ```python 224 | # 自訂字型樣式 225 | css=f"""* {{ 226 | font-family: {get_font_by_language(self.target_language)}; 227 | font-size: auto; 228 | color: #111111; 229 | font-weight: normal; 230 | letter-spacing: 0.5px; # 調整字距 231 | line-height: 1.5; # 調整行高 232 | }}""" 233 | ``` 234 | 235 | 2. **嵌入自訂字型** 236 | 可透過以下步驟嵌入自訂字型: 237 | - 將字型檔案(.ttf、.otf等)放置在專案的`fonts`目錄中 238 | - 在CSS中使用`@font-face`宣告自訂字型 239 | ```python 240 | css=f""" 241 | @font-face {{ 242 | font-family: 'CustomFont'; 243 | src: url('fonts/your-font.ttf') format('truetype'); 244 | }} 245 | * {{ 246 | font-family: 'CustomFont', {get_font_by_language(self.target_language)}; 247 | font-size: auto; 248 | font-weight: normal; 249 | }} 250 | """ 251 | ``` 252 | 253 | ### 基本原理 254 | 本專案採用與Adobe Acrobat DC的PDF編輯類似的基本原理,使用PyMuPDF識別和處理PDF文字區塊: 255 | 256 | - **核心處理流程**: 257 | ```python 258 | # 從頁面取得文字區塊 259 | blocks = page.get_text("dict")["blocks"] 260 | 261 | # 處理每個文字區塊 262 | for block in blocks: 263 | if block.get("type") == 0: # 文字區塊 264 | bbox = block["bbox"] # 取得文字區塊的邊界框 265 | text = "" 266 | font_info = None 267 | # 收集文字和字型資訊 268 | for line in block["lines"]: 269 | for span in line["spans"]: 270 | text += span["text"] + " " 271 | ``` 272 | 這種方式直接處理PDF文字區塊,在保持原始版面的同時,實現高效的文字擷取和修改。 273 | 274 | - **技術選擇**: 275 | - 使用PyMuPDF進行PDF解析和編輯 276 | - 專注於文字處理,避免問題複雜化 277 | - 不進行複雜的AI識別,如數學公式、表格、頁面重構 278 | 279 | - **避免複雜處理的原因**: 280 | - 數學公式、表格、PDF頁面重構的AI識別存在嚴重的效能瓶頸 281 | - 複雜的AI處理計算成本高昂 282 | - 處理時間大幅增加(可能需要數十秒以上) 283 | - 難以在生產環境中進行大規模低成本部署 284 | - 不適合線上服務的快速回應 285 | 286 | - **專案定位**: 287 | - 主要目的是保持版面的PDF檔案翻譯 288 | - 提供PDF AI輔助閱讀的高效實現方式 289 | - 追求最佳效能和成本比 290 | 291 | - **效能表現**: 292 | - PolyglotPDF API服務回應時間:約1秒/頁 293 | - 低計算資源消耗,可擴展部署 294 | - 成本效益高,適合商業使用 295 | -------------------------------------------------------------------------------- /Subset_Font.py: -------------------------------------------------------------------------------- 1 | from fontTools.subset import Subsetter, Options 2 | from fontTools.ttLib import TTFont 3 | import datetime 4 | import os 5 | import requests 6 | 7 | 8 | def download_font_from_github(language, font_filename, target_path): 9 | """ 10 | 从GitHub下载字体文件 11 | """ 12 | 13 | # 构建GitHub原始文件URL 14 | github_base_url = "https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF-fonts/main" 15 | font_folder = f"{language}_fonts" 16 | github_url = f"{github_base_url}/{font_folder}/{font_filename}" 17 | 18 | try: 19 | # 下载文件 20 | response = requests.get(github_url) 21 | 22 | # 检查是否存在(GitHub返回404表示文件不存在) 23 | if response.status_code == 404: 24 | print("\n=== 字体文件未找到 ===") 25 | print(f"在GitHub仓库中未找到所需的字体文件:") 26 | print(f"- 语言: {language}") 27 | print(f"- 字体文件: {font_filename}") 28 | print(f"- 预期路径: {font_folder}/{font_filename}") 29 | print("\n请通过以下步骤请求添加字体:") 30 | print("1. 访问: https://github.com/CBIhalsen/PolyglotPDF-fonts") 31 | print("2. 创建新的Issue") 32 | print("3. 标题: [Font Request] Add font for {language}") 33 | print("4. 内容:") 34 | print(f" - Language: {language}") 35 | print(f" - Font filename: {font_filename}") 36 | print(f" - Expected path: {font_folder}/{font_filename}") 37 | print(" - Additional details: (请描述使用场景和需求)\n") 38 | return False 39 | 40 | response.raise_for_status() # 检查其他可能的错误 41 | 42 | # 创建目标文件夹并保存文件 43 | os.makedirs(os.path.dirname(target_path), exist_ok=True) 44 | with open(target_path, 'wb') as f: 45 | f.write(response.content) 46 | 47 | print(f"成功从GitHub下载字体文件到: {target_path}") 48 | return True 49 | 50 | except requests.exceptions.RequestException as e: 51 | if isinstance(e, requests.exceptions.ConnectionError): 52 | print(f"网络连接错误: 无法连接到GitHub。请检查您的网络连接。") 53 | elif isinstance(e, requests.exceptions.Timeout): 54 | print(f"请求超时: GitHub响应时间过长。") 55 | else: 56 | print(f"下载字体文件失败: {str(e)}") 57 | return False 58 | 59 | 60 | def check_glyph_coverage(font, text): 61 | """ 62 | 检查字体是否包含所需的所有字形 63 | 返回未找到的字符列表 64 | """ 65 | cmap = font.getBestCmap() 66 | missing_chars = [] 67 | 68 | for char in text: 69 | if ord(char) not in cmap: 70 | missing_chars.append(char) 71 | 72 | return missing_chars 73 | 74 | 75 | def subset_font(in_font_path, out_font_path, text, language): 76 | b = datetime.datetime.now() 77 | """ 78 | 使用 fontTools 对 in_font_path 做子集化, 79 | 只保留 text 中出现的字符,输出到 out_font_path。 80 | """ 81 | 82 | # 检查输入字体文件是否存在 83 | if not os.path.exists(in_font_path): 84 | print(f"输入字体文件不存在: {in_font_path}") 85 | print("尝试从GitHub下载字体文件...") 86 | 87 | # 获取原始字体文件名 88 | font_filename = os.path.basename(in_font_path) 89 | 90 | # 尝试下载字体 91 | if not download_font_from_github(language, font_filename, in_font_path): 92 | print("无法获取字体文件,子集化操作终止") 93 | return 94 | 95 | # 确保输出文件夹存在 96 | output_dir = os.path.dirname(out_font_path) 97 | if output_dir and not os.path.exists(output_dir): 98 | os.makedirs(output_dir) 99 | print(f"创建输出目录: {output_dir}") 100 | 101 | # 去重并排序要保留的字符 102 | unique_chars = "".join(sorted(set(text))) 103 | 104 | # 读取原字体 105 | font = TTFont(in_font_path) 106 | 107 | # 检查字形覆盖 108 | missing_chars = check_glyph_coverage(font, unique_chars) 109 | if missing_chars: 110 | print("\n=== 字形缺失警告 ===") 111 | print(f"字体文件 {os.path.basename(in_font_path)} 中未找到以下字符:") 112 | print("".join(missing_chars)) 113 | print("这些字符将使用 PyMuPDF 默认字体进行显示") 114 | print("==================\n") 115 | 116 | # 从text中移除缺失的字符,只对有字形的字符进行子集化 117 | for char in missing_chars: 118 | unique_chars = unique_chars.replace(char, '') 119 | 120 | # 配置子集化选项 121 | options = Options() 122 | 123 | # 创建子集器并指定要包含的字符 124 | subsetter = Subsetter(options=options) 125 | subsetter.populate(text=unique_chars) 126 | 127 | # 对字体做子集化 128 | subsetter.subset(font) 129 | 130 | # 保存子集化后的 TTF 131 | font.save(out_font_path) 132 | print(f"生成子集字体: {out_font_path} (仅包含所需字形)") 133 | 134 | e = datetime.datetime.now() 135 | elapsed_time = (e - b).total_seconds() 136 | print(f"子集化运行时间: {elapsed_time} 秒") 137 | -------------------------------------------------------------------------------- /YouDao_translation.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import requests 3 | import hashlib 4 | import time 5 | import json 6 | 7 | 8 | def translate(texts,original_lang, target_lang): 9 | """ 10 | 有道翻译API接口 11 | 12 | 参数: 13 | texts: list, 要翻译的文本列表 14 | target_lang: str, 目标语言代码 15 | credentials: dict, 包含 app_key 和 app_secret 的字典 16 | 17 | 返回: 18 | list: 翻译后的文本列表 19 | """ 20 | YOUDAO_URL = 'https://openapi.youdao.com/v2/api' 21 | 22 | with open("config.json", 'r', encoding='utf-8') as f: 23 | config = json.load(f) 24 | 25 | # 获取指定服务的认证信息 26 | if target_lang == 'zh': 27 | target_lang='zh-CHS' 28 | service_name = "youdao" 29 | credentials = config['translation_services'].get(service_name) 30 | if not credentials: 31 | raise ValueError(f"Translation service '{service_name}' not found in config") 32 | 33 | 34 | def encrypt(sign_str): 35 | hash_algorithm = hashlib.sha256() 36 | hash_algorithm.update(sign_str.encode('utf-8')) 37 | return hash_algorithm.hexdigest() 38 | 39 | def truncate(q): 40 | if q is None: 41 | return None 42 | size = len(q) 43 | return q if size <= 20 else q[0:10] + str(size) + q[size - 10:size] 44 | 45 | def do_request(data): 46 | headers = {'Content-Type': 'application/x-www-form-urlencoded'} 47 | return requests.post(YOUDAO_URL, data=data, headers=headers) 48 | 49 | try: 50 | # 确保输入文本为列表格式 51 | if isinstance(texts, str): 52 | texts = [texts] 53 | 54 | 55 | # 准备请求数据 56 | data = { 57 | 'from': original_lang, 58 | 'to': target_lang, 59 | 'signType': 'v3', 60 | 'curtime': str(int(time.time())), 61 | 'appKey': credentials['app_key'], 62 | 'q': texts, 63 | 'salt': str(uuid.uuid1()), 64 | 'vocabId': "您的用户词表ID" 65 | } 66 | 67 | # 生成签名 68 | sign_str = (credentials['app_key'] + 69 | truncate(''.join(texts)) + 70 | data['salt'] + 71 | data['curtime'] + 72 | credentials['app_secret']) 73 | data['sign'] = encrypt(sign_str) 74 | 75 | # 发送请求 76 | response = do_request(data) 77 | response_data = json.loads(response.content.decode("utf-8")) 78 | 79 | # 提取翻译结果 80 | translations = [result["translation"] for result in response_data["translateResults"]] 81 | print(translations) 82 | return translations 83 | 84 | except Exception as e: 85 | print(f"翻译出错: {str(e)}") 86 | return None 87 | # 使用示例: 88 | if __name__ == '__main__': 89 | # 认证信息 90 | 91 | 92 | # 要翻译的文本 93 | texts = ["很久很久以前", '待输入的文字"2', "待输入的文字3"] 94 | original_lang = 'auto' 95 | 96 | # 目标语言 97 | target_lang = 'zh' 98 | 99 | # 调用翻译 100 | results = translate(texts,original_lang='auto', target_lang=target_lang) 101 | print(results,'ggg') 102 | 103 | if results: 104 | for original, translated in zip(texts, results): 105 | print(f"原文: {original}") 106 | print(f"译文: {translated}\n") 107 | 108 | -------------------------------------------------------------------------------- /build.py: -------------------------------------------------------------------------------- 1 | # import os 2 | # import sys 3 | # import platform 4 | # import subprocess 5 | # import shutil 6 | # from pathlib import Path 7 | # 8 | # def main(): 9 | # #──────────────────────────────────────────────────────────────────────── 10 | # # 1. 准备工作:获取当前目录、检查 PyInstaller 11 | # #──────────────────────────────────────────────────────────────────────── 12 | # current_dir = Path(__file__).parent.absolute() 13 | # print(f"当前目录: {current_dir}") 14 | # 15 | # try: 16 | # import PyInstaller 17 | # print("PyInstaller 已安装") 18 | # except ImportError: 19 | # print("安装 PyInstaller...") 20 | # subprocess.run([sys.executable, "-m", "pip", "install", "pyinstaller"], check=True) 21 | # 22 | # system = platform.system().lower() 23 | # print(f"当前系统: {system}") 24 | # 25 | # # 设置路径分隔符 (Windows 下为 ;,其他平台为 :) 26 | # separator = ';' if system == 'windows' else ':' 27 | # 28 | # # 生成可执行文件名称 (Windows 上会变成 EbookTranslator.exe,其它系统就没有后缀) 29 | # exe_name = "EbookTranslator" 30 | # 31 | # #──────────────────────────────────────────────────────────────────────── 32 | # # 2. 创建输出目录(供后面使用,onedir 模式下可自由放置打包产物) 33 | # #──────────────────────────────────────────────────────────────────────── 34 | # dist_app_dir = current_dir / "dist" / exe_name 35 | # os.makedirs(dist_app_dir, exist_ok=True) 36 | # 37 | # #──────────────────────────────────────────────────────────────────────── 38 | # # 3. 根据你的需要,检查关键资源文件 39 | # #──────────────────────────────────────────────────────────────────────── 40 | # required_files = { 41 | # 'app.py': True, 42 | # 'index.html': True, 43 | # 'config.json': True, 44 | # 'static': True, 45 | # 'recent.json': True 46 | # } 47 | # for file_name, required in required_files.items(): 48 | # file_path = current_dir / file_name 49 | # if not file_path.exists() and required: 50 | # print(f"错误: 必要文件 '{file_name}' 不存在") 51 | # sys.exit(1) 52 | # 53 | # #──────────────────────────────────────────────────────────────────────── 54 | # # 4. 构建 PyInstaller 的命令 55 | # # 使用 --onedir 模式,并设置可执行文件名称为 EbookTranslator 56 | # #──────────────────────────────────────────────────────────────────────── 57 | # pyinstaller_cmd = [ 58 | # sys.executable, '-m', 'PyInstaller', 59 | # '--noconfirm', 60 | # '--onedir', # onedir 模式 61 | # '--name', exe_name # 生成文件(文件夹)名 62 | # ] 63 | # 64 | # # 如果在 Windows 平台,并且有 icon.ico,就使用图标 65 | # icon_file = current_dir / "icon.ico" 66 | # if system == 'windows' and icon_file.exists(): 67 | # pyinstaller_cmd.extend(["--icon", str(icon_file)]) 68 | # 69 | # #──────────────────────────────────────────────────────────────────────── 70 | # # 5. 设置 --add-data 参数,打包静态资源与需要的文件 71 | # #──────────────────────────────────────────────────────────────────────── 72 | # data_files = [] 73 | # if (current_dir / 'static').exists(): 74 | # data_files.append((str(current_dir / 'static'), 'static')) 75 | # if (current_dir / 'index.html').exists(): 76 | # data_files.append((str(current_dir / 'index.html'), '.')) 77 | # if (current_dir / 'config.json').exists(): 78 | # data_files.append((str(current_dir / 'config.json'), '.')) 79 | # if (current_dir / 'recent.json').exists(): 80 | # data_files.append((str(current_dir / 'recent.json'), '.')) 81 | # 82 | # for src, dst in data_files: 83 | # pyinstaller_cmd.extend(['--add-data', f"{src}{separator}{dst}"]) 84 | # 85 | # # 最后指定主脚本(app.py) 86 | # pyinstaller_cmd.append(str(current_dir / 'app.py')) 87 | # 88 | # #──────────────────────────────────────────────────────────────────────── 89 | # # 6. 打印并执行命令 90 | # #──────────────────────────────────────────────────────────────────────── 91 | # print("执行 PyInstaller 命令:\n", " ".join(map(str, pyinstaller_cmd))) 92 | # try: 93 | # subprocess.run(pyinstaller_cmd, check=True) 94 | # print("PyInstaller 打包完成") 95 | # except Exception as e: 96 | # print(f"PyInstaller 打包失败: {e}") 97 | # sys.exit(1) 98 | # 99 | # #──────────────────────────────────────────────────────────────────────── 100 | # # 7. 打包完成后,一般会在 dist/EbookTranslator 目录下看到: 101 | # # ├─ EbookTranslator.exe (Windows) 或 EbookTranslator(其它系统) 102 | # # ├─ 静态资源、依赖库、.. 等文件 103 | # #──────────────────────────────────────────────────────────────────────── 104 | # build_dir = current_dir / "build" 105 | # spec_file = current_dir / f"{exe_name}.spec" 106 | # 107 | # # 清理临时文件 108 | # if build_dir.exists(): 109 | # shutil.rmtree(build_dir) 110 | # if spec_file.exists(): 111 | # spec_file.unlink() 112 | # 113 | # print("Flask 应用打包完成!\n请查看 dist/EbookTranslator 文件夹," 114 | # "其中的 EbookTranslator.exe(Windows) 或 EbookTranslator(其他平台) 即可运行。") 115 | # 116 | # 117 | # if __name__ == "__main__": 118 | # main() 119 | 120 | 121 | import os 122 | import sys 123 | import platform 124 | import subprocess 125 | import shutil 126 | from pathlib import Path 127 | 128 | 129 | def main(): 130 | # ──────────────────────────────────────────────────────────────────────── 131 | # 1. 准备工作:获取当前目录、检查 PyInstaller 132 | # ──────────────────────────────────────────────────────────────────────── 133 | current_dir = Path(__file__).parent.absolute() 134 | print(f"当前目录: {current_dir}") 135 | 136 | try: 137 | import PyInstaller 138 | print("PyInstaller 已安装") 139 | except ImportError: 140 | print("安装 PyInstaller...") 141 | subprocess.run([sys.executable, "-m", "pip", "install", "pyinstaller"], check=True) 142 | 143 | system = platform.system().lower() 144 | print(f"当前系统: {system}") 145 | 146 | # 生成可执行文件名称 (Windows 上会变成 EbookTranslator.exe,其它系统就没有后缀) 147 | exe_name = "EbookTranslator" 148 | 149 | # ──────────────────────────────────────────────────────────────────────── 150 | # 2. 创建输出目录 151 | # ──────────────────────────────────────────────────────────────────────── 152 | dist_dir = current_dir / "dist" 153 | dist_app_dir = dist_dir / exe_name 154 | 155 | # 如果已存在,先删除 156 | if dist_app_dir.exists(): 157 | print(f"清理已存在的输出目录: {dist_app_dir}") 158 | shutil.rmtree(dist_app_dir) 159 | 160 | os.makedirs(dist_app_dir, exist_ok=True) 161 | 162 | # ──────────────────────────────────────────────────────────────────────── 163 | # 3. 检查关键资源文件 164 | # ──────────────────────────────────────────────────────────────────────── 165 | required_files = { 166 | 'app.py': True, 167 | 'index.html': True, 168 | 'pdfviewer.html': True, 169 | 'pdfviewer2.html': True, 170 | 'merge_pdf.py': True, 171 | 'config.json': True, 172 | 'static': True 173 | } 174 | for file_name, required in required_files.items(): 175 | file_path = current_dir / file_name 176 | if not file_path.exists() and required: 177 | print(f"错误: 必要文件 '{file_name}' 不存在") 178 | sys.exit(1) 179 | 180 | # ──────────────────────────────────────────────────────────────────────── 181 | # 4. 构建 PyInstaller 的命令 - 不添加任何资源文件 182 | # ──────────────────────────────────────────────────────────────────────── 183 | pyinstaller_cmd = [ 184 | sys.executable, '-m', 'PyInstaller', 185 | '--noconfirm', 186 | '--onedir', # onedir 模式 187 | '--name', exe_name, # 生成文件(文件夹)名 188 | ##'--windowed' # 生成 macOS 的 .app 文件 189 | ] 190 | 191 | # 如果在 Windows 平台,并且有 icon.ico,就使用图标 192 | icon_file = current_dir / "icon.ico" 193 | if system == 'windows' and icon_file.exists(): 194 | pyinstaller_cmd.extend(["--icon", str(icon_file)]) 195 | 196 | # 最后指定主脚本(app.py) 197 | pyinstaller_cmd.append(str(current_dir / 'app.py')) 198 | 199 | # ──────────────────────────────────────────────────────────────────────── 200 | # 5. 执行 PyInstaller 命令 201 | # ──────────────────────────────────────────────────────────────────────── 202 | print("执行 PyInstaller 命令:\n", " ".join(map(str, pyinstaller_cmd))) 203 | try: 204 | subprocess.run(pyinstaller_cmd, check=True) 205 | print("PyInstaller 打包完成") 206 | except Exception as e: 207 | print(f"PyInstaller 打包失败: {e}") 208 | sys.exit(1) 209 | 210 | # ──────────────────────────────────────────────────────────────────────── 211 | # 6. 手动复制所有资源文件到输出目录 212 | # ──────────────────────────────────────────────────────────────────────── 213 | print("\n开始复制资源文件到输出目录...") 214 | 215 | # 复制 index.html 216 | if (current_dir / 'index.html').exists(): 217 | print(f"复制 index.html 到 {dist_app_dir}") 218 | shutil.copy2(current_dir / 'index.html', dist_app_dir / 'index.html') 219 | 220 | 221 | if (current_dir / 'pdfviewer.html').exists(): 222 | print(f"复制 pdfviewer.html 到 {dist_app_dir}") 223 | shutil.copy2(current_dir / 'pdfviewer.html', dist_app_dir / 'pdfviewer.html') 224 | 225 | # 复制 pdfviewer2.html 226 | if (current_dir / 'pdfviewer2.html').exists(): 227 | print(f"复制 pdfviewer2.html 到 {dist_app_dir}") 228 | shutil.copy2(current_dir / 'pdfviewer2.html', dist_app_dir / 'pdfviewer2.html') 229 | 230 | # 复制 merge_pdf.py 231 | if (current_dir / 'merge_pdf.py').exists(): 232 | print(f"复制 merge_pdf.py 到 {dist_app_dir}") 233 | shutil.copy2(current_dir / 'merge_pdf.py', dist_app_dir / 'merge_pdf.py') 234 | 235 | # 复制 config.json 236 | if (current_dir / 'config.json').exists(): 237 | print(f"复制 config.json 到 {dist_app_dir}") 238 | shutil.copy2(current_dir / 'config.json', dist_app_dir / 'config.json') 239 | 240 | # 复制 recent.json (如果存在) 241 | if (current_dir / 'recent.json').exists(): 242 | print(f"复制 recent.json 到 {dist_app_dir}") 243 | shutil.copy2(current_dir / 'recent.json', dist_app_dir / 'recent.json') 244 | 245 | # 复制 static 目录 246 | if (current_dir / 'static').exists(): 247 | static_dest = dist_app_dir / 'static' 248 | print(f"复制 static 目录到 {static_dest}") 249 | if static_dest.exists(): 250 | shutil.rmtree(static_dest) 251 | shutil.copytree(current_dir / 'static', static_dest) 252 | 253 | # 复制其他可能需要的文件 254 | other_files = ['README.md', 'LICENSE', 'requirements.txt'] 255 | for file_name in other_files: 256 | if (current_dir / file_name).exists(): 257 | print(f"复制 {file_name} 到 {dist_app_dir}") 258 | shutil.copy2(current_dir / file_name, dist_app_dir / file_name) 259 | 260 | # ──────────────────────────────────────────────────────────────────────── 261 | # 7. 清理临时文件 262 | # ──────────────────────────────────────────────────────────────────────── 263 | build_dir = current_dir / "build" 264 | spec_file = current_dir / f"{exe_name}.spec" 265 | 266 | if build_dir.exists(): 267 | print(f"清理 build 目录: {build_dir}") 268 | shutil.rmtree(build_dir) 269 | if spec_file.exists(): 270 | print(f"删除 spec 文件: {spec_file}") 271 | spec_file.unlink() 272 | 273 | # ──────────────────────────────────────────────────────────────────────── 274 | # 8. 完成 275 | # ──────────────────────────────────────────────────────────────────────── 276 | print("\n打包完成!") 277 | print(f"应用程序位于: {dist_app_dir}") 278 | print(f"可执行文件: {dist_app_dir / exe_name}{'.exe' if system == 'windows' else ''}") 279 | print("所有资源文件已直接复制到输出目录,可以直接查看和编辑。") 280 | 281 | 282 | if __name__ == "__main__": 283 | main() -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "count": 2, 3 | "PPC": 20, 4 | "translation_services": { 5 | "Doubao": { 6 | "auth_key": "", 7 | "model_name": "" 8 | }, 9 | "GLM": { 10 | "auth_key": "", 11 | "model_name": "glm-4-flash" 12 | }, 13 | "Grok": { 14 | "auth_key": "", 15 | "model_name": "grok-2-latest" 16 | }, 17 | "Qwen": { 18 | "auth_key": "", 19 | "model_name": "qwen-plus" 20 | }, 21 | "ThirdParty": { 22 | "api_url": "https://api.chatanywhere.tech/v1/chat/completions", 23 | "auth_key": "", 24 | "model_name": "gpt-4o-mini" 25 | }, 26 | "deepl": { 27 | "auth_key": "" 28 | }, 29 | "deepseek": { 30 | "auth_key": "", 31 | "model_name": "deepseek-chat" 32 | }, 33 | "openai": { 34 | "auth_key": "", 35 | "model_name": "gpt-4o-mini" 36 | }, 37 | "youdao": { 38 | "app_key": "", 39 | "app_secret": "" 40 | } 41 | }, 42 | "ocr_services": { 43 | "tesseract": { 44 | "path": "C:\\Program Files\\Tesseract-OCR\\tesseract.exe" 45 | } 46 | }, 47 | "default_services": { 48 | "ocr_model": false, 49 | "Enable_translation": true, 50 | "Translation_api": "GLM" 51 | } 52 | } -------------------------------------------------------------------------------- /convert2pdf.py: -------------------------------------------------------------------------------- 1 | import fitz 2 | import os 3 | 4 | 5 | def convert_to_pdf(input_file, output_file=None): 6 | """ 7 | 将支持的文档格式转换为 PDF,支持跨平台路径处理 8 | 9 | Args: 10 | input_file (str): 输入文件的完整路径 11 | output_file (str, optional): 输出PDF文件的完整路径。如果为None,则使用输入文件名+.pdf 12 | 13 | Returns: 14 | bool: 转换是否成功 15 | """ 16 | try: 17 | # 规范化路径,处理不同平台的路径分隔符 18 | input_file = os.path.normpath(input_file) 19 | 20 | if not os.path.exists(input_file): 21 | print(f"错误:输入文件 '{input_file}' 不存在") 22 | return False 23 | 24 | # 如果未指定输出文件,则基于输入文件生成输出路径 25 | if output_file is None: 26 | # 获取文件名和目录 27 | file_dir = os.path.dirname(input_file) 28 | file_name = os.path.basename(input_file) 29 | name_without_ext = os.path.splitext(file_name)[0] 30 | 31 | # 在同一目录下创建同名PDF文件 32 | output_file = os.path.join(file_dir, f"{name_without_ext}.pdf") 33 | 34 | # 确保输出目录存在 35 | output_dir = os.path.dirname(output_file) 36 | if output_dir and not os.path.exists(output_dir): 37 | os.makedirs(output_dir, exist_ok=True) 38 | 39 | print(f"正在处理文件: {input_file}") 40 | print(f"输出文件将保存为: {output_file}") 41 | 42 | # 1. 先用 fitz.open 打开文档(EPUB、XPS、FB2 等格式) 43 | doc = fitz.open(input_file) 44 | print(f"文档页数: {len(doc)}") 45 | 46 | # 2. 调用 convert_to_pdf() 得到 PDF 格式字节流 47 | pdf_bytes = doc.convert_to_pdf() 48 | 49 | # 3. 再以 "pdf" 格式打开这段字节流 50 | pdf_doc = fitz.open("pdf", pdf_bytes) 51 | 52 | # 4. 保存为真正的 PDF 文件 53 | pdf_doc.save(output_file) 54 | 55 | # 关闭文档 56 | pdf_doc.close() 57 | doc.close() 58 | 59 | # 检查输出文件是否成功创建 60 | if os.path.exists(output_file): 61 | print(f"转换成功!PDF文件已保存为: {output_file}") 62 | return True 63 | else: 64 | print("转换似乎完成,但输出文件未找到") 65 | return False 66 | 67 | except fitz.FileDataError as e: 68 | print(f"文件格式错误或文件损坏:{str(e)}") 69 | except PermissionError as e: 70 | print(f"权限错误:无法访问或写入文件 - {str(e)}") 71 | except Exception as e: 72 | print(f"转换失败,错误类型: {type(e).__name__}") 73 | print(f"错误详情: {str(e)}") 74 | # 在调试模式下打印完整的堆栈跟踪 75 | import traceback 76 | traceback.print_exc() 77 | 78 | return False 79 | # 使用示例 80 | if __name__ == "__main__": 81 | # 单个文件转换示例 82 | input_file = "666 (1).epub" 83 | 84 | # 验证文件扩展名 85 | if not input_file.lower().endswith(('.xps', '.epub', '.fb2', '.cbz', '.mobi')): 86 | print(f"不支持的文件格式。支持的格式包括: XPS, EPUB, FB2, CBZ, MOBI") 87 | else: 88 | convert_to_pdf(input_file) 89 | 90 | # 批量转换示例 91 | # input_directory = "documents" 92 | # batch_convert_to_pdf(input_directory) 93 | -------------------------------------------------------------------------------- /demo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/demo.mp4 -------------------------------------------------------------------------------- /demo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/demo.pdf -------------------------------------------------------------------------------- /demo_zh.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/demo_zh.pdf -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | polyglotpdf: 4 | image: 2207397265/polyglotpdf:latest 5 | ports: 6 | - "12226:12226" 7 | volumes: 8 | - ./config/config.json:/app/config.json # 配置文件 9 | - ./fonts:/app/fonts # 字体文件 10 | - ./static/original:/app/static/original # 原始PDF 11 | - ./static/target:/app/static/target # 翻译后PDF 12 | - ./static/merged_pdf:/app/static/merged_pdf # 合并PDF 13 | restart: unless-stopped 14 | 15 | -------------------------------------------------------------------------------- /download_model.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | 4 | 5 | support_language = [ 6 | "en", # 英语 English 7 | "zh", # 中文 Chinese 8 | "es", # 西班牙语 Spanish 9 | "fr", # 法语 French 10 | "de", # 德语 German 11 | "ru", # 俄语 Russian 12 | "ar", # 阿拉伯语 Arabic 13 | "it", # 意大利语 Italian 14 | "ja", # 日语 Japanese 15 | "ko", # 韩语 Korean 16 | "nl", # 荷兰语 Dutch 17 | "pt", # 葡萄牙语 Portuguese 18 | "tr", # 土耳其语 Turkish 19 | "sv", # 瑞典语 Swedish 20 | "pl", # 波兰语 Polish 21 | "fi", # 芬兰语 Finnish 22 | "da", # 丹麦语 Danish 23 | "no", # 挪威语 Norwegian 24 | "cs", # 捷克语 Czech 25 | "el", # 希腊语 Greek 26 | "hu", # 匈牙利语 Hungarian 27 | "th" # 泰语 Thai 28 | ] 29 | 30 | def download_file(url, dest_folder, file_name): 31 | """ 32 | 下载文件并保存到指定的文件夹中。 33 | """ 34 | response = requests.get(url, allow_redirects=True) 35 | if response.status_code == 200: 36 | with open(os.path.join(dest_folder, file_name), 'wb') as file: 37 | file.write(response.content) 38 | else: 39 | print(f"Failed to download {file_name}. Status code: {response.status_code}") 40 | 41 | def download_model_files(model_name): 42 | """ 43 | 根据模型名称下载模型文件。 44 | """ 45 | # 文件列表 46 | files_to_download = [ 47 | "config.json", 48 | "pytorch_model.bin", 49 | "tokenizer_config.json", 50 | "vocab.json", 51 | "source.spm", 52 | "target.spm" # 如果模型不使用SentencePiece,这两个文件可能不需要 53 | ] 54 | 55 | # 创建模型文件夹 56 | # 创建模型文件夹 57 | model_folder_name = model_name.split('/')[-1] # 从模型名称中获取文件夹名称 58 | model_folder = os.path.join("translation_models", model_folder_name) # 添加相对路径前缀 59 | 60 | if os.path.exists(model_folder): 61 | return 62 | 63 | 64 | if not os.path.exists(model_folder): 65 | os.makedirs(model_folder) 66 | 67 | # 构建下载链接并下载文件 68 | base_url = f"https://huggingface.co/{model_name}/resolve/main/" 69 | for file_name in files_to_download: 70 | download_url = base_url + file_name 71 | print(f"Downloading {file_name}...") 72 | download_file(download_url, model_folder, file_name) 73 | 74 | # 示例使用 75 | if __name__ == '__main__': 76 | 77 | model_name = "Helsinki-NLP/opus-mt-en-es" 78 | download_model_files(model_name) 79 | 80 | -------------------------------------------------------------------------------- /icon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/icon.ico -------------------------------------------------------------------------------- /languagedetect.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | text = "今日は(こんにちは)" 4 | 5 | # 方法2:直接使用detect 6 | from langdetect import detect 7 | lang_code = detect(text) 8 | print(lang_code) # 输出: ja 9 | -------------------------------------------------------------------------------- /merge_pdf.py: -------------------------------------------------------------------------------- 1 | import fitz 2 | import os 3 | 4 | def merge_pdfs_horizontally(pdf1_path, pdf2_path, output_path, spacing=0): 5 | """ 6 | 水平合并两个PDF文件的所有页面 7 | :param pdf1_path: 第一个PDF文件的绝对路径 8 | :param pdf2_path: 第二个PDF文件的绝对路径 9 | :param output_path: 输出PDF文件的绝对路径 10 | :param spacing: 两个PDF之间的间距(点) 11 | """ 12 | # 确保输入路径存在 13 | if not os.path.exists(pdf1_path): 14 | raise FileNotFoundError(f"找不到第一个PDF文件: {pdf1_path}") 15 | if not os.path.exists(pdf2_path): 16 | raise FileNotFoundError(f"找不到第二个PDF文件: {pdf2_path}") 17 | 18 | # 打开两个源PDF文件 19 | doc1 = fitz.open(pdf1_path) 20 | doc2 = fitz.open(pdf2_path) 21 | 22 | # 创建新的PDF文档 23 | result_doc = fitz.open() 24 | 25 | # 确保两个文档都至少有一页 26 | if doc1.page_count == 0 or doc2.page_count == 0: 27 | raise ValueError("Both PDFs must have at least one page") 28 | 29 | # 确保两个PDF的页数相同 30 | if doc1.page_count != doc2.page_count: 31 | raise ValueError("Both PDFs must have the same number of pages") 32 | 33 | # 处理每一页 34 | for page_num in range(doc1.page_count): 35 | # 获取两个PDF的当前页 36 | page1 = doc1[page_num] 37 | page2 = doc2[page_num] 38 | 39 | # 获取页面尺寸 40 | rect1 = page1.rect 41 | rect2 = page2.rect 42 | 43 | # 计算新页面的尺寸 44 | new_width = rect1.width + rect2.width + spacing 45 | new_height = max(rect1.height, rect2.height) 46 | 47 | # 创建新页面 48 | new_page = result_doc.new_page(width=new_width, height=new_height) 49 | 50 | # 创建第一个PDF的位置矩阵(保持在左侧) 51 | matrix1 = fitz.Matrix(1, 1) 52 | 53 | # 创建第二个PDF的位置矩阵(移动到右侧) 54 | matrix2 = fitz.Matrix(1, 1) 55 | x_shift = rect1.width + spacing 56 | matrix2.pretranslate(x_shift, 0) 57 | 58 | # 将两个页面内容复制到新页面 59 | new_page.show_pdf_page(rect1, doc1, page_num, matrix1) 60 | new_page.show_pdf_page(fitz.Rect(x_shift, 0, x_shift + rect2.width, new_height), 61 | doc2, page_num, matrix2) 62 | 63 | # 确保输出目录存在 64 | output_dir = os.path.dirname(output_path) 65 | if not os.path.exists(output_dir): 66 | os.makedirs(output_dir) 67 | 68 | # 保存结果 69 | result_doc.save(output_path) 70 | 71 | # 关闭所有文档 72 | doc1.close() 73 | doc2.close() 74 | result_doc.close() 75 | 76 | # 使用示例 77 | if __name__ == "__main__": 78 | pdf1_path = r"g6.pdf" 79 | pdf2_path = r"g6_zh.pdf" 80 | output_path = r"./output/merged.pdf" 81 | 82 | try: 83 | merge_pdfs_horizontally(pdf1_path, pdf2_path, output_path) 84 | print("PDFs merged successfully!") 85 | print(f"Output saved to: {output_path}") 86 | except FileNotFoundError as e: 87 | print(f"File error: {str(e)}") 88 | except Exception as e: 89 | print(f"Error occurred: {str(e)}") 90 | -------------------------------------------------------------------------------- /pdf_thumbnail.py: -------------------------------------------------------------------------------- 1 | import fitz 2 | import os 3 | 4 | 5 | def create_pdf_thumbnail(pdf_path, width=400): 6 | """ 7 | 为PDF文件第一页创建缩略图并保存到pdf_path上一层目录的thumbnail文件夹 8 | 9 | 参数: 10 | pdf_path: PDF文件路径 11 | width: 缩略图的宽度(像素) 12 | """ 13 | try: 14 | # 获取PDF文件名(不含扩展名) 15 | pdf_filename = os.path.splitext(os.path.basename(pdf_path))[0] 16 | 17 | # 获取PDF文件的绝对路径 18 | pdf_absolute_path = os.path.abspath(pdf_path) 19 | 20 | # 获取PDF文件所在目录的上一层目录 21 | parent_dir = os.path.dirname(os.path.dirname(pdf_absolute_path)) 22 | 23 | # 构建保存缩略图的路径(上一层目录的thumbnail文件夹) 24 | thumbnail_dir = os.path.join(parent_dir, 'thumbnail') 25 | 26 | # 如果目录不存在,创建目录 27 | os.makedirs(thumbnail_dir, exist_ok=True) 28 | 29 | # 构建输出路径 30 | output_path = os.path.join(thumbnail_dir, f"{pdf_filename}.png") 31 | 32 | # 打开PDF文件 33 | doc = fitz.open(pdf_path) 34 | 35 | # 获取第一页 36 | first_page = doc[0] 37 | 38 | # 设置缩放参数 39 | zoom = width / first_page.rect.width 40 | matrix = fitz.Matrix(zoom, zoom) 41 | 42 | # 获取页面的像素图 43 | pix = first_page.get_pixmap(matrix=matrix, alpha=False) 44 | 45 | # 保存图片 46 | pix.save(output_path) 47 | 48 | # 关闭PDF文档 49 | doc.close() 50 | 51 | print(f"缩略图已保存到: {output_path}") 52 | return output_path 53 | 54 | except Exception as e: 55 | print(f"生成缩略图时发生错误: {str(e)}") 56 | return None 57 | 58 | 59 | # 使用示例 60 | if __name__ == "__main__": 61 | # PDF文件路径 62 | pdf_file = "g55.pdf" 63 | # 生成并保存缩略图 64 | thumbnail_path = create_pdf_thumbnail(pdf_file, width=400) 65 | -------------------------------------------------------------------------------- /pdfviewer.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Enhanced Split PDF Viewer 7 | 8 | 132 | 133 | 134 |
135 | 136 |
137 | 138 |
139 | 145 |
146 | 149 | 152 |
153 |
154 |
155 | 156 | 157 |
158 | 159 | 160 |
161 | 162 |
163 | 169 |
170 | 173 | 176 |
177 |
178 |
179 |
180 | 181 | 282 | 283 | 284 | -------------------------------------------------------------------------------- /pdfviewer2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Single PDF Viewer 9 | 37 | 38 | 39 |
40 |
41 | 42 | 43 |
44 |
45 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /recent.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "index": 0, 4 | "date": "2025-04-13 01:34:09", 5 | "name": "2403.20127v1.pdf", 6 | "original_language": "auto", 7 | "target_language": "zh", 8 | "read": "0", 9 | "statue": "1" 10 | } 11 | ] -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | deepl==1.17.0 2 | Flask==2.0.1 3 | flask-cors 4 | Pillow==10.2.0 5 | PyMuPDF==1.24.0 6 | pytesseract==0.3.10 7 | requests==2.31.0 8 | tiktoken==0.6.0 9 | Werkzeug==2.0.1 10 | aiohttp 11 | fontTools 12 | 13 | -------------------------------------------------------------------------------- /static/1.js: -------------------------------------------------------------------------------- 1 | 2 | // 在全局范围定义变量 3 | 4 | 5 | // 显示主页 6 | function showHome() { 7 | document.getElementById('recentread').innerHTML = 'Recent Reading'; 8 | document.getElementById('articleContainer').style.display = ''; 9 | document.getElementById('viewAllSection').style.display = 'flex'; 10 | document.querySelector('.sidebar-menu a[onclick="showHome()"]').classList.add('active'); 11 | document.querySelector('.sidebar-menu a[onclick="showAllRecent()"]').classList.remove('active'); 12 | document.querySelector('.sidebar-menu a[onclick="showSetup()"]').classList.remove('active'); // 添加这行 13 | loadArticles(true,true); 14 | document.getElementById('t-container').style.display = ''; 15 | } 16 | 17 | function showAllRecent() { 18 | document.getElementById('recentread').innerHTML = 'Recent Reading'; 19 | 20 | document.getElementById('articleContainer').style.display = ''; 21 | document.getElementById('viewAllSection').style.display = 'none'; 22 | document.querySelector('.sidebar-menu a[onclick="showHome()"]').classList.remove('active'); 23 | document.querySelector('.sidebar-menu a[onclick="showAllRecent()"]').classList.add('active'); 24 | document.querySelector('.sidebar-menu a[onclick="showSetup()"]').classList.remove('active'); // 添加这行 25 | loadArticles(false,true); 26 | document.getElementById('t-container').style.display = ''; 27 | } 28 | // 添加新的函数处理 Setup steps 29 | function showSetup() { 30 | // 隐藏其他部分(如果需要的话) 31 | 32 | 33 | document.getElementById('recentread').innerHTML = 'config.json'; 34 | document.getElementById('articleContainer').style.display = 'none'; 35 | document.getElementById('viewAllSection').style.display = 'none'; 36 | 37 | 38 | // 移除其他菜单项的 active 类 39 | document.querySelector('.sidebar-menu a[onclick="showHome()"]').classList.remove('active'); 40 | document.querySelector('.sidebar-menu a[onclick="showAllRecent()"]').classList.remove('active'); 41 | 42 | // 给 Setup steps 添加 active 类 43 | document.querySelector('.sidebar-menu a[onclick="showSetup()"]').classList.add('active'); 44 | document.getElementById('t-container').style.display = 'block'; 45 | } 46 | 47 | // 显示上传模态框 48 | function showUpload() { 49 | document.getElementById('uploadModal').style.display = 'block'; 50 | document.getElementById('upload_content-1').style.display = 'block'; 51 | document.getElementById('upload_content-2').style.display = 'none'; 52 | document.getElementById('languageSelection').style.display = 'none'; 53 | 54 | } 55 | 56 | 57 | 58 | // 显示设置模态框 59 | function showSettings() { 60 | document.getElementById('settingsModal').style.display = 'block'; 61 | } 62 | 63 | 64 | async function loadArticles(isLimited,first_reload) { 65 | const container = document.getElementById('articleContainer'); 66 | if (first_reload) { 67 | const record_show_staute = document.getElementById('record_show_staute'); 68 | record_show_staute.setAttribute('data-value', isLimited); 69 | } 70 | 71 | 72 | 73 | 74 | try { 75 | container.innerHTML = '
正在加载数据...
'; 76 | 77 | const response = await fetch('/recent.json'); 78 | if (!response.ok) { 79 | throw new Error(`HTTP error! status: ${response.status}`); 80 | } 81 | 82 | const data = await response.json(); 83 | container.innerHTML = ''; 84 | 85 | if (data.length === 0) { 86 | container.innerHTML = '
No reading records yet
'; 87 | return; 88 | } 89 | 90 | // 根据 index 排序(从大到小) 91 | let sortedArticles = [...data].sort((a, b) => b.index - a.index); 92 | 93 | // 如果需要限制显示数量 94 | if (isLimited) { 95 | sortedArticles = sortedArticles.slice(0, 3); 96 | } 97 | 98 | sortedArticles.forEach(article => { 99 | const articleCard = document.createElement('a'); 100 | articleCard.className = 'article-card'; 101 | 102 | // 上半部分div 103 | const topDiv = document.createElement('div'); 104 | topDiv.className = 'article-top'; 105 | topDiv.innerHTML = ` 106 | ${article.name} 107 | 108 | `; 109 | 110 | 111 | // 下半部分div 112 | const bottomDiv = document.createElement('div'); 113 | bottomDiv.className = 'article-bottom'; 114 | 115 | // 文章标题 116 | const titleDiv = document.createElement('div'); 117 | titleDiv.className = 'article-title'; 118 | titleDiv.innerHTML = `

${article.name}

`; 119 | 120 | // 信息行div 121 | const infoDiv = document.createElement('div'); 122 | infoDiv.className = 'article-info'; 123 | infoDiv.innerHTML = ` 124 | ${article.author || 'Unknown author'} 125 | ${article.date} 126 | ${article.original_language} - ${article.target_language} 127 | `; 128 | 129 | bottomDiv.appendChild(titleDiv); 130 | bottomDiv.appendChild(infoDiv); 131 | 132 | // 状态指示器 133 | const statusIndicator = document.createElement('div'); 134 | statusIndicator.className = 'status-indicator'; 135 | 136 | if (parseInt(article.statue) === 0) { 137 | statusIndicator.innerHTML = ''; 138 | articleCard.className += ' disabled'; 139 | articleCard.addEventListener('click', (e) => { 140 | e.preventDefault(); 141 | showToast('Translation is not complete yet, unable to view at this time.'); 142 | }); 143 | } else { 144 | statusIndicator.innerHTML = ''; 145 | articleCard.addEventListener('click', () => { 146 | const targetFileName = `${article.name.replace(/\.pdf$/, '')}_${article.target_language}.pdf`; 147 | const url = `/pdfviewer.html?name=${encodeURIComponent(article.name)}&name_target_language=${encodeURIComponent(targetFileName)}&index=${encodeURIComponent(article.index)}`; 148 | window.open(url, '_blank'); 149 | }); 150 | articleCard.style.cursor = 'pointer'; 151 | 152 | } 153 | bottomDiv.appendChild(statusIndicator); 154 | 155 | // 阅读状态标签 156 | const readStatus = document.createElement('div'); 157 | readStatus.className = `read-status ${parseInt(article.read) === 0 ? 'unread' : 'read'}`; 158 | readStatus.textContent = parseInt(article.read) === 0 ? 'Unread' : 'Read'; 159 | 160 | // 三点菜单按钮 161 | const menuButton = document.createElement('button'); 162 | menuButton.className = 'menu-button'; 163 | menuButton.innerHTML = ''; 164 | 165 | articleCard.appendChild(topDiv); 166 | articleCard.appendChild(bottomDiv); 167 | articleCard.appendChild(readStatus); 168 | articleCard.appendChild(menuButton); 169 | 170 | container.appendChild(articleCard); 171 | 172 | // 菜单按钮点击事件 173 | menuButton.addEventListener('click', (e) => { 174 | e.preventDefault(); 175 | e.stopPropagation(); 176 | showMenu(e, article, e.currentTarget); 177 | }); 178 | }); 179 | } catch (error) { 180 | console.error('加载数据失败:', error); 181 | container.innerHTML = ` 182 |
183 | 加载数据失败,请稍后重试
184 | ${error.message} 185 |
186 | `; 187 | } 188 | 189 | } 190 | 191 | // 显示菜单函数 192 | 193 | 194 | 195 | 196 | // Toast提示函数 197 | function showToast(message) { 198 | const toast = document.createElement('div'); 199 | toast.className = 'toast'; 200 | toast.textContent = message; 201 | document.body.appendChild(toast); 202 | 203 | setTimeout(() => { 204 | toast.remove(); 205 | }, 2000); 206 | } 207 | 208 | 209 | // 显示菜单函数 210 | function showMenu(event, article) { 211 | const menu = document.createElement('div'); 212 | articleId = article.index 213 | article_name= article.name 214 | article_tl = article.target_language 215 | article_ol = article.original_language 216 | console.log(2,articleId) 217 | menu.className = 'article-menu'; 218 | menu.innerHTML = ` 219 | 220 | 221 | 222 | `; 223 | 224 | // 定位菜单 225 | menu.style.position = 'absolute'; 226 | menu.style.top = `${event.pageY}px`; 227 | menu.style.left = `${event.pageX}px`; 228 | 229 | document.body.appendChild(menu); 230 | 231 | // 点击其他地方关闭菜单 232 | document.addEventListener('click', function closeMenu(e) { 233 | if (!menu.contains(e.target) && e.target !== event.target) { 234 | menu.remove(); 235 | document.removeEventListener('click', closeMenu); 236 | } 237 | }); 238 | } 239 | 240 | 241 | function open_bilingual(articleId,article_name,article_tl,article_ol) { 242 | const url = `/pdfviewer2.html?name=${encodeURIComponent(article_name)}&target_language=${encodeURIComponent(article_tl)}&index=${encodeURIComponent(articleId)}&original_language=${encodeURIComponent(article_ol)}`; 243 | window.open(url, '_blank'); 244 | } 245 | 246 | 247 | // Toast提示函数 248 | function showToast(message) { 249 | const toast = document.createElement('div'); 250 | toast.className = 'toast'; 251 | toast.textContent = message; 252 | document.body.appendChild(toast); 253 | 254 | setTimeout(() => { 255 | toast.remove(); 256 | }, 2000); 257 | } 258 | 259 | 260 | 261 | // 页面加载完成后初始化 262 | document.addEventListener('DOMContentLoaded', function() { 263 | showHome(); 264 | }); 265 | function closeUploadModal() { 266 | // 隐藏modal 267 | document.getElementById('uploadModal').style.display = 'none'; 268 | // 清空文件列表显示 269 | document.getElementById('uploadFilesList').innerHTML = ''; 270 | // 清空uploadFiles Map 271 | uploadFiles.clear(); 272 | 273 | // 重置上传界面(如果需要的话) 274 | document.getElementById('upload_content-1').style.display = 'flex'; 275 | document.getElementById('upload_content-2').style.display = 'none'; 276 | } 277 | 278 | 279 | -------------------------------------------------------------------------------- /static/2.js: -------------------------------------------------------------------------------- 1 | // 全局变量存储API密钥 2 | let translationKeys = { 3 | deepl: '', 4 | google: '', 5 | youdao: '', 6 | aliyun: '', 7 | tencent: '', 8 | Grok: '', // 修改为大写的Grok 9 | ThirdParty: '', // 添加ThirdParty 10 | GLM: '', // 添加GLM 11 | bing: '' // 添加Bing 12 | }; 13 | 14 | // 关闭设置弹窗 15 | function closeSettings() { 16 | document.getElementById('settingsModal').style.display = 'none'; 17 | 18 | } 19 | const toggle = document.getElementById('ocrToggle'); 20 | const toggle2 = document.getElementById('translationToggle'); 21 | function getValue() { 22 | return toggle.checked ? 23 | toggle.getAttribute('data-on') : 24 | toggle.getAttribute('data-off'); 25 | } 26 | function getValue2() { 27 | return toggle2.checked ? 28 | toggle2.getAttribute('data-on') : 29 | toggle.getAttribute('data-off'); 30 | } 31 | 32 | 33 | function getecount() { 34 | fetch('/api/get-default-services') 35 | .then(response => response.json()) 36 | .then(data => { 37 | if (data.success && data.data) { 38 | const settings = data.data; 39 | 40 | document.getElementById('count_article').textContent = ` Articles in Total: ${settings.count} `; 41 | } 42 | }) 43 | .catch(error => { 44 | console.error('获取设置失败:', error); 45 | alert('获取设置失败,请稍后重试'); 46 | }); 47 | } 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /static/4.js: -------------------------------------------------------------------------------- 1 | 2 | // 用于存储批量选择的文章ID 3 | let selectedBatchIds = new Set(); 4 | 5 | // 展示批量管理弹窗 6 | function showBatchModal() { 7 | document.getElementById('batchModal').style.display = 'block'; 8 | loadBatchData(); // 获取数据并渲染卡片 9 | } 10 | 11 | // 关闭批量管理弹窗 12 | function closeBatchModal() { 13 | document.getElementById('batchModal').style.display = 'none'; 14 | // 关闭时清空已选 15 | selectedBatchIds.clear(); 16 | } 17 | 18 | // 加载 recent.json 数据并渲染到批量弹窗 19 | async function loadBatchData() { 20 | const container = document.getElementById('batchGrid'); 21 | container.innerHTML = '
Loading data...
'; 22 | try { 23 | const response = await fetch('/recent.json'); 24 | if (!response.ok) { 25 | throw new Error(`HTTP error! status: ${response.status}`); 26 | } 27 | const data = await response.json(); 28 | container.innerHTML = ''; 29 | 30 | if (!data || data.length === 0) { 31 | container.innerHTML = '
No records to batch manage
'; 32 | return; 33 | } 34 | 35 | // 按 index 倒序 36 | const sortedData = data.sort((a, b) => b.index - a.index); 37 | 38 | // 将卡片渲染到 container 39 | sortedData.forEach(item => { 40 | const card = document.createElement('div'); 41 | card.className = 'batch-card'; 42 | card.dataset.indexId = item.index; // 存一下,方便后续操作 43 | 44 | // 已读 / 未读 45 | const readStatus = item.read === "1" ? "Read" : "Unread"; 46 | 47 | // 注意:后端返回没有作者的话,可以用Unknown 48 | const author = item.author || "Unknown author"; 49 | const original_lan = item.original_language ; 50 | const target_lan = item.target_language; 51 | 52 | card.innerHTML = ` 53 |
${item.name}
54 |
55 |

Date: ${item.date}

56 |

Author: ${author}

57 |

Status: ${readStatus} || Convertion: 58 | 59 | ${original_lan} to ${target_lan}

60 | 61 |
62 | `; 63 | 64 | // 点击选择或取消选择 65 | card.addEventListener('click', () => { 66 | if (selectedBatchIds.has(item.index)) { 67 | selectedBatchIds.delete(item.index); 68 | card.classList.remove('selected'); 69 | } else { 70 | selectedBatchIds.add(item.index); 71 | card.classList.add('selected'); 72 | } 73 | }); 74 | 75 | container.appendChild(card); 76 | }); 77 | } catch (error) { 78 | console.error('加载数据失败:', error); 79 | container.innerHTML = `
Failed to load data
${error.message}
`; 80 | } 81 | } 82 | 83 | // 全选 / 取消全选 84 | function toggleSelectAll() { 85 | const container = document.getElementById('batchGrid'); 86 | const cards = container.querySelectorAll('.batch-card'); 87 | 88 | // 如果有一个未选,则本次点击后全选,否则取消全选 89 | let shouldSelectAll = false; 90 | if (selectedBatchIds.size < cards.length) { 91 | // 还有没选的,进行全选 92 | shouldSelectAll = true; 93 | } 94 | 95 | cards.forEach(card => { 96 | const indexId = parseInt(card.dataset.indexId, 10); 97 | if (shouldSelectAll) { 98 | selectedBatchIds.add(indexId); 99 | card.classList.add('selected'); 100 | } else { 101 | selectedBatchIds.delete(indexId); 102 | card.classList.remove('selected'); 103 | } 104 | }); 105 | } 106 | 107 | // 批量删除 108 | async function handleBatchDelete() { 109 | if (selectedBatchIds.size === 0) { 110 | alert('No articles selected!'); 111 | return; 112 | } 113 | 114 | // 简单确认 115 | if (!confirm('Are you sure you want to delete the selected items?')) { 116 | return; 117 | } 118 | 119 | // 发送到后端 120 | try { 121 | // 假设后端你新加了一个 /delete_batch 接口 122 | const response = await fetch('/delete_batch', { 123 | method: 'POST', 124 | headers: { 125 | 'Content-Type': 'application/json' 126 | }, 127 | body: JSON.stringify({ 128 | articleIds: Array.from(selectedBatchIds) 129 | }) 130 | }); 131 | 132 | if (!response.ok) throw new Error('Delete failed'); 133 | 134 | // 删除成功后刷新弹窗数据 135 | selectedBatchIds.clear(); 136 | loadBatchData(); 137 | getecount(); 138 | } catch (error) { 139 | console.error('删除失败:', error); 140 | alert('Delete failed, please try again!'); 141 | } 142 | } 143 | 144 | // 生成思维导图 145 | function handleMindMap() { 146 | if (selectedBatchIds.size === 0) { 147 | alert('No articles selected for mind map!'); 148 | return; 149 | } 150 | 151 | // 这里演示直接在控制台输出,你可以改为实际的请求 152 | console.log('生成思维导图,选中的ID:', Array.from(selectedBatchIds)); 153 | alert('Pretend to generate Mind Map for selected items'); 154 | } 155 | 156 | // 总结 157 | function handleSummary() { 158 | if (selectedBatchIds.size === 0) { 159 | alert('No articles selected for summary!'); 160 | return; 161 | } 162 | 163 | // 同上,这里可以改成实际的后端接口 164 | console.log('生成总结,选中的ID:', Array.from(selectedBatchIds)); 165 | alert('Pretend to generate Summary for selected items'); 166 | } 167 | -------------------------------------------------------------------------------- /static/Figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/Figure_1.png -------------------------------------------------------------------------------- /static/Line-model-demo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/Line-model-demo.pdf -------------------------------------------------------------------------------- /static/Line-model-demo_zh.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/Line-model-demo_zh.pdf -------------------------------------------------------------------------------- /static/PolyglotPDF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/PolyglotPDF.png -------------------------------------------------------------------------------- /static/colorspace_issue_sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/colorspace_issue_sample.pdf -------------------------------------------------------------------------------- /static/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/demo.gif -------------------------------------------------------------------------------- /static/demo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/demo.mp4 -------------------------------------------------------------------------------- /static/merged_pdf/2403.20127v1_auto_zh.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/merged_pdf/2403.20127v1_auto_zh.pdf -------------------------------------------------------------------------------- /static/original/2403.20127v1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/original/2403.20127v1.pdf -------------------------------------------------------------------------------- /static/original/2501.05450v1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/original/2501.05450v1.pdf -------------------------------------------------------------------------------- /static/original/demo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/original/demo.pdf -------------------------------------------------------------------------------- /static/page1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/page1.png -------------------------------------------------------------------------------- /static/page2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/page2.jpeg -------------------------------------------------------------------------------- /static/page3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/page3.png -------------------------------------------------------------------------------- /static/page4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/page4.png -------------------------------------------------------------------------------- /static/setup.css: -------------------------------------------------------------------------------- 1 | 2 | select.t-input { 3 | width: 300px; 4 | padding: 8px; 5 | border: 1px solid #ccc; 6 | border-radius: 4px; 7 | background-color: white; 8 | transition: border-color 0.2s ease-in-out, box-shadow 0.2s ease-in-out; 9 | } 10 | 11 | select.t-input:focus { 12 | outline: none; 13 | border-color: #007bff; 14 | box-shadow: 0 0 5px rgba(0, 123, 255, 0.25); 15 | } 16 | 17 | /* 容器与标题等基础样式 */ 18 | .t-container { 19 | display: none; 20 | max-width: 90%; 21 | 22 | margin: 0 auto; 23 | font-family: Arial, sans-serif; 24 | } 25 | .t-header-container { 26 | display: flex; 27 | justify-content: space-between; 28 | align-items: center; 29 | margin-bottom: 20px; 30 | } 31 | .t-section { 32 | border: 1px solid #ddd; 33 | margin: 10px 0; 34 | padding: 10px; 35 | border-radius: 4px; 36 | background: #fff; 37 | } 38 | 39 | /* 展开/折叠区域的基础样式 */ 40 | .t-section-header { 41 | display: flex; 42 | justify-content: space-between; 43 | align-items: center; 44 | cursor: pointer; 45 | 46 | } 47 | 48 | /* 展开/折叠按钮:这里使用了一个“加号”图标,有旋转和淡入效果 */ 49 | .t-toggle-btn { 50 | background: none; 51 | border: none; 52 | font-size: 18px; 53 | cursor: pointer; 54 | transition: transform 0.4s ease; 55 | width: 30px; 56 | height: 30px; 57 | border-radius: 4px; 58 | display: flex; 59 | align-items: center; 60 | justify-content: center; 61 | color: #000; 62 | position: relative; 63 | } 64 | .t-toggle-btn:hover { 65 | background: #f0f0f0; 66 | } 67 | .t-toggle-btn.t-active { 68 | transform: rotate(45deg); 69 | color: #007bff; 70 | } 71 | 72 | /* 用于内容区域的动画展开:max-height + 透明度平滑过渡 */ 73 | .t-content { 74 | max-height: 0; 75 | overflow: hidden; 76 | transition: max-height 0.4s ease, opacity 0.4s ease; 77 | opacity: 0; 78 | } 79 | .t-content.t-active { 80 | max-height: 1000px; /* 根据内容高度可适度加大 */ 81 | opacity: 1; 82 | } 83 | 84 | /* 子区域卡片 */ 85 | .t-sub-section { 86 | margin-left: 20px; 87 | padding: 10px; 88 | border: 2px solid #eee; 89 | margin-top: 10px; 90 | border-radius: 4px; 91 | background-color: white; 92 | } 93 | 94 | /* 输入框分组与标签 */ 95 | .t-input-group { 96 | margin: 10px 0; 97 | } 98 | .t-input-group label { 99 | display: inline-block; 100 | width: 150px; 101 | font-weight: bold; 102 | color: #555; 103 | } 104 | 105 | /* 数量显示 */ 106 | .t-count-display { 107 | padding: 5px 10px; 108 | background-color: #f8f9fa; 109 | border: 1px solid #ddd; 110 | border-radius: 4px; 111 | display: inline-block; 112 | } 113 | 114 | /* 美化输入框 */ 115 | .t-input { 116 | width: 300px; 117 | padding: 8px; 118 | border: 1px solid #ccc; 119 | border-radius: 4px; 120 | transition: border-color 0.2s ease-in-out, box-shadow 0.2s ease-in-out; 121 | } 122 | .t-input:focus { 123 | outline: none; 124 | border-color: #007bff; 125 | box-shadow: 0 0 5px rgba(0, 123, 255, 0.25); 126 | } 127 | 128 | /* 保存按钮 */ 129 | .t-save-btn { 130 | background-color: #6366f1; 131 | color: white; 132 | border: none; 133 | padding: 10px 20px; 134 | border-radius: 5px; 135 | cursor: pointer; 136 | transition: background-color 0.3s; 137 | } 138 | 139 | .t-save-btn:hover { 140 | background-color: #4f46e5; 141 | } 142 | 143 | .t-save-btn.success { 144 | background-color: #22c55e; 145 | } 146 | 147 | .t-ppc { 148 | /* 基础排版 */ 149 | width: 120px; 150 | padding: 8px; 151 | font-size: 14px; 152 | 153 | /* 边框与圆角 */ 154 | border: 1px solid #ccc; 155 | border-radius: 4px; 156 | 157 | /* 其他外观 */ 158 | color: #333; 159 | background-color: #f9f9f9; 160 | outline: none; 161 | 162 | /* 过渡,提升交互体验 */ 163 | transition: border-color 0.3s, box-shadow 0.3s; 164 | } 165 | 166 | .t-ppc:focus { 167 | /* 获取焦点时边框颜色改变,比如蓝色 */ 168 | border-color: #4A90E2; 169 | box-shadow: 0 0 5px rgba(74,144,226,0.5); 170 | } 171 | -------------------------------------------------------------------------------- /static/setup.js: -------------------------------------------------------------------------------- 1 | // 页面加载时获取配置 2 | fetch('/config_json') 3 | .then(response => response.json()) 4 | .then(data => { 5 | initializeUI(data); 6 | }); 7 | 8 | // 初始化UI 9 | // 初始化UI 10 | function initializeUI(data) { 11 | document.getElementById('t-count').textContent = data.count; 12 | document.getElementById('t-count').value = data.count; 13 | document.getElementById('t-ppc').textContent = data.PPC; 14 | document.getElementById('t-ppc').value = data.PPC; 15 | document.getElementById('count_article').textContent += data.count; 16 | 17 | 18 | console.log('count', data.count); 19 | 20 | // 初始化翻译服务 (这部分代码保持不变) 21 | const translationServices = document.getElementById('t-translation-services'); 22 | Object.entries(data.translation_services).forEach(([service, config]) => { 23 | const serviceDiv = createServiceSection(service, config); 24 | translationServices.appendChild(serviceDiv); 25 | }); 26 | 27 | // 初始化OCR服务 (这部分代码保持不变) 28 | const ocrServices = document.getElementById('t-ocr-services'); 29 | Object.entries(data.ocr_services).forEach(([service, config]) => { 30 | const serviceDiv = createServiceSection(service, config); 31 | ocrServices.appendChild(serviceDiv); 32 | }); 33 | 34 | // 初始化默认配置 35 | const defaultServices = document.getElementById('t-default-services'); 36 | console.log('api',data.default_services.Translation_api) 37 | const defaultConfig = { 38 | 'ocr_model': { 39 | type: 'select', 40 | options: ['true', 'false'], 41 | value: data.default_services.ocr_model 42 | }, 43 | 'Enable_translation': { 44 | type: 'select', 45 | options: ['true', 'false'], 46 | value: data.default_services.Enable_translation 47 | }, 48 | 'Translation_api': { 49 | type: 'select', 50 | options: ['Doubao', 'Qwen', 'deepseek', 'openai', 'deepl', 'youdao','Grok', 'ThirdParty', 'GLM', 'bing'], 51 | value: data.default_services.Translation_api 52 | } 53 | }; 54 | 55 | // 在 initializeUI 函数中修改相关部分 56 | Object.entries(defaultConfig).forEach(([key, config]) => { 57 | const inputGroup = document.createElement('div'); 58 | inputGroup.className = 't-input-group'; 59 | 60 | const select = document.createElement('select'); 61 | select.className = 't-input'; 62 | 63 | config.options.forEach(option => { 64 | const optionElement = document.createElement('option'); 65 | optionElement.value = option; 66 | optionElement.textContent = option; 67 | 68 | // 修改选项匹配逻辑 69 | if (key === 'Translation_api') { 70 | // 直接比较字符串值 71 | optionElement.selected = (option === config.value); 72 | console.log(`Translation API option: ${option}, config value: ${config.value}, selected: ${optionElement.selected}`); 73 | } else if (key === 'ocr_model' || key === 'Enable_translation' ) { 74 | const optionBool = option.toLowerCase() === 'true'; 75 | optionElement.selected = (optionBool === config.value); 76 | } 77 | 78 | select.appendChild(optionElement); 79 | }); 80 | if (key === 'Enable_translation') { 81 | inputGroup.innerHTML = ``; 82 | } else { 83 | inputGroup.innerHTML = ``; 84 | } 85 | 86 | 87 | inputGroup.appendChild(select); 88 | defaultServices.appendChild(inputGroup); 89 | }); 90 | 91 | } 92 | 93 | // 创建服务配置区域 94 | function createServiceSection(serviceName, config) { 95 | const section = document.createElement('div'); 96 | section.className = 't-sub-section'; 97 | 98 | const header = document.createElement('div'); 99 | header.className = 't-section-header'; 100 | header.innerHTML = ` 101 |

${serviceName}

102 | 103 | `; 104 | 105 | const content = document.createElement('div'); 106 | content.className = 't-content'; 107 | 108 | Object.entries(config).forEach(([key, value]) => { 109 | const inputGroup = document.createElement('div'); 110 | inputGroup.className = 't-input-group'; 111 | inputGroup.innerHTML = ` 112 | 113 | 114 | `; 115 | content.appendChild(inputGroup); 116 | }); 117 | 118 | 119 | section.appendChild(header); 120 | section.appendChild(content); 121 | 122 | return section; 123 | } 124 | 125 | // 添加展开/折叠功能 126 | document.addEventListener('click', function(e) { 127 | if (e.target.classList.contains('t-toggle-btn')) { 128 | const button = e.target; 129 | const content = button.closest('.t-section-header').nextElementSibling; 130 | button.classList.toggle('t-active'); 131 | content.classList.toggle('t-active'); 132 | } 133 | }); 134 | 135 | // 添加自动保存功能 136 | let saveTimeout; 137 | document.addEventListener('input', function(e) { 138 | if (e.target.classList.contains('t-input')) { 139 | clearTimeout(saveTimeout); 140 | saveTimeout = setTimeout(() => { 141 | // 收集当前所有配置数据 142 | const config = collectConfig(); 143 | // 发送到后端 144 | fetch('/update_config', { 145 | method: 'POST', 146 | headers: { 147 | 'Content-Type': 'application/json', 148 | }, 149 | body: JSON.stringify(config) 150 | }); 151 | }, 5000); 152 | } 153 | }); 154 | async function saveall() { 155 | const saveall = document.getElementById('saveall'); 156 | 157 | 158 | // 添加切换事件监听 159 | 160 | 161 | try { 162 | // 发送数据到后端 163 | 164 | const config = collectConfig(); 165 | 166 | const response = await fetch('/save_all', { 167 | method: 'POST', 168 | headers: { 169 | 'Content-Type': 'application/json', 170 | }, 171 | body: JSON.stringify(config) 172 | }); 173 | 174 | if (!response.ok) { 175 | throw new Error('保存失败'); 176 | } 177 | 178 | // 显示成功状态 179 | saveall.innerHTML = '✓'; 180 | saveall.classList.add('success'); 181 | 182 | // 2秒后恢复按钮状态 183 | setTimeout(() => { 184 | saveall.innerHTML = '保存所有修改'; 185 | saveall.classList.remove('success'); 186 | }, 2000); 187 | 188 | 189 | 190 | } catch (error) { 191 | console.error('保存设置失败:', error); 192 | alert('保存设置失败,请重试'); 193 | } 194 | } 195 | // 保存所有修改 196 | document.querySelector('.t-save-btn').addEventListener('click', function() { 197 | const config = collectConfig(); 198 | fetch('/save_all', { 199 | method: 'POST', 200 | headers: { 201 | 'Content-Type': 'application/json', 202 | }, 203 | body: JSON.stringify(config) 204 | }); 205 | }); 206 | 207 | // 收集所有配置数据 208 | // 收集所有配置数据 209 | function collectConfig() { 210 | const config = { 211 | count: document.getElementById('t-count').value, 212 | PPC: parseInt(document.getElementById('t-ppc').value, 10), 213 | translation_services: {}, 214 | ocr_services: {}, 215 | default_services: {} 216 | }; 217 | 218 | // 收集翻译服务配置 219 | const translationServices = document.getElementById('t-translation-services'); 220 | [...translationServices.getElementsByClassName('t-sub-section')].forEach(section => { 221 | const serviceName = section.querySelector('h4').textContent; 222 | config.translation_services[serviceName] = {}; 223 | [...section.getElementsByClassName('t-input-group')].forEach(group => { 224 | const key = group.querySelector('label').textContent.replace(':', ''); 225 | const value = group.querySelector('input').value; 226 | config.translation_services[serviceName][key] = value; 227 | }); 228 | }); 229 | 230 | // 收集OCR服务配置 231 | const ocrServices = document.getElementById('t-ocr-services'); 232 | [...ocrServices.getElementsByClassName('t-sub-section')].forEach(section => { 233 | const serviceName = section.querySelector('h4').textContent; 234 | config.ocr_services[serviceName] = {}; 235 | [...section.getElementsByClassName('t-input-group')].forEach(group => { 236 | const key = group.querySelector('label').textContent.replace(':', ''); 237 | const value = group.querySelector('input').value; 238 | config.ocr_services[serviceName][key] = value; 239 | }); 240 | }); 241 | 242 | // 收集默认配置 243 | // 收集默认配置 244 | const defaultServices = document.getElementById('t-default-services'); 245 | [...defaultServices.getElementsByClassName('t-input-group')].forEach(group => { 246 | const key = group.querySelector('label').textContent.replace(':', ''); 247 | let value = group.querySelector('select').value; 248 | 249 | // 对特定key进行布尔值转换 250 | if(key === 'ocr_model' || key === 'Enable_translation' ) { 251 | value = value === 'true' ? true : false; 252 | } 253 | 254 | 255 | config.default_services[key] = value; 256 | }); 257 | 258 | 259 | return config; 260 | } 261 | 262 | // 在加载翻译服务配置时,确保处理Grok选项 263 | function loadTranslationServices(config) { 264 | const container = document.getElementById('t-translation-services'); 265 | // ...existing code... 266 | 267 | // 确保在创建服务配置UI时包含Grok 268 | // 使用正确的键名'Grok'而不是'grok' 269 | if (config.translation_services && config.translation_services.Grok) { 270 | const grokDiv = document.createElement('div'); 271 | grokDiv.className = 't-service'; 272 | grokDiv.innerHTML = ` 273 |

Grok Translate API

274 |
275 | 276 | 277 |
278 |
279 | 280 | 281 |
282 | `; 283 | container.appendChild(grokDiv); 284 | } 285 | 286 | // 确保在创建服务配置UI时包含GLM 287 | if (config.translation_services && config.translation_services.GLM) { 288 | const glmDiv = document.createElement('div'); 289 | glmDiv.className = 't-service'; 290 | glmDiv.innerHTML = ` 291 |

GLM Translate API

292 |
293 | 294 | 295 |
296 |
297 | 298 | 299 |
300 | `; 301 | container.appendChild(glmDiv); 302 | } 303 | 304 | // 添加ThirdParty服务配置 305 | if (config.translation_services && config.translation_services.ThirdParty) { 306 | const thirdPartyDiv = document.createElement('div'); 307 | thirdPartyDiv.className = 't-sub-section'; 308 | thirdPartyDiv.innerHTML = ` 309 |
310 |

ThirdParty

311 | 312 |
313 |
314 |
315 | 316 | 317 |
318 |
319 | 320 | 321 |
322 |
323 | 324 | 325 |
326 |
327 | `; 328 | container.appendChild(thirdPartyDiv); 329 | } else { 330 | // 如果ThirdParty配置不存在,则创建一个默认的 331 | const thirdPartyDiv = document.createElement('div'); 332 | thirdPartyDiv.className = 't-sub-section'; 333 | thirdPartyDiv.innerHTML = ` 334 |
335 |

ThirdParty

336 | 337 |
338 |
339 |
340 | 341 | 342 |
343 |
344 | 345 | 346 |
347 |
348 | 349 | 350 |
351 |
352 | `; 353 | container.appendChild(thirdPartyDiv); 354 | } 355 | 356 | // 添加Bing服务配置UI 357 | if (config.translation_services && config.translation_services.bing) { 358 | const bingDiv = document.createElement('div'); 359 | bingDiv.className = 't-service'; 360 | bingDiv.innerHTML = ` 361 |

Bing Translate API

362 |
363 | 364 |
365 | `; 366 | container.appendChild(bingDiv); 367 | } else { 368 | // 如果Bing配置不存在,则创建一个默认的 369 | const bingDiv = document.createElement('div'); 370 | bingDiv.className = 't-sub-section'; 371 | bingDiv.innerHTML = ` 372 |
373 |

Bing

374 | 375 |
376 |
377 |
378 | 379 |
380 |
381 | `; 382 | container.appendChild(bingDiv); 383 | } 384 | } 385 | 386 | -------------------------------------------------------------------------------- /static/target/2403.20127v1_zh.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/target/2403.20127v1_zh.pdf -------------------------------------------------------------------------------- /static/target/2501.05450v1_zh.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/target/2501.05450v1_zh.pdf -------------------------------------------------------------------------------- /static/thumbnail/...txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/thumbnail/...txt -------------------------------------------------------------------------------- /static/thumbnail/2403.20127v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/thumbnail/2403.20127v1.png -------------------------------------------------------------------------------- /static/thumbnail/2501.05450v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/thumbnail/2501.05450v1.png -------------------------------------------------------------------------------- /static/thumbnail/2g2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/thumbnail/2g2.png -------------------------------------------------------------------------------- /static/thumbnail/32g2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/thumbnail/32g2.png -------------------------------------------------------------------------------- /static/thumbnail/High-precision real-time autonomous driving targetdetection based on YOLOv8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/thumbnail/High-precision real-time autonomous driving targetdetection based on YOLOv8.png -------------------------------------------------------------------------------- /static/thumbnail/g2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/thumbnail/g2.png -------------------------------------------------------------------------------- /static/thumbnail/g55.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/thumbnail/g55.png -------------------------------------------------------------------------------- /static/thumbnail/g6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/thumbnail/g6.png -------------------------------------------------------------------------------- /static/thumbnail/gl1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/thumbnail/gl1.png -------------------------------------------------------------------------------- /static/thumbnail/line.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/thumbnail/line.png -------------------------------------------------------------------------------- /static/thumbnail/m2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/thumbnail/m2.png -------------------------------------------------------------------------------- /static/thumbnail/zz1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/static/thumbnail/zz1.png -------------------------------------------------------------------------------- /temp/fonts/zh_subset.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CBIhalsen/PolyglotPDF/f476650ba6563f574d8cbdcc0840bab27fcc7d35/temp/fonts/zh_subset.ttf -------------------------------------------------------------------------------- /update_recent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import datetime 4 | from typing import List, Dict, Any 5 | import glob 6 | from collections import OrderedDict 7 | import re 8 | import shutil 9 | 10 | def parse_merged_filename(filename: str) -> Dict[str, str]: 11 | """从合并PDF文件名解析出原始文件名、原始语言和目标语言""" 12 | # 格式为:原始文件名_原始语言_目标语言.pdf 13 | pattern = r"(.+)_(\w+)_(\w+)\.pdf$" 14 | match = re.match(pattern, filename) 15 | 16 | if match: 17 | original_name = match.group(1) + ".pdf" # 添加.pdf后缀 18 | original_lang = match.group(2) 19 | target_lang = match.group(3) 20 | return { 21 | "original_name": original_name, 22 | "original_language": original_lang, 23 | "target_language": target_lang 24 | } 25 | else: 26 | # 如果不符合格式,返回默认值并确保有.pdf后缀 27 | name_without_ext = filename.rsplit(".", 1)[0] # 去掉扩展名 28 | return { 29 | "original_name": name_without_ext + ".pdf", 30 | "original_language": "auto", 31 | "target_language": "zh" 32 | } 33 | 34 | def get_file_info(file_path: str) -> Dict[str, Any]: 35 | """从文件路径获取文件信息""" 36 | filename = os.path.basename(file_path) 37 | creation_time = os.path.getctime(file_path) 38 | date_str = datetime.datetime.fromtimestamp(creation_time).strftime('%Y-%m-%d %H:%M:%S') 39 | 40 | # 解析文件名 41 | parsed_info = parse_merged_filename(filename) 42 | 43 | # 创建有序字典,确保属性按指定顺序排列 44 | ordered_info = OrderedDict() 45 | ordered_info["index"] = 0 # 临时值,会在后面被更新 46 | ordered_info["date"] = date_str 47 | ordered_info["name"] = parsed_info["original_name"] 48 | ordered_info["original_language"] = parsed_info["original_language"] 49 | ordered_info["target_language"] = parsed_info["target_language"] 50 | ordered_info["read"] = "0" # 默认为未读 51 | ordered_info["statue"] = "1" # 默认状态为1 52 | 53 | return ordered_info 54 | 55 | def update_config_count(count: int) -> bool: 56 | """ 57 | 更新config.json中的count值为指定的数量 58 | 59 | Args: 60 | count: 要设置的count值 61 | 62 | Returns: 63 | bool: 操作是否成功 64 | """ 65 | try: 66 | # 读取config.json文件 67 | config_path = "config.json" 68 | if os.path.exists(config_path): 69 | with open(config_path, "r", encoding="utf-8") as f: 70 | config = json.load(f) 71 | 72 | # 更新count值 73 | config["count"] = count 74 | 75 | # 写回文件 76 | with open(config_path, "w", encoding="utf-8") as f: 77 | json.dump(config, f, ensure_ascii=False, indent=2) 78 | 79 | print(f"已更新config.json的count值为: {count}") 80 | return True 81 | else: 82 | print(f"错误: 找不到config.json文件") 83 | return False 84 | except Exception as e: 85 | print(f"更新config.json的count值时发生错误: {str(e)}") 86 | return False 87 | 88 | def validate_json_file(file_path: str) -> bool: 89 | """ 90 | 验证JSON文件格式是否正确 91 | 92 | Args: 93 | file_path: JSON文件路径 94 | 95 | Returns: 96 | bool: 文件格式是否有效 97 | """ 98 | try: 99 | if os.path.exists(file_path): 100 | with open(file_path, "r", encoding="utf-8") as f: 101 | json.load(f) 102 | return True 103 | return False 104 | except Exception as e: 105 | print(f"JSON文件格式无效: {str(e)}") 106 | return False 107 | 108 | def update_recent_json(): 109 | """更新recent.json文件,先清空现有配置,然后从索引0开始重新生成""" 110 | # 从merged_pdf目录读取文件 111 | merged_path = os.path.join("static", "merged_pdf") 112 | 113 | # 创建备份 114 | if os.path.exists("recent.json"): 115 | try: 116 | shutil.copy2("recent.json", "recent.json.bak") 117 | print(f"已创建备份文件: recent.json.bak") 118 | except Exception as e: 119 | print(f"创建备份文件失败: {str(e)}") 120 | 121 | # 扫描merged_pdf目录获取文件 122 | if not os.path.exists(merged_path): 123 | print(f"警告: 目录不存在 {merged_path}") 124 | try: 125 | os.makedirs(merged_path, exist_ok=True) 126 | except Exception as e: 127 | print(f"创建目录失败: {str(e)}") 128 | 129 | merged_files = glob.glob(os.path.join(merged_path, "*.pdf")) 130 | new_entries = [] 131 | 132 | for file_path in merged_files: 133 | file_info = get_file_info(file_path) 134 | new_entries.append(file_info) 135 | 136 | # 从索引0开始分配 137 | for i, entry in enumerate(new_entries): 138 | entry["index"] = i 139 | 140 | # 保存前先验证数据格式 141 | try: 142 | # 使用json.dumps检查序列化是否正常 143 | json_str = json.dumps(new_entries, ensure_ascii=False, indent=2) 144 | 145 | # 写入文件 146 | with open("recent.json", "w", encoding="utf-8") as f: 147 | f.write(json_str) 148 | 149 | # 验证写入的文件 150 | if not validate_json_file("recent.json"): 151 | raise Exception("写入的JSON文件验证失败") 152 | 153 | # 更新config.json中的count值为新条目的数量 154 | update_config_count(len(new_entries)) 155 | 156 | print(f"已重置并更新recent.json,共 {len(new_entries)} 条记录") 157 | except Exception as e: 158 | print(f"更新recent.json文件失败: {str(e)}") 159 | # 尝试恢复备份 160 | if os.path.exists("recent.json.bak"): 161 | try: 162 | shutil.copy2("recent.json.bak", "recent.json") 163 | print("已从备份恢复recent.json文件") 164 | except Exception as e2: 165 | print(f"从备份恢复失败: {str(e2)}") 166 | 167 | if __name__ == "__main__": 168 | update_recent_json() 169 | --------------------------------------------------------------------------------