├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── custom.md │ └── feature_request.md ├── .gitignore ├── GPTOCRGUI.py ├── GPTOCRGUI.spec ├── LICENSE ├── README.md ├── build_on_macos.md ├── build_on_windows.md ├── ocrgui.icns ├── ocrgui.ico ├── ocrgui.jpg ├── processors ├── __init__.py ├── image_encoder.py └── markdown_processor.py ├── requirements.txt ├── setup └── PillOCR.iss └── utils ├── __init__.py ├── config_manager.py ├── hotkey_manager.py └── path_tools.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # PyInstaller 默认生成目录 2 | /build/ 3 | /dist/ 4 | # python vnev 5 | /venv/ 6 | # 二进制文件 7 | *.exe 8 | *.dmg 9 | *.app 10 | GPTOCRGUI-deepseek.py 11 | # 平台临时文件(按需补充) 12 | .DS_Store # macOS 13 | Thumbs.db # Windows 14 | 15 | # Python 编译缓存 16 | __pycache__/ 17 | *.py[cod] 18 | 19 | # IDE 配置文件(示例) 20 | .idea/ 21 | .vscode/ 22 | -------------------------------------------------------------------------------- /GPTOCRGUI.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import pystray 4 | import pyperclip 5 | import platform 6 | #import keyboard 7 | import threading 8 | import tkinter as tk 9 | from tkinter import ttk 10 | from PIL import Image, ImageGrab, ImageDraw, ImageTk 11 | import time 12 | from openai import OpenAI 13 | import httpx 14 | from utils.path_tools import get_absolute_path 15 | from processors.image_encoder import ImageEncoder 16 | from processors.markdown_processor import MarkdownProcessor 17 | from utils.config_manager import ConfigManager 18 | from utils.hotkey_manager import create_hotkey_manager, HotkeyManager 19 | 20 | class ImageToMarkdown: 21 | def __init__(self, log_callback, app): 22 | self.log_callback = log_callback 23 | self.app = app 24 | self.running = False 25 | self.client = None 26 | self.gpt_model = 'gpt-4o' 27 | self.image_encoder = ImageEncoder() 28 | self.markdown_processor = MarkdownProcessor() 29 | self.current_provider = 'OPENAI' # 添加服务商标识 30 | 31 | def set_provider(self, provider): 32 | """设置当前服务商""" 33 | self.current_provider = provider 34 | # if self.log_callback: 35 | # self.log_callback(f"服务商已设置为: {provider}") 36 | 37 | def set_api_key(self, api_key): 38 | if not api_key: 39 | raise ValueError("API Key不能为空") 40 | os.environ['OPENAI_API_KEY'] = api_key 41 | 42 | def set_proxy(self, proxy): 43 | """根据服务商设置代理和client""" 44 | try: 45 | if self.current_provider == 'OPENAI': 46 | if proxy: 47 | self.client = OpenAI( 48 | http_client=httpx.Client( 49 | transport=httpx.HTTPTransport(proxy=proxy) 50 | ) 51 | ) 52 | else: 53 | self.client = OpenAI() 54 | elif self.current_provider == '火山引擎': 55 | if proxy: 56 | self.client = OpenAI( 57 | base_url="https://ark.cn-beijing.volces.com/api/v3", 58 | http_client=httpx.Client( 59 | transport=httpx.HTTPTransport(proxy=proxy) 60 | ) 61 | ) 62 | else: 63 | self.client = OpenAI( 64 | base_url="https://ark.cn-beijing.volces.com/api/v3" 65 | ) 66 | elif self.current_provider == '自定义': 67 | # 从app获取用户设置的URL 68 | custom_url = self.app.url_var.get().strip() 69 | if not custom_url: 70 | raise ValueError("自定义URL不能为空") 71 | 72 | if proxy: 73 | self.client = OpenAI( 74 | base_url=custom_url, 75 | http_client=httpx.Client( 76 | transport=httpx.HTTPTransport(proxy=proxy) 77 | ) 78 | ) 79 | else: 80 | self.client = OpenAI( 81 | base_url=custom_url 82 | ) 83 | except Exception as e: 84 | if self.log_callback: 85 | self.log_callback(f"设置客户端时出错: {str(e)}") 86 | 87 | def set_gpt_model(self, model_name): 88 | self.gpt_model = model_name 89 | 90 | def process_image(self, image): 91 | if not self.client: 92 | raise Exception("请先设置 API Key 或推理接入点") 93 | 94 | base64_img = f"data:image/png;base64,{self.image_encoder.encode_image(image)}" 95 | 96 | response = self.client.chat.completions.create( 97 | model=self.gpt_model, 98 | messages=[ 99 | { 100 | "role": "user", 101 | "content": [ 102 | {"type": "text", "text": "Convert to markdown. Use LaTeX for formulas. Return only markdown content."}, 103 | { 104 | "type": "image_url", 105 | "image_url": {"url": f"{base64_img}"} 106 | } 107 | ], 108 | } 109 | ], 110 | max_tokens=1000, 111 | ) 112 | 113 | markdown_content = response.choices[0].message.content 114 | markdown_content = re.sub(r'^```markdown\s*\n(.*?)\n```\s*$', r'\1', markdown_content, flags=re.DOTALL) 115 | return self.markdown_processor.modify_wrappers(markdown_content) 116 | 117 | def process_clipboard_image(self): 118 | last_image = None 119 | while self.running: 120 | try: 121 | image = ImageGrab.grabclipboard() 122 | if isinstance(image, Image.Image) and image != last_image: 123 | self.log_callback("检测到新的剪贴板图像。") 124 | self.app.update_icon_status('processing') 125 | 126 | markdown_content = self.process_image(image) 127 | pyperclip.copy(markdown_content) 128 | self.log_callback("识别后的内容已复制到剪贴板。") 129 | 130 | self.app.update_icon_status('success') 131 | last_image = image 132 | time.sleep(1) 133 | except Exception as e: 134 | self.log_callback(f"发生错误: {e}") 135 | self.app.update_icon_status('error') 136 | self.running = False 137 | break 138 | 139 | def start(self): 140 | self.running = True 141 | threading.Thread(target=self.process_clipboard_image, daemon=True).start() 142 | 143 | def stop(self): 144 | self.running = False 145 | 146 | def set_wrappers(self, inline_wrapper: str, block_wrapper: str): 147 | """代理到 markdown_processor 的 set_wrappers 方法""" 148 | self.markdown_processor.set_wrappers(inline_wrapper, block_wrapper) 149 | 150 | class App: 151 | def __init__(self, root, processor): 152 | self.processor = processor 153 | self.processor.app = self 154 | self.processor.log_callback = self.log 155 | self.config_manager = ConfigManager() 156 | self.hotkey_manager = create_hotkey_manager(self.toggle_processing) 157 | self.hotkey_var = tk.StringVar(value='ctrl+shift+o') 158 | self.provider_var = tk.StringVar(value='OPENAI') # 确保 provider_var 在 load_settings 之前定义 159 | self.url_var = tk.StringVar(value='') 160 | self.log_text = tk.Text() # 确保 log_text 在 load_settings 之前定义 161 | self.root = root 162 | self.root.title("OCR") 163 | self.root.configure(bg='#ffffff') 164 | 165 | # 配置 ttk 样式 166 | style = ttk.Style() 167 | style.theme_use('clam') 168 | 169 | # 设置风格 170 | primary_color = '#95ec69' # 绿色,与成功状态的胶囊图标一致 171 | text_color = '#000000' # 黑色文字 172 | bg_color = '#ffffff' # 白色背景 173 | 174 | style.configure('TButton', padding=6, relief="flat", 175 | background=primary_color, foreground=text_color) 176 | style.map('TButton', 177 | background=[('active', primary_color)], 178 | foreground=[('active', text_color)]) 179 | style.configure('TLabel', background=bg_color, foreground=text_color) 180 | style.configure('TFrame', background=bg_color) 181 | style.configure('TLabelframe', background=bg_color) 182 | style.configure('TLabelframe.Label', background=bg_color, 183 | foreground=text_color, font=('Segoe UI', 9)) 184 | style.configure('TEntry', padding=6) 185 | style.configure('TCombobox', padding=6) 186 | 187 | # 初始化变量 188 | self.provider_var = tk.StringVar(value='OPENAI') 189 | self.api_key_var = tk.StringVar() 190 | self.proxy_var = tk.StringVar() 191 | self.model_var = tk.StringVar(value='gpt-4o') 192 | self.inline_var = tk.StringVar(value='$ $') 193 | self.block_var = tk.StringVar(value='$$ $$') 194 | 195 | # 定义服务商配置字典 196 | self.provider_settings = { 197 | 'OPENAI': { 198 | 'api_key': '', 199 | 'proxy': '', 200 | 'model': 'gpt-4o' 201 | }, 202 | '火山引擎': { 203 | 'api_key': '', 204 | 'proxy': '', 205 | 'model': '' 206 | }, 207 | '自定义': { 208 | 'url':'', 209 | 'api_key': '', 210 | 'proxy': '', 211 | 'model': '' 212 | } 213 | } 214 | 215 | # 主容器,采用两栏布局 216 | main_frame = ttk.Frame(root, padding=20, style='TFrame') 217 | main_frame.pack(fill=tk.BOTH, expand=True) 218 | 219 | # 左右两栏 220 | left_frame = ttk.Frame(main_frame, style='TFrame') 221 | left_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=False, padx=(0, 20)) 222 | 223 | right_frame = ttk.Frame(main_frame, style='TFrame') 224 | right_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) 225 | 226 | # 左侧设置项 227 | self.provider_frame = ttk.LabelFrame(left_frame, text="服务商选择", padding=10, style='TLabelframe') 228 | self.provider_frame.pack(fill=tk.X, pady=(0, 10)) 229 | # 添加服务商映射 230 | self.PROVIDER_MAPPING = { 231 | 'OPENAI': 'OPENAI', 232 | '火山引擎': '火山引擎', 233 | '自定义': '自定义' 234 | } 235 | # 反向映射用于保存 236 | self.PROVIDER_REVERSE_MAPPING = {v: k for k, v in self.PROVIDER_MAPPING.items()} 237 | self.provider_dropdown = ttk.Combobox(self.provider_frame, 238 | textvariable=self.provider_var, 239 | values=list(self.PROVIDER_MAPPING.values()), 240 | state='readonly') 241 | self.provider_dropdown.pack(fill=tk.X) 242 | self.provider_dropdown.bind('<>', self.on_provider_change) 243 | 244 | # 自定义 URL 配置,先隐藏,只有选择自定义才显示 245 | self.custom_url_frame = ttk.LabelFrame(left_frame, text="Base_Url", padding=10, style='TLabelframe') 246 | self.url_entry = ttk.Entry(self.custom_url_frame, textvariable=self.url_var) 247 | self.url_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=10) 248 | ttk.Button(self.custom_url_frame, text="保存", command=self.save_custom_url).pack(side=tk.RIGHT) 249 | 250 | # API Key 设置 251 | api_frame = ttk.LabelFrame(left_frame, text="API Key", padding=10, style='TLabelframe') 252 | api_frame.pack(fill=tk.X, pady=(0, 10)) 253 | 254 | self.api_key_entry = ttk.Entry(api_frame, textvariable=self.api_key_var, show="•") 255 | self.api_key_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 10)) 256 | 257 | self.save_api_button = ttk.Button(api_frame, text="保存", command=self.save_api_key) 258 | self.save_api_button.pack(side=tk.RIGHT) 259 | 260 | # 代理设置 261 | proxy_frame = ttk.LabelFrame(left_frame, text="代理设置", padding=10, style='TLabelframe') 262 | proxy_frame.pack(fill=tk.X, pady=(0, 10)) 263 | 264 | ttk.Label(proxy_frame, text="HTTP代理:").pack(side=tk.LEFT) 265 | self.proxy_entry = ttk.Entry(proxy_frame, textvariable=self.proxy_var) 266 | self.proxy_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=10) 267 | ttk.Button(proxy_frame, text="保存", command=self.save_proxy).pack(side=tk.RIGHT) 268 | 269 | # 模型选择 270 | self.model_frame = ttk.LabelFrame(left_frame, text="模型选择(请确保模型具有视觉功能)", padding=10, style='TLabelframe') 271 | self.model_dropdown = ttk.Combobox(self.model_frame, textvariable=self.model_var, 272 | state='readonly') 273 | ttk.Button(self.model_frame, text="保存", command=self.save_model_choice).pack(side=tk.RIGHT) 274 | self.model_dropdown.pack(fill=tk.X) 275 | self.model_dropdown.bind('<>', self.save_model_choice) 276 | 277 | # 模型输入框 278 | self.model_entry_frame= ttk.LabelFrame(left_frame, text="模型(请确保模型具有视觉功能)", padding=10, style='TLabelframe') 279 | self.model_entry=ttk.Entry(self.model_entry_frame, textvariable=self.model_var) 280 | self.model_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 10)) 281 | ttk.Button(self.model_entry_frame, text="保存", command=self.save_model_choice).pack(side=tk.RIGHT) 282 | 283 | # 推理接入点框架 284 | self.endpoint_frame = ttk.LabelFrame(left_frame, text="推理接入点(请确保模型具有视觉功能)", padding=10, style='TLabelframe') 285 | self.endpoint_entry = ttk.Entry(self.endpoint_frame, textvariable=self.model_var) 286 | self.endpoint_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 10)) 287 | ttk.Button(self.endpoint_frame, text="保存", command=self.save_model_choice).pack(side=tk.RIGHT) 288 | 289 | # LaTeX 设置 290 | latex_frame = ttk.LabelFrame(left_frame, text="LaTeX 设置", padding=10, style='TLabelframe') 291 | latex_frame.pack(fill=tk.X, pady=(0, 10)) 292 | 293 | inline_frame = ttk.Frame(latex_frame, style='TFrame') 294 | inline_frame.pack(fill=tk.X, pady=(0, 5)) 295 | ttk.Label(inline_frame, text="行内公式包装符:").pack(side=tk.LEFT) 296 | inline_combo = ttk.Combobox(inline_frame, textvariable=self.inline_var, 297 | values=['$ $', '\\( \\)']) 298 | inline_combo.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(10, 0)) 299 | 300 | block_frame = ttk.Frame(latex_frame, style='TFrame') 301 | block_frame.pack(fill=tk.X) 302 | ttk.Label(block_frame, text="行间公式包装符:").pack(side=tk.LEFT) 303 | block_combo = ttk.Combobox(block_frame, textvariable=self.block_var, 304 | values=['$$ $$', '\\[ \\]']) 305 | block_combo.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(10, 0)) 306 | 307 | # 根据平台决定是否显示热键设置UI 308 | if HotkeyManager.should_show_ui(): 309 | hotkey_frame = ttk.LabelFrame(left_frame, text="快捷键设置", padding=10, style='TLabelframe') 310 | hotkey_frame.pack(fill=tk.X, pady=(0, 10)) 311 | 312 | hotkey_input_frame = ttk.Frame(hotkey_frame, style='TFrame') 313 | hotkey_input_frame.pack(fill=tk.X, pady=(0, 5)) 314 | ttk.Label(hotkey_input_frame, text="启动/停止快捷键:").pack(side=tk.LEFT) 315 | self.hotkey_entry = ttk.Entry(hotkey_input_frame, textvariable=self.hotkey_var) 316 | self.hotkey_entry.bind('', self.capture_hotkey) 317 | self.hotkey_entry.bind('', self.finalize_hotkey) 318 | self.hotkey_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(10, 10)) 319 | save_hotkey_button = ttk.Button(hotkey_input_frame, text="保存", command=self.save_hotkey) 320 | save_hotkey_button.pack(side=tk.RIGHT) 321 | 322 | # 右侧日志显示 323 | log_frame = ttk.LabelFrame(right_frame, text="日志", padding=10, style='TLabelframe') 324 | log_frame.pack(fill=tk.BOTH, expand=True, pady=(0, 10)) 325 | 326 | self.log_text = tk.Text(log_frame, height=6, font=('Consolas', 9), 327 | bg='#f0f0f0', relief='flat', padx=5, pady=5) 328 | self.log_text.pack(fill=tk.BOTH, expand=True) 329 | 330 | # 设置窗口图标 331 | icon_path = get_absolute_path('ocrgui.ico') 332 | icon_image = Image.open(icon_path) 333 | self.icon_photo = ImageTk.PhotoImage(icon_image) 334 | self.root.iconphoto(False, self.icon_photo) 335 | 336 | self.root.protocol('WM_DELETE_WINDOW', self.hide_window) 337 | 338 | # 初始化其他组件 339 | self.icon = None 340 | self.icon_image = None 341 | self.running_state = False 342 | self.create_tray_icon() 343 | 344 | # 加载设置 345 | self.load_settings() 346 | 347 | # 绑定包装符变化 348 | # 添加防抖计时器 349 | self.debounce_timer = None 350 | self.last_wrapper_change = time.time() 351 | 352 | self.inline_var.trace_add('write', self.debounced_update_wrappers) 353 | self.block_var.trace_add('write', self.debounced_update_wrappers) 354 | 355 | self.processor.set_gpt_model(self.model_var.get()) # 确保在加载配置后更新模型设置 356 | 357 | # 自动开始处理 358 | self.root.after(1000, self.auto_start) 359 | #self.update_client_settings() 360 | # 初始隐藏推理接入点 361 | if self.provider_var.get() == 'OPENAI': 362 | self.model_frame.pack_forget() 363 | self.model_frame.pack(after=self.provider_frame, fill=tk.X, pady=(0, 10)) 364 | elif self.provider_var.get() == '火山引擎': 365 | self.model_frame.pack_forget() 366 | self.endpoint_frame.pack(after=self.provider_frame, fill=tk.X, pady=(0, 10)) 367 | elif self.provider_var.get() == '自定义': 368 | self.model_frame.pack_forget() 369 | 370 | def debounced_update_wrappers(self, *args): 371 | """防抖包装符更新""" 372 | DEBOUNCE_TIME = 2.0 # 1秒防抖时间 373 | 374 | # 取消之前的定时器 375 | if self.debounce_timer: 376 | self.debounce_timer.cancel() 377 | 378 | # 创建新定时器 379 | self.debounce_timer = threading.Timer(DEBOUNCE_TIME, self.update_wrappers) 380 | self.debounce_timer.start() 381 | 382 | def auto_start(self): 383 | self.start_processing() 384 | self.running_state = True 385 | self.icon.menu = self.create_menu() 386 | 387 | def log(self, message): 388 | self.log_text.insert(tk.END, message + "\n") 389 | self.log_text.see(tk.END) 390 | 391 | def update_wrappers(self): 392 | """更新包装符并保存配置""" 393 | inline_wrapper = self.inline_var.get() 394 | block_wrapper = self.block_var.get() 395 | self.processor.set_wrappers(inline_wrapper, block_wrapper) 396 | self.save_settings() # 自动保存配置 397 | self.log(f"已更新并保存LaTeX包装符设置") 398 | 399 | def save_hotkey(self): 400 | """保存快捷键设置""" 401 | if not HotkeyManager.is_supported(): 402 | return 403 | 404 | try: 405 | self.unregister_hotkey() 406 | self.register_hotkey() 407 | self.save_settings() 408 | self.log(f"启动/停止快捷键已设置为: {self.hotkey_var.get()}") 409 | except Exception as e: 410 | self.log(f"快捷键设置失败: {e}") 411 | 412 | def register_hotkey(self): 413 | """注册全局热键""" 414 | if not HotkeyManager.is_supported(): 415 | return # 在不支持的平台上什么也不做 416 | 417 | try: 418 | result = self.hotkey_manager.register_hotkey(self.hotkey_var.get()) 419 | if result: 420 | self.log(f"已注册快捷键: {self.hotkey_var.get()}") 421 | else: 422 | self.log("注册快捷键失败") 423 | except Exception as e: 424 | self.log(f"注册快捷键失败: {e}") 425 | 426 | def unregister_hotkey(self): 427 | if not HotkeyManager.is_supported(): 428 | return 429 | 430 | try: 431 | self.hotkey_manager.unregister_hotkey() 432 | except: 433 | pass 434 | 435 | def capture_hotkey(self, event): 436 | """实时捕获按键组合""" 437 | modifiers = [] 438 | if event.state & 0x0001: modifiers.append('shift') 439 | if event.state & 0x0004: modifiers.append('ctrl') 440 | if event.state & 0x0008: modifiers.append('alt') 441 | 442 | key = event.keysym.lower() 443 | if key not in modifiers: 444 | combo = '+'.join(modifiers + [key]) if modifiers else key 445 | self.hotkey_var.set(combo) 446 | return "break" # 阻止默认输入 447 | 448 | def finalize_hotkey(self, event): 449 | """失去焦点时保存热键""" 450 | self.save_hotkey() 451 | 452 | def start_processing(self): 453 | self.processor.start() 454 | self.update_icon_status('success') 455 | self.running_state = True 456 | self.icon.menu = self.create_menu() # 更新菜单 457 | self.log("已开始处理") 458 | 459 | def stop_processing(self): 460 | self.processor.stop() 461 | if self.icon: 462 | self.icon.icon = self.icon_image['processing'] # 改用 'processing' 状态 463 | self.running_state = False 464 | self.icon.menu = self.create_menu() # 更新菜单 465 | self.log("已停止处理") 466 | 467 | def create_tray_icon(self): 468 | width, height = 64, 32 469 | base_icon = self.create_capsule_icon('grey') 470 | self.icon_image = { 471 | 'processing': self.create_capsule_icon('grey'), 472 | 'success': self.create_capsule_icon('green'), 473 | 'error': self.create_capsule_icon('red'), 474 | } 475 | self.icon = pystray.Icon( 476 | "name", 477 | base_icon, 478 | "PillOCR" 479 | ) 480 | self.icon.menu = self.create_menu() 481 | # hand control of the AppKit run loop back to Tkinter 482 | if platform.system() == "Darwin" and hasattr(self.icon, "run_detached"): 483 | self.icon.run_detached() 484 | else: 485 | threading.Thread(target=self.icon.run, daemon=True).start() 486 | 487 | def create_menu(self): 488 | """创建托盘菜单""" 489 | return pystray.Menu( 490 | pystray.MenuItem( 491 | "停止" if self.running_state else "启动", # 使用 self.running_state 492 | self.toggle_processing 493 | ), 494 | pystray.MenuItem("设置", self.show_window), 495 | pystray.MenuItem("退出", self.quit_app) 496 | ) 497 | 498 | def toggle_processing(self, icon=None, item=None): 499 | """切换启动/停止状态""" 500 | if self.running_state: 501 | self.stop_processing() 502 | else: 503 | self.start_processing() 504 | # 更新菜单 505 | self.icon.menu = self.create_menu() 506 | 507 | def create_capsule_icon(self, color): 508 | scale = 4 509 | base_width, base_height = 24, 24 510 | width, height = base_width * scale, base_height * scale 511 | 512 | image = Image.new('RGBA', (width, height), (0, 0, 0, 0)) 513 | draw = ImageDraw.Draw(image) 514 | 515 | if (color == 'grey'): 516 | fill = (128, 128, 128, 255) 517 | elif (color == 'green'): 518 | fill = (0, 255, 0, 255) 519 | elif (color == 'red'): 520 | fill = (255, 0, 0, 255) 521 | else: 522 | fill = (0, 255, 0, 255) # 默认使用绿色 523 | 524 | capsule_height = 12 * scale 525 | capsule_width = 24 * scale 526 | 527 | x = (width - capsule_width) // 2 528 | y = (height - capsule_height) // 2 529 | 530 | draw.ellipse([x, y, x + capsule_height, y + capsule_height], fill=fill, outline=None) 531 | draw.ellipse([x + capsule_width - capsule_height, y, x + capsule_width, y + capsule_height], fill=fill, outline=None) 532 | draw.rectangle([x + capsule_height//2, y, x + capsule_width - capsule_height//2, y + capsule_height], fill=fill, outline=None) 533 | 534 | image = image.resize((base_width, base_height), Image.Resampling.LANCZOS) 535 | 536 | return image 537 | 538 | def hide_window(self): 539 | self.root.withdraw() 540 | 541 | def show_window(self): 542 | self.root.deiconify() 543 | 544 | def quit_app(self): 545 | self.unregister_hotkey() # 取消热键注册 546 | self.processor.stop() 547 | if self.icon: 548 | self.icon.stop() 549 | self.root.destroy() # 修改为 destroy 以立即关闭窗口和主循环 550 | 551 | def update_icon_status(self, status): 552 | if hasattr(self, 'icon') and self.icon and self.icon._running: 553 | try: 554 | self.icon.icon = self.icon_image[status] # 直接设置图标 555 | except Exception as e: 556 | print(f"更新图标失败: {e}") 557 | 558 | def update_client_settings(self): 559 | """更新 ImageToMarkdown 处理器的设置""" 560 | current_provider = self.provider_var.get() 561 | settings = self.provider_settings.get(current_provider, {}) 562 | 563 | # 更新API Key 564 | self.processor.set_api_key(settings.get('api_key', '')) 565 | 566 | # 更新代理 567 | self.processor.set_proxy(settings.get('proxy', '')) 568 | 569 | # 更新模型 570 | if current_provider == 'OPENAI': 571 | self.processor.set_gpt_model(settings.get('model', 'gpt-4o')) 572 | elif current_provider == '火山引擎': 573 | self.processor.set_gpt_model(settings.get('model', '')) 574 | elif current_provider == '自定义': 575 | self.processor.set_gpt_model(settings.get('model', '')) 576 | 577 | def apply_provider_settings(self): 578 | """处理和切换服务商相关的 UI 界面更新和组件显示""" 579 | current_provider = self.provider_var.get() 580 | settings = self.provider_settings.get(current_provider, {}) 581 | 582 | if (current_provider == 'OPENAI'): 583 | self.processor.set_provider('OPENAI') 584 | elif (current_provider == '火山引擎'): 585 | self.processor.set_provider('火山引擎') 586 | elif current_provider == '自定义': 587 | self.processor.set_provider('自定义') 588 | 589 | if current_provider == 'OPENAI': 590 | # OpenAI 特定设置 591 | self.api_key_var.set(settings.get('api_key', '')) 592 | self.proxy_var.set(settings.get('proxy', '')) 593 | self.model_var.set(settings.get('model', 'gpt-4o')) 594 | # UI更新 595 | self.model_entry_frame.pack_forget() 596 | self.endpoint_frame.pack_forget() 597 | self.model_frame.pack(after=self.provider_frame, fill=tk.X, pady=(0, 10)) 598 | elif current_provider == '火山引擎': 599 | # 火山引擎特定设置 600 | self.api_key_var.set(settings.get('api_key', '')) 601 | self.proxy_var.set(settings.get('proxy', '')) 602 | self.model_var.set(settings.get('model', '')) 603 | # UI更新 604 | self.model_frame.pack_forget() 605 | self.model_entry_frame.pack_forget() 606 | self.endpoint_frame.pack(after=self.provider_frame, fill=tk.X, pady=(0, 10)) 607 | elif current_provider == '自定义': 608 | # 读取自定义URL, 如果需要可以在 self.provider_settings['自定义'] 中添加 url 609 | self.url_var.set(settings.get('url', '')) 610 | self.api_key_var.set(settings.get('api_key', '')) 611 | self.proxy_var.set(settings.get('proxy', '')) 612 | self.model_var.set(settings.get('model', '')) 613 | # 自定义场景下可根据需求显示/隐藏 UI 614 | self.model_frame.pack_forget() 615 | self.endpoint_frame.pack_forget() 616 | self.custom_url_frame.pack(after=self.provider_frame, fill=tk.X, pady=(0, 10)) 617 | self.model_entry_frame.pack(after=self.custom_url_frame, fill=tk.X, pady=(0, 10)) 618 | 619 | # 确保在应用设置时更新客户端 620 | self.update_client_settings() 621 | 622 | def save_settings(self): 623 | """保存所有设置""" 624 | display_provider = self.provider_var.get() 625 | current_provider = self.PROVIDER_REVERSE_MAPPING[display_provider] 626 | 627 | if current_provider == 'OPENAI': 628 | settings = { 629 | 'api_key': self.api_key_var.get().strip(), 630 | 'proxy': self.proxy_var.get().strip(), 631 | 'model': self.model_var.get().strip() 632 | } 633 | elif current_provider == '火山引擎': 634 | settings = { 635 | 'api_key': self.api_key_var.get().strip(), 636 | 'proxy': self.proxy_var.get().strip(), 637 | 'model': self.model_var.get().strip() 638 | } 639 | elif current_provider == '自定义': 640 | # 保存自定义URL 641 | settings = { 642 | 'url': self.url_var.get().strip(), 643 | 'api_key': self.api_key_var.get().strip(), 644 | 'proxy': self.proxy_var.get().strip(), 645 | 'model': self.model_var.get().strip() 646 | } 647 | 648 | self.provider_settings[current_provider] = settings 649 | 650 | # 添加LaTeX包装符设置 651 | config = { 652 | 'current_provider': current_provider, 653 | 'provider_settings': self.provider_settings, 654 | 'latex_settings': { 655 | 'inline_wrapper': self.inline_var.get(), 656 | 'block_wrapper': self.block_var.get() 657 | }, 658 | 'hotkey': self.hotkey_var.get() 659 | } 660 | 661 | try: 662 | self.config_manager.save(config) 663 | self.update_client_settings() 664 | except Exception as e: 665 | self.log(f"保存设置失败: {e}") 666 | 667 | def on_provider_change(self, event=None): 668 | """切换服务商""" 669 | display_provider = self.provider_dropdown.get() 670 | self.provider_var.set(display_provider) # 直接使用显示名称 671 | 672 | # 根据供应商设置不同的模型 673 | if display_provider == 'OPENAI': 674 | self.model_dropdown['values'] = ['gpt-4o', 'gpt-4o-mini'] 675 | else: 676 | self.model_dropdown['values'] = [] 677 | 678 | # 默认选择列表中的第一个模型 679 | if self.model_dropdown['values']: 680 | self.model_var.set(self.model_dropdown['values'][0]) 681 | 682 | # 显示/隐藏自定义 URL 框 683 | if display_provider == '自定义': 684 | # 在 provider_frame 下方插入 685 | self.custom_url_frame.pack(after=self.provider_frame, fill=tk.X, pady=(0, 10)) 686 | else: 687 | self.custom_url_frame.pack_forget() 688 | 689 | # 加载新服务商配置 690 | self.apply_provider_settings() 691 | self.log(f"已切换到 {display_provider} 服务") 692 | 693 | def save_custom_url(self): 694 | """保存自定义URL并更新设置""" 695 | self.save_settings() 696 | self.log(f"已保存自定义URL: {self.url_var.get()}") 697 | 698 | def save_api_key(self): 699 | """保存 API Key""" 700 | self.save_settings() 701 | self.log("API Key已保存") 702 | 703 | def save_proxy(self): 704 | """保存代理设置""" 705 | self.save_settings() 706 | self.log("代理设置已保存") 707 | 708 | def save_model_choice(self, event=None): 709 | """保存模型选择到配置文件""" 710 | model_choice = self.model_var.get() # 获取当前选择的模型 711 | self.save_settings() 712 | self.log(f"模型已设置为: {model_choice}") 713 | 714 | def load_settings(self): 715 | """从配置文件加载设置到内存""" 716 | try: 717 | config = self.config_manager.load() 718 | 719 | # 仅当 config 中不存在 provider_settings 时才使用默认 720 | if 'provider_settings' not in config: 721 | self.provider_settings = { 722 | 'OPENAI': {'api_key': '', 'proxy': '', 'model': 'gpt-4o'}, 723 | '火山引擎': {'api_key': '', 'proxy': '', 'model': ''}, 724 | '自定义': {'url': '', 'api_key': '', 'proxy': '', 'model': ''} 725 | } 726 | else: 727 | self.provider_settings = config['provider_settings'] 728 | current_provider = config.get('current_provider', 'OPENAI') 729 | self.provider_var.set(current_provider) 730 | 731 | # 加载LaTeX包装符设置 732 | latex_settings = config.get('latex_settings', { 733 | 'inline_wrapper': '$ $', 734 | 'block_wrapper': '$$ $$' 735 | }) 736 | self.inline_var.set(latex_settings['inline_wrapper']) 737 | self.block_var.set(latex_settings['block_wrapper']) 738 | 739 | # 将包装符应用到处理器 740 | self.processor.set_wrappers( 741 | self.inline_var.get(), 742 | self.block_var.get() 743 | ) 744 | 745 | # 加载热键设置 746 | self.hotkey_var.set(config.get('hotkey', 'ctrl+shift+o')) 747 | self.register_hotkey() # 注册热键 748 | 749 | # 更新所有设置 750 | self.apply_provider_settings() 751 | except Exception as e: 752 | self.log(f"加载配置失败: {e}") 753 | 754 | if __name__ == "__main__": 755 | root = tk.Tk() 756 | root.geometry("800x700+{}+{}".format( 757 | root.winfo_screenwidth() // 2 - 400, # 水平居中 758 | root.winfo_screenheight() // 2 - 400 # 垂直居中 759 | )) # 调整窗口大小以适应新布局 760 | # 在创建窗口后立即隐藏 761 | root.withdraw() 762 | processor = ImageToMarkdown(None, None) 763 | app = App(root, processor) 764 | 765 | # 更新 processor 的引用 766 | processor.log_callback = app.log 767 | processor.app = app 768 | root.withdraw() 769 | app.update_icon_status('success') 770 | root.mainloop() 771 | -------------------------------------------------------------------------------- /GPTOCRGUI.spec: -------------------------------------------------------------------------------- 1 | import platform 2 | 3 | block_cipher = None 4 | 5 | icon_file = 'ocrgui.ico' # 默认 Windows 图标 6 | if platform.system() == "Darwin": # 如果是 macOS 7 | icon_file = 'ocrgui.icns' # 设置 macOS 图标 8 | 9 | a = Analysis( 10 | ['GPTOCRGUI.py'], 11 | binaries=[], 12 | datas=[ 13 | ('ocrgui.ico', '.'), 14 | ('ocrgui.icns', '.'), 15 | ('utils/*.py', 'utils'), 16 | ('processors/*.py', 'processors'), 17 | ], 18 | hiddenimports=[ 19 | 'PIL', 20 | 'openai', 21 | 'pystray', 22 | 'httpx', 23 | 'utils.path_tools', 24 | 'utils.config_manager', 25 | 'processors.image_encoder', 26 | 'processors.markdown_processor', 27 | 'keyboard' 28 | ], 29 | hookspath=[], 30 | hooksconfig={}, 31 | runtime_hooks=[], 32 | excludes=[], 33 | win_no_prefer_redirects=False, 34 | win_private_assemblies=False, 35 | cipher=block_cipher, 36 | noarchive=False # 修改为False 37 | ) 38 | 39 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) 40 | 41 | exe = EXE( 42 | pyz, 43 | a.scripts, 44 | a.binaries, 45 | a.zipfiles, 46 | a.datas, 47 | name='PillOCR', 48 | debug=False, 49 | bootloader_ignore_signals=False, 50 | strip=False, 51 | upx=True, 52 | upx_exclude=[], 53 | runtime_tmpdir=None, 54 | console=False, 55 | disable_windowed_traceback=False, 56 | target_arch=None, 57 | codesign_identity=None, 58 | entitlements_file=None, 59 | icon=icon_file if platform.system() == "Windows" else None 60 | ) 61 | 62 | if platform.system() == "Darwin": 63 | app = BUNDLE( 64 | exe, 65 | name='PillOCR.app', 66 | icon=icon_file, 67 | bundle_identifier='com.pebblestudio.pillocr', 68 | info_plist={ 69 | 'LSUIElement': 'YES', 70 | 'CFBundleShortVersionString': '0.0.1', 71 | 'CFBundleName': 'PillOCR', 72 | 'CFBundleDisplayName': 'PillOCR', 73 | 'NSHighResolutionCapable': True, 74 | } 75 | ) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |
3 | image 4 |

PillOCR

5 | 一个接近无感的OCR工具

6 |
7 | 8 | --- 9 | ## 动机 10 | 现在已经有许多用于公式识别的工具,也有许多优秀的免费工具,如[SimpleTex](https://simpletex.cn/)等。 11 | 这些软件使用时往往需要经历打开软件窗口→截图或上传图像→复制识别结果并粘贴到编辑器的过程。 12 | 在连续写作时,重复上述操作难免觉得麻烦,且打开、关闭窗口的过程会打断写作思路。 13 | 有些软件可以设置截图识别且识别完成不自动打开窗口,但这样又无从得知识别是否已经完成。 14 | 于是我做了这个小工具,献给那些和我有同样感受的同学。 15 | 16 | ## 原理 17 | 本工具基于大模型api,其会检测剪贴板中的图片,将其自动发送给大模型,并将大模型的返回结果处理后粘贴到剪贴板中。 18 | 19 | ## 特点 20 | - 轻量化。该工具本质上只是一个UI,并不会在本地进行图片识别,因此对电脑算力要求不高。使用本地模型识别的好处是完全免费,但有些时候我们日常携带的用来写作的机器未必有足够的算力。 21 | - 价格便宜。现在许多大模型api的价格已经足够低。以火山引擎的Doubao-1.5-vision-lite为例,本工具设置max_tokens为1000,而Doubao-vision-pro-32kapi的价格为0.0045元/千tokens,即识别一张图约0.5分钱。且有些大模型api还会赠送免费额度。 22 | - 比较稳定。不依赖于某一家提供的服务,如果某天你使用的大模型api提供商倒闭了,可以另换一家。 23 | 24 | ## 模型推荐 25 | - 火山引擎的Doubao-1.5-vision-lite,若觉得精准度不够可以使用Doubao-1.5-vision-pro,价格比前者贵一倍。火山引擎赠送500,000tokens的免费额度。 26 | 27 | 因为火山引擎的免费额度我还没用完,所以暂无其他推荐。大家有推荐的模型可以告诉我,我会添加到此处。 28 | 29 | ## 同类型工具推荐 30 | - [SimpleTex](https://simpletex.cn/),该软件功能非常强大,支持在识别结果上直接编辑,且支持转化为MathML和Typst(话说我或许也可以在这个工具中加入该功能?)。 31 | - [MixTeX](https://github.com/RQLuo/MixTeX-Latex-OCR),离线OCR软件,完全免费,效果非常不错。如果机器性能还可以的话强烈推荐。 32 | - [MinerU](https://mineru.net/),适合将整本pdf批量转化为markdown,可用于构建RAG使用的知识库。配合[RAGFlow](https://github.com/infiniflow/ragflow)食用很香。 33 | - [Mathpix](https://mathpix.com/),老牌公式识别软件,就是免费额度略少。 34 | ## 未来计划 35 | 如果用的人比较多,我也许会用tauri重写该工具。可能会增加一些功能,比如: 36 | - 支持MathML和Typst(刚刚想到); 37 | - 识别+翻译; 38 | - 添加其他显示语言; 39 | 但作者今年即将毕业,升学/工作还无着落,且Rust仍在学习中……因此短期内如果工具没有严重问题可能会暂时搁置该项目。 40 | 41 | ## 打赏支持 42 | 如果这个工具对您有帮助的话,就请我吃个鸡腿吧~ 43 | 44 | image 45 | 46 | 47 | -------------------------------------------------------------------------------- /build_on_macos.md: -------------------------------------------------------------------------------- 1 | # Build on MacOS 2 | ## Create virtual environment 3 | ``` 4 | python -m venv venv 5 | ``` 6 | ## Activate virtual environment 7 | 8 | ``` 9 | source venv/bin/activate 10 | ``` 11 | 12 | ### Install requirements 13 | ``` 14 | pip install -r requirements.txt 15 | ``` 16 | ### Build .exe file using .spec file 17 | ``` 18 | pyinstaller GPTOCRGUI.spec 19 | ``` 20 | ### Use create-dmg to build dmg file 21 | Install create-dmg 22 | ``` 23 | brew install create-dmg 24 | ``` 25 | create dmg 26 | 27 | ``` 28 | create-dmg \ 29 | --volname "PillOCR setup" \ 30 | --volicon "ocrgui.icns" \ 31 | --window-pos 200 120 \ 32 | --window-size 800 400 \ 33 | --icon-size 100 \ 34 | --icon "PillOCR.app" 200 190 \ 35 | --hide-extension "PillOCR.app" \ 36 | --app-drop-link 600 185 \ 37 | "PillOCR.dmg" \ 38 | "dist/PillOCR.app" 39 | ``` -------------------------------------------------------------------------------- /build_on_windows.md: -------------------------------------------------------------------------------- 1 | # Build on Windows 2 | ## Create virtual environment 3 | ``` 4 | python -m venv venv 5 | ``` 6 | ## Activate virtual environment 7 | ### On CMD 8 | ``` 9 | venv\Scripts\activate.bat 10 | ``` 11 | ### On PowerShell 12 | ``` 13 | venv\Scripts\Activate.ps1 14 | ``` 15 | 16 | ### Install requirements 17 | ``` 18 | pip install -r requirements.txt 19 | ``` 20 | ### Build .exe file using .spec file 21 | ```powershell 22 | pyinstaller GPTOCRGUI.spec 23 | ``` 24 | ### Use Inno Setup Compiler to build setup 25 | Use Inno Setup Compiler to run `.\setup\PillOCR.iss`. -------------------------------------------------------------------------------- /ocrgui.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OB0NE/PillOCR/622ba94d3da652474053e509eb895963812d12fb/ocrgui.icns -------------------------------------------------------------------------------- /ocrgui.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OB0NE/PillOCR/622ba94d3da652474053e509eb895963812d12fb/ocrgui.ico -------------------------------------------------------------------------------- /ocrgui.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OB0NE/PillOCR/622ba94d3da652474053e509eb895963812d12fb/ocrgui.jpg -------------------------------------------------------------------------------- /processors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OB0NE/PillOCR/622ba94d3da652474053e509eb895963812d12fb/processors/__init__.py -------------------------------------------------------------------------------- /processors/image_encoder.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import io 3 | from PIL import Image 4 | 5 | class ImageEncoder: 6 | def encode_image(self, image: Image.Image) -> str: 7 | """将图片编码为base64字符串""" 8 | img_byte_arr = io.BytesIO() 9 | image.save(img_byte_arr, format='PNG') 10 | img_byte_arr = img_byte_arr.getvalue() 11 | return base64.b64encode(img_byte_arr).decode('utf-8') -------------------------------------------------------------------------------- /processors/markdown_processor.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | class MarkdownProcessor: 4 | def __init__(self): 5 | self.inline_wrapper = '$ $' 6 | self.block_wrapper = '$$ $$' 7 | 8 | def set_wrappers(self, inline_wrapper: str, block_wrapper: str): 9 | self.inline_wrapper = inline_wrapper 10 | self.block_wrapper = block_wrapper 11 | 12 | def modify_wrappers(self, text: str) -> str: 13 | # 清理多余的 $$ 序列 14 | text = re.sub(r'\${3,}', '', text) 15 | 16 | # 处理行间公式 17 | block_wrappers = self.block_wrapper.split(' ') 18 | if len(block_wrappers) == 2: 19 | left_wrapper, right_wrapper = block_wrappers 20 | text = re.sub(r'\\\[(.*?)\\\]', 21 | lambda m: f'{left_wrapper}{m.group(1).strip()}{right_wrapper}', 22 | text, flags=re.DOTALL) 23 | 24 | # 处理行内公式 25 | inline_wrappers = self.inline_wrapper.split(' ') 26 | if len(inline_wrappers) == 2: 27 | left_inline, right_inline = inline_wrappers 28 | text = re.sub(r'\\\((.*?)\\\)', 29 | lambda m: f'{left_inline}{m.group(1).strip()}{right_inline}', 30 | text) 31 | 32 | # 清理单个 $ 和内容之间的空格 33 | text = re.sub(r'\$\s+([^\$]+?)\s+\$', r'$\1$', text) 34 | 35 | return text -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | pystray 3 | pyinstaller 4 | pyperclip 5 | httpx[socks] 6 | keyboard -------------------------------------------------------------------------------- /setup/PillOCR.iss: -------------------------------------------------------------------------------- 1 | ; Script generated by the Inno Setup Script Wizard. 2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! 3 | 4 | #define MyAppRoot ".." 5 | #define MyAppName "PillOCR" 6 | #define MyAppVersion "0.0.1" 7 | #define MyAppPublisher "ZhouZhiwu" 8 | #define MyAppExeName "PillOCR.exe" 9 | 10 | [Setup] 11 | ; NOTE: The value of AppId uniquely identifies this application. Do not use the same AppId value in installers for other applications. 12 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.) 13 | AppId={{BA2F81A6-2C46-41F7-9B64-36ED2337A2CA} 14 | AppName={#MyAppName} 15 | AppVersion={#MyAppVersion} 16 | ;AppVerName={#MyAppName} {#MyAppVersion} 17 | AppPublisher={#MyAppPublisher} 18 | DefaultDirName={autopf}\PillOCR-Light 19 | ; "ArchitecturesAllowed=x64compatible" specifies that Setup cannot run 20 | ; on anything but x64 and Windows 11 on Arm. 21 | ArchitecturesAllowed=x64compatible 22 | ; "ArchitecturesInstallIn64BitMode=x64compatible" requests that the 23 | ; install be done in "64-bit mode" on x64 or Windows 11 on Arm, 24 | ; meaning it should use the native 64-bit Program Files directory and 25 | ; the 64-bit view of the registry. 26 | ArchitecturesInstallIn64BitMode=x64compatible 27 | DisableProgramGroupPage=yes 28 | ; Uncomment the following line to run in non administrative install mode (install for current user only.) 29 | ;PrivilegesRequired=lowest 30 | PrivilegesRequiredOverridesAllowed=dialog 31 | OutputDir=. 32 | OutputBaseFilename=PillOCR-setup 33 | SetupIconFile={#MyAppRoot}\ocrgui.ico 34 | Compression=lzma 35 | SolidCompression=yes 36 | WizardStyle=modern 37 | 38 | [Languages] 39 | Name: "chinese"; MessagesFile: "compiler:Languages\ChineseSimplified.isl" 40 | 41 | [Tasks] 42 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked 43 | 44 | [Files] 45 | Source: "{#MyAppRoot}\dist\{#MyAppExeName}"; DestDir: "{app}"; Flags: ignoreversion 46 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files 47 | 48 | [Icons] 49 | Name: "{autoprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}" 50 | Name: "{autodesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; Tasks: desktopicon 51 | 52 | [Run] 53 | Filename: "{app}\{#MyAppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(MyAppName, '&', '&&')}}"; Flags: nowait postinstall skipifsilent -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OB0NE/PillOCR/622ba94d3da652474053e509eb895963812d12fb/utils/__init__.py -------------------------------------------------------------------------------- /utils/config_manager.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import platform 4 | 5 | class ConfigManager: 6 | def __init__(self, config_file='config.json'): 7 | # Get %APPDATA% directory and create app configuration directory 8 | if platform.system() == "Windows": 9 | appdata = os.getenv("APPDATA") 10 | config_dir = os.path.join(appdata, "PillOCR") 11 | elif platform.system() == "Darwin": 12 | home = os.path.expanduser("~") 13 | config_dir = os.path.join(home, "Library", "Application Support", "PillOCR") 14 | if not os.path.exists(config_dir): 15 | os.makedirs(config_dir, exist_ok=True) 16 | self.config_file = os.path.join(config_dir, config_file) 17 | 18 | def load(self): 19 | """Load configuration file""" 20 | try: 21 | if os.path.exists(self.config_file): 22 | with open(self.config_file, 'r', encoding='utf-8') as f: 23 | return json.load(f) 24 | except json.JSONDecodeError as e: 25 | raise ValueError(f"配置文件格式错误: {e}") 26 | return {} 27 | 28 | def save(self, config): 29 | """Save configuration file""" 30 | with open(self.config_file, 'w', encoding='utf-8') as f: 31 | json.dump(config, f, indent=4, ensure_ascii=False) -------------------------------------------------------------------------------- /utils/hotkey_manager.py: -------------------------------------------------------------------------------- 1 | import platform 2 | import threading 3 | 4 | # 平台检测 5 | CURRENT_PLATFORM = platform.system() 6 | IS_WINDOWS = CURRENT_PLATFORM == "Windows" 7 | IS_MACOS = CURRENT_PLATFORM == "Darwin" 8 | 9 | # 尝试导入特定平台的模块 10 | KEYBOARD_AVAILABLE = False 11 | if IS_WINDOWS: 12 | try: 13 | import keyboard 14 | KEYBOARD_AVAILABLE = True 15 | except ImportError: 16 | pass 17 | 18 | class HotkeyManager: 19 | 20 | def __init__(self, callback=None): 21 | """ 22 | Args: 23 | callback: 热键触发时执行的回调函数 24 | """ 25 | self.callback = callback 26 | self.current_hotkey = None 27 | self.is_active = False 28 | 29 | def register_hotkey(self, hotkey_str): 30 | """注册热键 31 | 32 | Args: 33 | hotkey_str: 热键字符串,如 'ctrl+shift+o' 34 | 35 | Returns: 36 | bool: 是否成功注册 37 | """ 38 | raise NotImplementedError("子类必须实现此方法") 39 | 40 | def unregister_hotkey(self, hotkey_str=None): 41 | """取消注册热键 42 | 43 | Args: 44 | hotkey_str: 要取消的热键,如果为None则取消当前热键 45 | 46 | Returns: 47 | bool: 是否成功取消 48 | """ 49 | raise NotImplementedError("子类必须实现此方法") 50 | 51 | def set_callback(self, callback): 52 | """设置热键触发回调 53 | 54 | Args: 55 | callback: 回调函数 56 | """ 57 | self.callback = callback 58 | 59 | @staticmethod 60 | def is_supported(): 61 | """检查当前平台是否支持全局热键 62 | 63 | Returns: 64 | bool: 当前平台是否支持 65 | """ 66 | return KEYBOARD_AVAILABLE 67 | 68 | @staticmethod 69 | def should_show_ui(): 70 | """判断是否应该显示热键相关UI元素 71 | 72 | Returns: 73 | bool: 是否应显示UI 74 | """ 75 | return IS_WINDOWS # 只在Windows上显示热键UI 76 | 77 | 78 | class WindowsHotkeyManager(HotkeyManager): 79 | """Windows平台的热键管理实现""" 80 | 81 | def register_hotkey(self, hotkey_str): 82 | if not KEYBOARD_AVAILABLE: 83 | return False 84 | 85 | try: 86 | # 先取消已有的热键 87 | self.unregister_hotkey() 88 | 89 | # 注册新热键 90 | keyboard.add_hotkey(hotkey_str, self.callback) 91 | self.current_hotkey = hotkey_str 92 | self.is_active = True 93 | return True 94 | except Exception: 95 | return False 96 | 97 | def unregister_hotkey(self, hotkey_str=None): 98 | if not KEYBOARD_AVAILABLE: 99 | return False 100 | 101 | try: 102 | key_to_remove = hotkey_str or self.current_hotkey 103 | if key_to_remove: 104 | keyboard.remove_hotkey(key_to_remove) 105 | if hotkey_str is None or hotkey_str == self.current_hotkey: 106 | self.current_hotkey = None 107 | self.is_active = False 108 | return True 109 | except Exception: 110 | return False 111 | 112 | 113 | class MacOSHotkeyManager(HotkeyManager): 114 | """macOS平台的热键管理实现 - 空实现""" 115 | 116 | def register_hotkey(self, hotkey_str): 117 | # macOS上不支持,返回假成功 118 | self.current_hotkey = hotkey_str 119 | self.is_active = False 120 | return True 121 | 122 | def unregister_hotkey(self, hotkey_str=None): 123 | # macOS上不支持,返回假成功 124 | if hotkey_str is None or hotkey_str == self.current_hotkey: 125 | self.current_hotkey = None 126 | self.is_active = False 127 | return True 128 | 129 | 130 | def create_hotkey_manager(callback=None): 131 | """工厂方法,根据平台创建合适的热键管理器 132 | 133 | Args: 134 | callback: 热键触发回调函数 135 | 136 | Returns: 137 | HotkeyManager: 热键管理器实例 138 | """ 139 | if IS_WINDOWS: 140 | return WindowsHotkeyManager(callback) 141 | else: # macOS或其他平台 142 | return MacOSHotkeyManager(callback) -------------------------------------------------------------------------------- /utils/path_tools.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | def get_absolute_path(relative_path): 5 | """Get the absolute path of the resource file""" 6 | try: 7 | # Packaged environment 8 | base_path = sys._MEIPASS 9 | # Adapt to _internal directory for PyInstaller 6.0 and above 10 | internal_path = os.path.join(base_path, '_internal') 11 | if os.path.exists(os.path.join(internal_path, relative_path)): 12 | base_path = internal_path 13 | except Exception: 14 | # Development environment 15 | base_path = os.path.abspath(".") 16 | return os.path.join(base_path, relative_path) --------------------------------------------------------------------------------