├── .gitignore ├── assets ├── chat_with_chatgpt.gif ├── init_resubmit_button.gif └── init_submit_button.gif ├── chatgpt_auto_script.py ├── chatgpt_chat_script.py ├── cursor.py ├── demo.py ├── frog_eye.py ├── openai_api.py ├── readme.md ├── readme_cn.md ├── readme_jp.md ├── requirements.txt ├── search_in_browser.py ├── test_openai_api.py └── windows_api.py /.gitignore: -------------------------------------------------------------------------------- 1 | debug/ 2 | __pycache__/ 3 | detected_images/ 4 | chats.json -------------------------------------------------------------------------------- /assets/chat_with_chatgpt.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MaoXiaoYuZ/AutoChatScript/a826633a5e0305d10a136a58e2f09969927466bd/assets/chat_with_chatgpt.gif -------------------------------------------------------------------------------- /assets/init_resubmit_button.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MaoXiaoYuZ/AutoChatScript/a826633a5e0305d10a136a58e2f09969927466bd/assets/init_resubmit_button.gif -------------------------------------------------------------------------------- /assets/init_submit_button.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MaoXiaoYuZ/AutoChatScript/a826633a5e0305d10a136a58e2f09969927466bd/assets/init_submit_button.gif -------------------------------------------------------------------------------- /chatgpt_auto_script.py: -------------------------------------------------------------------------------- 1 | # ChatGPT网页的自动化脚本,只实现自动化操作,不存储任何数据 2 | 3 | import functools 4 | import os 5 | import pyautogui 6 | import pyperclip 7 | import time 8 | import re 9 | import random 10 | import string 11 | from PIL import Image 12 | import numpy as np 13 | 14 | import frog_eye 15 | import cursor 16 | import search_in_browser 17 | import windows_api 18 | 19 | from pynput.mouse import Listener 20 | 21 | 22 | class ChatGPTAutoScript: 23 | def __init__(self): 24 | self.retry_button_path = "detected_images/retry_button.png" 25 | self.submit_button_path = "detected_images/submit_button.png" 26 | self.resubmit_button_path = "detected_images/resubmit_button.png" 27 | self._on_focus = False 28 | 29 | os.makedirs("detected_images", exist_ok=True) 30 | 31 | def focus_window(func): 32 | @functools.wraps(func) 33 | def wrapper(*args, **kwargs): 34 | # 假设第一个参数是调用该方法的类实例 35 | self = args[0] 36 | cursor_pos = pyautogui.position() 37 | flag = False 38 | if not self._on_focus: 39 | self._focus_chat_input() 40 | self._on_focus = True 41 | flag = True 42 | try: 43 | return func(*args, **kwargs) 44 | except Exception as e: 45 | raise e 46 | finally: 47 | if flag: 48 | pyautogui.moveTo(cursor_pos) 49 | self._on_focus = False 50 | 51 | return wrapper 52 | 53 | def init_window_rect(self): 54 | self.window_rect = windows_api.get_mouse_window_rect() 55 | 56 | def test(self): 57 | self.new_chat() 58 | 59 | prompt = "Translate to Chinese: This is a test." 60 | print("User:", prompt) 61 | response = self.submit(prompt) 62 | print("ChatGPT:", response) 63 | 64 | prompt = "Translate to Chinese: This is the second test." 65 | print("User:", prompt) 66 | response = self.resubmit(prompt) 67 | print("ChatGPT:", response) 68 | 69 | def init_submit_button(self): 70 | cursor_pos = pyautogui.position() 71 | 72 | init_submit_button_flag = True 73 | if os.path.exists(self.submit_button_path) or hasattr(self, "submit_button_image"): 74 | if not hasattr(self, "submit_button_image"): 75 | self.submit_button_image = Image.open(self.submit_button_path) 76 | if (region := self.locate_image(self.submit_button_image, confidence=0.9)): 77 | self.submit_button_region = self.pad_image_region(region) 78 | cursor_pos = pyautogui.position() 79 | pyautogui.moveTo(region[0], region[1]) 80 | self.init_window_rect() 81 | pyautogui.moveTo(cursor_pos) 82 | init_submit_button_flag = False 83 | 84 | if init_submit_button_flag: 85 | pyautogui.alert(text='程序将监听鼠标点击,请将鼠标移动到ChatGPT网页空白处,并点击一次。\nThe program will listen for mouse clicks, please move the mouse to a blank area of the ChatGPT webpage and click once.', title='初始化自动脚本 (Initialize AutoChatScript)', button='ok') 86 | 87 | # 定义点击事件处理函数 88 | def on_click(x, y, button, pressed): 89 | if pressed: 90 | print(f"Mouse clicked at ({x}, {y})") 91 | return False # 返回False以停止监听 92 | 93 | # 启动监听 94 | with Listener(on_click=on_click) as listener: 95 | listener.join() 96 | 97 | print("开始初始化...") 98 | self.init_window_rect() 99 | self.submit_button_region, self.submit_button_image = self.locate_submit_button() 100 | 101 | 102 | # if os.path.exists(self.retry_button_path): 103 | # self.retry_button_image = Image.open(self.retry_button_path) 104 | # else: 105 | # _, self.retry_button_image = self.locate_retry_button() 106 | 107 | # if os.path.exists(self.resubmit_button_path): 108 | # self.resubmit_button_image = Image.open(self.resubmit_button_path) 109 | # else: 110 | # _, self.resubmit_button_image = self.locate_resubmit_button() 111 | 112 | pyautogui.moveTo(cursor_pos) 113 | 114 | def wait_submit_image(self, timeout=0): 115 | print('waiting submit button...') 116 | pos = self.wait_image(self.submit_button_image, region=self.submit_button_region, confidence=0.9, timeout=timeout) 117 | if pos is None: 118 | print('submit button not found, reinitializing...') 119 | self.init_submit_button() 120 | pos = self.wait_image(self.submit_button_image, region=self.submit_button_region, confidence=0.9, timeout=timeout) 121 | if pos is None: 122 | raise Exception("未能定位到submit按钮!(Failed to locate the submit button!)") 123 | print('submit button found!') 124 | return pos 125 | 126 | def init_resubmit_button(self): 127 | if os.path.exists(self.resubmit_button_path): 128 | self.resubmit_button_image = Image.open(self.resubmit_button_path) 129 | else: 130 | pyautogui.alert(text='点击ok后,程序将自动检测修改按钮图片。\nAfter clicking ok, the program will automatically detect the resubmit button image.', title='初始化自动脚本 (Initialize AutoChatScript)', button='ok') 131 | 132 | while (response := self.copy_last_response()) is None: 133 | pyautogui.alert(text='请展示有对话内容的网页,以便程序检测修改按钮位置。\nPlease display a webpage with dialogue content so the program can detect the position of the resubmit button.', title='初始化自动脚本 (Initialize AutoChatScript)', button='ok') 134 | 135 | _, self.resubmit_button_image = self.locate_resubmit_button(response) 136 | 137 | def screenshot(self, filepath=None, region=None): 138 | if region is None: 139 | left, top, right, bottom = self.window_rect 140 | else: 141 | left, top, right, bottom = region 142 | region = (int(left), int(top), int(right - left), int(bottom - top)) 143 | if filepath is not None: 144 | return pyautogui.screenshot(filepath, region=region) 145 | else: 146 | return pyautogui.screenshot(region=region) 147 | 148 | def locateCenterOnScreen(self, image, region=None, confidence=1): 149 | if region is None: 150 | left, top, right, bottom = self.window_rect 151 | else: 152 | left, top, right, bottom = region 153 | region = (int(left), int(top), int(right - left), int(bottom - top)) 154 | 155 | return pyautogui.locateCenterOnScreen(image, region=region, confidence=confidence) 156 | 157 | def _focus_chat_input(self): 158 | pos = self.wait_submit_image() 159 | left, top, right, bottom = self.submit_button_region 160 | pyautogui.click(left, (top + bottom) // 2) 161 | return pos 162 | 163 | def _focus_chat_input_by_keyboard(self): 164 | pyautogui.hotkey("shift", "esc") 165 | 166 | @focus_window 167 | def new_chat(self): 168 | pyautogui.hotkey("ctrl", "shift", "o") 169 | 170 | def _wait_last_response(self): 171 | # self.scroll_to_bottom() 172 | # wait_start = self.wait_stationary(delay=1, timeout=10, reverse=True) 173 | # wait_finish = self.wait_stationary(delay=1, timeout=10, reverse=False) 174 | time.sleep(0.5) # 等待浏览器响应点击submit事件 175 | 176 | wait_start = self.wait_image_disappear(self.submit_button_image, region=self.submit_button_region, confidence=0.9, timeout=30) 177 | 178 | if wait_start: 179 | wait_finish = self.wait_image(self.submit_button_image, region=self.submit_button_region, confidence=0.9, timeout=120) 180 | if wait_finish: 181 | response = self.copy_last_response() 182 | if response is None: 183 | pyautogui.alert(text='请手动复制,程序将监听下一次的复制内容。\nPlease copy manually, the program will listen for the next copy action.', title='复制快捷键未起效!(Copy shortcut key is not effective!)', button='OK') 184 | else: 185 | if len(response) > 500: 186 | continue_generating_button, _ = search_in_browser.locate_text("Continue generating") 187 | if continue_generating_button: 188 | cursor_pos = pyautogui.position() 189 | pyautogui.click(continue_generating_button[0].mean(axis=0).tolist()) 190 | pyautogui.moveTo(cursor_pos) 191 | return self._wait_last_response() 192 | else: 193 | return response 194 | else: 195 | return response 196 | else: 197 | pyautogui.alert(text='请手动复制,程序将监听下一次的复制内容。\nPlease copy manually, the program will listen for the next copy action.', title='ChatGPT回复超时!(ChatGPT response timeout!)', button='OK') 198 | else: 199 | pyautogui.alert(text='请手动复制,程序将监听下一次的复制内容。\nPlease copy manually, the program will listen for the next copy action.', title='ChatGPT未响应!(ChatGPT is not responding!)', button='OK') 200 | 201 | return self.wait_for_clip_copy() 202 | 203 | def wait_for_clip_copy(self): 204 | clip_state = pyperclip.paste() 205 | initial_clipboard_content = self.generate_password() 206 | pyperclip.copy(initial_clipboard_content) 207 | while pyperclip.paste() == initial_clipboard_content: 208 | time.sleep(0.1) 209 | new_clipboard_content = pyperclip.paste() 210 | pyperclip.copy(clip_state) 211 | return new_clipboard_content 212 | 213 | def generate_password(self, length=12): 214 | # 定义密码字符池 215 | characters = string.ascii_letters + string.digits + "!@#$%^&*()" 216 | # 随机选择字符生成密码 217 | password = ''.join(random.choice(characters) for i in range(length)) 218 | return password 219 | 220 | @focus_window 221 | def copy_last_response(self): 222 | clip_state = pyperclip.paste() 223 | initial_clipboard_content = self.generate_password() 224 | pyperclip.copy(initial_clipboard_content) 225 | 226 | pyautogui.hotkey("ctrl", "shift", "c") 227 | 228 | if pyperclip.paste() == initial_clipboard_content: 229 | return None 230 | 231 | new_clipboard_content = pyperclip.paste() 232 | pyperclip.copy(clip_state) 233 | return new_clipboard_content 234 | 235 | def submit(self, prompt): 236 | debug = False 237 | if debug: 238 | return debug 239 | 240 | cursor_pos = pyautogui.position() 241 | pos = self._focus_chat_input() 242 | pyperclip.copy(prompt) 243 | pyautogui.hotkey("ctrl", "a", "v") 244 | pyautogui.scroll(-1_000_000) 245 | 246 | pos = self.wait_submit_image(timeout=0.5) # 浏览器需要时间响应粘贴和滚动事件 247 | pyautogui.click(pos) 248 | pyautogui.moveTo(cursor_pos) 249 | 250 | try: 251 | response = self._wait_last_response() 252 | except Exception as e: 253 | print(e) 254 | 255 | return response 256 | 257 | def match_code_block(self, response): 258 | pattern = r"(?<=[\r\n])```.*?```(?=[\r\n])" 259 | matches = re.findall(pattern, '\n' + response + '```\n', re.DOTALL) 260 | return matches 261 | 262 | def scroll_to_bottom(self, clicks=-1_000_000): 263 | pos = (self.window_rect[0] + self.window_rect[2]) // 2, (self.window_rect[1] + self.window_rect[3]) // 2 264 | pyautogui.moveTo(pos) 265 | pyautogui.scroll(clicks) 266 | 267 | def resubmit(self, prompt, response=None): 268 | if not hasattr(self, "resubmit_button_image"): 269 | self.init_resubmit_button() 270 | 271 | if response is None: 272 | response = self.copy_last_response() 273 | 274 | button_left_top, button_right_bottom, line_height = self.estimate_resubmit_button_reigon(response) 275 | 276 | cursor_pos = pyautogui.position() 277 | pyautogui.moveTo(button_left_top[0] + line_height, button_left_top[1] + line_height) 278 | pos = self.wait_image(self.resubmit_button_image, region=(*button_left_top, self.window_rect[2], button_right_bottom[1]), confidence=0.9, timeout=0) 279 | 280 | if pos is None: 281 | if os.path.exists(self.resubmit_button_path): 282 | os.remove(self.resubmit_button_path) 283 | if hasattr(self, "resubmit_button_image"): 284 | del self.resubmit_button_image 285 | return self.resubmit(prompt, response) 286 | else: 287 | pyautogui.click(pos) 288 | 289 | pyperclip.copy(prompt) 290 | pyautogui.hotkey("ctrl", "a", "v") 291 | pyautogui.hotkey("tab", "enter") 292 | pyautogui.moveTo(cursor_pos) 293 | 294 | try: 295 | response = self._wait_last_response() 296 | except Exception as e: 297 | print(e) 298 | 299 | return response 300 | 301 | def wait_image(self, image, region=None, confidence=1, timeout=10, debug=False): 302 | t0 = time.time() 303 | while True: 304 | try: 305 | if debug:self.screenshot(f"debug/wait_image/{t0:.0f}-{time.time()-t0:.3f}.png", region=region) 306 | pos = self.locateCenterOnScreen(image, region=region, confidence=confidence) 307 | return pos 308 | except Exception: 309 | if time.time() - t0 > timeout: 310 | return None 311 | time.sleep(0.5 if debug else 0.1) 312 | 313 | def wait_image_disappear(self, image, region=None, confidence=1, timeout=10, debug=False): 314 | t0 = time.time() 315 | pos = True 316 | while pos: 317 | try: 318 | if debug:self.screenshot(f"debug/wait_image_disappear/{t0:.0f}-{time.time()-t0:.3f}.png", region=region) 319 | pos = self.locateCenterOnScreen(image, region=region, confidence=confidence) 320 | except Exception: 321 | return True 322 | if time.time() - t0 > timeout: 323 | return False 324 | time.sleep(0.5 if debug else 0.1) 325 | 326 | def wait_stationary(self, delay=2, timeout=10, reverse=False, debug=False): 327 | t0 = time.time() 328 | img_before = self.screenshot() 329 | cur_delay = delay 330 | if debug:os.makedirs(f"debug/wait_stationary/{t0}") 331 | while True: 332 | time.sleep(0.5) 333 | img_after = self.screenshot() 334 | ret_change_ratio = [] 335 | is_stationary = frog_eye.is_stationary(img_before, img_after, ret_change_ratio=ret_change_ratio) 336 | if reverse: is_stationary = not is_stationary 337 | if is_stationary: 338 | cur_delay -= 0.5 339 | if debug:img_after.save(f"debug/wait_stationary/{t0}/{time.time()}-{ret_change_ratio[0]}-{cur_delay}.png") 340 | if cur_delay <= 0: 341 | return True 342 | else: 343 | if debug:img_after.save(f"debug/wait_stationary/{t0}/{time.time()}-{ret_change_ratio[0]}-{cur_delay}.png") 344 | cur_delay = delay 345 | if time.time() - t0 > timeout: 346 | return False 347 | img_before = img_after 348 | 349 | def locate_image(self, image, confidence=1): 350 | try: 351 | location = pyautogui.locateOnScreen(image, confidence=confidence) 352 | except Exception: 353 | return None 354 | 355 | return location.left, location.top, location.left + location.width, location.top + location.height 356 | 357 | def pad_image_region(self, region, pad = 50): 358 | left, top, right, bottom = region 359 | return (left - pad, top - pad, right + pad, bottom + pad) 360 | 361 | def manual_locate_image_region(self, image_file): 362 | while not input("移动鼠标,悬停到按钮上后,按下回车键"): 363 | if cursor.get_cursor_state() != 'HAND': 364 | print("请将鼠标移动到按钮上!") 365 | else: 366 | cursor_pos = np.asarray(pyautogui.position()) 367 | region = cursor.detect_cur_button_boundary(cursor_pos, cursor_pos - 200, cursor_pos + 200, step=5) 368 | image = self.screenshot(image_file, region=region) 369 | pyautogui.moveTo(cursor_pos[0], cursor_pos[1]) 370 | return self.pad_image_region(region), image 371 | 372 | @focus_window 373 | def locate_retry_button(self): 374 | last_response = self.copy_last_response() 375 | last_para = [e for e in last_response.split("\n") if e.strip()][-1] 376 | last_para = last_para.replace('`', '') 377 | 378 | text_rects, line_height = search_in_browser.locate_text(last_para) 379 | left_top_rect = text_rects[0] 380 | right_bottom_rect = text_rects[-1] 381 | next_line_left_top = (left_top_rect[:, 0].min(axis=0), right_bottom_rect[:, 1].max()) 382 | 383 | button_right_bottom = next_line_left_top[0] + line_height * 10, next_line_left_top[1] + line_height * 2 384 | 385 | button_boundary_list = cursor.detect_button_boundary( 386 | next_line_left_top, 387 | button_right_bottom, 388 | step=line_height // 2, 389 | sub_step=line_height // 8, 390 | only_first=True) 391 | 392 | if len(button_boundary_list) == 1: 393 | image = self.screenshot(self.retry_button_path, region=button_boundary_list[0]) 394 | else: 395 | assert False, "未能定位到重试按钮!(Failed to locate the retry button!)" 396 | 397 | return self.pad_image_region(button_boundary_list[0]), image 398 | 399 | @focus_window 400 | def estimate_resubmit_button_reigon(self, response): 401 | for left_top_text in response.split("\r\n"): 402 | if left_top_text.startswith('```'): 403 | continue 404 | left_top_text = left_top_text.replace("`", "") 405 | if len(left_top_text) < 7: 406 | continue 407 | break 408 | left_top_text = left_top_text.replace('`', '') 409 | 410 | text_rects, line_height = search_in_browser.locate_text(left_top_text) 411 | left_top_rect = text_rects[0] 412 | 413 | button_left_top = left_top_rect.min(axis=0) + np.array([-line_height, -line_height * 5]) 414 | button_right_bottom = left_top_rect.max(axis=0) 415 | 416 | # if button_left_top is on the top of the window, scroll to the bottom 417 | if (button_left_top[1] - self.window_rect[1]) / (self.window_rect[3] - self.window_rect[1]) < 0.25: 418 | self.scroll_to_bottom(clicks=(self.window_rect[3] - self.window_rect[1]) // 4) 419 | 420 | text_rects, line_height = search_in_browser.locate_text(left_top_text) 421 | left_top_rect = text_rects[0] 422 | 423 | button_left_top = left_top_rect.min(axis=0) + np.array([-line_height * 2, -line_height * 7]) 424 | button_right_bottom = left_top_rect.max(axis=0) 425 | 426 | return button_left_top.tolist(), button_right_bottom.tolist(), int(line_height) 427 | 428 | @focus_window 429 | def locate_resubmit_button(self, response): 430 | button_left_top, button_right_bottom, line_height = self.estimate_resubmit_button_reigon(response) 431 | 432 | button_boundary_list = cursor.detect_button_boundary( 433 | button_left_top, 434 | button_right_bottom, 435 | step=line_height // 2, 436 | sub_step=line_height // 8, 437 | only_first=True) 438 | 439 | if len(button_boundary_list) == 1: 440 | left, top, right, bottom = button_boundary_list[0] 441 | pyautogui.moveTo(left, top - line_height) 442 | image = self.screenshot(self.resubmit_button_path, region=button_boundary_list[0]) 443 | else: 444 | assert False, "未能定位到resubmit按钮!(Failed to locate the resubmit button!)" 445 | 446 | return self.pad_image_region(button_boundary_list[0]), image 447 | 448 | def locate_submit_button(self): 449 | self._focus_chat_input_by_keyboard() 450 | 451 | #random_string = self.generate_password() 452 | random_string = "This Say a is Test!" 453 | pyperclip.copy(random_string) 454 | pyautogui.hotkey("ctrl", "a", "v") 455 | 456 | text_rects, line_height = search_in_browser.locate_text(random_string) 457 | text_rect = text_rects[0] 458 | 459 | button_left_top = int(text_rect[:, 0].max()), int(text_rect[:, 1].min()) 460 | #button_right_bottom = windows_api.get_mouse_window_rect()[2] - line_height // 4, int(text_rect[:, 1].max()) 461 | button_right_bottom = self.window_rect[2:] 462 | 463 | button_boundary_list = cursor.detect_button_boundary( 464 | button_left_top, 465 | button_right_bottom, 466 | step=line_height // 2, 467 | sub_step=line_height // 8, 468 | only_first=True) 469 | 470 | if len(button_boundary_list) == 1: 471 | image = self.screenshot(self.submit_button_path, region=button_boundary_list[0]) 472 | else: 473 | assert False, "未能定位到submit按钮!(Failed to locate the submit button!)" 474 | 475 | return self.pad_image_region(button_boundary_list[0]), image 476 | 477 | def demo(self): 478 | self.init_submit_button() 479 | while prompt := input("User(q to quit, r to resubmit): "): 480 | if prompt == 'q': 481 | break 482 | elif prompt == 'r': 483 | prompt = input("\t resubmit:") 484 | response = self.resubmit(prompt) 485 | print("ChatGPT:", response) 486 | elif len(prompt) == 0: 487 | print("Input cannot be empty!") 488 | else: 489 | response = self.submit(prompt) 490 | print("ChatGPT:", response) 491 | 492 | 493 | if __name__ == "__main__": 494 | chatgpt = ChatGPTAutoScript() 495 | #response = chatgpt.submit("你好") 496 | #print(response) 497 | chatgpt.demo() -------------------------------------------------------------------------------- /chatgpt_chat_script.py: -------------------------------------------------------------------------------- 1 | # 对chatgpt_auto_script.py进行封装,存储对话数据并且实现类似api的chat功能 2 | 3 | import json 4 | import os 5 | import time 6 | from typing import Dict, List, Literal, Optional, Union 7 | 8 | from pydantic import BaseModel 9 | 10 | from chatgpt_auto_script import ChatGPTAutoScript 11 | 12 | class ChatMessage(BaseModel): 13 | role: Literal['user', 'assistant', 'system'] 14 | content: Optional[str] 15 | 16 | class Chat(BaseModel): 17 | chatid: str 18 | name: Optional[str] = None 19 | messages: List[ChatMessage] 20 | 21 | class ChatGPTChatScript: 22 | def __init__(self): 23 | self.auto_script: ChatGPTAutoScript = ChatGPTAutoScript() 24 | self.chats : List[Chat] = [] 25 | 26 | self.auto_script.init_submit_button() 27 | 28 | if os.path.exists('chats.json'): 29 | self.load_chats('chats.json') 30 | 31 | def save_chats(self, filename: str): 32 | """ 33 | 将chats列表保存到文件 34 | """ 35 | with open(filename, 'w', encoding='utf-8') as file: 36 | # Pydantic模型的json()方法将模型转换为JSON字符串 37 | json_data = [chat.model_dump() for chat in self.chats] 38 | json.dump(json_data, file, ensure_ascii=False, indent=1) 39 | 40 | def load_chats(self, filename: str): 41 | """ 42 | 从文件加载chats列表 43 | """ 44 | with open(filename, 'r', encoding='utf-8') as file: 45 | json_data = json.load(file) 46 | # 使用Chat.parse_obj将字典转换回Chat对象 47 | self.chats = [Chat.model_validate(chat) for chat in json_data] 48 | 49 | def find_chat(self, messages:List[ChatMessage]): 50 | # 从self.chats中匹配对应的chat 51 | for chat in self.chats: 52 | if len(messages) <= len(chat.messages) and all(len(msg.content) == len(chat_msg.content) and msg.content == chat_msg.content for msg, chat_msg in zip(messages, chat.messages)): 53 | return chat 54 | return None 55 | 56 | def submit(self, prompt): 57 | response = self.auto_script.submit(prompt) 58 | response_msg = ChatMessage(role='assistant', content=response) 59 | return response_msg 60 | 61 | def auto_chat(self, messages:List[ChatMessage]): 62 | messages = messages.copy() 63 | # 像api一样接受messages输入,将自动判定该new/submit/resubmit chat 64 | assert len(messages) > 0 and messages[-1].role == 'user' 65 | 66 | if len(messages) == 1: 67 | if (chat := self.find_chat(messages)) is None: 68 | action = 'new' 69 | else: 70 | action = 'resubmit' 71 | else: 72 | if (chat := self.find_chat(messages[:-1])) is None: 73 | raise Exception('Previous messages not found') 74 | else: 75 | if len(chat.messages) == len(messages) - 1: 76 | action = 'submit' 77 | else: 78 | action = 'resubmit' 79 | 80 | # check 81 | # if action != 'new' and chat.messages[-1].content != self.auto_script.copy_last_response(): 82 | # print('[WARN]Current chat is not consistent with last response') 83 | # 目前暂时不检测一致性 84 | 85 | if action == 'new': 86 | self.auto_script.new_chat() 87 | chat_id = str(int(time.time() * 1000)) # Use milliseconds for uniqueness 88 | chat = Chat(chatid=chat_id, messages=messages) 89 | 90 | response = self.auto_script.submit(messages[-1].content) 91 | response_msg = ChatMessage(role='assistant', content=response) 92 | chat.messages.append(response_msg) 93 | 94 | self.chats.append(chat) 95 | elif action == 'submit': 96 | response = self.auto_script.submit(messages[-1].content) 97 | response_msg = ChatMessage(role='assistant', content=response) 98 | chat.messages.append(messages[-1]) 99 | chat.messages.append(response_msg) 100 | elif action == 'resubmit': 101 | response = self.auto_script.resubmit(messages[-1].content, chat.messages[len(messages)].content) 102 | response_msg = ChatMessage(role='assistant', content=response) 103 | chat.messages = messages + [response_msg, ] 104 | 105 | self.save_chats('chats.json') 106 | return response_msg 107 | 108 | if __name__ == '__main__': 109 | chat_script = ChatGPTChatScript() 110 | messages = [ 111 | ChatMessage(role='user', content='Hello, how are you?'), 112 | ChatMessage(role='assistant', content="I'm here and ready to assist you. How can I help you today?"), 113 | ChatMessage(role='user', content='What can you do for me ?'), 114 | ] 115 | chat_script.auto_chat(messages) 116 | print(chat_script.chats) -------------------------------------------------------------------------------- /cursor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import win32gui 5 | import win32con 6 | import functools 7 | import pyautogui 8 | 9 | 10 | from sklearn.cluster import DBSCAN 11 | import numpy as np 12 | 13 | 14 | def modify_pyautogui_settings(func): 15 | @functools.wraps(func) # 保持被修饰函数的元数据 16 | def wrapper(*args, **kwargs): 17 | # 保存当前pyautogui设置 18 | pyautogui_values = (pyautogui.PAUSE, pyautogui.DARWIN_CATCH_UP_TIME, pyautogui.FAILSAFE) 19 | 20 | # 临时修改设置 21 | pyautogui.PAUSE, pyautogui.DARWIN_CATCH_UP_TIME, pyautogui.FAILSAFE = 0.01, 0.01, False 22 | 23 | try: 24 | # 执行被修饰的函数 25 | return func(*args, **kwargs) 26 | except Exception as e: 27 | raise e 28 | finally: 29 | # 恢复原来的pyautogui设置 30 | pyautogui.PAUSE, pyautogui.DARWIN_CATCH_UP_TIME, pyautogui.FAILSAFE = pyautogui_values 31 | 32 | return wrapper 33 | 34 | 35 | def get_cursor_state(): 36 | """ 37 | 获取当前鼠标光标的状态。 38 | """ 39 | 40 | # 获取光标的图标ID,这个ID可以用来判断光标的类型 41 | cursor_id = win32gui.GetCursorInfo()[1] 42 | 43 | # 例如,可以通过比较cursor_id与系统预定义的光标类型来判断光标状态 44 | if cursor_id == win32gui.LoadCursor(0, win32con.IDC_ARROW): 45 | return "ARROW" 46 | elif cursor_id == win32gui.LoadCursor(0, win32con.IDC_IBEAM): 47 | return "IBEAM" 48 | elif cursor_id == win32gui.LoadCursor(0, win32con.IDC_HAND): 49 | return "HAND" 50 | else: 51 | print("Unknown cursor.") 52 | return None 53 | 54 | 55 | @modify_pyautogui_settings 56 | def detect_cur_button_boundary(start_pos, left_top, right_bottom, step=20): 57 | directions = ['left', 'right', 'up', 'down'] 58 | bounds = {} 59 | 60 | # 定义搜索边界 61 | search_boundaries = { 62 | 'left': left_top[0], 63 | 'right': right_bottom[0], 64 | 'up': left_top[1], 65 | 'down': right_bottom[1] 66 | } 67 | 68 | cursor_pos = pyautogui.position() 69 | 70 | start_pos = (int(start_pos[0]), int(start_pos[1])) 71 | 72 | for direction in directions: 73 | x, y = start_pos 74 | flag = True 75 | while True: 76 | if direction == 'left': 77 | x -= step 78 | elif direction == 'right': 79 | x += step 80 | elif direction == 'up': 81 | y -= step 82 | elif direction == 'down': 83 | y += step 84 | else: 85 | break 86 | 87 | if x < search_boundaries['left'] or x > search_boundaries['right'] or y < search_boundaries['up'] or y > search_boundaries['down']: 88 | # 如果到达了搜索区域的边界,结束搜索 89 | bounds[direction] = (x, y) 90 | break 91 | 92 | if pyautogui.position() == cursor_pos: 93 | if flag: # 给浏览器一点时间检测鼠标悬停 94 | pyautogui.moveTo(x, y, duration=0.2) 95 | flag = False 96 | else: 97 | pyautogui.moveTo(x, y) 98 | cursor_pos = (x, y) 99 | else: 100 | pyautogui.alert(text='检测到人为鼠标移动!(Detected artificial mouse movement!) ', title='程序终止(Program Abort)', button='OK') 101 | return 102 | 103 | if get_cursor_state() != "HAND": 104 | # 一旦光标状态不是手形,记录边界点并跳出循环 105 | bounds[direction] = (x, y) 106 | break 107 | 108 | # 计算矩形左上角和右下角的坐标 109 | left = bounds.get('left', start_pos)[0] 110 | right = bounds.get('right', start_pos)[0] 111 | top = bounds.get('up', start_pos)[1] 112 | bottom = bounds.get('down', start_pos)[1] 113 | 114 | return int(left), int(top), int(right), int(bottom) 115 | 116 | @modify_pyautogui_settings 117 | def detect_button_boundary(left_top, right_bottom, step=20, sub_step=5, only_first=False): 118 | width = right_bottom[0] - left_top[0] 119 | height = right_bottom[1] - left_top[1] 120 | # 创建一个二维数组,初始化为False 121 | visited = np.full((height // step + 1, width // step + 1), False, dtype=bool) 122 | 123 | all_boundaries = [] 124 | 125 | for idx_y, y in enumerate(range(left_top[1], right_bottom[1], step)): 126 | for idx_x, x in enumerate(range(left_top[0], right_bottom[0], step)): 127 | if visited[idx_y, idx_x]: 128 | continue # 如果当前点已经被检查过,跳过 129 | 130 | pyautogui.moveTo(x, y) 131 | 132 | if get_cursor_state() == "HAND": 133 | boundary = left, top, right, bottom = detect_cur_button_boundary((x, y), left_top, right_bottom, step=sub_step) 134 | if only_first: 135 | return [boundary, ] 136 | 137 | assert boundary not in all_boundaries 138 | all_boundaries.append(boundary) 139 | 140 | for by in range(top, bottom + 1, step): 141 | for bx in range(left, right + 1, step): 142 | idx_visited_x = (bx - left) // step 143 | idx_visited_y = (by - top) // step 144 | visited[idx_visited_y, idx_visited_x] = True 145 | return all_boundaries 146 | 147 | def merge_positions_using_dbscan(positions, eps=10, min_samples=2): 148 | """ 149 | 使用DBSCAN算法合并手形光标位置。 150 | 151 | :param positions: 手形光标的位置列表,格式为[(x1, y1), (x2, y2), ...] 152 | :param eps: DBSCAN的邻域半径参数 153 | :param min_samples: 形成簇所需的最小样本数 154 | :return: 按簇合并后的手形光标位置的列表,格式为[[(x11, y11), (x12, y12), ...], ...] 155 | """ 156 | if not positions: 157 | return [] 158 | 159 | # 将位置转换为NumPy数组以供DBSCAN使用 160 | X = np.array(positions) 161 | 162 | # 应用DBSCAN算法 163 | db = DBSCAN(eps=eps, min_samples=min_samples).fit(X) 164 | labels = db.labels_ 165 | 166 | # 将位置按照所属簇组织起来 167 | clusters = {} 168 | for label, position in zip(labels, positions): 169 | if label not in clusters: 170 | clusters[label] = [] 171 | clusters[label].append(position) 172 | 173 | # 忽略噪声点,仅返回有效簇 174 | clusters = [np.array(clusters[label]) for label in clusters if label != -1] 175 | return clusters 176 | 177 | @modify_pyautogui_settings 178 | def detect_hand_location(left_top, right_bottom, step=20): 179 | t0 = time.time() 180 | i = 0 181 | left, top = left_top 182 | right, bottom = right_bottom 183 | 184 | hand_positions = [] 185 | 186 | cursor_pos = pyautogui.position() 187 | 188 | for y in range(top, bottom, step): 189 | for x in range(left, right, step): # 以50像素的步长在屏幕上移动鼠标 190 | if pyautogui.position() == cursor_pos: 191 | pyautogui.moveTo(x, y) 192 | cursor_pos = (x, y) 193 | else: 194 | pyautogui.alert(text='检测到人为鼠标s移动!(Detected artificial mouse movement!) ', title='程序终止(Program Abort)', button='OK') 195 | return 196 | i += 1 197 | state = get_cursor_state() 198 | if state == "HAND": 199 | hand_positions.append((x, y)) 200 | 201 | if time.time() - t0 > 60: 202 | print("Timeout.") 203 | return 204 | 205 | print(f"Time: {time.time() - t0:.2f} seconds") 206 | 207 | return merge_positions_using_dbscan(hand_positions, eps=step+1, min_samples=2) 208 | 209 | if __name__ == '__main__': 210 | button_boundary_list = detect_button_boundary((0, 0), (100, 500)) 211 | if len(button_boundary_list) == 1: 212 | left, top, right, bottom = button_boundary_list[0] 213 | pyautogui.screenshot(os.path.join("detected_images", f"Button.png"), region=(left, top, right - left, bottom - top)) 214 | exit(0) 215 | result = detect_hand_location((0, 0), (100, 500)) 216 | for hand in result: 217 | left_top = hand.min(axis=0) 218 | right_bottom = hand.max(axis=0) 219 | left, top, right, bottom = left_top[0].item(), left_top[1].item(), right_bottom[0].item(), right_bottom[1].item() 220 | pyautogui.screenshot(os.path.join("cursor_area", f"HAND-{left}-{top}-{right}-{bottom}.png"), region=(left, top, right - left, bottom - top)) 221 | print(hand.min(axis=0), hand.max(axis=0)) 222 | print() 223 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | #这是本脚本的控制台demo 2 | from chatgpt_auto_script import ChatGPTAutoScript 3 | 4 | auto_script = ChatGPTAutoScript() 5 | auto_script.demo() -------------------------------------------------------------------------------- /frog_eye.py: -------------------------------------------------------------------------------- 1 | import colorsys 2 | import random 3 | import cv2 4 | import pyautogui 5 | 6 | import numpy as np 7 | 8 | def is_stationary(frame1, frame2, threshold=30, change_ratio_threshold=0.01, ret_change_ratio=None, ): 9 | frame1, frame2 = np.asarray(frame1), np.asarray(frame2) 10 | 11 | # 将图片转换为灰度图,以简化计算 12 | gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) 13 | gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) 14 | 15 | # 计算两帧的差异 16 | diff = cv2.absdiff(gray1, gray2) 17 | 18 | # 应用阈值来标识区域的变动 19 | _, thresh = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY) 20 | 21 | non_zero_count = np.count_nonzero(thresh) 22 | 23 | # 计算阈值图像中的非零像素点数量,以此判断变动 24 | total_pixels = frame1.shape[0] * frame1.shape[1] 25 | 26 | change_ratio = non_zero_count / total_pixels 27 | 28 | #print("change_ratio", change_ratio) 29 | if ret_change_ratio is not None: 30 | ret_change_ratio.append(change_ratio) 31 | 32 | return change_ratio < change_ratio_threshold 33 | 34 | 35 | def find_contours(img_before, img_after, thresh=30): 36 | img_before, img_after = np.asarray(img_before), np.asarray(img_after) 37 | 38 | # 转换为灰度图以加快处理速度 39 | gray_before = cv2.cvtColor(img_before, cv2.COLOR_BGR2GRAY) 40 | gray_after = cv2.cvtColor(img_after, cv2.COLOR_BGR2GRAY) 41 | 42 | # 计算两个图像之间的差异 43 | diff = cv2.absdiff(gray_before, gray_after) 44 | _, thresh = cv2.threshold(diff, thresh, 255, cv2.THRESH_BINARY) 45 | 46 | # 寻找差异区域的轮廓 47 | contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) 48 | 49 | return contours 50 | 51 | def point_in_contour(point, contour): 52 | dist = cv2.pointPolygonTest(contour, point, False) 53 | 54 | return dist >= 0 55 | 56 | def random_color(): 57 | h = random.random() # 随机色相值 58 | s = random.uniform(0.5, 1.0) # 随机饱和度值 59 | v = random.uniform(0.5, 1.0) # 随机亮度值 60 | 61 | s, v = 1, 1 62 | 63 | return tuple(round(i * 255) for i in colorsys.hsv_to_rgb(h, s, v)) 64 | 65 | 66 | def vis_contours(img, contours, delay=0): 67 | img_vis = np.asarray(img).copy() 68 | cv2.drawContours(img_vis, contours, 0, (0, 255, 0), 2) 69 | cv2.imshow('Contours', img_vis) 70 | cv2.waitKey(delay) 71 | #cv2.destroyAllWindows() 72 | 73 | def find_optimal_highlight_rect(img_before, img_after, width_height_ratio=None, vis=False): 74 | img_before, img_after = np.asarray(img_before), np.asarray(img_after) 75 | 76 | # 寻找差异区域的轮廓 77 | contours = find_contours(img_before, img_after) 78 | 79 | if vis: 80 | img_vis = img_after.copy() 81 | 82 | valid_rects = [] 83 | 84 | for contour in contours: 85 | # # 对每个轮廓进行近似 86 | # perimeter = cv2.arcLength(contour, True) 87 | # approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True) 88 | 89 | # # 筛选出近似为矩形的轮廓(有四个顶点) 90 | # if len(approx) == 4: 91 | rect = cv2.minAreaRect(contour) # 获取最小外接矩形 92 | width, height = max(rect[1]), min(rect[1]) 93 | 94 | if rect[2] % 90 != 0: 95 | continue 96 | 97 | if not (width >= 5 and 400 >= height >= 5): 98 | continue 99 | 100 | if cv2.contourArea(contour) == 0 or width * height / cv2.contourArea(contour) > 1.1: 101 | continue 102 | 103 | valid_rects.append(rect) 104 | 105 | if vis: 106 | cv2.drawContours(img_vis, [contour], 0, (0, 255, 0), 2) 107 | 108 | if vis: 109 | if not valid_rects: 110 | for contour in contours: 111 | cv2.drawContours(img_vis, [contour], 0, (0, 255, 0), 2) 112 | cv2.imshow('Changes', img_vis) 113 | cv2.waitKey(0) 114 | cv2.destroyAllWindows() 115 | 116 | if not valid_rects: 117 | return None 118 | 119 | # select a best rect with the aspect ratio closest to the given aspect ratio 120 | if width_height_ratio is not None: 121 | valid_rects = sorted(valid_rects, key=lambda x: abs(max(rect[1]) / (min(rect[1]) + 1e-6) - width_height_ratio)) 122 | 123 | best_rect = valid_rects[0] 124 | 125 | box = cv2.boxPoints(best_rect) 126 | box = np.int0(box) 127 | 128 | return box 129 | 130 | def find_all_highlight_rect(img_before, img_after, width_height_ratio=None, vis=False): 131 | img_before, img_after = np.asarray(img_before), np.asarray(img_after) 132 | 133 | # 寻找差异区域的轮廓 134 | contours = find_contours(img_before, img_after) 135 | 136 | if vis: 137 | img_vis = img_after.copy() 138 | 139 | valid_rects = [] 140 | 141 | for contour in contours: 142 | rect = cv2.minAreaRect(contour) # 获取最小外接矩形 143 | width, height = max(rect[1]), min(rect[1]) 144 | 145 | if not (width >= 5 and 400 >= height >= 5): 146 | continue 147 | 148 | if rect[2] % 90 != 0: 149 | continue 150 | 151 | if cv2.contourArea(contour) == 0 or width * height / cv2.contourArea(contour) > 1.1: 152 | continue 153 | 154 | valid_rects.append(rect) 155 | 156 | if vis: 157 | # random a color using hsv 158 | cv2.drawContours(img_vis, [contour], 0, random_color(), 2) 159 | 160 | if vis: 161 | if not valid_rects: 162 | for contour in contours: 163 | cv2.drawContours(img_vis, [contour], 0, random_color(), 2) 164 | cv2.imshow('Changes', img_vis) 165 | cv2.waitKey(0) 166 | cv2.destroyAllWindows() 167 | 168 | if not valid_rects: 169 | return None 170 | 171 | boxes = [] 172 | for rect in valid_rects: 173 | box = cv2.boxPoints(rect) 174 | boxes.append(np.int0(box)) 175 | 176 | return boxes 177 | 178 | 179 | if __name__ == '__main__': 180 | img_before = cv2.imread('img_before.png') 181 | img_after = cv2.imread('img_after.png') 182 | 183 | boxes = find_all_highlight_rect(img_before, img_after, width_height_ratio=10, vis=True) 184 | print(len(boxes)) -------------------------------------------------------------------------------- /openai_api.py: -------------------------------------------------------------------------------- 1 | 2 | import base64 3 | import copy 4 | import json 5 | import time 6 | from argparse import ArgumentParser 7 | from contextlib import asynccontextmanager 8 | from pprint import pprint 9 | from typing import Dict, List, Literal, Optional, Union 10 | 11 | import uvicorn 12 | from fastapi import FastAPI, HTTPException 13 | from fastapi.middleware.cors import CORSMiddleware 14 | from pydantic import BaseModel, Field 15 | from starlette.middleware.base import BaseHTTPMiddleware 16 | from starlette.requests import Request 17 | from starlette.responses import Response 18 | 19 | 20 | from chatgpt_chat_script import ChatGPTChatScript, ChatMessage 21 | 22 | class BasicAuthMiddleware(BaseHTTPMiddleware): 23 | 24 | def __init__(self, app, username: str, password: str): 25 | super().__init__(app) 26 | self.required_credentials = base64.b64encode( 27 | f'{username}:{password}'.encode()).decode() 28 | 29 | async def dispatch(self, request: Request, call_next): 30 | authorization: str = request.headers.get('Authorization') 31 | if authorization: 32 | try: 33 | schema, credentials = authorization.split() 34 | if credentials == self.required_credentials: 35 | return await call_next(request) 36 | except ValueError: 37 | pass 38 | 39 | headers = {'WWW-Authenticate': 'Basic'} 40 | return Response(status_code=401, headers=headers) 41 | 42 | 43 | @asynccontextmanager 44 | async def lifespan(app: FastAPI): 45 | yield 46 | # _gc(forced=True) 47 | 48 | 49 | app = FastAPI(lifespan=lifespan) 50 | 51 | app.add_middleware( 52 | CORSMiddleware, 53 | allow_origins=['*'], 54 | allow_credentials=True, 55 | allow_methods=['*'], 56 | allow_headers=['*'], 57 | ) 58 | 59 | 60 | class ModelCard(BaseModel): 61 | id: str 62 | object: str = 'model' 63 | created: int = Field(default_factory=lambda: int(time.time())) 64 | owned_by: str = 'owner' 65 | root: Optional[str] = None 66 | parent: Optional[str] = None 67 | permission: Optional[list] = None 68 | 69 | 70 | class ModelList(BaseModel): 71 | object: str = 'list' 72 | data: List[ModelCard] = [] 73 | 74 | class DeltaMessage(BaseModel): 75 | role: Optional[Literal['user', 'assistant', 'system']] = None 76 | content: Optional[str] = None 77 | 78 | 79 | class ChatCompletionRequest(BaseModel): 80 | model: str 81 | messages: List[ChatMessage] 82 | functions: Optional[List[Dict]] = None 83 | temperature: Optional[float] = None 84 | top_p: Optional[float] = None 85 | top_k: Optional[int] = None 86 | max_length: Optional[int] = None 87 | stream: Optional[bool] = False 88 | stop: Optional[List[str]] = None 89 | 90 | 91 | class ChatCompletionResponseChoice(BaseModel): 92 | index: int 93 | message: Union[ChatMessage] 94 | finish_reason: Literal['stop', 'length', 'function_call'] 95 | 96 | 97 | class ChatCompletionResponseStreamChoice(BaseModel): 98 | index: int 99 | delta: DeltaMessage 100 | finish_reason: Optional[Literal['stop', 'length']] 101 | 102 | 103 | class ChatCompletionResponse(BaseModel): 104 | model: str 105 | object: Literal['chat.completion', 'chat.completion.chunk'] 106 | choices: List[Union[ChatCompletionResponseChoice, 107 | ChatCompletionResponseStreamChoice]] 108 | created: Optional[int] = Field(default_factory=lambda: int(time.time())) 109 | 110 | 111 | chat_script = ChatGPTChatScript() 112 | 113 | @app.get('/v1/models', response_model=ModelList) 114 | async def list_models(): 115 | model_card = ModelCard(id='gpt-3.5-turbo') 116 | return ModelList(data=[model_card]) 117 | 118 | 119 | @app.post('/v1/chat/completions', response_model=ChatCompletionResponse) 120 | async def create_chat_completion(request: ChatCompletionRequest): 121 | chat_messages = request.messages 122 | 123 | #response_msg = chat_script.auto_chat(chat_messages) 124 | response_msg = chat_script.submit(chat_messages[-1].content) 125 | 126 | choice_data = ChatCompletionResponseChoice( 127 | index=0, 128 | message=response_msg, 129 | finish_reason='stop', 130 | ) 131 | return ChatCompletionResponse(model=request.model, 132 | choices=[choice_data], 133 | object='chat.completion') 134 | 135 | 136 | def _get_args(): 137 | parser = ArgumentParser() 138 | parser.add_argument('--api-auth', help='API authentication credentials') 139 | parser.add_argument('--server-port', 140 | type=int, 141 | default=8000, 142 | help='Demo server port.') 143 | parser.add_argument( 144 | '--server-name', 145 | type=str, 146 | default='127.0.0.1', 147 | help= 148 | 'Demo server name. Default: 127.0.0.1, which is only visible from the local computer.' 149 | ' If you want other computers to access your server, use 0.0.0.0 instead.', 150 | ) 151 | args = parser.parse_args() 152 | return args 153 | 154 | 155 | if __name__ == '__main__': 156 | args = _get_args() 157 | 158 | if args.api_auth: 159 | app.add_middleware(BasicAuthMiddleware, 160 | username=args.api_auth.split(':')[0], 161 | password=args.api_auth.split(':')[1]) 162 | 163 | uvicorn.run(app, host=args.server_name, port=args.server_port, workers=1) -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # AutoChatScript 2 | 3 | [English](readme.md) | [中文](readme_cn.md) | [日本語](readme_jp.md) 4 | 5 | AutoChatScript is an automation script implemented using the PyAutoGUI library, aimed at automating chat with ChatGPT(website). This project relies entirely on reading and using the mouse, keyboard, and screen without reverse-engineering any web requests or needing to install browser plugins. 6 | 7 | ## Features 8 | 9 | - **Automated Initialization**: Automatically initializes the script, no manual screenshotting required. 10 | - **Cross-Platform Support**: Future support for platforms beyond Windows. 11 | - **Full Operation Support**: Supports a series of operations including creating new dialogues, submitting, regenerating, modifying, etc. 12 | - **Support for More Websites**: This project is not only for ChatGPT but will support more chat websites in the future. 13 | 14 | ## Installation Guide 15 | 16 | Before starting, ensure you have Python 3.6 or higher installed on your computer. 17 | 18 | 1. **Clone the Repository** 19 | 20 | First, clone the project repository to your local computer: 21 | 22 | ``` 23 | git clone https://github.com/MaoXiaoYuZ/AutoChatScript.git 24 | cd AutoChatScript 25 | ``` 26 | 27 | 2. **Install Dependencies** 28 | 29 | Use pip to install the required Python libraries: 30 | 31 | ``` 32 | pip install -r requirements.txt 33 | ``` 34 | 35 | ## Usage Instructions 36 | 37 | Before running the script, make sure you have opened the web version of ChatGPT and logged into your account. 38 | 39 | 1. **Start Demo** 40 | 41 | ``` 42 | python demo.py 43 | ``` 44 | 45 | Below is a demonstration of the demo running: 46 | 47 | ![Chat with ChatGPT](assets/chat_with_chatgpt.gif "Demo running live") 48 | 49 | The demo will initialize certain features when needed, shown below is the process of automatically initializing the submit button after running demo.py. 50 | 51 | ![Init Submit Button](assets/init_submit_button.gif "Initialize Submit Button") 52 | 53 | Below is the initialization process for the resubmit button the first time the rewrite submit feature is used. 54 | 55 | ![Init ReSubmit Button](assets/init_resubmit_button.gif "Initialize Resubmit Button") 56 | 57 | 2. **Provide OpenAI Format API** 58 | 59 | ``` 60 | python openai_api.py --server-port 8000 61 | ``` 62 | 63 | ## Precautions 64 | 65 | - This project currently only supports the Windows operating system. 66 | - Ensure that the ChatGPT web version is already open on your screen before running the script. 67 | - As page layout updates may affect script operation, please check the project repository regularly for updates. 68 | 69 | ## Contributions 70 | 71 | We welcome any form of contribution, whether feature requests, bug reports, or code submissions. Please submit your contributions through the GitHub repository's Issues and Pull Requests. 72 | -------------------------------------------------------------------------------- /readme_cn.md: -------------------------------------------------------------------------------- 1 | # AutoChatScript 2 | 3 | [English](readme.md) | [中文](readme_cn.md) | [日本語](readme_jp.md) 4 | 5 | AutoChatScript是一个使用PyAutoGUI库实现的自动化脚本,旨在自动化与ChatGPT网页的对话。该项目完全依靠读取和使用鼠标,键盘,屏幕,不逆向任何网页请求,也不需要安装浏览器插件。 6 | 7 | ## 特性 8 | 9 | - **自动化初始化**:自动初始化脚本,无需人为截图。 10 | - **全平台支持**:后续支持除Windows外更多平台。 11 | - **全操作支持**:支持新建对话,提交,重新生成,修改等一系列操作。 12 | - **支持更多网站**:本项目不仅仅针对ChatGPT,后续将支持更多Chat网站。 13 | 14 | ## 安装指南 15 | 16 | 在开始之前,请确保您的计算机上已安装 Python 3.6 或更高版本。 17 | 18 | 1. **克隆仓库** 19 | 20 | 首先,克隆该项目仓库到您的本地计算机: 21 | 22 | ``` 23 | git clone https://github.com/MaoXiaoYuZ/AutoChatScript.git 24 | cd AutoChatScript 25 | ``` 26 | 27 | 2. **安装依赖** 28 | 29 | 使用 pip 安装所需的 Python 库: 30 | 31 | ``` 32 | pip install -r requirements.txt 33 | ``` 34 | 35 | ## 使用说明 36 | 37 | 在运行脚本前,请确保您已经打开了ChatGPT网页版,并登录到您的账户。 38 | 39 | 1. **启动Demo** 40 | 41 | ``` 42 | python demo.py 43 | ``` 44 | 45 | 下面展示是demo的运行实况: 46 | 47 | ![chat_with_chatgpt](assets/chat_with_chatgpt.gif "Demo的运行实况") 48 | 49 | 该demo会再需要用到某项功能时对该功能进行初始化,下面展示的是运行demo.py后程序自动初始化提交按钮的过程。 50 | 51 | ![Init Submit Button](assets/init_submit_button.gif "初始化提交按钮") 52 | 53 | 下面展示的是第一次使用重写提交功能是,初始化重写提交按钮的过程。 54 | 55 | ![Init reSubmit Button](assets/init_resubmit_button.gif "初始化重新提交按钮") 56 | 57 | 2. **提供openai格式api** 58 | 59 | 60 | ``` 61 | python openai_api.py --server-port 8000 62 | ``` 63 | 64 | ## 注意事项 65 | 66 | - 该项目目前仅支持 Windows 操作系统。 67 | - 确保在运行脚本之前,您的屏幕上已经打开了ChatGPT的网页版。 68 | - 由于页面布局的更新可能会影响脚本的运行,请定期检查项目仓库以获取更新。 69 | 70 | ## 贡献 71 | 72 | 我们欢迎任何形式的贡献,无论是功能请求、bug 报告还是代码提交。请通过 GitHub 仓库的 Issues 和 Pull Requests 来提交您的贡献。 73 | -------------------------------------------------------------------------------- /readme_jp.md: -------------------------------------------------------------------------------- 1 | # AutoChatScript 2 | 3 | [English](readme.md) | [中文](readme_cn.md) | [日本語](readme_jp.md) 4 | 5 | AutoChatScriptは、PyAutoGUIライブラリを使用して実装された自動化スクリプトで、ChatGPT(ウェブサイト)とのチャットを自動化することを目的としています。このプロジェクトは、マウス、キーボード、画面の読み取りと使用に完全に依存しており、ウェブリクエストのリバースエンジニアリングやブラウザプラグインのインストールを必要としません。 6 | 7 | ## 特徴 8 | 9 | - **自動化された初期化**:スクリプトを自動的に初期化し、手動のスクリーンショットは不要です。 10 | - **クロスプラットフォームサポート**:将来的にはWindows以外のプラットフォームもサポートします。 11 | - **完全な操作サポート**:新しい対話の作成、送信、再生成、修正などの一連の操作をサポートします。 12 | - **より多くのウェブサイトのサポート**:このプロジェクトはChatGPTだけでなく、将来的にはより多くのチャットウェブサイトをサポートします。 13 | 14 | ## インストールガイド 15 | 16 | 開始する前に、コンピュータにPython 3.6以上がインストールされていることを確認してください。 17 | 18 | 1. **リポジトリをクローン** 19 | 20 | まず、プロジェクトのリポジトリをローカルコンピュータにクローンします: 21 | 22 | ``` 23 | git clone https://github.com/MaoXiaoYuZ/AutoChatScript.git 24 | cd AutoChatScript 25 | ``` 26 | 27 | 2. **依存関係のインストール** 28 | 29 | 必要なPythonライブラリをpipでインストールします: 30 | 31 | ``` 32 | pip install -r requirements.txt 33 | ``` 34 | 35 | ## 使用方法 36 | 37 | スクリプトを実行する前に、ChatGPTのウェブ版を開き、アカウントにログインしていることを確認してください。 38 | 39 | 1. **デモの開始** 40 | 41 | ``` 42 | python demo.py 43 | ``` 44 | 45 | 以下はデモの実行の様子です: 46 | 47 | ![Chat with ChatGPT](assets/chat_with_chatgpt.gif "デモの実行の様子") 48 | 49 | デモは必要な機能を初期化する際に自動的に行います。以下はdemo.pyを実行した後に送信ボタンを自動的に初期化するプロセスです。 50 | 51 | ![Init Submit Button](assets/init_submit_button.gif "送信ボタンの初期化") 52 | 53 | 以下は初めて再送信機能を使用する際に再送信ボタンを初期化するプロセスです。 54 | 55 | ![Init ReSubmit Button](assets/init_resubmit_button.gif "再送信ボタンの初期化") 56 | 57 | 2. **OpenAI形式のAPIを提供** 58 | 59 | ``` 60 | python openai_api.py --server-port 8000 61 | ``` 62 | 63 | ## 注意事項 64 | 65 | - このプロジェクトは現在、Windowsオペレーティングシステムのみをサポートしています。 66 | - スクリプトを実行する前に、ChatGPTのウェブ版が画面に表示されていることを確認してください。 67 | - ページレイアウトの更新がスクリプトの動作に影響を与える可能性があるため、定期的にプロジェクトリポジトリをチェックして更新を取得してください。 68 | 69 | ## 貢献 70 | 71 | 機能リクエスト、バグ報告、コードの提出など、どのような形での貢献も歓迎します。GitHubリポジトリのIssuesおよびPull Requestsを通じて貢献を提出してください。 72 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pyautogui 2 | pyperclip 3 | opencv-python -------------------------------------------------------------------------------- /search_in_browser.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | import pyautogui 3 | import pyperclip 4 | 5 | 6 | 7 | def locate_text(keyword): 8 | from frog_eye import find_all_highlight_rect 9 | 10 | pyautogui.hotkey('ctrl', 'f') 11 | 12 | pyperclip.copy(keyword) 13 | pyautogui.hotkey('ctrl', 'a', 'v') 14 | pyautogui.press('enter') 15 | img_before = pyautogui.screenshot() 16 | 17 | pyautogui.hotkey('ctrl', 'a', 'backspace') 18 | img_after = pyautogui.screenshot() 19 | 20 | pyautogui.press('esc') 21 | 22 | rects = find_all_highlight_rect(img_before, img_after, vis=False, width_height_ratio=len(keyword)) 23 | 24 | if rects: 25 | line_height = Counter([rect[:, 1].max() - rect[:, 1].min() for rect in rects]).most_common(1)[0][0] 26 | 27 | rects.sort(key=lambda e: (e[:, 1].min(axis=0) // line_height) * 10000 + e[:, 0].min(axis=0)) 28 | else: 29 | line_height = None 30 | 31 | return rects, line_height 32 | 33 | if __name__ == '__main__': 34 | import time 35 | time.sleep(2) 36 | keyword = "7tAYlnGUJYRw" 37 | rects, line_height = locate_text(keyword) 38 | pyautogui.moveTo(rects[0].mean(axis=0).tolist()) 39 | print(len(rects)) 40 | 41 | -------------------------------------------------------------------------------- /test_openai_api.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import ssl 3 | import certifi 4 | 5 | MODEL = "chatgpt-3.5-turbo" 6 | OPENAI_SECRET_KEY = "none" 7 | # Assuming MODEL and OPENAI_SECRET_KEY are defined earlier in your code. 8 | 9 | def chat_with_chatgpt(prompt: str): 10 | payload = { 11 | 'model': MODEL, 12 | 'messages': [ 13 | {"role": "user", "content": prompt} 14 | ] 15 | } 16 | headers = { 17 | "Content-Type": "application/json", 18 | "Authorization": f"Bearer {OPENAI_SECRET_KEY}" 19 | } 20 | url = 'http://127.0.0.1:8000/v1/chat/completions' 21 | 22 | try: 23 | response = requests.post(url, headers=headers, json=payload, verify=certifi.where()) 24 | 25 | # Check if the request was successful 26 | if response.status_code == 200: 27 | response_data = response.json() 28 | if "error" in response_data: 29 | print(f"OpenAI request failed with error {response_data['error']}") 30 | return None 31 | return response_data['choices'][0]['message']['content'] 32 | else: 33 | print(f"Request failed with status code {response.status_code}") 34 | return None 35 | except Exception as e: 36 | print(f"Request failed: {e}") 37 | return None 38 | 39 | response = chat_with_chatgpt('what can you do for me') 40 | print(response) -------------------------------------------------------------------------------- /windows_api.py: -------------------------------------------------------------------------------- 1 | import pygetwindow as gw 2 | import pyautogui 3 | 4 | def get_mouse_window_rect(): 5 | x, y = pyautogui.position() 6 | win = gw.getWindowsAt(x, y) 7 | if win: 8 | win = win[0] # 获取鼠标下的第一个窗口 9 | return (win.left, win.top, win.left + win.width, win.top + win.height) 10 | else: 11 | return None 12 | 13 | 14 | --------------------------------------------------------------------------------