├── .gitignore
├── assets
    ├── chat_with_chatgpt.gif
    ├── init_resubmit_button.gif
    └── init_submit_button.gif
├── chatgpt_auto_script.py
├── chatgpt_chat_script.py
├── cursor.py
├── demo.py
├── frog_eye.py
├── openai_api.py
├── readme.md
├── readme_cn.md
├── readme_jp.md
├── requirements.txt
├── search_in_browser.py
├── test_openai_api.py
└── windows_api.py


/.gitignore:
--------------------------------------------------------------------------------
1 | debug/
2 | __pycache__/
3 | detected_images/
4 | chats.json


--------------------------------------------------------------------------------
/assets/chat_with_chatgpt.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MaoXiaoYuZ/AutoChatScript/a826633a5e0305d10a136a58e2f09969927466bd/assets/chat_with_chatgpt.gif


--------------------------------------------------------------------------------
/assets/init_resubmit_button.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MaoXiaoYuZ/AutoChatScript/a826633a5e0305d10a136a58e2f09969927466bd/assets/init_resubmit_button.gif


--------------------------------------------------------------------------------
/assets/init_submit_button.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MaoXiaoYuZ/AutoChatScript/a826633a5e0305d10a136a58e2f09969927466bd/assets/init_submit_button.gif


--------------------------------------------------------------------------------
/chatgpt_auto_script.py:
--------------------------------------------------------------------------------
  1 | # ChatGPT网页的自动化脚本，只实现自动化操作，不存储任何数据
  2 | 
  3 | import functools
  4 | import os
  5 | import pyautogui
  6 | import pyperclip
  7 | import time
  8 | import re
  9 | import random
 10 | import string
 11 | from PIL import Image
 12 | import numpy as np
 13 | 
 14 | import frog_eye
 15 | import cursor
 16 | import search_in_browser
 17 | import windows_api
 18 | 
 19 | from pynput.mouse import Listener
 20 | 
 21 | 
 22 | class ChatGPTAutoScript:
 23 |     def __init__(self):
 24 |         self.retry_button_path = "detected_images/retry_button.png"
 25 |         self.submit_button_path = "detected_images/submit_button.png"
 26 |         self.resubmit_button_path = "detected_images/resubmit_button.png"
 27 |         self._on_focus = False
 28 | 
 29 |         os.makedirs("detected_images", exist_ok=True)
 30 | 
 31 |     def focus_window(func):
 32 |         @functools.wraps(func)
 33 |         def wrapper(*args, **kwargs):
 34 |             # 假设第一个参数是调用该方法的类实例
 35 |             self = args[0]
 36 |             cursor_pos = pyautogui.position()
 37 |             flag = False
 38 |             if not self._on_focus:
 39 |                 self._focus_chat_input()
 40 |                 self._on_focus = True
 41 |                 flag = True
 42 |             try:
 43 |                 return func(*args, **kwargs)
 44 |             except Exception as e:
 45 |                 raise e
 46 |             finally:
 47 |                 if flag:
 48 |                     pyautogui.moveTo(cursor_pos)
 49 |                     self._on_focus = False
 50 |                 
 51 |         return wrapper
 52 | 
 53 |     def init_window_rect(self):
 54 |         self.window_rect = windows_api.get_mouse_window_rect()
 55 | 
 56 |     def test(self):
 57 |         self.new_chat()
 58 | 
 59 |         prompt = "Translate to Chinese: This is a test."
 60 |         print("User:", prompt)
 61 |         response = self.submit(prompt)
 62 |         print("ChatGPT:", response)
 63 | 
 64 |         prompt = "Translate to Chinese: This is the second test."
 65 |         print("User:", prompt)
 66 |         response = self.resubmit(prompt)
 67 |         print("ChatGPT:", response)
 68 | 
 69 |     def init_submit_button(self):
 70 |         cursor_pos = pyautogui.position()
 71 | 
 72 |         init_submit_button_flag = True
 73 |         if os.path.exists(self.submit_button_path) or hasattr(self, "submit_button_image"):
 74 |             if not hasattr(self, "submit_button_image"):
 75 |                 self.submit_button_image = Image.open(self.submit_button_path)
 76 |             if (region := self.locate_image(self.submit_button_image, confidence=0.9)):
 77 |                 self.submit_button_region = self.pad_image_region(region)
 78 |                 cursor_pos = pyautogui.position()
 79 |                 pyautogui.moveTo(region[0], region[1])
 80 |                 self.init_window_rect()
 81 |                 pyautogui.moveTo(cursor_pos)
 82 |                 init_submit_button_flag = False
 83 | 
 84 |         if init_submit_button_flag:
 85 |             pyautogui.alert(text='程序将监听鼠标点击，请将鼠标移动到ChatGPT网页空白处，并点击一次。\nThe program will listen for mouse clicks, please move the mouse to a blank area of the ChatGPT webpage and click once.', title='初始化自动脚本 (Initialize AutoChatScript)', button='ok')
 86 |             
 87 |             # 定义点击事件处理函数
 88 |             def on_click(x, y, button, pressed):
 89 |                 if pressed:
 90 |                     print(f"Mouse clicked at ({x}, {y})")
 91 |                     return False  # 返回False以停止监听
 92 | 
 93 |             # 启动监听
 94 |             with Listener(on_click=on_click) as listener:
 95 |                 listener.join()
 96 | 
 97 |             print("开始初始化...")
 98 |             self.init_window_rect()
 99 |             self.submit_button_region, self.submit_button_image = self.locate_submit_button()
100 | 
101 |      
102 |         # if os.path.exists(self.retry_button_path):
103 |         #     self.retry_button_image = Image.open(self.retry_button_path)
104 |         # else:
105 |         #     _, self.retry_button_image = self.locate_retry_button()
106 |         
107 |         # if os.path.exists(self.resubmit_button_path):
108 |         #     self.resubmit_button_image = Image.open(self.resubmit_button_path)
109 |         # else:
110 |         #     _, self.resubmit_button_image = self.locate_resubmit_button()
111 | 
112 |         pyautogui.moveTo(cursor_pos)
113 |     
114 |     def wait_submit_image(self, timeout=0):
115 |         print('waiting submit button...')
116 |         pos = self.wait_image(self.submit_button_image, region=self.submit_button_region, confidence=0.9, timeout=timeout)
117 |         if pos is None:
118 |             print('submit button not found, reinitializing...')
119 |             self.init_submit_button()
120 |             pos = self.wait_image(self.submit_button_image, region=self.submit_button_region, confidence=0.9, timeout=timeout)
121 |             if pos is None:
122 |                 raise Exception("未能定位到submit按钮！(Failed to locate the submit button!)")
123 |         print('submit button found!')
124 |         return pos
125 |     
126 |     def init_resubmit_button(self):
127 |         if os.path.exists(self.resubmit_button_path):
128 |             self.resubmit_button_image = Image.open(self.resubmit_button_path)
129 |         else:
130 |             pyautogui.alert(text='点击ok后，程序将自动检测修改按钮图片。\nAfter clicking ok, the program will automatically detect the resubmit button image.', title='初始化自动脚本 (Initialize AutoChatScript)', button='ok')
131 |             
132 |             while (response := self.copy_last_response()) is None:
133 |                 pyautogui.alert(text='请展示有对话内容的网页，以便程序检测修改按钮位置。\nPlease display a webpage with dialogue content so the program can detect the position of the resubmit button.', title='初始化自动脚本 (Initialize AutoChatScript)', button='ok')
134 | 
135 |             _, self.resubmit_button_image = self.locate_resubmit_button(response)
136 |     
137 |     def screenshot(self, filepath=None, region=None):
138 |         if region is None:
139 |             left, top, right, bottom = self.window_rect
140 |         else:
141 |             left, top, right, bottom = region        
142 |         region = (int(left), int(top), int(right - left), int(bottom - top))
143 |         if filepath is not None:
144 |             return pyautogui.screenshot(filepath, region=region)
145 |         else:
146 |             return pyautogui.screenshot(region=region)
147 |     
148 |     def locateCenterOnScreen(self, image, region=None, confidence=1):
149 |         if region is None:
150 |             left, top, right, bottom = self.window_rect
151 |         else:
152 |             left, top, right, bottom = region        
153 |         region = (int(left), int(top), int(right - left), int(bottom - top))
154 | 
155 |         return pyautogui.locateCenterOnScreen(image, region=region, confidence=confidence)
156 |     
157 |     def _focus_chat_input(self):
158 |         pos = self.wait_submit_image()
159 |         left, top, right, bottom = self.submit_button_region
160 |         pyautogui.click(left, (top + bottom) // 2)
161 |         return pos
162 |     
163 |     def _focus_chat_input_by_keyboard(self):
164 |         pyautogui.hotkey("shift", "esc")
165 | 
166 |     @focus_window
167 |     def new_chat(self):
168 |         pyautogui.hotkey("ctrl", "shift", "o")
169 | 
170 |     def _wait_last_response(self):
171 |         # self.scroll_to_bottom()
172 |         # wait_start = self.wait_stationary(delay=1, timeout=10, reverse=True)
173 |         # wait_finish = self.wait_stationary(delay=1, timeout=10, reverse=False)
174 |         time.sleep(0.5)     # 等待浏览器响应点击submit事件
175 |         
176 |         wait_start = self.wait_image_disappear(self.submit_button_image, region=self.submit_button_region, confidence=0.9, timeout=30)
177 |         
178 |         if wait_start:
179 |             wait_finish = self.wait_image(self.submit_button_image, region=self.submit_button_region, confidence=0.9, timeout=120)
180 |             if wait_finish:
181 |                 response = self.copy_last_response()
182 |                 if response is None:
183 |                     pyautogui.alert(text='请手动复制，程序将监听下一次的复制内容。\nPlease copy manually, the program will listen for the next copy action.', title='复制快捷键未起效！(Copy shortcut key is not effective!)', button='OK')
184 |                 else:
185 |                     if len(response) > 500:
186 |                         continue_generating_button, _ = search_in_browser.locate_text("Continue generating")
187 |                         if continue_generating_button:
188 |                             cursor_pos = pyautogui.position()
189 |                             pyautogui.click(continue_generating_button[0].mean(axis=0).tolist())
190 |                             pyautogui.moveTo(cursor_pos)
191 |                             return self._wait_last_response()
192 |                         else:
193 |                             return response
194 |                     else:
195 |                         return response
196 |             else:
197 |                 pyautogui.alert(text='请手动复制，程序将监听下一次的复制内容。\nPlease copy manually, the program will listen for the next copy action.', title='ChatGPT回复超时！(ChatGPT response timeout!)', button='OK')
198 |         else:
199 |             pyautogui.alert(text='请手动复制，程序将监听下一次的复制内容。\nPlease copy manually, the program will listen for the next copy action.', title='ChatGPT未响应！(ChatGPT is not responding!)', button='OK')
200 |         
201 |         return self.wait_for_clip_copy()
202 |     
203 |     def wait_for_clip_copy(self):
204 |         clip_state = pyperclip.paste()
205 |         initial_clipboard_content = self.generate_password()
206 |         pyperclip.copy(initial_clipboard_content)
207 |         while pyperclip.paste() == initial_clipboard_content:
208 |             time.sleep(0.1)
209 |         new_clipboard_content = pyperclip.paste()
210 |         pyperclip.copy(clip_state)
211 |         return new_clipboard_content
212 |     
213 |     def generate_password(self, length=12):
214 |         # 定义密码字符池
215 |         characters = string.ascii_letters + string.digits + "!@#$%^&*()"
216 |         # 随机选择字符生成密码
217 |         password = ''.join(random.choice(characters) for i in range(length))
218 |         return password
219 |     
220 |     @focus_window
221 |     def copy_last_response(self):
222 |         clip_state = pyperclip.paste()
223 |         initial_clipboard_content = self.generate_password()
224 |         pyperclip.copy(initial_clipboard_content)
225 |         
226 |         pyautogui.hotkey("ctrl", "shift", "c")
227 | 
228 |         if pyperclip.paste() == initial_clipboard_content:
229 |             return None
230 | 
231 |         new_clipboard_content = pyperclip.paste()
232 |         pyperclip.copy(clip_state)
233 |         return new_clipboard_content
234 | 
235 |     def submit(self, prompt):
236 |         debug = False
237 |         if debug:
238 |             return debug
239 | 
240 |         cursor_pos = pyautogui.position()
241 |         pos = self._focus_chat_input()
242 |         pyperclip.copy(prompt)
243 |         pyautogui.hotkey("ctrl", "a", "v")
244 |         pyautogui.scroll(-1_000_000)
245 | 
246 |         pos = self.wait_submit_image(timeout=0.5)   # 浏览器需要时间响应粘贴和滚动事件
247 |         pyautogui.click(pos)
248 |         pyautogui.moveTo(cursor_pos)
249 |         
250 |         try:
251 |             response = self._wait_last_response()
252 |         except Exception as e:
253 |             print(e)
254 |         
255 |         return response
256 | 
257 |     def match_code_block(self, response):
258 |         pattern = r"(?<=[\r\n])```.*?```(?=[\r\n])"
259 |         matches = re.findall(pattern, '\n' + response + '```\n', re.DOTALL)
260 |         return matches
261 |     
262 |     def scroll_to_bottom(self, clicks=-1_000_000):
263 |         pos = (self.window_rect[0] + self.window_rect[2]) // 2, (self.window_rect[1] + self.window_rect[3]) // 2
264 |         pyautogui.moveTo(pos)
265 |         pyautogui.scroll(clicks)
266 | 
267 |     def resubmit(self, prompt, response=None):
268 |         if not hasattr(self, "resubmit_button_image"):
269 |             self.init_resubmit_button()
270 | 
271 |         if response is None:
272 |             response = self.copy_last_response()
273 |         
274 |         button_left_top, button_right_bottom, line_height = self.estimate_resubmit_button_reigon(response)
275 | 
276 |         cursor_pos = pyautogui.position()
277 |         pyautogui.moveTo(button_left_top[0] + line_height, button_left_top[1] + line_height)
278 |         pos = self.wait_image(self.resubmit_button_image, region=(*button_left_top, self.window_rect[2], button_right_bottom[1]), confidence=0.9, timeout=0)
279 |         
280 |         if pos is None:
281 |             if os.path.exists(self.resubmit_button_path):
282 |                 os.remove(self.resubmit_button_path)
283 |             if hasattr(self, "resubmit_button_image"):
284 |                 del self.resubmit_button_image
285 |             return self.resubmit(prompt, response)
286 |         else:
287 |             pyautogui.click(pos)
288 |         
289 |             pyperclip.copy(prompt)
290 |             pyautogui.hotkey("ctrl", "a", "v")
291 |             pyautogui.hotkey("tab", "enter")
292 |             pyautogui.moveTo(cursor_pos)
293 | 
294 |         try:
295 |             response = self._wait_last_response()
296 |         except Exception as e:
297 |             print(e)
298 |         
299 |         return response
300 | 
301 |     def wait_image(self, image, region=None, confidence=1, timeout=10, debug=False):
302 |         t0 = time.time()
303 |         while True:
304 |             try:
305 |                 if debug:self.screenshot(f"debug/wait_image/{t0:.0f}-{time.time()-t0:.3f}.png", region=region)
306 |                 pos = self.locateCenterOnScreen(image, region=region, confidence=confidence)
307 |                 return pos
308 |             except Exception:
309 |                 if time.time() - t0 > timeout:
310 |                     return None
311 |                 time.sleep(0.5 if debug else 0.1)
312 |     
313 |     def wait_image_disappear(self, image, region=None, confidence=1, timeout=10, debug=False):
314 |         t0 = time.time()
315 |         pos = True
316 |         while pos:
317 |             try:
318 |                 if debug:self.screenshot(f"debug/wait_image_disappear/{t0:.0f}-{time.time()-t0:.3f}.png", region=region)
319 |                 pos = self.locateCenterOnScreen(image, region=region, confidence=confidence)
320 |             except Exception:
321 |                 return True
322 |             if time.time() - t0 > timeout:
323 |                 return False
324 |             time.sleep(0.5 if debug else 0.1)
325 |      
326 |     def wait_stationary(self, delay=2, timeout=10, reverse=False, debug=False):
327 |         t0 = time.time()
328 |         img_before = self.screenshot()
329 |         cur_delay = delay
330 |         if debug:os.makedirs(f"debug/wait_stationary/{t0}")
331 |         while True:
332 |             time.sleep(0.5)
333 |             img_after = self.screenshot()
334 |             ret_change_ratio = []
335 |             is_stationary = frog_eye.is_stationary(img_before, img_after, ret_change_ratio=ret_change_ratio)
336 |             if reverse: is_stationary = not is_stationary
337 |             if is_stationary:
338 |                 cur_delay -= 0.5
339 |                 if debug:img_after.save(f"debug/wait_stationary/{t0}/{time.time()}-{ret_change_ratio[0]}-{cur_delay}.png")
340 |                 if cur_delay <= 0:
341 |                     return True
342 |             else:
343 |                 if debug:img_after.save(f"debug/wait_stationary/{t0}/{time.time()}-{ret_change_ratio[0]}-{cur_delay}.png")
344 |                 cur_delay = delay
345 |             if time.time() - t0 > timeout:
346 |                 return False        
347 |             img_before = img_after
348 | 
349 |     def locate_image(self, image, confidence=1):
350 |         try:
351 |             location = pyautogui.locateOnScreen(image, confidence=confidence)
352 |         except Exception:
353 |             return None
354 |         
355 |         return location.left, location.top, location.left + location.width, location.top + location.height
356 | 
357 |     def pad_image_region(self, region, pad = 50):
358 |         left, top, right, bottom = region
359 |         return (left - pad, top - pad, right + pad, bottom + pad)
360 | 
361 |     def manual_locate_image_region(self, image_file):
362 |         while not input("移动鼠标，悬停到按钮上后，按下回车键"):
363 |             if cursor.get_cursor_state() != 'HAND':
364 |                 print("请将鼠标移动到按钮上！")
365 |             else:
366 |                 cursor_pos = np.asarray(pyautogui.position())
367 |                 region = cursor.detect_cur_button_boundary(cursor_pos, cursor_pos - 200, cursor_pos + 200, step=5)
368 |                 image = self.screenshot(image_file, region=region)
369 |                 pyautogui.moveTo(cursor_pos[0], cursor_pos[1])
370 |                 return self.pad_image_region(region), image
371 | 
372 |     @focus_window
373 |     def locate_retry_button(self):
374 |         last_response = self.copy_last_response()
375 |         last_para = [e for e in last_response.split("\n") if e.strip()][-1]
376 |         last_para = last_para.replace('`', '')
377 | 
378 |         text_rects, line_height = search_in_browser.locate_text(last_para)
379 |         left_top_rect = text_rects[0]
380 |         right_bottom_rect = text_rects[-1]
381 |         next_line_left_top = (left_top_rect[:, 0].min(axis=0), right_bottom_rect[:, 1].max())
382 | 
383 |         button_right_bottom = next_line_left_top[0] + line_height * 10, next_line_left_top[1] + line_height * 2
384 | 
385 |         button_boundary_list = cursor.detect_button_boundary(
386 |             next_line_left_top, 
387 |             button_right_bottom, 
388 |             step=line_height // 2, 
389 |             sub_step=line_height // 8, 
390 |             only_first=True)
391 |         
392 |         if len(button_boundary_list) == 1:
393 |             image = self.screenshot(self.retry_button_path, region=button_boundary_list[0])
394 |         else:
395 |             assert False, "未能定位到重试按钮！(Failed to locate the retry button!)"
396 | 
397 |         return self.pad_image_region(button_boundary_list[0]), image
398 |     
399 |     @focus_window
400 |     def estimate_resubmit_button_reigon(self, response):
401 |         for left_top_text in response.split("\r\n"):
402 |             if left_top_text.startswith('```'):
403 |                 continue
404 |             left_top_text = left_top_text.replace("`", "")
405 |             if len(left_top_text) < 7:
406 |                 continue
407 |             break
408 |         left_top_text = left_top_text.replace('`', '')
409 | 
410 |         text_rects, line_height = search_in_browser.locate_text(left_top_text)
411 |         left_top_rect = text_rects[0]
412 |         
413 |         button_left_top = left_top_rect.min(axis=0) + np.array([-line_height, -line_height * 5])
414 |         button_right_bottom = left_top_rect.max(axis=0)
415 | 
416 |         # if button_left_top is on the top of the window, scroll to the bottom
417 |         if (button_left_top[1] - self.window_rect[1]) / (self.window_rect[3] - self.window_rect[1]) < 0.25:
418 |             self.scroll_to_bottom(clicks=(self.window_rect[3] - self.window_rect[1]) // 4)
419 |             
420 |             text_rects, line_height = search_in_browser.locate_text(left_top_text)
421 |             left_top_rect = text_rects[0]
422 |             
423 |             button_left_top = left_top_rect.min(axis=0) + np.array([-line_height * 2, -line_height * 7])
424 |             button_right_bottom = left_top_rect.max(axis=0)
425 | 
426 |         return button_left_top.tolist(), button_right_bottom.tolist(), int(line_height)
427 | 
428 |     @focus_window
429 |     def locate_resubmit_button(self, response):
430 |         button_left_top, button_right_bottom, line_height = self.estimate_resubmit_button_reigon(response)
431 | 
432 |         button_boundary_list = cursor.detect_button_boundary(
433 |             button_left_top, 
434 |             button_right_bottom, 
435 |             step=line_height // 2, 
436 |             sub_step=line_height // 8, 
437 |             only_first=True)
438 |         
439 |         if len(button_boundary_list) == 1:
440 |             left, top, right, bottom = button_boundary_list[0]
441 |             pyautogui.moveTo(left, top - line_height)
442 |             image = self.screenshot(self.resubmit_button_path, region=button_boundary_list[0])
443 |         else:
444 |             assert False, "未能定位到resubmit按钮！(Failed to locate the resubmit button!)"
445 | 
446 |         return self.pad_image_region(button_boundary_list[0]), image
447 | 
448 |     def locate_submit_button(self):
449 |         self._focus_chat_input_by_keyboard()
450 |         
451 |         #random_string = self.generate_password()
452 |         random_string = "This Say a is Test!"
453 |         pyperclip.copy(random_string)
454 |         pyautogui.hotkey("ctrl", "a", "v")
455 | 
456 |         text_rects, line_height = search_in_browser.locate_text(random_string)
457 |         text_rect = text_rects[0]
458 | 
459 |         button_left_top = int(text_rect[:, 0].max()), int(text_rect[:, 1].min())
460 |         #button_right_bottom = windows_api.get_mouse_window_rect()[2] - line_height // 4, int(text_rect[:, 1].max())
461 |         button_right_bottom = self.window_rect[2:]
462 | 
463 |         button_boundary_list = cursor.detect_button_boundary(
464 |             button_left_top, 
465 |             button_right_bottom, 
466 |             step=line_height // 2, 
467 |             sub_step=line_height // 8, 
468 |             only_first=True)
469 |         
470 |         if len(button_boundary_list) == 1:
471 |             image = self.screenshot(self.submit_button_path, region=button_boundary_list[0])
472 |         else:
473 |             assert False, "未能定位到submit按钮！(Failed to locate the submit button!)"
474 | 
475 |         return self.pad_image_region(button_boundary_list[0]), image
476 |     
477 |     def demo(self):
478 |         self.init_submit_button()
479 |         while prompt := input("User(q to quit, r to resubmit): "):
480 |             if prompt == 'q':
481 |                 break
482 |             elif prompt == 'r':
483 |                 prompt = input("\t resubmit:")
484 |                 response = self.resubmit(prompt)
485 |                 print("ChatGPT:", response)
486 |             elif len(prompt) == 0:
487 |                 print("Input cannot be empty!")
488 |             else:
489 |                 response = self.submit(prompt)
490 |                 print("ChatGPT:", response)
491 | 
492 | 
493 | if __name__ == "__main__":
494 |     chatgpt = ChatGPTAutoScript()
495 |     #response = chatgpt.submit("你好")
496 |     #print(response)
497 |     chatgpt.demo()


--------------------------------------------------------------------------------
/chatgpt_chat_script.py:
--------------------------------------------------------------------------------
  1 | # 对chatgpt_auto_script.py进行封装，存储对话数据并且实现类似api的chat功能
  2 | 
  3 | import json
  4 | import os
  5 | import time
  6 | from typing import Dict, List, Literal, Optional, Union
  7 | 
  8 | from pydantic import BaseModel
  9 | 
 10 | from chatgpt_auto_script import ChatGPTAutoScript
 11 | 
 12 | class ChatMessage(BaseModel):
 13 |     role: Literal['user', 'assistant', 'system']
 14 |     content: Optional[str]
 15 | 
 16 | class Chat(BaseModel):
 17 |     chatid: str
 18 |     name: Optional[str] = None
 19 |     messages: List[ChatMessage]
 20 | 
 21 | class ChatGPTChatScript:
 22 |     def __init__(self):
 23 |         self.auto_script: ChatGPTAutoScript = ChatGPTAutoScript()
 24 |         self.chats : List[Chat] = []
 25 | 
 26 |         self.auto_script.init_submit_button()
 27 | 
 28 |         if os.path.exists('chats.json'):
 29 |             self.load_chats('chats.json')
 30 |     
 31 |     def save_chats(self, filename: str):
 32 |         """
 33 |         将chats列表保存到文件
 34 |         """
 35 |         with open(filename, 'w', encoding='utf-8') as file:
 36 |             # Pydantic模型的json()方法将模型转换为JSON字符串
 37 |             json_data = [chat.model_dump() for chat in self.chats]
 38 |             json.dump(json_data, file, ensure_ascii=False, indent=1)
 39 |     
 40 |     def load_chats(self, filename: str):
 41 |         """
 42 |         从文件加载chats列表
 43 |         """
 44 |         with open(filename, 'r', encoding='utf-8') as file:
 45 |             json_data = json.load(file)
 46 |             # 使用Chat.parse_obj将字典转换回Chat对象
 47 |             self.chats = [Chat.model_validate(chat) for chat in json_data]
 48 | 
 49 |     def find_chat(self, messages:List[ChatMessage]):
 50 |         # 从self.chats中匹配对应的chat
 51 |         for chat in self.chats:
 52 |             if len(messages) <= len(chat.messages) and all(len(msg.content) == len(chat_msg.content) and msg.content == chat_msg.content for msg, chat_msg in zip(messages, chat.messages)):
 53 |                 return chat
 54 |         return None
 55 |     
 56 |     def submit(self, prompt):
 57 |         response = self.auto_script.submit(prompt)
 58 |         response_msg = ChatMessage(role='assistant', content=response)
 59 |         return response_msg
 60 |     
 61 |     def auto_chat(self, messages:List[ChatMessage]):
 62 |         messages = messages.copy()
 63 |         # 像api一样接受messages输入，将自动判定该new/submit/resubmit chat
 64 |         assert len(messages) > 0 and messages[-1].role == 'user'
 65 |         
 66 |         if len(messages) == 1:
 67 |             if (chat := self.find_chat(messages)) is None:
 68 |                 action = 'new'
 69 |             else:
 70 |                 action = 'resubmit'
 71 |         else:
 72 |             if (chat := self.find_chat(messages[:-1])) is None:
 73 |                 raise Exception('Previous messages not found')
 74 |             else:
 75 |                 if len(chat.messages) == len(messages) - 1:
 76 |                     action = 'submit'
 77 |                 else:
 78 |                     action = 'resubmit'
 79 |         
 80 |         # check
 81 |         # if action != 'new' and chat.messages[-1].content != self.auto_script.copy_last_response():
 82 |         #     print('[WARN]Current chat is not consistent with last response') 
 83 |         # 目前暂时不检测一致性
 84 | 
 85 |         if action == 'new':
 86 |             self.auto_script.new_chat()
 87 |             chat_id = str(int(time.time() * 1000))  # Use milliseconds for uniqueness
 88 |             chat = Chat(chatid=chat_id, messages=messages)
 89 |             
 90 |             response = self.auto_script.submit(messages[-1].content)
 91 |             response_msg = ChatMessage(role='assistant', content=response)
 92 |             chat.messages.append(response_msg)
 93 | 
 94 |             self.chats.append(chat)
 95 |         elif action == 'submit':
 96 |             response = self.auto_script.submit(messages[-1].content)
 97 |             response_msg = ChatMessage(role='assistant', content=response)
 98 |             chat.messages.append(messages[-1])
 99 |             chat.messages.append(response_msg)
100 |         elif action == 'resubmit':
101 |             response = self.auto_script.resubmit(messages[-1].content, chat.messages[len(messages)].content)
102 |             response_msg = ChatMessage(role='assistant', content=response)
103 |             chat.messages = messages + [response_msg, ]
104 | 
105 |         self.save_chats('chats.json')
106 |         return response_msg
107 |         
108 | if __name__ == '__main__':
109 |     chat_script = ChatGPTChatScript()
110 |     messages = [
111 |         ChatMessage(role='user', content='Hello, how are you?'),
112 |         ChatMessage(role='assistant', content="I'm here and ready to assist you. How can I help you today?"),
113 |         ChatMessage(role='user', content='What can you do for me ?'),
114 |     ]
115 |     chat_script.auto_chat(messages)
116 |     print(chat_script.chats)


--------------------------------------------------------------------------------
/cursor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | 
  4 | import win32gui
  5 | import win32con
  6 | import functools
  7 | import pyautogui
  8 | 
  9 | 
 10 | from sklearn.cluster import DBSCAN
 11 | import numpy as np
 12 | 
 13 | 
 14 | def modify_pyautogui_settings(func):
 15 |     @functools.wraps(func)  # 保持被修饰函数的元数据
 16 |     def wrapper(*args, **kwargs):
 17 |         # 保存当前pyautogui设置
 18 |         pyautogui_values = (pyautogui.PAUSE, pyautogui.DARWIN_CATCH_UP_TIME, pyautogui.FAILSAFE)
 19 |         
 20 |         # 临时修改设置
 21 |         pyautogui.PAUSE, pyautogui.DARWIN_CATCH_UP_TIME, pyautogui.FAILSAFE = 0.01, 0.01, False
 22 |         
 23 |         try:
 24 |             # 执行被修饰的函数
 25 |             return func(*args, **kwargs)
 26 |         except Exception as e:
 27 |             raise e
 28 |         finally:
 29 |             # 恢复原来的pyautogui设置
 30 |             pyautogui.PAUSE, pyautogui.DARWIN_CATCH_UP_TIME, pyautogui.FAILSAFE = pyautogui_values
 31 |             
 32 |     return wrapper
 33 | 
 34 | 
 35 | def get_cursor_state():
 36 |     """
 37 |     获取当前鼠标光标的状态。
 38 |     """
 39 | 
 40 |     # 获取光标的图标ID，这个ID可以用来判断光标的类型
 41 |     cursor_id = win32gui.GetCursorInfo()[1]
 42 |     
 43 |     # 例如，可以通过比较cursor_id与系统预定义的光标类型来判断光标状态
 44 |     if cursor_id == win32gui.LoadCursor(0, win32con.IDC_ARROW):
 45 |         return "ARROW"
 46 |     elif cursor_id == win32gui.LoadCursor(0, win32con.IDC_IBEAM):
 47 |         return "IBEAM"
 48 |     elif cursor_id == win32gui.LoadCursor(0, win32con.IDC_HAND):
 49 |         return "HAND"
 50 |     else:
 51 |         print("Unknown cursor.")
 52 |         return None
 53 | 
 54 | 
 55 | @modify_pyautogui_settings
 56 | def detect_cur_button_boundary(start_pos, left_top, right_bottom, step=20):
 57 |     directions = ['left', 'right', 'up', 'down']
 58 |     bounds = {}
 59 | 
 60 |     # 定义搜索边界
 61 |     search_boundaries = {
 62 |         'left': left_top[0],
 63 |         'right': right_bottom[0],
 64 |         'up': left_top[1],
 65 |         'down': right_bottom[1]
 66 |     }
 67 | 
 68 |     cursor_pos = pyautogui.position()
 69 | 
 70 |     start_pos = (int(start_pos[0]), int(start_pos[1]))
 71 | 
 72 |     for direction in directions:
 73 |         x, y = start_pos
 74 |         flag = True
 75 |         while True:
 76 |             if direction == 'left':
 77 |                 x -= step
 78 |             elif direction == 'right':
 79 |                 x += step
 80 |             elif direction == 'up':
 81 |                 y -= step
 82 |             elif direction == 'down':
 83 |                 y += step
 84 |             else:
 85 |                 break
 86 | 
 87 |             if  x < search_boundaries['left'] or x > search_boundaries['right'] or y < search_boundaries['up'] or y > search_boundaries['down']:
 88 |                 # 如果到达了搜索区域的边界，结束搜索
 89 |                 bounds[direction] = (x, y)
 90 |                 break
 91 | 
 92 |             if pyautogui.position() == cursor_pos:
 93 |                 if flag:    # 给浏览器一点时间检测鼠标悬停
 94 |                     pyautogui.moveTo(x, y, duration=0.2)
 95 |                     flag = False
 96 |                 else:
 97 |                     pyautogui.moveTo(x, y)
 98 |                 cursor_pos = (x, y)
 99 |             else:
100 |                 pyautogui.alert(text='检测到人为鼠标移动！(Detected artificial mouse movement!) ', title='程序终止(Program Abort)', button='OK')
101 |                 return
102 | 
103 |             if get_cursor_state() != "HAND":
104 |                 # 一旦光标状态不是手形，记录边界点并跳出循环
105 |                 bounds[direction] = (x, y)
106 |                 break
107 | 
108 |     # 计算矩形左上角和右下角的坐标
109 |     left = bounds.get('left', start_pos)[0]
110 |     right = bounds.get('right', start_pos)[0]
111 |     top = bounds.get('up', start_pos)[1]
112 |     bottom = bounds.get('down', start_pos)[1]
113 | 
114 |     return int(left), int(top), int(right), int(bottom)
115 | 
116 | @modify_pyautogui_settings
117 | def detect_button_boundary(left_top, right_bottom, step=20, sub_step=5, only_first=False):
118 |     width = right_bottom[0] - left_top[0]
119 |     height = right_bottom[1] - left_top[1]
120 |     # 创建一个二维数组，初始化为False
121 |     visited = np.full((height // step + 1, width // step + 1), False, dtype=bool)
122 |     
123 |     all_boundaries = []
124 | 
125 |     for idx_y, y in enumerate(range(left_top[1], right_bottom[1], step)):
126 |         for idx_x, x in enumerate(range(left_top[0], right_bottom[0], step)):
127 |             if visited[idx_y, idx_x]:
128 |                 continue  # 如果当前点已经被检查过，跳过
129 | 
130 |             pyautogui.moveTo(x, y)
131 | 
132 |             if get_cursor_state() == "HAND":
133 |                 boundary = left, top, right, bottom = detect_cur_button_boundary((x, y), left_top, right_bottom, step=sub_step)
134 |                 if only_first:
135 |                     return [boundary, ]
136 | 
137 |                 assert boundary not in all_boundaries
138 |                 all_boundaries.append(boundary)
139 | 
140 |                 for by in range(top, bottom + 1, step):
141 |                     for bx in range(left, right + 1, step):
142 |                         idx_visited_x = (bx - left) // step
143 |                         idx_visited_y = (by - top) // step
144 |                         visited[idx_visited_y, idx_visited_x] = True
145 |     return all_boundaries
146 | 
147 | def merge_positions_using_dbscan(positions, eps=10, min_samples=2):
148 |     """
149 |     使用DBSCAN算法合并手形光标位置。
150 |     
151 |     :param positions: 手形光标的位置列表，格式为[(x1, y1), (x2, y2), ...]
152 |     :param eps: DBSCAN的邻域半径参数
153 |     :param min_samples: 形成簇所需的最小样本数
154 |     :return: 按簇合并后的手形光标位置的列表，格式为[[(x11, y11), (x12, y12), ...], ...]
155 |     """
156 |     if not positions:
157 |         return []
158 | 
159 |     # 将位置转换为NumPy数组以供DBSCAN使用
160 |     X = np.array(positions)
161 |     
162 |     # 应用DBSCAN算法
163 |     db = DBSCAN(eps=eps, min_samples=min_samples).fit(X)
164 |     labels = db.labels_
165 |     
166 |     # 将位置按照所属簇组织起来
167 |     clusters = {}
168 |     for label, position in zip(labels, positions):
169 |         if label not in clusters:
170 |             clusters[label] = []
171 |         clusters[label].append(position)
172 |     
173 |     # 忽略噪声点，仅返回有效簇
174 |     clusters = [np.array(clusters[label]) for label in clusters if label != -1]
175 |     return clusters
176 | 
177 | @modify_pyautogui_settings
178 | def detect_hand_location(left_top, right_bottom, step=20):
179 |     t0 = time.time()
180 |     i = 0
181 |     left, top = left_top
182 |     right, bottom = right_bottom
183 |     
184 |     hand_positions = []
185 | 
186 |     cursor_pos = pyautogui.position()
187 | 
188 |     for y in range(top, bottom, step):
189 |         for x in range(left, right, step):  # 以50像素的步长在屏幕上移动鼠标
190 |             if pyautogui.position() == cursor_pos:
191 |                 pyautogui.moveTo(x, y)
192 |                 cursor_pos = (x, y)
193 |             else:
194 |                 pyautogui.alert(text='检测到人为鼠标s移动！(Detected artificial mouse movement!) ', title='程序终止(Program Abort)', button='OK')
195 |                 return
196 |             i += 1
197 |             state = get_cursor_state()
198 |             if state == "HAND":
199 |                 hand_positions.append((x, y))
200 | 
201 |             if time.time() - t0 > 60:
202 |                 print("Timeout.")
203 |                 return
204 |    
205 |     print(f"Time: {time.time() - t0:.2f} seconds")
206 | 
207 |     return merge_positions_using_dbscan(hand_positions, eps=step+1, min_samples=2)
208 | 
209 | if __name__ == '__main__':
210 |     button_boundary_list = detect_button_boundary((0, 0), (100, 500))
211 |     if len(button_boundary_list) == 1:
212 |         left, top, right, bottom = button_boundary_list[0]
213 |         pyautogui.screenshot(os.path.join("detected_images", f"Button.png"), region=(left, top, right - left, bottom - top))
214 |     exit(0)
215 |     result = detect_hand_location((0, 0), (100, 500))
216 |     for hand in result:
217 |         left_top = hand.min(axis=0)
218 |         right_bottom = hand.max(axis=0)
219 |         left, top, right, bottom = left_top[0].item(), left_top[1].item(), right_bottom[0].item(), right_bottom[1].item()
220 |         pyautogui.screenshot(os.path.join("cursor_area", f"HAND-{left}-{top}-{right}-{bottom}.png"), region=(left, top, right - left, bottom - top))
221 |         print(hand.min(axis=0), hand.max(axis=0))
222 |         print()
223 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | #这是本脚本的控制台demo
2 | from chatgpt_auto_script import ChatGPTAutoScript
3 | 
4 | auto_script = ChatGPTAutoScript()
5 | auto_script.demo()


--------------------------------------------------------------------------------
/frog_eye.py:
--------------------------------------------------------------------------------
  1 | import colorsys
  2 | import random
  3 | import cv2
  4 | import pyautogui
  5 | 
  6 | import numpy as np
  7 | 
  8 | def is_stationary(frame1, frame2, threshold=30, change_ratio_threshold=0.01, ret_change_ratio=None, ):
  9 |     frame1, frame2 = np.asarray(frame1), np.asarray(frame2)
 10 | 
 11 |     # 将图片转换为灰度图，以简化计算
 12 |     gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
 13 |     gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
 14 | 
 15 |     # 计算两帧的差异
 16 |     diff = cv2.absdiff(gray1, gray2)
 17 | 
 18 |     # 应用阈值来标识区域的变动
 19 |     _, thresh = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)
 20 | 
 21 |     non_zero_count = np.count_nonzero(thresh)
 22 | 
 23 |     # 计算阈值图像中的非零像素点数量，以此判断变动
 24 |     total_pixels = frame1.shape[0] * frame1.shape[1]
 25 | 
 26 |     change_ratio = non_zero_count / total_pixels
 27 | 
 28 |     #print("change_ratio", change_ratio)
 29 |     if ret_change_ratio is not None:
 30 |         ret_change_ratio.append(change_ratio)
 31 | 
 32 |     return change_ratio < change_ratio_threshold
 33 | 
 34 | 
 35 | def find_contours(img_before, img_after, thresh=30):
 36 |     img_before, img_after = np.asarray(img_before), np.asarray(img_after)
 37 |     
 38 |     # 转换为灰度图以加快处理速度
 39 |     gray_before = cv2.cvtColor(img_before, cv2.COLOR_BGR2GRAY)
 40 |     gray_after = cv2.cvtColor(img_after, cv2.COLOR_BGR2GRAY)
 41 |     
 42 |     # 计算两个图像之间的差异
 43 |     diff = cv2.absdiff(gray_before, gray_after)
 44 |     _, thresh = cv2.threshold(diff, thresh, 255, cv2.THRESH_BINARY)
 45 |     
 46 |     # 寻找差异区域的轮廓
 47 |     contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
 48 | 
 49 |     return contours
 50 | 
 51 | def point_in_contour(point, contour):
 52 |     dist = cv2.pointPolygonTest(contour, point, False)
 53 | 
 54 |     return dist >= 0
 55 | 
 56 | def random_color():
 57 |     h = random.random()  # 随机色相值
 58 |     s = random.uniform(0.5, 1.0)  # 随机饱和度值
 59 |     v = random.uniform(0.5, 1.0)  # 随机亮度值
 60 | 
 61 |     s, v = 1, 1
 62 | 
 63 |     return tuple(round(i * 255) for i in colorsys.hsv_to_rgb(h, s, v))
 64 | 
 65 | 
 66 | def vis_contours(img, contours, delay=0):
 67 |     img_vis = np.asarray(img).copy()
 68 |     cv2.drawContours(img_vis, contours, 0, (0, 255, 0), 2)
 69 |     cv2.imshow('Contours', img_vis)
 70 |     cv2.waitKey(delay)
 71 |     #cv2.destroyAllWindows()
 72 | 
 73 | def find_optimal_highlight_rect(img_before, img_after, width_height_ratio=None, vis=False):
 74 |     img_before, img_after = np.asarray(img_before), np.asarray(img_after)
 75 |     
 76 |     # 寻找差异区域的轮廓
 77 |     contours = find_contours(img_before, img_after)
 78 |     
 79 |     if vis:
 80 |         img_vis = img_after.copy()
 81 | 
 82 |     valid_rects = []
 83 | 
 84 |     for contour in contours:
 85 |         # # 对每个轮廓进行近似
 86 |         # perimeter = cv2.arcLength(contour, True)
 87 |         # approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
 88 |         
 89 |         # # 筛选出近似为矩形的轮廓（有四个顶点）
 90 |         # if len(approx) == 4:
 91 |         rect = cv2.minAreaRect(contour)  # 获取最小外接矩形
 92 |         width, height = max(rect[1]), min(rect[1])
 93 | 
 94 |         if rect[2] % 90 != 0:
 95 |             continue
 96 | 
 97 |         if not (width >= 5 and 400 >= height >= 5):
 98 |             continue
 99 | 
100 |         if cv2.contourArea(contour) == 0 or width * height / cv2.contourArea(contour) > 1.1:
101 |             continue
102 | 
103 |         valid_rects.append(rect)
104 | 
105 |         if vis:
106 |             cv2.drawContours(img_vis, [contour], 0, (0, 255, 0), 2)    
107 |     
108 |     if vis:
109 |         if not valid_rects:
110 |             for contour in contours:
111 |                 cv2.drawContours(img_vis, [contour], 0, (0, 255, 0), 2)    
112 |         cv2.imshow('Changes', img_vis)
113 |         cv2.waitKey(0)
114 |         cv2.destroyAllWindows()
115 | 
116 |     if not valid_rects:
117 |         return None
118 | 
119 |     # select a best rect with the aspect ratio closest to the given aspect ratio
120 |     if width_height_ratio is not None:
121 |         valid_rects = sorted(valid_rects, key=lambda x: abs(max(rect[1]) / (min(rect[1]) + 1e-6) - width_height_ratio))
122 |     
123 |     best_rect = valid_rects[0]
124 |     
125 |     box = cv2.boxPoints(best_rect)
126 |     box = np.int0(box)
127 | 
128 |     return box
129 | 
130 | def find_all_highlight_rect(img_before, img_after, width_height_ratio=None, vis=False):
131 |     img_before, img_after = np.asarray(img_before), np.asarray(img_after)
132 |     
133 |     # 寻找差异区域的轮廓
134 |     contours = find_contours(img_before, img_after)
135 |     
136 |     if vis:
137 |         img_vis = img_after.copy()
138 | 
139 |     valid_rects = []
140 | 
141 |     for contour in contours:
142 |         rect = cv2.minAreaRect(contour)  # 获取最小外接矩形
143 |         width, height = max(rect[1]), min(rect[1])
144 | 
145 |         if not (width >= 5 and 400 >= height >= 5):
146 |             continue
147 | 
148 |         if rect[2] % 90 != 0:
149 |             continue
150 | 
151 |         if cv2.contourArea(contour) == 0 or width * height / cv2.contourArea(contour) > 1.1:
152 |             continue
153 | 
154 |         valid_rects.append(rect)
155 | 
156 |         if vis:
157 |             # random a color using hsv
158 |             cv2.drawContours(img_vis, [contour], 0, random_color(), 2)    
159 |     
160 |     if vis:
161 |         if not valid_rects:
162 |             for contour in contours:
163 |                 cv2.drawContours(img_vis, [contour], 0, random_color(), 2)    
164 |         cv2.imshow('Changes', img_vis)
165 |         cv2.waitKey(0)
166 |         cv2.destroyAllWindows()
167 | 
168 |     if not valid_rects:
169 |         return None
170 | 
171 |     boxes = []
172 |     for rect in valid_rects:
173 |         box = cv2.boxPoints(rect)
174 |         boxes.append(np.int0(box))
175 | 
176 |     return boxes
177 | 
178 | 
179 | if __name__ == '__main__':
180 |     img_before = cv2.imread('img_before.png')
181 |     img_after = cv2.imread('img_after.png')
182 |     
183 |     boxes = find_all_highlight_rect(img_before, img_after, width_height_ratio=10, vis=True)
184 |     print(len(boxes))


--------------------------------------------------------------------------------
/openai_api.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import base64
  3 | import copy
  4 | import json
  5 | import time
  6 | from argparse import ArgumentParser
  7 | from contextlib import asynccontextmanager
  8 | from pprint import pprint
  9 | from typing import Dict, List, Literal, Optional, Union
 10 | 
 11 | import uvicorn
 12 | from fastapi import FastAPI, HTTPException
 13 | from fastapi.middleware.cors import CORSMiddleware
 14 | from pydantic import BaseModel, Field
 15 | from starlette.middleware.base import BaseHTTPMiddleware
 16 | from starlette.requests import Request
 17 | from starlette.responses import Response
 18 | 
 19 | 
 20 | from chatgpt_chat_script import ChatGPTChatScript, ChatMessage
 21 | 
 22 | class BasicAuthMiddleware(BaseHTTPMiddleware):
 23 | 
 24 |     def __init__(self, app, username: str, password: str):
 25 |         super().__init__(app)
 26 |         self.required_credentials = base64.b64encode(
 27 |             f'{username}:{password}'.encode()).decode()
 28 | 
 29 |     async def dispatch(self, request: Request, call_next):
 30 |         authorization: str = request.headers.get('Authorization')
 31 |         if authorization:
 32 |             try:
 33 |                 schema, credentials = authorization.split()
 34 |                 if credentials == self.required_credentials:
 35 |                     return await call_next(request)
 36 |             except ValueError:
 37 |                 pass
 38 | 
 39 |         headers = {'WWW-Authenticate': 'Basic'}
 40 |         return Response(status_code=401, headers=headers)
 41 | 
 42 | 
 43 | @asynccontextmanager
 44 | async def lifespan(app: FastAPI):
 45 |     yield
 46 |     # _gc(forced=True)
 47 | 
 48 | 
 49 | app = FastAPI(lifespan=lifespan)
 50 | 
 51 | app.add_middleware(
 52 |     CORSMiddleware,
 53 |     allow_origins=['*'],
 54 |     allow_credentials=True,
 55 |     allow_methods=['*'],
 56 |     allow_headers=['*'],
 57 | )
 58 | 
 59 | 
 60 | class ModelCard(BaseModel):
 61 |     id: str
 62 |     object: str = 'model'
 63 |     created: int = Field(default_factory=lambda: int(time.time()))
 64 |     owned_by: str = 'owner'
 65 |     root: Optional[str] = None
 66 |     parent: Optional[str] = None
 67 |     permission: Optional[list] = None
 68 | 
 69 | 
 70 | class ModelList(BaseModel):
 71 |     object: str = 'list'
 72 |     data: List[ModelCard] = []
 73 | 
 74 | class DeltaMessage(BaseModel):
 75 |     role: Optional[Literal['user', 'assistant', 'system']] = None
 76 |     content: Optional[str] = None
 77 | 
 78 | 
 79 | class ChatCompletionRequest(BaseModel):
 80 |     model: str
 81 |     messages: List[ChatMessage]
 82 |     functions: Optional[List[Dict]] = None
 83 |     temperature: Optional[float] = None
 84 |     top_p: Optional[float] = None
 85 |     top_k: Optional[int] = None
 86 |     max_length: Optional[int] = None
 87 |     stream: Optional[bool] = False
 88 |     stop: Optional[List[str]] = None
 89 | 
 90 | 
 91 | class ChatCompletionResponseChoice(BaseModel):
 92 |     index: int
 93 |     message: Union[ChatMessage]
 94 |     finish_reason: Literal['stop', 'length', 'function_call']
 95 | 
 96 | 
 97 | class ChatCompletionResponseStreamChoice(BaseModel):
 98 |     index: int
 99 |     delta: DeltaMessage
100 |     finish_reason: Optional[Literal['stop', 'length']]
101 | 
102 | 
103 | class ChatCompletionResponse(BaseModel):
104 |     model: str
105 |     object: Literal['chat.completion', 'chat.completion.chunk']
106 |     choices: List[Union[ChatCompletionResponseChoice,
107 |                         ChatCompletionResponseStreamChoice]]
108 |     created: Optional[int] = Field(default_factory=lambda: int(time.time()))
109 | 
110 | 
111 | chat_script = ChatGPTChatScript()
112 | 
113 | @app.get('/v1/models', response_model=ModelList)
114 | async def list_models():
115 |     model_card = ModelCard(id='gpt-3.5-turbo')
116 |     return ModelList(data=[model_card])
117 | 
118 | 
119 | @app.post('/v1/chat/completions', response_model=ChatCompletionResponse)
120 | async def create_chat_completion(request: ChatCompletionRequest):
121 |     chat_messages = request.messages
122 | 
123 |     #response_msg = chat_script.auto_chat(chat_messages)
124 |     response_msg = chat_script.submit(chat_messages[-1].content)
125 | 
126 |     choice_data = ChatCompletionResponseChoice(
127 |         index=0,
128 |         message=response_msg,
129 |         finish_reason='stop',
130 |     )
131 |     return ChatCompletionResponse(model=request.model,
132 |                                   choices=[choice_data],
133 |                                   object='chat.completion')
134 | 
135 | 
136 | def _get_args():
137 |     parser = ArgumentParser()
138 |     parser.add_argument('--api-auth', help='API authentication credentials')
139 |     parser.add_argument('--server-port',
140 |                         type=int,
141 |                         default=8000,
142 |                         help='Demo server port.')
143 |     parser.add_argument(
144 |         '--server-name',
145 |         type=str,
146 |         default='127.0.0.1',
147 |         help=
148 |         'Demo server name. Default: 127.0.0.1, which is only visible from the local computer.'
149 |         ' If you want other computers to access your server, use 0.0.0.0 instead.',
150 |     )
151 |     args = parser.parse_args()
152 |     return args
153 | 
154 | 
155 | if __name__ == '__main__':
156 |     args = _get_args()
157 | 
158 |     if args.api_auth:
159 |         app.add_middleware(BasicAuthMiddleware,
160 |                            username=args.api_auth.split(':')[0],
161 |                            password=args.api_auth.split(':')[1])
162 | 
163 |     uvicorn.run(app, host=args.server_name, port=args.server_port, workers=1)


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # AutoChatScript
 2 | 
 3 | [English](readme.md) | [中文](readme_cn.md) | [日本語](readme_jp.md)
 4 | 
 5 | AutoChatScript is an automation script implemented using the PyAutoGUI library, aimed at automating chat with ChatGPT(website). This project relies entirely on reading and using the mouse, keyboard, and screen without reverse-engineering any web requests or needing to install browser plugins.
 6 | 
 7 | ## Features
 8 | 
 9 | - **Automated Initialization**: Automatically initializes the script, no manual screenshotting required.
10 | - **Cross-Platform Support**: Future support for platforms beyond Windows.
11 | - **Full Operation Support**: Supports a series of operations including creating new dialogues, submitting, regenerating, modifying, etc.
12 | - **Support for More Websites**: This project is not only for ChatGPT but will support more chat websites in the future.
13 | 
14 | ## Installation Guide
15 | 
16 | Before starting, ensure you have Python 3.6 or higher installed on your computer.
17 | 
18 | 1. **Clone the Repository**
19 | 
20 |    First, clone the project repository to your local computer:
21 | 
22 |    ```
23 |    git clone https://github.com/MaoXiaoYuZ/AutoChatScript.git
24 |    cd AutoChatScript
25 |    ```
26 | 
27 | 2. **Install Dependencies**
28 | 
29 |    Use pip to install the required Python libraries:
30 | 
31 |    ```
32 |    pip install -r requirements.txt
33 |    ```
34 | 
35 | ## Usage Instructions
36 | 
37 | Before running the script, make sure you have opened the web version of ChatGPT and logged into your account.
38 | 
39 | 1. **Start Demo**
40 | 
41 |    ```
42 |    python demo.py
43 |    ```
44 | 
45 |    Below is a demonstration of the demo running:
46 | 
47 |    ![Chat with ChatGPT](assets/chat_with_chatgpt.gif "Demo running live")
48 | 
49 |    The demo will initialize certain features when needed, shown below is the process of automatically initializing the submit button after running demo.py.
50 | 
51 |    ![Init Submit Button](assets/init_submit_button.gif "Initialize Submit Button")
52 | 
53 |    Below is the initialization process for the resubmit button the first time the rewrite submit feature is used.
54 | 
55 |    ![Init ReSubmit Button](assets/init_resubmit_button.gif "Initialize Resubmit Button")
56 | 
57 | 2. **Provide OpenAI Format API**
58 | 
59 |    ```
60 |    python openai_api.py --server-port 8000
61 |    ```
62 | 
63 | ## Precautions
64 | 
65 | - This project currently only supports the Windows operating system.
66 | - Ensure that the ChatGPT web version is already open on your screen before running the script.
67 | - As page layout updates may affect script operation, please check the project repository regularly for updates.
68 | 
69 | ## Contributions
70 | 
71 | We welcome any form of contribution, whether feature requests, bug reports, or code submissions. Please submit your contributions through the GitHub repository's Issues and Pull Requests.
72 | 


--------------------------------------------------------------------------------
/readme_cn.md:
--------------------------------------------------------------------------------
 1 | # AutoChatScript
 2 | 
 3 | [English](readme.md) | [中文](readme_cn.md) | [日本語](readme_jp.md)
 4 | 
 5 | AutoChatScript是一个使用PyAutoGUI库实现的自动化脚本，旨在自动化与ChatGPT网页的对话。该项目完全依靠读取和使用鼠标，键盘，屏幕，不逆向任何网页请求，也不需要安装浏览器插件。
 6 | 
 7 | ## 特性
 8 | 
 9 | - **自动化初始化**：自动初始化脚本，无需人为截图。
10 | - **全平台支持**：后续支持除Windows外更多平台。
11 | - **全操作支持**：支持新建对话，提交，重新生成，修改等一系列操作。
12 | - **支持更多网站**：本项目不仅仅针对ChatGPT，后续将支持更多Chat网站。
13 | 
14 | ## 安装指南
15 | 
16 | 在开始之前，请确保您的计算机上已安装 Python 3.6 或更高版本。
17 | 
18 | 1. **克隆仓库**
19 | 
20 |     首先，克隆该项目仓库到您的本地计算机：
21 | 
22 |     ```
23 |     git clone https://github.com/MaoXiaoYuZ/AutoChatScript.git
24 |     cd AutoChatScript
25 |     ```
26 | 
27 | 2. **安装依赖**
28 | 
29 |     使用 pip 安装所需的 Python 库：
30 | 
31 |     ```
32 |     pip install -r requirements.txt
33 |     ```
34 | 
35 | ## 使用说明
36 | 
37 | 在运行脚本前，请确保您已经打开了ChatGPT网页版，并登录到您的账户。
38 | 
39 | 1. **启动Demo**
40 | 
41 |     ```
42 |     python demo.py
43 |     ```
44 | 
45 |     下面展示是demo的运行实况:
46 | 
47 |     ![chat_with_chatgpt](assets/chat_with_chatgpt.gif "Demo的运行实况")
48 | 
49 |     该demo会再需要用到某项功能时对该功能进行初始化，下面展示的是运行demo.py后程序自动初始化提交按钮的过程。
50 | 
51 |     ![Init Submit Button](assets/init_submit_button.gif "初始化提交按钮")
52 | 
53 |     下面展示的是第一次使用重写提交功能是，初始化重写提交按钮的过程。
54 | 
55 |     ![Init reSubmit Button](assets/init_resubmit_button.gif "初始化重新提交按钮")
56 | 
57 | 2. **提供openai格式api**
58 | 
59 | 
60 |     ```
61 |     python openai_api.py --server-port 8000
62 |     ```
63 | 
64 | ## 注意事项
65 | 
66 | - 该项目目前仅支持 Windows 操作系统。
67 | - 确保在运行脚本之前，您的屏幕上已经打开了ChatGPT的网页版。
68 | - 由于页面布局的更新可能会影响脚本的运行，请定期检查项目仓库以获取更新。
69 | 
70 | ## 贡献
71 | 
72 | 我们欢迎任何形式的贡献，无论是功能请求、bug 报告还是代码提交。请通过 GitHub 仓库的 Issues 和 Pull Requests 来提交您的贡献。
73 | 


--------------------------------------------------------------------------------
/readme_jp.md:
--------------------------------------------------------------------------------
 1 | # AutoChatScript
 2 | 
 3 | [English](readme.md) | [中文](readme_cn.md) | [日本語](readme_jp.md)
 4 | 
 5 | AutoChatScriptは、PyAutoGUIライブラリを使用して実装された自動化スクリプトで、ChatGPT（ウェブサイト）とのチャットを自動化することを目的としています。このプロジェクトは、マウス、キーボード、画面の読み取りと使用に完全に依存しており、ウェブリクエストのリバースエンジニアリングやブラウザプラグインのインストールを必要としません。
 6 | 
 7 | ## 特徴
 8 | 
 9 | - **自動化された初期化**：スクリプトを自動的に初期化し、手動のスクリーンショットは不要です。
10 | - **クロスプラットフォームサポート**：将来的にはWindows以外のプラットフォームもサポートします。
11 | - **完全な操作サポート**：新しい対話の作成、送信、再生成、修正などの一連の操作をサポートします。
12 | - **より多くのウェブサイトのサポート**：このプロジェクトはChatGPTだけでなく、将来的にはより多くのチャットウェブサイトをサポートします。
13 | 
14 | ## インストールガイド
15 | 
16 | 開始する前に、コンピュータにPython 3.6以上がインストールされていることを確認してください。
17 | 
18 | 1. **リポジトリをクローン**
19 | 
20 |    まず、プロジェクトのリポジトリをローカルコンピュータにクローンします：
21 | 
22 |    ```
23 |    git clone https://github.com/MaoXiaoYuZ/AutoChatScript.git
24 |    cd AutoChatScript
25 |    ```
26 | 
27 | 2. **依存関係のインストール**
28 | 
29 |    必要なPythonライブラリをpipでインストールします：
30 | 
31 |    ```
32 |    pip install -r requirements.txt
33 |    ```
34 | 
35 | ## 使用方法
36 | 
37 | スクリプトを実行する前に、ChatGPTのウェブ版を開き、アカウントにログインしていることを確認してください。
38 | 
39 | 1. **デモの開始**
40 | 
41 |    ```
42 |    python demo.py
43 |    ```
44 | 
45 |    以下はデモの実行の様子です：
46 | 
47 |    ![Chat with ChatGPT](assets/chat_with_chatgpt.gif "デモの実行の様子")
48 | 
49 |    デモは必要な機能を初期化する際に自動的に行います。以下はdemo.pyを実行した後に送信ボタンを自動的に初期化するプロセスです。
50 | 
51 |    ![Init Submit Button](assets/init_submit_button.gif "送信ボタンの初期化")
52 | 
53 |    以下は初めて再送信機能を使用する際に再送信ボタンを初期化するプロセスです。
54 | 
55 |    ![Init ReSubmit Button](assets/init_resubmit_button.gif "再送信ボタンの初期化")
56 | 
57 | 2. **OpenAI形式のAPIを提供**
58 | 
59 |    ```
60 |    python openai_api.py --server-port 8000
61 |    ```
62 | 
63 | ## 注意事項
64 | 
65 | - このプロジェクトは現在、Windowsオペレーティングシステムのみをサポートしています。
66 | - スクリプトを実行する前に、ChatGPTのウェブ版が画面に表示されていることを確認してください。
67 | - ページレイアウトの更新がスクリプトの動作に影響を与える可能性があるため、定期的にプロジェクトリポジトリをチェックして更新を取得してください。
68 | 
69 | ## 貢献
70 | 
71 | 機能リクエスト、バグ報告、コードの提出など、どのような形での貢献も歓迎します。GitHubリポジトリのIssuesおよびPull Requestsを通じて貢献を提出してください。
72 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pyautogui
2 | pyperclip
3 | opencv-python


--------------------------------------------------------------------------------
/search_in_browser.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | import pyautogui
 3 | import pyperclip
 4 | 
 5 | 
 6 | 
 7 | def locate_text(keyword):
 8 |     from frog_eye import find_all_highlight_rect
 9 | 
10 |     pyautogui.hotkey('ctrl', 'f')
11 | 
12 |     pyperclip.copy(keyword)
13 |     pyautogui.hotkey('ctrl', 'a', 'v')
14 |     pyautogui.press('enter')
15 |     img_before = pyautogui.screenshot()
16 | 
17 |     pyautogui.hotkey('ctrl', 'a', 'backspace')
18 |     img_after = pyautogui.screenshot()
19 | 
20 |     pyautogui.press('esc')
21 | 
22 |     rects = find_all_highlight_rect(img_before, img_after, vis=False, width_height_ratio=len(keyword))
23 |     
24 |     if rects:
25 |         line_height = Counter([rect[:, 1].max() - rect[:, 1].min() for rect in rects]).most_common(1)[0][0]
26 | 
27 |         rects.sort(key=lambda e: (e[:, 1].min(axis=0) // line_height) * 10000 + e[:, 0].min(axis=0))
28 |     else:
29 |         line_height = None
30 | 
31 |     return rects, line_height
32 | 
33 | if __name__ == '__main__':
34 |     import time
35 |     time.sleep(2)
36 |     keyword = "7tAYlnGUJYRw"
37 |     rects, line_height = locate_text(keyword)
38 |     pyautogui.moveTo(rects[0].mean(axis=0).tolist())
39 |     print(len(rects))
40 | 
41 | 


--------------------------------------------------------------------------------
/test_openai_api.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import ssl
 3 | import certifi
 4 | 
 5 | MODEL = "chatgpt-3.5-turbo"
 6 | OPENAI_SECRET_KEY = "none"
 7 | # Assuming MODEL and OPENAI_SECRET_KEY are defined earlier in your code.
 8 | 
 9 | def chat_with_chatgpt(prompt: str):
10 |     payload = {
11 |         'model': MODEL,
12 |         'messages': [
13 |             {"role": "user", "content": prompt}
14 |         ]
15 |     }
16 |     headers = {
17 |         "Content-Type": "application/json",
18 |         "Authorization": f"Bearer {OPENAI_SECRET_KEY}"
19 |     }
20 |     url = 'http://127.0.0.1:8000/v1/chat/completions'
21 |     
22 |     try:
23 |         response = requests.post(url, headers=headers, json=payload, verify=certifi.where())
24 |         
25 |         # Check if the request was successful
26 |         if response.status_code == 200:
27 |             response_data = response.json()
28 |             if "error" in response_data:
29 |                 print(f"OpenAI request failed with error {response_data['error']}")
30 |                 return None
31 |             return response_data['choices'][0]['message']['content']
32 |         else:
33 |             print(f"Request failed with status code {response.status_code}")
34 |             return None
35 |     except Exception as e:
36 |         print(f"Request failed: {e}")
37 |         return None
38 | 
39 | response = chat_with_chatgpt('what can you do for me')
40 | print(response)


--------------------------------------------------------------------------------
/windows_api.py:
--------------------------------------------------------------------------------
 1 | import pygetwindow as gw
 2 | import pyautogui
 3 | 
 4 | def get_mouse_window_rect():
 5 |     x, y = pyautogui.position()
 6 |     win = gw.getWindowsAt(x, y)
 7 |     if win:
 8 |         win = win[0]  # 获取鼠标下的第一个窗口
 9 |         return (win.left, win.top, win.left + win.width, win.top + win.height)
10 |     else:
11 |         return None
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------