├── README.md ├── chat.py └── requirements.txt /README.md: -------------------------------------------------------------------------------- 1 | # Self Support ChatBot 2 | 3 | This is an initial version of a chat support chat bot that guides you on how to do different actions across websites and other software 4 | 5 | The bot uses MultiModal abilities of LLMs to capture the screen and than reasons on what should be the next steps 6 | 7 | ## Running 8 | 9 | 1. `pip install -r requirements.txt` 10 | 2. Put your openAI API key in the relevant line in the code 11 | 3. Run the code 12 | 13 | ## Future Plans 14 | 15 | - [ ] Add support for Anthropic Claude3 16 | - [ ] Add support for open-source MultiModal LLMs (LLava, Baqllava, etc) 17 | - [ ] Improve chat history 18 | - [ ] Add the ability to upload technical guides to help the bot give better answers 19 | - [ ] Add a caching mechanism based of VectorDB to reduce cost and improve latency 20 | 21 | ## Contibutions 22 | 23 | Contributions are welcome. Please open an issue to discuss the changes you would like to make. 24 | -------------------------------------------------------------------------------- /chat.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | from tkinter import scrolledtext 3 | import pyautogui 4 | import base64 5 | import requests 6 | import io 7 | 8 | # OpenAI API Key 9 | api_key = "" 10 | 11 | conversation_history = [] 12 | 13 | def encode_image(image_path): 14 | with open(image_path, "rb") as image_file: 15 | return base64.b64encode(image_file.read()).decode('utf-8') 16 | 17 | def submit_text(): 18 | user_input = input_box.get("1.0", "end-1c") 19 | conversation_history.append({"role": "user", "content": [{"type": 'text', "text": user_input}]}) 20 | output_box.configure(state='normal') 21 | output_box.insert(tk.END, "User: ", 'bold') # Make 'User' label bold 22 | output_box.insert(tk.END, user_input + "\n") 23 | output_box.insert(tk.END, "\n") # Add a line space 24 | 25 | 26 | screenshot = pyautogui.screenshot() 27 | screenshot_bytes = io.BytesIO() 28 | screenshot.save(screenshot_bytes, format='PNG') 29 | screenshot_base64 = base64.b64encode(screenshot_bytes.getvalue()).decode('utf-8') 30 | 31 | 32 | prompt = """You are a technical support assistant in charge of helping the user perform some action. 33 | Attached is the screenshot of the user screen and a request from the user. Guide the user on what to do next. Keep your 34 | answer short and precise as possible. In case you don't know what to do, write "I don't know". Notice that the screenshot contains the chat 35 | widget where the user communicates with user (titled "chat widget") - please ignore it. ==== \n user request:""" + user_input 36 | response = call_chat_with_image(screenshot_base64, prompt) 37 | 38 | # Call a function to process the user input and get a response 39 | # response = process_input(user_input) 40 | output_box.insert(tk.END, "Assistant: ", 'bold') # Make 'Assistant' label bold 41 | output_box.insert(tk.END, response + "\n") 42 | output_box.insert(tk.END, "\n") # Add a line space 43 | output_box.see(tk.END) # Scroll to the end of the output box 44 | 45 | output_box.configure(state='disabled') 46 | input_box.delete("1.0", tk.END) 47 | 48 | 49 | def call_chat_with_image(image, text): 50 | headers = { 51 | "Content-Type": "application/json", 52 | "Authorization": f"Bearer {api_key}" 53 | } 54 | 55 | payload = { 56 | "model": "gpt-4-vision-preview", 57 | "messages": conversation_history + [ 58 | { 59 | "role": "user", 60 | "content": [ 61 | { 62 | "type": "text", 63 | "text":text 64 | }, 65 | { 66 | "type": "image_url", 67 | "image_url": { 68 | "url": f"data:image/jpeg;base64,{image}" 69 | } 70 | } 71 | ] 72 | } 73 | ], 74 | "max_tokens": 300 75 | } 76 | 77 | response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) 78 | 79 | print(response.json()) 80 | response_text = response.json()["choices"][0]["message"]["content"] 81 | conversation_history.append({"role": "assistant", "content": [{"type": 'text', "text": response_text}]}) 82 | 83 | return response_text 84 | 85 | def next_text(): 86 | output_box.configure(state='normal') 87 | 88 | # Call a function to get the next text 89 | prompt = """You are a techinal support assitant in charge of helping the user perfrom some action, 90 | Attached is the screen shot of the user screen and the history of what the user requested and what you told the user to do, 91 | assume the user has performed the last action you told him to do. Guide the user on what to do next, keep your 92 | answer short and precise as possible, in case you don't know what to do write 'i dont know' Notice that the screenshot contains the chat 93 | widget where the user communicates with use titiled "chat widget", please ignore this""" 94 | 95 | screenshot = pyautogui.screenshot() 96 | screenshot_bytes = io.BytesIO() 97 | screenshot.save(screenshot_bytes, format='PNG') 98 | screenshot_base64 = base64.b64encode(screenshot_bytes.getvalue()).decode('utf-8') 99 | 100 | response = call_chat_with_image(screenshot_base64, prompt) 101 | output_box.insert(tk.END, "Assistant: ", 'bold') # Make 'Assistant' label bold 102 | output_box.insert(tk.END, response + "\n") 103 | output_box.insert(tk.END, "\n") # Add a line space 104 | output_box.see(tk.END) # Scroll to the end of the output box 105 | 106 | output_box.configure(state='disabled') 107 | 108 | def get_next_text(): 109 | # Function to get the next text 110 | # Replace this with your own logic 111 | return "This is the next text." 112 | 113 | # Create the main window 114 | window = tk.Tk() 115 | window.title("Chat Widget") 116 | window.attributes('-topmost', True) # Make the window always stay on top 117 | 118 | # Create the output box 119 | output_box = scrolledtext.ScrolledText(window, height=20, width=50, state='disabled', wrap="word") 120 | output_box.pack() 121 | output_box.tag_configure('bold', font=('Arial', 10, 'bold')) 122 | 123 | # Create the input box 124 | input_box = tk.Text(window, height=2, width=50, wrap="word") 125 | input_box.pack() 126 | 127 | # Create the submit button 128 | submit_button = tk.Button(window, text="Submit", command=submit_text) 129 | submit_button.pack() 130 | 131 | # Create the next button 132 | next_button = tk.Button(window, text="Next", command=next_text) 133 | next_button.pack() 134 | 135 | # Run the main event loop 136 | window.mainloop() 137 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tk 2 | pyautogui --------------------------------------------------------------------------------