├── .gitattributes ├── .gitignore ├── README.md ├── ai_assistant.py ├── assets ├── animated_assistant.gif ├── messages-square.png ├── mic-off.png ├── mic-off.svg ├── mic.png ├── mic.svg └── static_assistant.png ├── chat_interface.py ├── datalake ├── Bakery_TEA.xlsx ├── business_plan.docx ├── data_by_cities.xlsx ├── eggs.pdf ├── electricity.png └── wages.pdf ├── smartest_kid.py └── tools ├── directory_tools.py ├── excel_tools.py ├── image_tools.py └── word_tools.py /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | .env 13 | 14 | # User-specific files (MonoDevelop/Xamarin Studio) 15 | *.userprefs 16 | 17 | # Mono auto generated files 18 | mono_crash.* 19 | 20 | # Build results 21 | [Dd]ebug/ 22 | [Dd]ebugPublic/ 23 | [Rr]elease/ 24 | [Rr]eleases/ 25 | x64/ 26 | x86/ 27 | [Ww][Ii][Nn]32/ 28 | [Aa][Rr][Mm]/ 29 | [Aa][Rr][Mm]64/ 30 | bld/ 31 | [Bb]in/ 32 | [Oo]bj/ 33 | [Oo]ut/ 34 | [Ll]og/ 35 | [Ll]ogs/ 36 | 37 | # Visual Studio 2015/2017 cache/options directory 38 | .vs/ 39 | # Uncomment if you have tasks that create the project's static files in wwwroot 40 | #wwwroot/ 41 | 42 | # Visual Studio 2017 auto generated files 43 | Generated\ Files/ 44 | 45 | # MSTest test Results 46 | [Tt]est[Rr]esult*/ 47 | [Bb]uild[Ll]og.* 48 | 49 | # NUnit 50 | *.VisualState.xml 51 | TestResult.xml 52 | nunit-*.xml 53 | 54 | # Build Results of an ATL Project 55 | [Dd]ebugPS/ 56 | [Rr]eleasePS/ 57 | dlldata.c 58 | 59 | # Benchmark Results 60 | BenchmarkDotNet.Artifacts/ 61 | 62 | # .NET Core 63 | project.lock.json 64 | project.fragment.lock.json 65 | artifacts/ 66 | 67 | # ASP.NET Scaffolding 68 | ScaffoldingReadMe.txt 69 | 70 | # StyleCop 71 | StyleCopReport.xml 72 | 73 | # Files built by Visual Studio 74 | *_i.c 75 | *_p.c 76 | *_h.h 77 | *.ilk 78 | *.meta 79 | *.obj 80 | *.iobj 81 | *.pch 82 | *.pdb 83 | *.ipdb 84 | *.pgc 85 | *.pgd 86 | *.rsp 87 | *.sbr 88 | *.tlb 89 | *.tli 90 | *.tlh 91 | *.tmp 92 | *.tmp_proj 93 | *_wpftmp.csproj 94 | *.log 95 | *.vspscc 96 | *.vssscc 97 | .builds 98 | *.pidb 99 | *.svclog 100 | *.scc 101 | 102 | # Chutzpah Test files 103 | _Chutzpah* 104 | 105 | # Visual C++ cache files 106 | ipch/ 107 | *.aps 108 | *.ncb 109 | *.opendb 110 | *.opensdf 111 | *.sdf 112 | *.cachefile 113 | *.VC.db 114 | *.VC.VC.opendb 115 | 116 | # Visual Studio profiler 117 | *.psess 118 | *.vsp 119 | *.vspx 120 | *.sap 121 | 122 | # Visual Studio Trace Files 123 | *.e2e 124 | 125 | # TFS 2012 Local Workspace 126 | $tf/ 127 | 128 | # Guidance Automation Toolkit 129 | *.gpState 130 | 131 | # ReSharper is a .NET coding add-in 132 | _ReSharper*/ 133 | *.[Rr]e[Ss]harper 134 | *.DotSettings.user 135 | 136 | # TeamCity is a build add-in 137 | _TeamCity* 138 | 139 | # DotCover is a Code Coverage Tool 140 | *.dotCover 141 | 142 | # AxoCover is a Code Coverage Tool 143 | .axoCover/* 144 | !.axoCover/settings.json 145 | 146 | # Coverlet is a free, cross platform Code Coverage Tool 147 | coverage*.json 148 | coverage*.xml 149 | coverage*.info 150 | 151 | # Visual Studio code coverage results 152 | *.coverage 153 | *.coveragexml 154 | 155 | # NCrunch 156 | _NCrunch_* 157 | .*crunch*.local.xml 158 | nCrunchTemp_* 159 | 160 | # MightyMoose 161 | *.mm.* 162 | AutoTest.Net/ 163 | 164 | # Web workbench (sass) 165 | .sass-cache/ 166 | 167 | # Installshield output folder 168 | [Ee]xpress/ 169 | 170 | # DocProject is a documentation generator add-in 171 | DocProject/buildhelp/ 172 | DocProject/Help/*.HxT 173 | DocProject/Help/*.HxC 174 | DocProject/Help/*.hhc 175 | DocProject/Help/*.hhk 176 | DocProject/Help/*.hhp 177 | DocProject/Help/Html2 178 | DocProject/Help/html 179 | 180 | # Click-Once directory 181 | publish/ 182 | 183 | # Publish Web Output 184 | *.[Pp]ublish.xml 185 | *.azurePubxml 186 | # Note: Comment the next line if you want to checkin your web deploy settings, 187 | # but database connection strings (with potential passwords) will be unencrypted 188 | *.pubxml 189 | *.publishproj 190 | 191 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 192 | # checkin your Azure Web App publish settings, but sensitive information contained 193 | # in these scripts will be unencrypted 194 | PublishScripts/ 195 | 196 | # NuGet Packages 197 | *.nupkg 198 | # NuGet Symbol Packages 199 | *.snupkg 200 | # The packages folder can be ignored because of Package Restore 201 | **/[Pp]ackages/* 202 | # except build/, which is used as an MSBuild target. 203 | !**/[Pp]ackages/build/ 204 | # Uncomment if necessary however generally it will be regenerated when needed 205 | #!**/[Pp]ackages/repositories.config 206 | # NuGet v3's project.json files produces more ignorable files 207 | *.nuget.props 208 | *.nuget.targets 209 | 210 | # Microsoft Azure Build Output 211 | csx/ 212 | *.build.csdef 213 | 214 | # Microsoft Azure Emulator 215 | ecf/ 216 | rcf/ 217 | 218 | # Windows Store app package directories and files 219 | AppPackages/ 220 | BundleArtifacts/ 221 | Package.StoreAssociation.xml 222 | _pkginfo.txt 223 | *.appx 224 | *.appxbundle 225 | *.appxupload 226 | 227 | # Visual Studio cache files 228 | # files ending in .cache can be ignored 229 | *.[Cc]ache 230 | # but keep track of directories ending in .cache 231 | !?*.[Cc]ache/ 232 | 233 | # Others 234 | ClientBin/ 235 | ~$* 236 | *~ 237 | *.dbmdl 238 | *.dbproj.schemaview 239 | *.jfm 240 | *.pfx 241 | *.publishsettings 242 | orleans.codegen.cs 243 | 244 | # Including strong name files can present a security risk 245 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 246 | #*.snk 247 | 248 | # Since there are multiple workflows, uncomment next line to ignore bower_components 249 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 250 | #bower_components/ 251 | 252 | # RIA/Silverlight projects 253 | Generated_Code/ 254 | 255 | # Backup & report files from converting an old project file 256 | # to a newer Visual Studio version. Backup files are not needed, 257 | # because we have git ;-) 258 | _UpgradeReport_Files/ 259 | Backup*/ 260 | UpgradeLog*.XML 261 | UpgradeLog*.htm 262 | ServiceFabricBackup/ 263 | *.rptproj.bak 264 | 265 | # SQL Server files 266 | *.mdf 267 | *.ldf 268 | *.ndf 269 | 270 | # Business Intelligence projects 271 | *.rdl.data 272 | *.bim.layout 273 | *.bim_*.settings 274 | *.rptproj.rsuser 275 | *- [Bb]ackup.rdl 276 | *- [Bb]ackup ([0-9]).rdl 277 | *- [Bb]ackup ([0-9][0-9]).rdl 278 | 279 | # Microsoft Fakes 280 | FakesAssemblies/ 281 | 282 | # GhostDoc plugin setting file 283 | *.GhostDoc.xml 284 | 285 | # Node.js Tools for Visual Studio 286 | .ntvs_analysis.dat 287 | node_modules/ 288 | 289 | # Visual Studio 6 build log 290 | *.plg 291 | 292 | # Visual Studio 6 workspace options file 293 | *.opt 294 | 295 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 296 | *.vbw 297 | 298 | # Visual Studio LightSwitch build output 299 | **/*.HTMLClient/GeneratedArtifacts 300 | **/*.DesktopClient/GeneratedArtifacts 301 | **/*.DesktopClient/ModelManifest.xml 302 | **/*.Server/GeneratedArtifacts 303 | **/*.Server/ModelManifest.xml 304 | _Pvt_Extensions 305 | 306 | # Paket dependency manager 307 | .paket/paket.exe 308 | paket-files/ 309 | 310 | # FAKE - F# Make 311 | .fake/ 312 | 313 | # CodeRush personal settings 314 | .cr/personal 315 | 316 | # Python Tools for Visual Studio (PTVS) 317 | __pycache__/ 318 | *.pyc 319 | 320 | # Cake - Uncomment if you are using it 321 | # tools/** 322 | # !tools/packages.config 323 | 324 | # Tabs Studio 325 | *.tss 326 | 327 | # Telerik's JustMock configuration file 328 | *.jmconfig 329 | 330 | # BizTalk build output 331 | *.btp.cs 332 | *.btm.cs 333 | *.odx.cs 334 | *.xsd.cs 335 | 336 | # OpenCover UI analysis results 337 | OpenCover/ 338 | 339 | # Azure Stream Analytics local run output 340 | ASALocalRun/ 341 | 342 | # MSBuild Binary and Structured Log 343 | *.binlog 344 | 345 | # NVidia Nsight GPU debugger configuration file 346 | *.nvuser 347 | 348 | # MFractors (Xamarin productivity tool) working folder 349 | .mfractor/ 350 | 351 | # Local History for Visual Studio 352 | .localhistory/ 353 | 354 | # BeatPulse healthcheck temp database 355 | healthchecksdb 356 | 357 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 358 | MigrationBackup/ 359 | 360 | # Ionide (cross platform F# VS Code tools) working folder 361 | .ionide/ 362 | 363 | # Fody - auto-generated XML schema 364 | FodyWeavers.xsd -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SmartestKid AI Assistant 2 | 3 | A Windows desktop AI assistant built in Python. Assistant (without tools) is ~1000 lines of python code, with super simple chat UI inspired by the original AI, SmarterChild. Uses Windows COM automation to interface with Microsoft Office (Word, Excel), Images, and your file system. Perfect for Windows users looking to explore AI-powered desktop automation. 4 | 5 | ## Demo 6 | https://github.com/user-attachments/assets/a7b0ae86-53d6-4407-b2dd-ea6f4abb59e4 7 | 8 | ## Features 9 | 10 | - Toggle between voice and text input modes 11 | - Interface with Word, Excel, Images, and your file system (Windows only) 12 | - Cute draggable interface elements 13 | 14 | ## Requirements 15 | 16 | - Windows OS 17 | - Python 3.7+ 18 | - OPENAI_API_API key for AI responses 19 | - Microsoft Office (for Word/Excel features) 20 | - Virtual environment (recommended) 21 | 22 | ## Setup 23 | 24 | 1. Clone the repository 25 | 2. Create and activate a virtual environment: 26 | ```bash 27 | python -m venv venv 28 | # On Windows: 29 | .\venv\Scripts\activate 30 | # On Unix/MacOS: 31 | source venv/bin/activate 32 | ``` 33 | 3. Install dependencies: 34 | ```bash 35 | pip install tkinter pillow pyaudio httpx python-dotenv 36 | ``` 37 | 4. Create a `.env` file in the root directory with your API keys: 38 | ```env 39 | # API Keys 40 | OPENAI_API_KEY=your_openai_api_key_here 41 | 42 | # Paths 43 | DATALAKE_DIRECTORY=path/to/your/datalake 44 | ``` 45 | 5. Run the application: 46 | ```bash 47 | python smartest_kid.py 48 | ``` 49 | 50 | ## Usage 51 | 52 | - Click the microphone icon to toggle voice input 53 | - Click the message icon to toggle the chat interface 54 | - Drag the robot or chat window to reposition them 55 | - Press ESC to exit the application 56 | 57 | ## Project Structure 58 | 59 | - `smartest_kid.py`: Main application and robot animation logic 60 | - `chat_interface.py`: Chat UI implementation 61 | - `ai_assistant.py`: AI integration with Claude API 62 | - `assets/`: Contains UI icons and robot character images 63 | - `tools/`: Contains tools for the assistant to use 64 | - `datalake/`: Contains data for the assistant to use 65 | - `.env`: Configuration and API keys 66 | 67 | ## License 68 | 69 | MIT License 70 | 71 | ## Contributing 72 | 73 | Want to contribute? Here are some areas we'd love help with: 74 | 1. Office Integration - Expand Excel/Word functionality and add new Office app support 75 | 2. Assistant Personality - Add Clippy-style emotions and contextual reactions (pls someone find these gifs) 76 | 3. New Tools - Integrate with more applications (PowerPoint, PDF readers, browsers, etc.) 77 | 78 | Feel free to open an issue or submit a pull request! 79 | 80 | ## Authors 81 | 82 | Victor Von Miller & Emmett Goodman 83 | -------------------------------------------------------------------------------- /ai_assistant.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import json 4 | import time 5 | from openai import OpenAI 6 | from tools.excel_tools import ExcelTools 7 | from tools.image_tools import ImageTools 8 | from tools.directory_tools import DirectoryTools 9 | from tools.word_tools import WordTools 10 | from dotenv import load_dotenv 11 | 12 | class ChatCompletionAssistant: 13 | def __init__(self, api_key=None): 14 | # Initialize OpenAI client with the provided API key 15 | self.client = OpenAI(api_key=api_key) 16 | 17 | # Initialize conversation with system message 18 | self.conversation = [ 19 | { 20 | "role": "system", 21 | "content": ( 22 | "You are an ultra smart modeling expert. You help people build and optimize models of all sorts." 23 | "You have access to various tools such as analyzing files, processing Excel and Word documents, listing directories, and running calculations." 24 | "When you use a tool, always explain what you found or what happened immediately after using it. " 25 | "Also, always, before using a tool, explain succinctly what tools you will use before you use them!" 26 | "Be interactive and conversational - if you need to use multiple tools, discuss the results of each one before moving to the next. " 27 | "In certain scenarios, you will be asked to plug numbers from one excel into another excel calculator and record. This may involve many loops writing in file A, file B, file A, etc. " 28 | ) 29 | } 30 | ] 31 | self.functions = [ 32 | { 33 | "type": "function", 34 | "function": { 35 | "name": "analyze_file", 36 | "description": "Analyze either an image or PDF file, using computer vision, to return valuable data or answer questions about it. Make sure you know the exact name of the file (i.e. might have to list directory tool first) before you open it!", 37 | "parameters": { 38 | "type": "object", 39 | "properties": { 40 | "question": { 41 | "type": "string", 42 | "description": "Question or instruction about what to analyze in the file. Be comprehensive." 43 | }, 44 | "file_name": { 45 | "type": "string", 46 | "description": "Name of the file in the datalake directory." 47 | } 48 | }, 49 | "required": ["file_name","question"] 50 | } 51 | } 52 | }, 53 | { 54 | "type": "function", 55 | "function": { 56 | "name": "process_excel", 57 | "description": "This function lets you create new excel files and edit existing ones. The input is the excel file name (file_path), as well as a set of tuples that represent data to put in the file (write_data). If the file_path doesnt exist in our data lake, it creates a new one. If the set of tuples is empty, we are not writing anything, just reading. At the end of the call, this function returns returns the post-edited state of the excel file. For example, if you just want to read the file, you can pass an empty list for write_data. However, if you're entering a value or values into an excel calculator, you dont need to do a distinct read because after you write, the function will return the new updated calculated state. write_data should be of the format {'A1': 42, 'B2': 'hello', 'C3': 'apple'}. Cell addresses MUST be in A1 notation (A1, B2, etc).", 58 | "parameters": { 59 | "type": "object", 60 | "properties": { 61 | "file_path": { 62 | "type": "string", 63 | "description": "Name or path of the Excel file to create or edit." 64 | }, 65 | "write_data": { 66 | "type": "object", 67 | "description": "REQUIRED for writing. Simple dictionary mapping cell addresses to values, e.g., {'A1': 42, 'B2': 'hello', 'C3': 'apple'}. Cell addresses MUST be in A1 notation (A1, B2, etc).", 68 | "additionalProperties": { 69 | "type": ["string", "number"] 70 | } 71 | } 72 | }, 73 | "required": ["file_path", "write_data"] 74 | } 75 | } 76 | }, 77 | { 78 | "type": "function", 79 | "function": { 80 | "name": "process_excel", 81 | "description": "Process an Excel file. When writing data, you MUST provide both file_path and write_data. Creates a new file if it doesn't exist. Cell addresses MUST be in A1 notation (A1, B2, etc).", 82 | "parameters": { 83 | "type": "object", 84 | "properties": { 85 | "file_path": { 86 | "type": "string", 87 | "description": "Name or path of the Excel file to process." 88 | }, 89 | "write_data": { 90 | "type": "object", 91 | "description": "REQUIRED for writing. Simple dictionary mapping cell addresses to values, e.g., {'A1': 42, 'B2': 'hello', 'C3': 'apple'}. Cell addresses MUST be in A1 notation (A1, B2, etc).", 92 | "additionalProperties": { 93 | "type": ["string", "number"] 94 | } 95 | } 96 | }, 97 | "required": ["file_path", "write_data"] 98 | } 99 | } 100 | }, 101 | { 102 | "type": "function", 103 | "function": { 104 | "name": "process_word", 105 | "description": "Process a Word document - read current content and optionally write new content", 106 | "parameters": { 107 | "type": "object", 108 | "properties": { 109 | "file_path": { 110 | "type": "string", 111 | "description": "Name of the Word document (e.g., 'mydoc.docx')" 112 | }, 113 | "content": { 114 | "type": "string", 115 | "description": "Optional content to write to the document" 116 | } 117 | }, 118 | "required": ["file_path"] 119 | } 120 | } 121 | }, 122 | { 123 | "type": "function", 124 | "function": { 125 | "name": "list_directory", 126 | "description": "List all files and directories in the DataLake directory", 127 | "parameters": { 128 | "type": "object", 129 | "properties": {}, 130 | "required": [] 131 | } 132 | } 133 | }, 134 | ] 135 | # Initialize local tool instances. 136 | self.excel_tools = ExcelTools() 137 | self.image_tools = ImageTools() 138 | self.directory_tools = DirectoryTools() 139 | self.word_tools = WordTools() 140 | 141 | def send_message(self, message: str, ui_callback=None): 142 | 143 | loop_counter = 0 144 | 145 | start_time = time.time() 146 | 147 | if message: 148 | self.conversation.append({"role": "user", "content": message}) 149 | complete_response = "" 150 | 151 | while True: # Keep going until we get a non-tool finish_reason 152 | 153 | loop_counter += 1 154 | 155 | try: 156 | completion = self.client.chat.completions.create( 157 | model="gpt-4o", 158 | messages=self.conversation, 159 | stream=True, 160 | tools=self.functions, 161 | tool_choice="auto" 162 | ) 163 | 164 | final_tool_calls = {} # Track complete tool calls 165 | 166 | for chunk in completion: 167 | delta = chunk.choices[0].delta 168 | 169 | # Handle regular content 170 | if delta.content: 171 | complete_response += delta.content 172 | if ui_callback: 173 | ui_callback(delta.content) 174 | 175 | # Handle tool calls 176 | if delta.tool_calls: 177 | for tool_call in delta.tool_calls: 178 | # Initialize if new tool call 179 | if tool_call.index not in final_tool_calls: 180 | final_tool_calls[tool_call.index] = { 181 | "id": tool_call.id, 182 | "type": "function", 183 | "function": { 184 | "name": tool_call.function.name, 185 | "arguments": "" 186 | } 187 | } 188 | 189 | # Accumulate arguments 190 | if tool_call.function and tool_call.function.arguments: 191 | final_tool_calls[tool_call.index]["function"]["arguments"] += tool_call.function.arguments 192 | 193 | # Check finish reason 194 | if chunk.choices[0].finish_reason == "tool_calls": 195 | for tool_call in final_tool_calls.values(): 196 | 197 | try: 198 | if ui_callback: 199 | ui_callback(f"\n\nRunning {tool_call['function']['name']}...\n\n") 200 | 201 | func_args = json.loads(tool_call["function"]["arguments"]) 202 | result = self.handle_function_call(tool_call["function"]["name"], func_args) 203 | 204 | # Add to conversation history 205 | self.conversation.append({ 206 | "role": "assistant", 207 | "content": None, 208 | "tool_calls": [tool_call] 209 | }) 210 | self.conversation.append({ 211 | "role": "tool", 212 | "tool_call_id": tool_call["id"], 213 | "content": str(result) 214 | }) 215 | except json.JSONDecodeError as e: 216 | print(f"[DEBUG] Error parsing arguments: {e}") 217 | break 218 | 219 | elif chunk.choices[0].finish_reason: 220 | print(f"[DEBUG] Finish reason: {chunk.choices[0].finish_reason}") 221 | if complete_response: 222 | self.conversation.append({"role": "assistant", "content": complete_response}) 223 | if ui_callback: 224 | ui_callback({"end_of_message": True}) 225 | return complete_response 226 | 227 | except Exception as e: 228 | print(f"Error in send_message: {str(e)}") 229 | return f"Error: {str(e)}" 230 | 231 | def handle_function_call(self, func_name: str, arguments: dict) -> str: 232 | print(f"[DEBUG] Handling function call: {func_name} with arguments {arguments}") 233 | if func_name == "process_excel": 234 | result = self.excel_tools.process_excel(arguments.get("file_path"), arguments.get("write_data")) 235 | elif func_name == "process_word": 236 | result = self.word_tools.process_word(arguments.get("file_path"), arguments.get("content")) 237 | elif func_name == "analyze_file": 238 | result = self.image_tools.analyze_file(arguments.get("question"), arguments.get("file_name")) 239 | elif func_name == "list_directory": 240 | result = self.directory_tools.list_directory() 241 | else: 242 | result = f"Function {func_name} not implemented." 243 | return str(result) 244 | 245 | def cleanup(self): 246 | self.excel_tools.cleanup() 247 | self.word_tools.cleanup() 248 | -------------------------------------------------------------------------------- /assets/animated_assistant.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/assets/animated_assistant.gif -------------------------------------------------------------------------------- /assets/messages-square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/assets/messages-square.png -------------------------------------------------------------------------------- /assets/mic-off.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/assets/mic-off.png -------------------------------------------------------------------------------- /assets/mic-off.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assets/mic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/assets/mic.png -------------------------------------------------------------------------------- /assets/mic.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assets/static_assistant.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/assets/static_assistant.png -------------------------------------------------------------------------------- /chat_interface.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | from tkinter import ttk 3 | import time 4 | 5 | class ChatInterface(tk.Frame): 6 | def __init__(self, parent, assistant, width=400, height=500, message_queue=None): 7 | super().__init__(parent) 8 | self.assistant = assistant 9 | self.message_queue = message_queue 10 | self.chat_window_id = None 11 | self.receiving_assistant_message = False 12 | 13 | self.configure(width=width, height=height, bg='#ECE9D8') 14 | self.grid_propagate(False) 15 | self.pack_propagate(False) 16 | 17 | main_container = tk.Frame(self, bg='#ECE9D8') 18 | main_container.pack(fill=tk.BOTH, expand=True) 19 | 20 | header_frame = tk.Frame(main_container, bg='#2196F3', height=32) 21 | header_frame.pack(fill=tk.X) 22 | 23 | name_label = tk.Label(header_frame, text="SmartestLad", bg='#2196F3', fg='white', font=('Segoe UI', 10, 'bold')) 24 | name_label.pack(side=tk.LEFT, padx=12, pady=6) 25 | 26 | header_frame.bind("", self.start_move) 27 | header_frame.bind("", self.do_move) 28 | name_label.bind("", self.start_move) 29 | name_label.bind("", self.do_move) 30 | 31 | chat_height = int(height * 0.7) 32 | history_frame = tk.Frame(main_container, height=chat_height, bg='#ECE9D8') 33 | history_frame.pack(fill=tk.BOTH, expand=True, padx=2, pady=2) 34 | history_frame.pack_propagate(False) 35 | 36 | style = ttk.Style() 37 | style.theme_use('clam') 38 | style.configure("Vertical.TScrollbar", troughcolor="#FFFFFF", background="#D4D0C8", bordercolor="#D4D0C8") 39 | 40 | scrollbar = ttk.Scrollbar(history_frame, style="Vertical.TScrollbar", orient=tk.VERTICAL) 41 | scrollbar.pack(side=tk.RIGHT, fill=tk.Y) 42 | 43 | self.history = tk.Text(history_frame, wrap=tk.WORD, bg='white', font=('Segoe UI', 10), yscrollcommand=scrollbar.set) 44 | self.history.pack(fill=tk.BOTH, expand=True, padx=1, pady=1) 45 | scrollbar.config(command=self.history.yview) 46 | 47 | input_height = int(height * 0.3) 48 | input_frame = tk.Frame(main_container, height=input_height, bg='#ECE9D8') 49 | input_frame.pack(fill=tk.X, side=tk.BOTTOM, padx=2, pady=2) 50 | input_frame.pack_propagate(False) 51 | 52 | self.input_field = tk.Text(input_frame, wrap=tk.WORD, bg='white', font=('Segoe UI', 10), height=5) 53 | self.input_field.pack(fill=tk.BOTH, expand=True, padx=1, pady=1) 54 | 55 | self.input_field.insert('1.0', 'Enter your thoughts here...') 56 | self.input_field.tag_configure('placeholder', foreground='grey') 57 | self.input_field.tag_add('placeholder', '1.0', 'end') 58 | 59 | self.input_field.bind('', self._on_focus_in) 60 | self.input_field.bind('', self._on_focus_out) 61 | 62 | self._configure_styles() 63 | 64 | def handle_enter(event): 65 | if not event.state & 0x1: 66 | self.send_message() 67 | return 'break' 68 | 69 | self.input_field.bind('', handle_enter) 70 | 71 | def start_move(self, event): 72 | self._drag_data = {"x": event.x_root, "y": event.y_root} 73 | 74 | def do_move(self, event): 75 | if not hasattr(self, '_drag_data'): return 76 | dx = event.x_root - self._drag_data["x"] 77 | dy = event.y_root - self._drag_data["y"] 78 | self._drag_data.update({"x": event.x_root, "y": event.y_root}) 79 | if self.chat_window_id: 80 | coords = self.master.coords(self.chat_window_id) 81 | self.master.coords(self.chat_window_id, coords[0] + dx, coords[1] + dy) 82 | 83 | def add_message(self, message, sender_type='user'): 84 | # Only add newline if this is a new message (not a continuation) 85 | if not self.receiving_assistant_message: 86 | if self.history.get('1.0', tk.END).strip(): 87 | self.history.insert(tk.END, "\n") 88 | timestamp = time.strftime("(%I:%M:%S %p)") 89 | sender = "uberushaximus" if sender_type == 'user' else "SmartestLad" 90 | tag = 'user_message' if sender_type == 'user' else 'assistant_message' 91 | 92 | self.history.insert(tk.END, f"{timestamp} ", 'timestamp') 93 | self.history.insert(tk.END, f"{sender}: ", tag) 94 | self.receiving_assistant_message = (sender_type == 'assistant') 95 | 96 | # Add the message fragment 97 | if isinstance(message, dict) and 'end_of_message' in message: 98 | self.receiving_assistant_message = False 99 | else: 100 | self.history.insert(tk.END, str(message)) 101 | self.history.see(tk.END) 102 | 103 | def add_user_message(self, message): 104 | self.add_message(message, sender_type='user') 105 | 106 | def add_assistant_message(self, message): 107 | self.add_message(message, sender_type='assistant') 108 | 109 | def _on_focus_in(self, event): 110 | self._handle_placeholder(event) 111 | 112 | def _on_focus_out(self, event): 113 | self._handle_placeholder(event, removing=False) 114 | 115 | def _handle_placeholder(self, event, removing=True): 116 | text = self.input_field.get('1.0', 'end-1c') 117 | if removing and text == 'Enter your thoughts here...': 118 | self.input_field.delete('1.0', tk.END) 119 | self.input_field.tag_remove('placeholder', '1.0', 'end') 120 | elif not removing and not text.strip(): 121 | self.input_field.insert('1.0', 'Enter your thoughts here...') 122 | self.input_field.tag_add('placeholder', '1.0', 'end') 123 | 124 | def send_message(self): 125 | message = self.input_field.get('1.0', tk.END).strip() 126 | if message and message != 'Enter your thoughts here...': 127 | # Clear input field first 128 | self.input_field.delete('1.0', tk.END) 129 | 130 | # Send to message queue if available 131 | if hasattr(self, 'message_queue') and self.message_queue: 132 | self._from_chat_interface = True 133 | self.message_queue.put(message) 134 | # Don't add the message here - let process_ai_messages handle it 135 | return 136 | 137 | # Only add directly if no message queue (fallback case) 138 | self.add_message(message) 139 | 140 | def _configure_styles(self): 141 | style = ttk.Style() 142 | style.theme_use('clam') 143 | style.configure("Vertical.TScrollbar", 144 | troughcolor="#FFFFFF", 145 | background="#D4D0C8", 146 | bordercolor="#D4D0C8") 147 | 148 | self.history.tag_configure('user_message', 149 | foreground='#FF0000', 150 | font=('Segoe UI', 10)) 151 | self.history.tag_configure('assistant_message', 152 | foreground='#2196F3', 153 | font=('Segoe UI', 10)) 154 | self.history.tag_configure('timestamp', 155 | foreground='#9E9E9E', 156 | font=('Segoe UI', 9)) -------------------------------------------------------------------------------- /datalake/Bakery_TEA.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/datalake/Bakery_TEA.xlsx -------------------------------------------------------------------------------- /datalake/business_plan.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/datalake/business_plan.docx -------------------------------------------------------------------------------- /datalake/data_by_cities.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/datalake/data_by_cities.xlsx -------------------------------------------------------------------------------- /datalake/eggs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/datalake/eggs.pdf -------------------------------------------------------------------------------- /datalake/electricity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/datalake/electricity.png -------------------------------------------------------------------------------- /datalake/wages.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/datalake/wages.pdf -------------------------------------------------------------------------------- /smartest_kid.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | from tkinter import ttk 3 | from PIL import Image, ImageTk, ImageDraw, ImageFont, ImageSequence 4 | import os 5 | import wave 6 | import threading 7 | import time 8 | import pyaudio 9 | import audioop # For RMS calculations 10 | import queue 11 | from io import BytesIO 12 | from chat_interface import ChatInterface 13 | 14 | # Import the new ChatCompletion-based assistant. 15 | from ai_assistant import ChatCompletionAssistant 16 | 17 | # ------------------ Recording with VAD ------------------ 18 | 19 | def transcribe_file_with_whisper(audio_filename, app): 20 | """ 21 | Uses OpenAI's Whisper API to transcribe a local audio file. 22 | Returns only the final transcript string. 23 | """ 24 | try: 25 | transcribe_start = time.time() # Start timing transcription 26 | from openai import OpenAI 27 | 28 | print("Calling Whisper API...") 29 | app.is_transcribing = True 30 | 31 | client = OpenAI(api_key=app.assistant_api_key) 32 | 33 | with open(audio_filename, "rb") as audio_file: 34 | try: 35 | response = client.audio.transcriptions.create( 36 | model="whisper-1", 37 | file=audio_file, 38 | response_format="text" 39 | ) 40 | print("[WHISPER] Successfully transcribed audio") 41 | except Exception as whisper_error: 42 | print(f"[WHISPER] API Error: {str(whisper_error)}") 43 | raise 44 | 45 | transcript_str = response.strip() 46 | transcribe_duration = time.time() - transcribe_start 47 | print(f"[TIMING] Transcription completed in {transcribe_duration:.2f}s") 48 | 49 | with open("transcription.txt", "w") as f: 50 | f.write(transcript_str) 51 | 52 | return transcript_str 53 | 54 | except Exception as e: 55 | error_msg = f"Error during Whisper transcription: {str(e)}" 56 | app.log(error_msg) 57 | print(error_msg) 58 | return None 59 | finally: 60 | app.is_transcribing = False 61 | 62 | 63 | def record_and_transcribe_vad(app): 64 | """ 65 | Records audio until silence is detected using VAD. 66 | Saves the audio to a WAV file and transcribes it via Whisper. 67 | The transcript is then added to the AI processing queue. 68 | """ 69 | audio_filename = "temp_recording.wav" 70 | transcript_filename = "transcription.txt" 71 | 72 | try: 73 | chunk = 1024 74 | sample_format = pyaudio.paInt16 75 | channels = 1 76 | rate = 16000 77 | silence_threshold = 200 78 | silence_chunks = 0 79 | 80 | pause_duration_sec = 2.5 81 | max_silence_chunks = int(pause_duration_sec / (chunk / rate)) 82 | min_chunks = int(0.5 / (chunk / rate)) 83 | frames = [] 84 | 85 | p = pyaudio.PyAudio() 86 | stream = p.open(format=sample_format, channels=channels, rate=rate, 87 | input=True, frames_per_buffer=chunk) 88 | app.log("Recording started (VAD enabled). Speak now...") 89 | app.is_listening = False 90 | voiced = False 91 | 92 | while True: 93 | if app.is_paused: 94 | # Always process what we have when muting 95 | break 96 | 97 | data = stream.read(chunk, exception_on_overflow=False) 98 | frames.append(data) 99 | rms = audioop.rms(data, 2) 100 | if rms > silence_threshold: 101 | silence_chunks = 0 102 | voiced = True 103 | app.is_listening = True 104 | else: 105 | if voiced: 106 | silence_chunks += 1 107 | if voiced and silence_chunks > max_silence_chunks and len(frames) > min_chunks: 108 | app.log("Silence detected. Finishing recording.") 109 | break 110 | 111 | # Close the stream after breaking from the loop 112 | stream.stop_stream() 113 | stream.close() 114 | p.terminate() 115 | 116 | # Always process the recording if we have enough voiced frames 117 | if voiced and len(frames) > min_chunks: 118 | wf = wave.open(audio_filename, 'wb') 119 | wf.setnchannels(channels) 120 | wf.setsampwidth(p.get_sample_size(sample_format)) 121 | wf.setframerate(rate) 122 | wf.writeframes(b''.join(frames)) 123 | wf.close() 124 | app.log("Recording finished. Audio saved to " + audio_filename) 125 | 126 | # Remove the pause check here - always process the final chunk 127 | transcript = transcribe_file_with_whisper(audio_filename, app) 128 | if transcript: 129 | app.log("Transcription: " + transcript) 130 | print("Transcription:", transcript) 131 | app.message_queue.put(transcript) 132 | else: 133 | app.log("Whisper transcription failed.") 134 | 135 | finally: 136 | try: 137 | if os.path.exists(audio_filename): 138 | os.remove(audio_filename) 139 | print(f"Cleaned up {audio_filename}") 140 | if os.path.exists(transcript_filename): 141 | os.remove(transcript_filename) 142 | print(f"Cleaned up {transcript_filename}") 143 | except Exception as e: 144 | print(f"Error cleaning up temporary files: {e}") 145 | 146 | def continuous_record_and_transcribe(app): 147 | while True: 148 | record_and_transcribe_vad(app) 149 | # Optionally, add a delay here if desired. 150 | 151 | # ------------------ Animated Tkinter Application ------------------ 152 | 153 | class AnimatedCharacter: 154 | def __init__(self, canvas, x, y): 155 | self.canvas = canvas 156 | self.x = x 157 | self.y = y 158 | self.is_animated = False 159 | 160 | static_img = Image.open("assets/static_assistant.png") 161 | if static_img.mode != 'RGBA': 162 | static_img = static_img.convert('RGBA') 163 | data = static_img.getdata() 164 | new_data = [] 165 | for item in data: 166 | if item[0] > 240 and item[1] > 240 and item[2] > 240: 167 | new_data.append((255, 255, 255, 0)) 168 | else: 169 | new_data.append(item) 170 | static_img.putdata(new_data) 171 | self.static_image = ImageTk.PhotoImage(static_img) 172 | 173 | gif = Image.open("assets/animated_assistant.gif") 174 | self.animated_frames = [] 175 | for frame in ImageSequence.Iterator(gif): 176 | frame = frame.convert('RGBA') 177 | data = frame.getdata() 178 | new_data = [] 179 | for item in data: 180 | if item[0] < 15 and item[1] < 15 and item[2] < 15: 181 | new_data.append((0, 0, 0, 0)) 182 | else: 183 | new_data.append(item) 184 | frame.putdata(new_data) 185 | self.animated_frames.append(ImageTk.PhotoImage(frame)) 186 | 187 | self.image_id = canvas.create_image(x, y, image=self.static_image, anchor='center', tags='character') 188 | self.current_frame = 0 189 | 190 | def set_animated(self, animated): 191 | self.is_animated = animated 192 | if not animated: 193 | self.canvas.itemconfig(self.image_id, image=self.static_image) 194 | 195 | def update(self): 196 | if self.is_animated and self.animated_frames: 197 | self.current_frame = (self.current_frame + 1) % len(self.animated_frames) 198 | self.canvas.itemconfig(self.image_id, image=self.animated_frames[self.current_frame]) 199 | 200 | class App(tk.Tk): 201 | def __init__(self): 202 | super().__init__() 203 | self.title("Animated AI Avatar - Transparent Robot Listening") 204 | self.geometry("1800x1600") 205 | self.resizable(False, False) 206 | 207 | self.overrideredirect(True) 208 | self.attributes('-alpha', 0.0) 209 | self.wm_attributes("-transparentcolor", "SystemButtonFace") 210 | self.wm_attributes("-topmost", True) 211 | 212 | self.canvas = tk.Canvas( 213 | self, 214 | bg="SystemButtonFace", 215 | width=1800, 216 | height=1600, 217 | highlightthickness=0 218 | ) 219 | self.canvas.pack() 220 | self.after(100, lambda: self.attributes('-alpha', 1.0)) 221 | 222 | self._drag_data = {"x": 0, "y": 0, "item": None, "start_time": 0, "start_x": 0, "start_y": 0} 223 | self.bind("", lambda e: self.cleanup()) 224 | 225 | self.is_listening = False 226 | self.is_transcribing = False 227 | self.is_ai_processing = False 228 | self.red_dot_id = None 229 | self.dot_visible = False 230 | self.is_paused = True 231 | 232 | self.chat_interface = None 233 | self.chat_window = None 234 | 235 | self.canvas.tag_bind("character", "", self.on_drag_start) 236 | self.canvas.tag_bind("character", "", self.on_drag_stop) 237 | self.canvas.tag_bind("character", "", self.on_drag_motion) 238 | 239 | self.character = AnimatedCharacter(self.canvas, 400, 300) 240 | 241 | self.log_widget = tk.Text(self, height=5, width=100) 242 | self.log_widget.pack(padx=10, pady=10) 243 | 244 | # Load API key and initialize ChatCompletionAssistant. 245 | from os import getenv 246 | from dotenv import load_dotenv 247 | load_dotenv() 248 | self.assistant_api_key = getenv('OPENAI_API_KEY') 249 | if not self.assistant_api_key: 250 | print("Please set your OPENAI_API_KEY environment variable") 251 | exit(1) 252 | self.assistant = ChatCompletionAssistant(self.assistant_api_key) 253 | 254 | self.message_queue = queue.Queue() 255 | self._ai_lock = threading.Lock() 256 | 257 | threading.Thread(target=continuous_record_and_transcribe, args=(self,), daemon=True).start() 258 | self.animate_robot() 259 | threading.Thread(target=self.process_ai_messages, daemon=True).start() 260 | self.create_mode_toggles() 261 | self.after(100, self.toggle_messages) 262 | 263 | def log(self, message): 264 | self.log_widget.insert(tk.END, message + "\n") 265 | self.log_widget.see(tk.END) 266 | 267 | def animate_robot(self): 268 | # Show red dot only when listening (not when processing) 269 | if self.is_listening and not self.is_ai_processing: 270 | self.toggle_red_dot() 271 | elif self.red_dot_id is not None: 272 | self.canvas.delete(self.red_dot_id) 273 | self.red_dot_id = None 274 | self.dot_visible = False 275 | 276 | # Animate robot when AI is processing (both voice and text) 277 | if self.is_ai_processing: 278 | if not self.character.is_animated: 279 | self.character.set_animated(True) 280 | self.character.update() 281 | else: 282 | if self.character.is_animated: 283 | self.character.set_animated(False) 284 | 285 | self.after(150, self.animate_robot) 286 | 287 | def on_drag_start(self, event): 288 | self._drag_data["x"] = event.x_root 289 | self._drag_data["y"] = event.y_root 290 | self._drag_data["start_time"] = time.time() 291 | self._drag_data["start_x"] = event.x_root 292 | self._drag_data["start_y"] = event.y_root 293 | 294 | def on_drag_stop(self, event): 295 | self._drag_data.update({"x": 0, "y": 0, "item": None, "start_time": 0, "start_x": 0, "start_y": 0}) 296 | 297 | def on_drag_motion(self, event): 298 | delta_x = event.x_root - self._drag_data["x"] 299 | delta_y = event.y_root - self._drag_data["y"] 300 | x = self.winfo_x() + delta_x 301 | y = self.winfo_y() + delta_y 302 | self.geometry(f"+{x}+{y}") 303 | self._drag_data["x"] = event.x_root 304 | self._drag_data["y"] = event.y_root 305 | 306 | def toggle_mute(self): 307 | self.is_paused = not self.is_paused 308 | try: 309 | self.mic_button.configure(image=self.mic_icon_muted if self.is_paused else self.mic_icon_active) 310 | except Exception as e: 311 | print(f"Error in toggle_mute: {e}") 312 | 313 | def process_ai_messages(self): 314 | while True: 315 | message = self.message_queue.get() 316 | if message: 317 | try: 318 | print(f"Processing with AI: {message}") 319 | self.log("Processing with AI: " + message) 320 | 321 | # Add user message to chat interface 322 | if self.chat_interface: 323 | self.chat_interface.add_user_message(message) 324 | 325 | # Set AI processing state and animate 326 | self.is_ai_processing = True 327 | self.is_transcribing = False # Ensure transcribing is off 328 | self.is_listening = False # Ensure listening is off 329 | 330 | def single_callback(text_fragment): 331 | if self.chat_interface: 332 | self.chat_interface.add_assistant_message(text_fragment) 333 | # Keep AI processing true while streaming response 334 | self.is_ai_processing = True 335 | self.character.set_animated(True) 336 | 337 | # Use the new ChatCompletion-based assistant 338 | with self._ai_lock: 339 | self.assistant.send_message(message, ui_callback=single_callback) 340 | except Exception as e: 341 | self.log(f"AI Processing error: {str(e)}") 342 | finally: 343 | # Reset all states after complete response 344 | self.is_ai_processing = False 345 | self.character.set_animated(False) 346 | self.message_queue.task_done() 347 | 348 | def process_new_message(self, user_input): 349 | start_time = time.time() 350 | print(f"\n[{time.strftime('%H:%M:%S')}] Processing new message: {user_input}") 351 | def single_callback(text_fragment): 352 | self.chat_interface and self.chat_interface.add_assistant_message(text_fragment) 353 | with self._ai_lock: 354 | try: 355 | self.is_transcribing = True 356 | if self.chat_interface: 357 | if not hasattr(self, '_from_chat_interface') or not self._from_chat_interface: 358 | self.chat_interface.add_user_message(user_input) 359 | self._from_chat_interface = False 360 | response = self.assistant.send_message(user_input, ui_callback=single_callback) 361 | print(f"[TIMING] Total response completed in {time.time()-start_time:.2f}s") 362 | except Exception as e: 363 | print(f"[ERROR] after {time.time()-start_time:.2f}s: {str(e)}") 364 | finally: 365 | self.is_transcribing = False 366 | 367 | def cleanup(self): 368 | self.destroy() 369 | 370 | def create_mode_toggles(self): 371 | button_size = 64 372 | def create_button(icon_file, active=True, use_green=True): 373 | button = Image.new('RGBA', (button_size, button_size), (0, 0, 0, 0)) 374 | draw = ImageDraw.Draw(button) 375 | padding = 8 376 | circle_color = (220, 255, 220, 255) if active else (255, 220, 220, 255) 377 | draw.ellipse([padding, padding, button_size-padding, button_size-padding], fill=circle_color) 378 | icon = Image.open(icon_file).convert('RGBA') 379 | icon = icon.resize((28, 28), Image.Resampling.LANCZOS) 380 | icon_x = (button_size - icon.width) // 2 381 | icon_y = (button_size - icon.height) // 2 382 | final_button = Image.new('RGBA', (button_size, button_size), (0, 0, 0, 0)) 383 | final_button.paste(button, (0, 0), button) 384 | final_button.paste(icon, (icon_x, icon_y), icon) 385 | return ImageTk.PhotoImage(final_button) 386 | 387 | self.mic_icon_active = create_button("assets/mic.png", True, use_green=True) 388 | self.mic_icon_muted = create_button("assets/mic-off.png", False, use_green=True) 389 | self.messages_icon_active = create_button("assets/messages-square.png", True, use_green=False) 390 | self.messages_icon_muted = create_button("assets/messages-square.png", False, use_green=False) 391 | 392 | self.mic_button = tk.Button( 393 | self.canvas, 394 | image=self.mic_icon_muted, 395 | command=self.toggle_mute, 396 | relief='flat', 397 | bg='SystemButtonFace', 398 | activebackground='SystemButtonFace', 399 | bd=0, 400 | highlightthickness=0, 401 | cursor="hand2", 402 | width=64, 403 | height=64 404 | ) 405 | 406 | self.messages_button = tk.Button( 407 | self.canvas, 408 | image=self.messages_icon_active, 409 | command=self.toggle_messages, 410 | relief='flat', 411 | bg='SystemButtonFace', 412 | activebackground='SystemButtonFace', 413 | bd=0, 414 | highlightthickness=0, 415 | cursor="hand2", 416 | width=64, 417 | height=64 418 | ) 419 | 420 | self.mic_button_window = self.canvas.create_window( 421 | self.character.x - 32, 422 | self.character.y - 100, 423 | window=self.mic_button, 424 | anchor='center' 425 | ) 426 | 427 | self.messages_button_window = self.canvas.create_window( 428 | self.character.x + 32, 429 | self.character.y - 100, 430 | window=self.messages_button, 431 | anchor='center' 432 | ) 433 | 434 | def toggle_messages(self): 435 | if not self.chat_interface: 436 | self.chat_interface = ChatInterface( 437 | self.canvas, 438 | self.assistant, 439 | width=400, 440 | height=500, 441 | message_queue=self.message_queue 442 | ) 443 | self.chat_window = self.canvas.create_window( 444 | self.character.x + 120, 445 | self.character.y - 200, 446 | window=self.chat_interface, 447 | anchor='nw', 448 | tags='chat' 449 | ) 450 | self.chat_interface.chat_window_id = self.chat_window 451 | self.canvas.itemconfig(self.chat_window, state='normal') 452 | self.messages_button.configure(image=self.messages_icon_active) 453 | else: 454 | current_state = self.canvas.itemcget(self.chat_window, 'state') 455 | new_state = 'hidden' if current_state == 'normal' else 'normal' 456 | self.canvas.itemconfig(self.chat_window, state=new_state) 457 | new_icon = self.messages_icon_muted if new_state == 'hidden' else self.messages_icon_active 458 | self.messages_button.configure(image=new_icon) 459 | 460 | def toggle_red_dot(self): 461 | if not self.dot_visible: 462 | x = self.character.x + 50 463 | y = self.character.y - 50 464 | self.red_dot_id = self.canvas.create_oval( 465 | x-5, y-5, x+5, y+5, 466 | fill='red', 467 | outline='darkred', 468 | tags='red_dot' 469 | ) 470 | self.dot_visible = True 471 | else: 472 | if self.red_dot_id: 473 | self.canvas.delete(self.red_dot_id) 474 | self.red_dot_id = None 475 | self.dot_visible = False 476 | 477 | def main(): 478 | app = App() 479 | app.mainloop() 480 | 481 | if __name__ == "__main__": 482 | main() -------------------------------------------------------------------------------- /tools/directory_tools.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env')) 4 | 5 | class DirectoryTools: 6 | def __init__(self): 7 | self.base_directory = os.getenv("BASE_DIRECTORY") 8 | if not self.base_directory: 9 | print("Warning: BASE_DIRECTORY not set in environment variables") 10 | 11 | def list_directory(self) -> str: 12 | """List all files and directories in the base directory""" 13 | try: 14 | if not self.base_directory: 15 | return "Error: BASE_DIRECTORY environment variable is not set" 16 | 17 | if not os.path.exists(self.base_directory): 18 | return f"Error: Directory {self.base_directory} does not exist" 19 | 20 | items = os.listdir(self.base_directory) 21 | if not items: 22 | return "Directory is empty" 23 | 24 | result = [f"Contents of directory ({self.base_directory}):"] 25 | for item in items: 26 | full_path = os.path.join(self.base_directory, item) 27 | item_type = "Directory" if os.path.isdir(full_path) else "File" 28 | result.append(f"{item_type}: {item}") 29 | 30 | return "\n".join(result) 31 | except Exception as e: 32 | return f"Error listing directory: {str(e)}" -------------------------------------------------------------------------------- /tools/excel_tools.py: -------------------------------------------------------------------------------- 1 | import win32com.client 2 | import pythoncom # Add this import 3 | import time 4 | from typing import List, Dict 5 | import json 6 | import os 7 | 8 | # Class to represent a cell edit, if needed elsewhere. 9 | class CellEdit: 10 | def __init__(self, cell_address: str, value: str): 11 | self.cell_address = cell_address 12 | self.value = value 13 | 14 | class ExcelTools: 15 | def __init__(self): 16 | self.excel_app = None 17 | self.open_workbooks = {} 18 | 19 | def _initialize_excel_app(self): 20 | """Initialize Excel application if not already initialized""" 21 | try: 22 | if self.excel_app is None: 23 | print("Initializing new Excel application...") 24 | # Initialize COM in this thread 25 | pythoncom.CoInitialize() 26 | self.excel_app = win32com.client.Dispatch("Excel.Application") 27 | # Set visibility after dispatch 28 | try: 29 | self.excel_app.Visible = True 30 | self.excel_app.DisplayAlerts = False 31 | except Exception as e: 32 | print(f"Warning: Could not set Excel visibility: {str(e)}") 33 | except Exception as e: 34 | print(f"Error initializing Excel: {str(e)}") 35 | raise 36 | 37 | def _activate_workbook(self, file_path: str): 38 | """ 39 | Helper method to activate an existing workbook by file path. 40 | If the workbook is already open, it is activated; otherwise, it is opened. 41 | Returns: (workbook, is_new_workbook) 42 | """ 43 | self._initialize_excel_app() 44 | normalized_path = os.path.abspath(file_path).lower() 45 | 46 | # Check if the workbook is already in our tracking dict 47 | if normalized_path in self.open_workbooks: 48 | try: 49 | self.open_workbooks[normalized_path].Activate() 50 | return self.open_workbooks[normalized_path], False 51 | except Exception as e: 52 | print(f"Warning: Could not activate tracked workbook: {str(e)}") 53 | del self.open_workbooks[normalized_path] 54 | 55 | # Check if the workbook is open in Excel 56 | try: 57 | for wb in self.excel_app.Workbooks: 58 | try: 59 | if wb.FullName.lower() == normalized_path: 60 | wb.Activate() 61 | self.open_workbooks[normalized_path] = wb 62 | return wb, False 63 | except: 64 | continue 65 | except: 66 | pass 67 | 68 | # Open or create the workbook 69 | try: 70 | if os.path.exists(file_path): 71 | wb = self.excel_app.Workbooks.Open(file_path) 72 | else: 73 | wb = self.excel_app.Workbooks.Add() 74 | wb.SaveAs(file_path) 75 | 76 | # For new workbooks, adjust the view 77 | active_window = wb.Windows(1) 78 | active_window.Zoom = 200 # Set zoom to 100% 79 | 80 | self.open_workbooks[normalized_path] = wb 81 | return wb, True 82 | except Exception as e: 83 | print(f"Error opening workbook: {str(e)}") 84 | raise 85 | 86 | def _get_full_path(self, file_path: str) -> str: 87 | """Helper method to get the full path, with better error handling and logging""" 88 | try: 89 | if os.path.isabs(file_path): 90 | print(f"Using absolute path: {file_path}") 91 | return file_path 92 | 93 | # Use the datalake directory relative to the project root 94 | base_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'datalake') 95 | 96 | # Create the datalake directory if it doesn't exist 97 | if not os.path.exists(base_dir): 98 | os.makedirs(base_dir) 99 | print(f"Created datalake directory at: {base_dir}") 100 | 101 | full_path = os.path.join(base_dir, file_path) 102 | print(f"Converted to full path: {full_path}") 103 | return full_path 104 | 105 | except Exception as e: 106 | print(f"Error in path handling: {str(e)}") 107 | raise 108 | 109 | def read_excel(self, file_path: str) -> str: 110 | """ 111 | Unified method to read a fixed range (A1:Z20) from the first worksheet of an Excel file. 112 | This method activates the target workbook and worksheet based on the provided file path. 113 | 114 | Args: 115 | file_path (str): The full path of the Excel file. 116 | 117 | Returns: 118 | str: A formatted string with each non-empty cell address and its value. 119 | """ 120 | try: 121 | self._activate_workbook(file_path) 122 | state = [] 123 | 124 | # Define the range to read ("A1:Z20"). 125 | range_obj = self.worksheet.Range("A1:Z20") 126 | for row in range(1, 21): # Rows 1 to 20. 127 | for col in range(1, 27): # Columns 1 to 26 (A to Z). 128 | cell = range_obj.Cells(row, col) 129 | if cell.Text: 130 | col_letter = chr(64 + col) # Convert column number to letter. 131 | cell_address = f"{col_letter}{row}" 132 | state.append(f"{cell_address}: {cell.Text}") 133 | result = "\n".join(state) 134 | return result 135 | except Exception as e: 136 | error_msg = f"Error reading Excel file: {str(e)}" 137 | return error_msg 138 | 139 | def write_to_excel(self, file_path: str, data: dict) -> str: 140 | """ 141 | Unified method to write data to cells in the first worksheet of an Excel file. 142 | After writing, the workbook is saved. 143 | This method activates the target workbook and worksheet based on the provided file path before performing any write operations. 144 | 145 | Args: 146 | file_path (str): The full path of the Excel file. 147 | data (dict): A dictionary mapping cell addresses (e.g., "A1") to values to write. 148 | If data is nested under a sheet name (e.g., {"Sheet1": { "A1": "Valve Name", ... }}) 149 | or {"Sheet1": [ { ... } ]}), the method extracts the inner dictionary or processes the list payload. 150 | Additionally, if the nested payload (e.g., under "cells") is a list of dictionaries each having 151 | 'cell' and 'value' keys, the method converts the list into a flat dictionary. 152 | 153 | Returns: 154 | str: A status message summarizing the write operations. 155 | """ 156 | try: 157 | self._activate_workbook(file_path) 158 | # Try to make visible after activation 159 | try: 160 | self.excel_app.Visible = True 161 | except: 162 | pass 163 | print(f"Debug: Workbook and worksheet activated for writing using file {file_path}.") 164 | 165 | # Check for nested data payload under a single key. 166 | if isinstance(data, dict) and len(data) == 1: 167 | first_key = next(iter(data)) 168 | inner_data = data[first_key] 169 | # If the inner data is already a dictionary, extract it. 170 | if isinstance(inner_data, dict): 171 | print(f"Debug: Detected nested data payload under sheet name '{first_key}' (dictionary format). Extracting payload.") 172 | data = inner_data 173 | # If the inner data is a list, determine its structure. 174 | elif isinstance(inner_data, list) and len(inner_data) > 0: 175 | # Check if every item in the list is a dict with 'cell' and 'value' keys. 176 | if all(isinstance(item, dict) and 'cell' in item and 'value' in item for item in inner_data): 177 | # Convert the list into a flat dictionary with cell addresses as keys. 178 | print(f"Debug: Detected list of cell-value dictionaries under key '{first_key}'. Converting list to flat dictionary.") 179 | data = {item['cell']: item['value'] for item in inner_data} 180 | else: 181 | # Fallback: Extract the first element if it doesn't match the expected structure. 182 | print(f"Debug: Detected nested data payload under sheet name '{first_key}' (list format) but not in cell-value format. Extracting first element as payload.") 183 | data = inner_data[0] 184 | else: 185 | print("Debug: Data payload nested under sheet name, but format is not recognized. Proceeding with original data.") 186 | 187 | results = [] 188 | for cell_address, value in data.items(): 189 | cell = self.worksheet.Range(cell_address) 190 | cell.Value2 = value 191 | print(f"Debug: Written {value} to {cell_address} in workbook {file_path}.") 192 | results.append(f"Wrote {value} to {cell_address}") 193 | time.sleep(0.1) # Small delay to allow Excel to refresh. 194 | self.workbook.Save() 195 | return "\n".join(results) 196 | except Exception as e: 197 | return f"Error writing to Excel file: {str(e)}" 198 | 199 | def create_new_workbook(self, new_file_path: str) -> str: 200 | """ 201 | Create a new Excel workbook, activate its first worksheet, and save it to the specified path. 202 | """ 203 | try: 204 | self._initialize_excel_app() 205 | # Create new workbook without affecting others 206 | self.workbook = self.excel_app.Workbooks.Add() 207 | try: 208 | self.excel_app.Visible = True 209 | except: 210 | pass 211 | 212 | # Save immediately to establish the file 213 | self.workbook.SaveAs(new_file_path) 214 | self.worksheet = self.workbook.Worksheets(1) 215 | self.worksheet.Select() 216 | print(f"Debug: New workbook created and saved at {new_file_path}.") 217 | return f"New workbook created and saved at {new_file_path}" 218 | except Exception as e: 219 | return f"Error creating new workbook: {str(e)}" 220 | 221 | def close_workbook(self, file_path: str) -> str: 222 | """ 223 | Close the workbook specified by file_path if it is open, saving changes before closing. 224 | 225 | Args: 226 | file_path (str): The full path of the Excel file to close. 227 | 228 | Returns: 229 | str: A status message indicating whether the workbook was closed. 230 | """ 231 | try: 232 | if self.excel_app is None: 233 | return "Excel application is not initialized." 234 | normalized_path = file_path.lower() 235 | workbook_found = None 236 | for wb in self.excel_app.Workbooks: 237 | try: 238 | if wb.FullName.lower() == normalized_path: 239 | workbook_found = wb 240 | break 241 | except Exception: 242 | continue 243 | 244 | if workbook_found: 245 | workbook_found.Save() 246 | workbook_found.Close() 247 | print(f"Debug: Closed workbook at {file_path}.") 248 | if self.workbook and self.workbook.FullName.lower() == normalized_path: 249 | self.workbook = None 250 | self.worksheet = None 251 | return f"Workbook at {file_path} closed successfully." 252 | else: 253 | return f"No open workbook found at {file_path}." 254 | except Exception as e: 255 | return f"Error closing workbook: {str(e)}" 256 | 257 | def cleanup(self): 258 | """ 259 | Clean up Excel resources by saving and closing all tracked workbooks 260 | and quitting the Excel application. 261 | """ 262 | try: 263 | # Save and close all tracked workbooks 264 | for path, wb in self.open_workbooks.items(): 265 | try: 266 | wb.Save() 267 | wb.Close() 268 | except Exception as e: 269 | print(f"Error closing workbook {path}: {str(e)}") 270 | self.open_workbooks.clear() 271 | 272 | if self.excel_app: 273 | # Close any remaining open workbooks 274 | while self.excel_app.Workbooks.Count: 275 | self.excel_app.Workbooks(1).Close() 276 | self.excel_app.Quit() 277 | self.excel_app = None 278 | print("Debug: Excel application quit.") 279 | pythoncom.CoUninitialize() 280 | except Exception as e: 281 | print(f"Error during cleanup: {str(e)}") 282 | 283 | def process_excel(self, file_path, write_data=None): 284 | """Process Excel operations with better workbook state handling""" 285 | try: 286 | self._initialize_excel_app() 287 | 288 | # Convert to full path 289 | abs_path = self._get_full_path(file_path) 290 | if not abs_path.endswith('.xlsx'): 291 | abs_path += '.xlsx' 292 | 293 | print(f"Processing Excel file: {abs_path}") 294 | 295 | # Ensure we have a valid Excel instance 296 | try: 297 | _ = self.excel_app.Visible 298 | except: 299 | print("Reconnecting to Excel...") 300 | self.excel_app = win32com.client.Dispatch("Excel.Application") 301 | self.excel_app.Visible = True 302 | self.excel_app.DisplayAlerts = True 303 | 304 | # Make Excel visible 305 | self.excel_app.Visible = True 306 | 307 | # Get or create workbook 308 | workbook, is_new_workbook = self._activate_workbook(abs_path) 309 | worksheet = workbook.Worksheets(1) 310 | worksheet.Activate() 311 | 312 | # Only maximize if it's a new workbook 313 | if is_new_workbook: 314 | try: 315 | window = workbook.Windows(1) 316 | window.WindowState = -4137 # xlMaximized 317 | except Exception as e: 318 | print(f"Warning: Could not maximize window: {str(e)}") 319 | 320 | if write_data: 321 | print(f"Writing data to workbook...") 322 | for cell, value in write_data.items(): 323 | try: 324 | print(f"Writing {value} to {cell}") 325 | cell_range = worksheet.Range(cell) 326 | 327 | # Store original color and format 328 | original_color = cell_range.Interior.Color 329 | original_pattern = cell_range.Interior.Pattern 330 | 331 | # Set value and highlight cell 332 | cell_range.Value = value 333 | cell_range.Select() 334 | cell_range.Interior.Color = 0xFF9019 335 | 336 | # Force immediate update 337 | self.excel_app.ScreenUpdating = True 338 | 339 | time.sleep(0.1) 340 | 341 | # Fade back to original color 342 | cell_range.Interior.Color = original_color 343 | cell_range.Interior.Pattern = original_pattern 344 | 345 | time.sleep(0.1) 346 | except Exception as e: 347 | print(f"Error writing to cell {cell}: {str(e)}") 348 | continue 349 | 350 | try: 351 | workbook.Save() 352 | print("Workbook saved successfully") 353 | except Exception as e: 354 | print(f"Error saving workbook: {str(e)}") 355 | time.sleep(1) 356 | workbook.Save() 357 | 358 | return f"Successfully wrote data to {abs_path}" 359 | else: 360 | print("Reading data from workbook...") 361 | data = {} 362 | used_range = worksheet.UsedRange 363 | for row in range(1, used_range.Rows.Count + 1): 364 | for col in range(1, used_range.Columns.Count + 1): 365 | cell = worksheet.Cells(row, col) 366 | if cell.Value is not None: 367 | col_letter = chr(64 + col) if col <= 26 else chr(64 + col//26) + chr(64 + col%26) 368 | cell_ref = f"{col_letter}{row}" 369 | data[cell_ref] = cell.Value 370 | return str(data) 371 | 372 | except Exception as e: 373 | print(f"Excel operation error: {str(e)}") 374 | try: 375 | self.excel_app = win32com.client.Dispatch("Excel.Application") 376 | self.excel_app.Visible = True 377 | except: 378 | pass 379 | return f"Error processing Excel file: {str(e)}" -------------------------------------------------------------------------------- /tools/image_tools.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import requests 3 | import os 4 | from typing import Optional 5 | import PyPDF2 6 | from dotenv import load_dotenv 7 | import tkinter as tk 8 | from PIL import Image, ImageTk 9 | import time 10 | import psutil 11 | import subprocess 12 | import win32api 13 | load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env')) 14 | 15 | class ImageTools: 16 | def __init__(self): 17 | self.api_key = os.getenv("OPENAI_API_KEY") 18 | self.api_url = "https://api.openai.com/v1/chat/completions" 19 | self.base_directory = os.getenv("BASE_DIRECTORY") 20 | self.image_path = None 21 | self.pdf_path = None 22 | self.viewer = None 23 | self.pdf_process = None 24 | 25 | def show_file(self, file_path): 26 | """Display the file in a window""" 27 | if file_path.lower().endswith('.pdf'): 28 | print(f"Opening PDF: {file_path}") 29 | # Move mouse to main monitor center before opening PDF 30 | self.focus_main_monitor() 31 | # For PDFs, use system default viewer and track the process 32 | self.pdf_process = subprocess.Popen(['start', '', file_path], shell=True) 33 | # Wait 2 seconds then close 34 | time.sleep(2) 35 | print("Attempting to close PDF viewer...") 36 | self.close_pdf_viewer() 37 | else: 38 | # For images, use Tkinter 39 | self.viewer = tk.Toplevel() 40 | self.viewer.title("Image Analysis") 41 | self.viewer.attributes('-topmost', True) 42 | 43 | # Position window on main monitor 44 | main_screen_width = self.viewer.winfo_screenwidth() 45 | main_screen_height = self.viewer.winfo_screenheight() 46 | window_width = 800 47 | window_height = 600 48 | x = (main_screen_width - window_width) // 2 49 | y = (main_screen_height - window_height) // 2 50 | self.viewer.geometry(f"{window_width}x{window_height}+{x}+{y}") 51 | 52 | # Load and display image 53 | img = Image.open(file_path) 54 | # Resize if too large while maintaining aspect ratio 55 | max_size = (800, 600) 56 | img.thumbnail(max_size, Image.Resampling.LANCZOS) 57 | photo = ImageTk.PhotoImage(img) 58 | 59 | label = tk.Label(self.viewer, image=photo) 60 | label.image = photo # Keep a reference 61 | label.pack() 62 | 63 | def focus_main_monitor(self): 64 | """Move mouse to center of main monitor to help focus windows there""" 65 | try: 66 | # Get main monitor resolution 67 | width = win32api.GetSystemMetrics(0) 68 | height = win32api.GetSystemMetrics(1) 69 | # Move mouse to center of main monitor 70 | win32api.SetCursorPos((width // 2, height // 2)) 71 | except Exception as e: 72 | print(f"Could not move cursor to main monitor: {e}") 73 | 74 | def close_pdf_viewer(self): 75 | """Close any PDF viewer processes""" 76 | print("Searching for PDF viewer processes...") 77 | 78 | try: 79 | # Windows-specific: Use taskkill to close PDF viewer windows 80 | # This targets window titles containing "PDF" or specific viewer names 81 | subprocess.run([ 82 | 'taskkill', '/F', '/FI', 83 | 'WINDOWTITLE eq *PDF*' 84 | ], capture_output=True) 85 | 86 | # Also try to close specific PDF applications 87 | subprocess.run(['taskkill', '/F', '/IM', 'AcroRd32.exe'], capture_output=True) 88 | subprocess.run(['taskkill', '/F', '/IM', 'Acrobat.exe'], capture_output=True) 89 | subprocess.run(['taskkill', '/F', '/IM', 'SumatraPDF.exe'], capture_output=True) 90 | 91 | # For Edge PDF viewer specifically 92 | subprocess.run([ 93 | 'powershell', 94 | '-command', 95 | "Get-Process | Where-Object {$_.MainWindowTitle -like '*PDF*'} | Stop-Process -Force" 96 | ], capture_output=True) 97 | 98 | except Exception as e: 99 | print(f"Error during PDF viewer cleanup: {e}") 100 | 101 | def close_viewer(self): 102 | """Close the viewer window if it exists""" 103 | if self.viewer: 104 | print("Closing image viewer...") 105 | self.viewer.destroy() 106 | self.viewer = None 107 | print("Ensuring PDF viewer is closed...") 108 | self.close_pdf_viewer() 109 | 110 | def analyze_file(self, question: str, file_name: Optional[str] = None) -> str: 111 | """Analyze either an image or PDF file and answer questions about it""" 112 | try: 113 | # Always use the specified file if provided 114 | if file_name: 115 | file_path = os.path.join(self.base_directory, file_name) 116 | print(f"Debug: Using specified file: {file_name}") 117 | else: 118 | # Search for first image or PDF if no specific file was requested 119 | for file in os.listdir(self.base_directory): 120 | if file.lower().endswith(('.png', '.jpg', '.jpeg', '.pdf')): 121 | file_path = os.path.join(self.base_directory, file) 122 | print(f"Debug: Found first file: {file}") 123 | break 124 | else: 125 | return "Error: No image or PDF file found in the datalake directory" 126 | 127 | if not os.path.exists(file_path): 128 | return f"Error: File not found at {file_path}" 129 | 130 | print(f"Debug: Attempting to analyze file at: {file_path}") 131 | 132 | # Show the file before analysis 133 | self.show_file(file_path) 134 | 135 | # Determine file type and process accordingly 136 | if file_path.lower().endswith('.pdf'): 137 | # Process PDF 138 | pdf_text = "" 139 | with open(file_path, 'rb') as pdf_file: 140 | pdf_reader = PyPDF2.PdfReader(pdf_file) 141 | for page in pdf_reader.pages: 142 | pdf_text += page.extract_text() + "\n" 143 | 144 | payload = { 145 | "model": "gpt-4o", 146 | "messages": [ 147 | { 148 | "role": "user", 149 | "content": f"PDF Content:\n{pdf_text}\n\nQuestion: {question}" 150 | } 151 | ], 152 | "max_tokens": 500 153 | } 154 | else: 155 | # Process Image 156 | with open(file_path, "rb") as image_file: 157 | base64_image = base64.b64encode(image_file.read()).decode('utf-8') 158 | 159 | payload = { 160 | "model": "gpt-4o", 161 | "messages": [ 162 | { 163 | "role": "user", 164 | "content": [ 165 | { 166 | "type": "text", 167 | "text": question 168 | }, 169 | { 170 | "type": "image_url", 171 | "image_url": { 172 | "url": f"data:image/png;base64,{base64_image}", 173 | # "detail": "high" # Can be "low", "high", or "auto" 174 | } 175 | } 176 | ] 177 | } 178 | ], 179 | "max_tokens": 3000 180 | } 181 | 182 | headers = { 183 | "Authorization": f"Bearer {self.api_key}", 184 | "Content-Type": "application/json" 185 | } 186 | 187 | response = requests.post(self.api_url, headers=headers, json=payload) 188 | 189 | if response.status_code != 200: 190 | print(f"\nDebug: API Error: {response.text}", flush=True) 191 | return f"Error: API call failed with status code: {response.status_code}" 192 | 193 | result = response.json() 194 | 195 | # Close the viewer after analysis 196 | self.close_viewer() 197 | 198 | print("RETURNED FROM IMAGE TOOL", result['choices'][0]['message']['content']) 199 | 200 | return result['choices'][0]['message']['content'].strip() 201 | 202 | except Exception as e: 203 | print(f"\nDebug: Exception details: {str(e)}", flush=True) 204 | self.close_viewer() # Ensure viewer is closed even if there's an error 205 | return f"Error analyzing file: {str(e)}" -------------------------------------------------------------------------------- /tools/word_tools.py: -------------------------------------------------------------------------------- 1 | import win32com.client 2 | import os 3 | import time 4 | import pythoncom 5 | 6 | class WordTools: 7 | def __init__(self): 8 | self.word_app = None 9 | self.document = None 10 | self.base_directory = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'datalake') 11 | 12 | # Create the datalake directory if it doesn't exist 13 | if not os.path.exists(self.base_directory): 14 | os.makedirs(self.base_directory) 15 | print(f"Created datalake directory at: {self.base_directory}") 16 | 17 | def _initialize_word_app(self): 18 | """Initialize Microsoft Word application if not already running""" 19 | if self.word_app is None: 20 | # Initialize COM in this thread 21 | pythoncom.CoInitialize() 22 | self.word_app = win32com.client.Dispatch("Word.Application") 23 | self.word_app.Visible = True 24 | print("Debug: Word application initialized.") 25 | 26 | def process_word(self, file_path: str, content: str = None) -> str: 27 | """ 28 | Process a Word document - read current content and optionally write new content 29 | """ 30 | try: 31 | # Convert relative path to full path if needed 32 | if not os.path.isabs(file_path): 33 | file_path = os.path.join(self.base_directory, file_path) 34 | print(f"Debug: Converting to full path: {file_path}") 35 | 36 | print("Debug: About to initialize Word...") 37 | self._initialize_word_app() 38 | print("Debug: Word initialized successfully") 39 | 40 | # Create new document if it doesn't exist 41 | if not os.path.exists(file_path): 42 | print("Debug: Creating new document...") 43 | self.document = self.word_app.Documents.Add() 44 | print("Debug: Document added, about to save...") 45 | try: 46 | self.document.SaveAs(file_path) 47 | print(f"Debug: Document saved successfully at {file_path}") 48 | except Exception as save_error: 49 | print(f"Debug: Error saving document: {str(save_error)}") 50 | raise 51 | else: 52 | print(f"Debug: Opening existing document at {file_path}") 53 | self.document = self.word_app.Documents.Open(file_path) 54 | 55 | # Read initial content 56 | initial_content = self.document.Content.Text 57 | 58 | # Write new content if provided 59 | if content: 60 | print("Debug: Writing new content...") 61 | self.document.Content.Text = content 62 | self.document.Save() 63 | time.sleep(0.1) # Give Word time to process 64 | 65 | # Read final content 66 | final_content = self.document.Content.Text 67 | 68 | # Format result 69 | result_sections = [ 70 | "Initial Content:", 71 | initial_content.strip() if initial_content.strip() else "(Empty document)", 72 | "\nWrite Operations:" if content else "", 73 | f"Wrote new content: {content}" if content else "", 74 | "\nFinal Content:", 75 | final_content.strip() if final_content.strip() else "(Empty document)" 76 | ] 77 | 78 | return "\n".join(filter(None, result_sections)) 79 | 80 | except Exception as e: 81 | error_details = f"Error processing Word document: {str(e)}\nType: {type(e)}" 82 | print(f"Debug: {error_details}") 83 | return error_details 84 | 85 | def cleanup(self): 86 | """Clean up Word resources""" 87 | try: 88 | if self.document: 89 | self.document.Close() 90 | if self.word_app: 91 | self.word_app.Quit() 92 | except: 93 | pass --------------------------------------------------------------------------------