├── .gitattributes
├── .gitignore
├── README.md
├── ai_assistant.py
├── assets
├── animated_assistant.gif
├── messages-square.png
├── mic-off.png
├── mic-off.svg
├── mic.png
├── mic.svg
└── static_assistant.png
├── chat_interface.py
├── datalake
├── Bakery_TEA.xlsx
├── business_plan.docx
├── data_by_cities.xlsx
├── eggs.pdf
├── electricity.png
└── wages.pdf
├── smartest_kid.py
└── tools
├── directory_tools.py
├── excel_tools.py
├── image_tools.py
└── word_tools.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Set default behavior to automatically normalize line endings.
3 | ###############################################################################
4 | * text=auto
5 |
6 | ###############################################################################
7 | # Set default behavior for command prompt diff.
8 | #
9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs diff=csharp
14 |
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln merge=binary
26 | #*.csproj merge=binary
27 | #*.vbproj merge=binary
28 | #*.vcxproj merge=binary
29 | #*.vcproj merge=binary
30 | #*.dbproj merge=binary
31 | #*.fsproj merge=binary
32 | #*.lsproj merge=binary
33 | #*.wixproj merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj merge=binary
36 | #*.wwaproj merge=binary
37 |
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg binary
44 | #*.png binary
45 | #*.gif binary
46 |
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | #
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the
52 | # entries below.
53 | ###############################################################################
54 | #*.doc diff=astextplain
55 | #*.DOC diff=astextplain
56 | #*.docx diff=astextplain
57 | #*.DOCX diff=astextplain
58 | #*.dot diff=astextplain
59 | #*.DOT diff=astextplain
60 | #*.pdf diff=astextplain
61 | #*.PDF diff=astextplain
62 | #*.rtf diff=astextplain
63 | #*.RTF diff=astextplain
64 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 | .env
13 |
14 | # User-specific files (MonoDevelop/Xamarin Studio)
15 | *.userprefs
16 |
17 | # Mono auto generated files
18 | mono_crash.*
19 |
20 | # Build results
21 | [Dd]ebug/
22 | [Dd]ebugPublic/
23 | [Rr]elease/
24 | [Rr]eleases/
25 | x64/
26 | x86/
27 | [Ww][Ii][Nn]32/
28 | [Aa][Rr][Mm]/
29 | [Aa][Rr][Mm]64/
30 | bld/
31 | [Bb]in/
32 | [Oo]bj/
33 | [Oo]ut/
34 | [Ll]og/
35 | [Ll]ogs/
36 |
37 | # Visual Studio 2015/2017 cache/options directory
38 | .vs/
39 | # Uncomment if you have tasks that create the project's static files in wwwroot
40 | #wwwroot/
41 |
42 | # Visual Studio 2017 auto generated files
43 | Generated\ Files/
44 |
45 | # MSTest test Results
46 | [Tt]est[Rr]esult*/
47 | [Bb]uild[Ll]og.*
48 |
49 | # NUnit
50 | *.VisualState.xml
51 | TestResult.xml
52 | nunit-*.xml
53 |
54 | # Build Results of an ATL Project
55 | [Dd]ebugPS/
56 | [Rr]eleasePS/
57 | dlldata.c
58 |
59 | # Benchmark Results
60 | BenchmarkDotNet.Artifacts/
61 |
62 | # .NET Core
63 | project.lock.json
64 | project.fragment.lock.json
65 | artifacts/
66 |
67 | # ASP.NET Scaffolding
68 | ScaffoldingReadMe.txt
69 |
70 | # StyleCop
71 | StyleCopReport.xml
72 |
73 | # Files built by Visual Studio
74 | *_i.c
75 | *_p.c
76 | *_h.h
77 | *.ilk
78 | *.meta
79 | *.obj
80 | *.iobj
81 | *.pch
82 | *.pdb
83 | *.ipdb
84 | *.pgc
85 | *.pgd
86 | *.rsp
87 | *.sbr
88 | *.tlb
89 | *.tli
90 | *.tlh
91 | *.tmp
92 | *.tmp_proj
93 | *_wpftmp.csproj
94 | *.log
95 | *.vspscc
96 | *.vssscc
97 | .builds
98 | *.pidb
99 | *.svclog
100 | *.scc
101 |
102 | # Chutzpah Test files
103 | _Chutzpah*
104 |
105 | # Visual C++ cache files
106 | ipch/
107 | *.aps
108 | *.ncb
109 | *.opendb
110 | *.opensdf
111 | *.sdf
112 | *.cachefile
113 | *.VC.db
114 | *.VC.VC.opendb
115 |
116 | # Visual Studio profiler
117 | *.psess
118 | *.vsp
119 | *.vspx
120 | *.sap
121 |
122 | # Visual Studio Trace Files
123 | *.e2e
124 |
125 | # TFS 2012 Local Workspace
126 | $tf/
127 |
128 | # Guidance Automation Toolkit
129 | *.gpState
130 |
131 | # ReSharper is a .NET coding add-in
132 | _ReSharper*/
133 | *.[Rr]e[Ss]harper
134 | *.DotSettings.user
135 |
136 | # TeamCity is a build add-in
137 | _TeamCity*
138 |
139 | # DotCover is a Code Coverage Tool
140 | *.dotCover
141 |
142 | # AxoCover is a Code Coverage Tool
143 | .axoCover/*
144 | !.axoCover/settings.json
145 |
146 | # Coverlet is a free, cross platform Code Coverage Tool
147 | coverage*.json
148 | coverage*.xml
149 | coverage*.info
150 |
151 | # Visual Studio code coverage results
152 | *.coverage
153 | *.coveragexml
154 |
155 | # NCrunch
156 | _NCrunch_*
157 | .*crunch*.local.xml
158 | nCrunchTemp_*
159 |
160 | # MightyMoose
161 | *.mm.*
162 | AutoTest.Net/
163 |
164 | # Web workbench (sass)
165 | .sass-cache/
166 |
167 | # Installshield output folder
168 | [Ee]xpress/
169 |
170 | # DocProject is a documentation generator add-in
171 | DocProject/buildhelp/
172 | DocProject/Help/*.HxT
173 | DocProject/Help/*.HxC
174 | DocProject/Help/*.hhc
175 | DocProject/Help/*.hhk
176 | DocProject/Help/*.hhp
177 | DocProject/Help/Html2
178 | DocProject/Help/html
179 |
180 | # Click-Once directory
181 | publish/
182 |
183 | # Publish Web Output
184 | *.[Pp]ublish.xml
185 | *.azurePubxml
186 | # Note: Comment the next line if you want to checkin your web deploy settings,
187 | # but database connection strings (with potential passwords) will be unencrypted
188 | *.pubxml
189 | *.publishproj
190 |
191 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
192 | # checkin your Azure Web App publish settings, but sensitive information contained
193 | # in these scripts will be unencrypted
194 | PublishScripts/
195 |
196 | # NuGet Packages
197 | *.nupkg
198 | # NuGet Symbol Packages
199 | *.snupkg
200 | # The packages folder can be ignored because of Package Restore
201 | **/[Pp]ackages/*
202 | # except build/, which is used as an MSBuild target.
203 | !**/[Pp]ackages/build/
204 | # Uncomment if necessary however generally it will be regenerated when needed
205 | #!**/[Pp]ackages/repositories.config
206 | # NuGet v3's project.json files produces more ignorable files
207 | *.nuget.props
208 | *.nuget.targets
209 |
210 | # Microsoft Azure Build Output
211 | csx/
212 | *.build.csdef
213 |
214 | # Microsoft Azure Emulator
215 | ecf/
216 | rcf/
217 |
218 | # Windows Store app package directories and files
219 | AppPackages/
220 | BundleArtifacts/
221 | Package.StoreAssociation.xml
222 | _pkginfo.txt
223 | *.appx
224 | *.appxbundle
225 | *.appxupload
226 |
227 | # Visual Studio cache files
228 | # files ending in .cache can be ignored
229 | *.[Cc]ache
230 | # but keep track of directories ending in .cache
231 | !?*.[Cc]ache/
232 |
233 | # Others
234 | ClientBin/
235 | ~$*
236 | *~
237 | *.dbmdl
238 | *.dbproj.schemaview
239 | *.jfm
240 | *.pfx
241 | *.publishsettings
242 | orleans.codegen.cs
243 |
244 | # Including strong name files can present a security risk
245 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
246 | #*.snk
247 |
248 | # Since there are multiple workflows, uncomment next line to ignore bower_components
249 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
250 | #bower_components/
251 |
252 | # RIA/Silverlight projects
253 | Generated_Code/
254 |
255 | # Backup & report files from converting an old project file
256 | # to a newer Visual Studio version. Backup files are not needed,
257 | # because we have git ;-)
258 | _UpgradeReport_Files/
259 | Backup*/
260 | UpgradeLog*.XML
261 | UpgradeLog*.htm
262 | ServiceFabricBackup/
263 | *.rptproj.bak
264 |
265 | # SQL Server files
266 | *.mdf
267 | *.ldf
268 | *.ndf
269 |
270 | # Business Intelligence projects
271 | *.rdl.data
272 | *.bim.layout
273 | *.bim_*.settings
274 | *.rptproj.rsuser
275 | *- [Bb]ackup.rdl
276 | *- [Bb]ackup ([0-9]).rdl
277 | *- [Bb]ackup ([0-9][0-9]).rdl
278 |
279 | # Microsoft Fakes
280 | FakesAssemblies/
281 |
282 | # GhostDoc plugin setting file
283 | *.GhostDoc.xml
284 |
285 | # Node.js Tools for Visual Studio
286 | .ntvs_analysis.dat
287 | node_modules/
288 |
289 | # Visual Studio 6 build log
290 | *.plg
291 |
292 | # Visual Studio 6 workspace options file
293 | *.opt
294 |
295 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
296 | *.vbw
297 |
298 | # Visual Studio LightSwitch build output
299 | **/*.HTMLClient/GeneratedArtifacts
300 | **/*.DesktopClient/GeneratedArtifacts
301 | **/*.DesktopClient/ModelManifest.xml
302 | **/*.Server/GeneratedArtifacts
303 | **/*.Server/ModelManifest.xml
304 | _Pvt_Extensions
305 |
306 | # Paket dependency manager
307 | .paket/paket.exe
308 | paket-files/
309 |
310 | # FAKE - F# Make
311 | .fake/
312 |
313 | # CodeRush personal settings
314 | .cr/personal
315 |
316 | # Python Tools for Visual Studio (PTVS)
317 | __pycache__/
318 | *.pyc
319 |
320 | # Cake - Uncomment if you are using it
321 | # tools/**
322 | # !tools/packages.config
323 |
324 | # Tabs Studio
325 | *.tss
326 |
327 | # Telerik's JustMock configuration file
328 | *.jmconfig
329 |
330 | # BizTalk build output
331 | *.btp.cs
332 | *.btm.cs
333 | *.odx.cs
334 | *.xsd.cs
335 |
336 | # OpenCover UI analysis results
337 | OpenCover/
338 |
339 | # Azure Stream Analytics local run output
340 | ASALocalRun/
341 |
342 | # MSBuild Binary and Structured Log
343 | *.binlog
344 |
345 | # NVidia Nsight GPU debugger configuration file
346 | *.nvuser
347 |
348 | # MFractors (Xamarin productivity tool) working folder
349 | .mfractor/
350 |
351 | # Local History for Visual Studio
352 | .localhistory/
353 |
354 | # BeatPulse healthcheck temp database
355 | healthchecksdb
356 |
357 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
358 | MigrationBackup/
359 |
360 | # Ionide (cross platform F# VS Code tools) working folder
361 | .ionide/
362 |
363 | # Fody - auto-generated XML schema
364 | FodyWeavers.xsd
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SmartestKid AI Assistant
2 |
3 | A Windows desktop AI assistant built in Python. Assistant (without tools) is ~1000 lines of python code, with super simple chat UI inspired by the original AI, SmarterChild. Uses Windows COM automation to interface with Microsoft Office (Word, Excel), Images, and your file system. Perfect for Windows users looking to explore AI-powered desktop automation.
4 |
5 | ## Demo
6 | https://github.com/user-attachments/assets/a7b0ae86-53d6-4407-b2dd-ea6f4abb59e4
7 |
8 | ## Features
9 |
10 | - Toggle between voice and text input modes
11 | - Interface with Word, Excel, Images, and your file system (Windows only)
12 | - Cute draggable interface elements
13 |
14 | ## Requirements
15 |
16 | - Windows OS
17 | - Python 3.7+
18 | - OPENAI_API_API key for AI responses
19 | - Microsoft Office (for Word/Excel features)
20 | - Virtual environment (recommended)
21 |
22 | ## Setup
23 |
24 | 1. Clone the repository
25 | 2. Create and activate a virtual environment:
26 | ```bash
27 | python -m venv venv
28 | # On Windows:
29 | .\venv\Scripts\activate
30 | # On Unix/MacOS:
31 | source venv/bin/activate
32 | ```
33 | 3. Install dependencies:
34 | ```bash
35 | pip install tkinter pillow pyaudio httpx python-dotenv
36 | ```
37 | 4. Create a `.env` file in the root directory with your API keys:
38 | ```env
39 | # API Keys
40 | OPENAI_API_KEY=your_openai_api_key_here
41 |
42 | # Paths
43 | DATALAKE_DIRECTORY=path/to/your/datalake
44 | ```
45 | 5. Run the application:
46 | ```bash
47 | python smartest_kid.py
48 | ```
49 |
50 | ## Usage
51 |
52 | - Click the microphone icon to toggle voice input
53 | - Click the message icon to toggle the chat interface
54 | - Drag the robot or chat window to reposition them
55 | - Press ESC to exit the application
56 |
57 | ## Project Structure
58 |
59 | - `smartest_kid.py`: Main application and robot animation logic
60 | - `chat_interface.py`: Chat UI implementation
61 | - `ai_assistant.py`: AI integration with Claude API
62 | - `assets/`: Contains UI icons and robot character images
63 | - `tools/`: Contains tools for the assistant to use
64 | - `datalake/`: Contains data for the assistant to use
65 | - `.env`: Configuration and API keys
66 |
67 | ## License
68 |
69 | MIT License
70 |
71 | ## Contributing
72 |
73 | Want to contribute? Here are some areas we'd love help with:
74 | 1. Office Integration - Expand Excel/Word functionality and add new Office app support
75 | 2. Assistant Personality - Add Clippy-style emotions and contextual reactions (pls someone find these gifs)
76 | 3. New Tools - Integrate with more applications (PowerPoint, PDF readers, browsers, etc.)
77 |
78 | Feel free to open an issue or submit a pull request!
79 |
80 | ## Authors
81 |
82 | Victor Von Miller & Emmett Goodman
83 |
--------------------------------------------------------------------------------
/ai_assistant.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import os
3 | import json
4 | import time
5 | from openai import OpenAI
6 | from tools.excel_tools import ExcelTools
7 | from tools.image_tools import ImageTools
8 | from tools.directory_tools import DirectoryTools
9 | from tools.word_tools import WordTools
10 | from dotenv import load_dotenv
11 |
12 | class ChatCompletionAssistant:
13 | def __init__(self, api_key=None):
14 | # Initialize OpenAI client with the provided API key
15 | self.client = OpenAI(api_key=api_key)
16 |
17 | # Initialize conversation with system message
18 | self.conversation = [
19 | {
20 | "role": "system",
21 | "content": (
22 | "You are an ultra smart modeling expert. You help people build and optimize models of all sorts."
23 | "You have access to various tools such as analyzing files, processing Excel and Word documents, listing directories, and running calculations."
24 | "When you use a tool, always explain what you found or what happened immediately after using it. "
25 | "Also, always, before using a tool, explain succinctly what tools you will use before you use them!"
26 | "Be interactive and conversational - if you need to use multiple tools, discuss the results of each one before moving to the next. "
27 | "In certain scenarios, you will be asked to plug numbers from one excel into another excel calculator and record. This may involve many loops writing in file A, file B, file A, etc. "
28 | )
29 | }
30 | ]
31 | self.functions = [
32 | {
33 | "type": "function",
34 | "function": {
35 | "name": "analyze_file",
36 | "description": "Analyze either an image or PDF file, using computer vision, to return valuable data or answer questions about it. Make sure you know the exact name of the file (i.e. might have to list directory tool first) before you open it!",
37 | "parameters": {
38 | "type": "object",
39 | "properties": {
40 | "question": {
41 | "type": "string",
42 | "description": "Question or instruction about what to analyze in the file. Be comprehensive."
43 | },
44 | "file_name": {
45 | "type": "string",
46 | "description": "Name of the file in the datalake directory."
47 | }
48 | },
49 | "required": ["file_name","question"]
50 | }
51 | }
52 | },
53 | {
54 | "type": "function",
55 | "function": {
56 | "name": "process_excel",
57 | "description": "This function lets you create new excel files and edit existing ones. The input is the excel file name (file_path), as well as a set of tuples that represent data to put in the file (write_data). If the file_path doesnt exist in our data lake, it creates a new one. If the set of tuples is empty, we are not writing anything, just reading. At the end of the call, this function returns returns the post-edited state of the excel file. For example, if you just want to read the file, you can pass an empty list for write_data. However, if you're entering a value or values into an excel calculator, you dont need to do a distinct read because after you write, the function will return the new updated calculated state. write_data should be of the format {'A1': 42, 'B2': 'hello', 'C3': 'apple'}. Cell addresses MUST be in A1 notation (A1, B2, etc).",
58 | "parameters": {
59 | "type": "object",
60 | "properties": {
61 | "file_path": {
62 | "type": "string",
63 | "description": "Name or path of the Excel file to create or edit."
64 | },
65 | "write_data": {
66 | "type": "object",
67 | "description": "REQUIRED for writing. Simple dictionary mapping cell addresses to values, e.g., {'A1': 42, 'B2': 'hello', 'C3': 'apple'}. Cell addresses MUST be in A1 notation (A1, B2, etc).",
68 | "additionalProperties": {
69 | "type": ["string", "number"]
70 | }
71 | }
72 | },
73 | "required": ["file_path", "write_data"]
74 | }
75 | }
76 | },
77 | {
78 | "type": "function",
79 | "function": {
80 | "name": "process_excel",
81 | "description": "Process an Excel file. When writing data, you MUST provide both file_path and write_data. Creates a new file if it doesn't exist. Cell addresses MUST be in A1 notation (A1, B2, etc).",
82 | "parameters": {
83 | "type": "object",
84 | "properties": {
85 | "file_path": {
86 | "type": "string",
87 | "description": "Name or path of the Excel file to process."
88 | },
89 | "write_data": {
90 | "type": "object",
91 | "description": "REQUIRED for writing. Simple dictionary mapping cell addresses to values, e.g., {'A1': 42, 'B2': 'hello', 'C3': 'apple'}. Cell addresses MUST be in A1 notation (A1, B2, etc).",
92 | "additionalProperties": {
93 | "type": ["string", "number"]
94 | }
95 | }
96 | },
97 | "required": ["file_path", "write_data"]
98 | }
99 | }
100 | },
101 | {
102 | "type": "function",
103 | "function": {
104 | "name": "process_word",
105 | "description": "Process a Word document - read current content and optionally write new content",
106 | "parameters": {
107 | "type": "object",
108 | "properties": {
109 | "file_path": {
110 | "type": "string",
111 | "description": "Name of the Word document (e.g., 'mydoc.docx')"
112 | },
113 | "content": {
114 | "type": "string",
115 | "description": "Optional content to write to the document"
116 | }
117 | },
118 | "required": ["file_path"]
119 | }
120 | }
121 | },
122 | {
123 | "type": "function",
124 | "function": {
125 | "name": "list_directory",
126 | "description": "List all files and directories in the DataLake directory",
127 | "parameters": {
128 | "type": "object",
129 | "properties": {},
130 | "required": []
131 | }
132 | }
133 | },
134 | ]
135 | # Initialize local tool instances.
136 | self.excel_tools = ExcelTools()
137 | self.image_tools = ImageTools()
138 | self.directory_tools = DirectoryTools()
139 | self.word_tools = WordTools()
140 |
141 | def send_message(self, message: str, ui_callback=None):
142 |
143 | loop_counter = 0
144 |
145 | start_time = time.time()
146 |
147 | if message:
148 | self.conversation.append({"role": "user", "content": message})
149 | complete_response = ""
150 |
151 | while True: # Keep going until we get a non-tool finish_reason
152 |
153 | loop_counter += 1
154 |
155 | try:
156 | completion = self.client.chat.completions.create(
157 | model="gpt-4o",
158 | messages=self.conversation,
159 | stream=True,
160 | tools=self.functions,
161 | tool_choice="auto"
162 | )
163 |
164 | final_tool_calls = {} # Track complete tool calls
165 |
166 | for chunk in completion:
167 | delta = chunk.choices[0].delta
168 |
169 | # Handle regular content
170 | if delta.content:
171 | complete_response += delta.content
172 | if ui_callback:
173 | ui_callback(delta.content)
174 |
175 | # Handle tool calls
176 | if delta.tool_calls:
177 | for tool_call in delta.tool_calls:
178 | # Initialize if new tool call
179 | if tool_call.index not in final_tool_calls:
180 | final_tool_calls[tool_call.index] = {
181 | "id": tool_call.id,
182 | "type": "function",
183 | "function": {
184 | "name": tool_call.function.name,
185 | "arguments": ""
186 | }
187 | }
188 |
189 | # Accumulate arguments
190 | if tool_call.function and tool_call.function.arguments:
191 | final_tool_calls[tool_call.index]["function"]["arguments"] += tool_call.function.arguments
192 |
193 | # Check finish reason
194 | if chunk.choices[0].finish_reason == "tool_calls":
195 | for tool_call in final_tool_calls.values():
196 |
197 | try:
198 | if ui_callback:
199 | ui_callback(f"\n\nRunning {tool_call['function']['name']}...\n\n")
200 |
201 | func_args = json.loads(tool_call["function"]["arguments"])
202 | result = self.handle_function_call(tool_call["function"]["name"], func_args)
203 |
204 | # Add to conversation history
205 | self.conversation.append({
206 | "role": "assistant",
207 | "content": None,
208 | "tool_calls": [tool_call]
209 | })
210 | self.conversation.append({
211 | "role": "tool",
212 | "tool_call_id": tool_call["id"],
213 | "content": str(result)
214 | })
215 | except json.JSONDecodeError as e:
216 | print(f"[DEBUG] Error parsing arguments: {e}")
217 | break
218 |
219 | elif chunk.choices[0].finish_reason:
220 | print(f"[DEBUG] Finish reason: {chunk.choices[0].finish_reason}")
221 | if complete_response:
222 | self.conversation.append({"role": "assistant", "content": complete_response})
223 | if ui_callback:
224 | ui_callback({"end_of_message": True})
225 | return complete_response
226 |
227 | except Exception as e:
228 | print(f"Error in send_message: {str(e)}")
229 | return f"Error: {str(e)}"
230 |
231 | def handle_function_call(self, func_name: str, arguments: dict) -> str:
232 | print(f"[DEBUG] Handling function call: {func_name} with arguments {arguments}")
233 | if func_name == "process_excel":
234 | result = self.excel_tools.process_excel(arguments.get("file_path"), arguments.get("write_data"))
235 | elif func_name == "process_word":
236 | result = self.word_tools.process_word(arguments.get("file_path"), arguments.get("content"))
237 | elif func_name == "analyze_file":
238 | result = self.image_tools.analyze_file(arguments.get("question"), arguments.get("file_name"))
239 | elif func_name == "list_directory":
240 | result = self.directory_tools.list_directory()
241 | else:
242 | result = f"Function {func_name} not implemented."
243 | return str(result)
244 |
245 | def cleanup(self):
246 | self.excel_tools.cleanup()
247 | self.word_tools.cleanup()
248 |
--------------------------------------------------------------------------------
/assets/animated_assistant.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/assets/animated_assistant.gif
--------------------------------------------------------------------------------
/assets/messages-square.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/assets/messages-square.png
--------------------------------------------------------------------------------
/assets/mic-off.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/assets/mic-off.png
--------------------------------------------------------------------------------
/assets/mic-off.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assets/mic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/assets/mic.png
--------------------------------------------------------------------------------
/assets/mic.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assets/static_assistant.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/assets/static_assistant.png
--------------------------------------------------------------------------------
/chat_interface.py:
--------------------------------------------------------------------------------
1 | import tkinter as tk
2 | from tkinter import ttk
3 | import time
4 |
5 | class ChatInterface(tk.Frame):
6 | def __init__(self, parent, assistant, width=400, height=500, message_queue=None):
7 | super().__init__(parent)
8 | self.assistant = assistant
9 | self.message_queue = message_queue
10 | self.chat_window_id = None
11 | self.receiving_assistant_message = False
12 |
13 | self.configure(width=width, height=height, bg='#ECE9D8')
14 | self.grid_propagate(False)
15 | self.pack_propagate(False)
16 |
17 | main_container = tk.Frame(self, bg='#ECE9D8')
18 | main_container.pack(fill=tk.BOTH, expand=True)
19 |
20 | header_frame = tk.Frame(main_container, bg='#2196F3', height=32)
21 | header_frame.pack(fill=tk.X)
22 |
23 | name_label = tk.Label(header_frame, text="SmartestLad", bg='#2196F3', fg='white', font=('Segoe UI', 10, 'bold'))
24 | name_label.pack(side=tk.LEFT, padx=12, pady=6)
25 |
26 | header_frame.bind("", self.start_move)
27 | header_frame.bind("", self.do_move)
28 | name_label.bind("", self.start_move)
29 | name_label.bind("", self.do_move)
30 |
31 | chat_height = int(height * 0.7)
32 | history_frame = tk.Frame(main_container, height=chat_height, bg='#ECE9D8')
33 | history_frame.pack(fill=tk.BOTH, expand=True, padx=2, pady=2)
34 | history_frame.pack_propagate(False)
35 |
36 | style = ttk.Style()
37 | style.theme_use('clam')
38 | style.configure("Vertical.TScrollbar", troughcolor="#FFFFFF", background="#D4D0C8", bordercolor="#D4D0C8")
39 |
40 | scrollbar = ttk.Scrollbar(history_frame, style="Vertical.TScrollbar", orient=tk.VERTICAL)
41 | scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
42 |
43 | self.history = tk.Text(history_frame, wrap=tk.WORD, bg='white', font=('Segoe UI', 10), yscrollcommand=scrollbar.set)
44 | self.history.pack(fill=tk.BOTH, expand=True, padx=1, pady=1)
45 | scrollbar.config(command=self.history.yview)
46 |
47 | input_height = int(height * 0.3)
48 | input_frame = tk.Frame(main_container, height=input_height, bg='#ECE9D8')
49 | input_frame.pack(fill=tk.X, side=tk.BOTTOM, padx=2, pady=2)
50 | input_frame.pack_propagate(False)
51 |
52 | self.input_field = tk.Text(input_frame, wrap=tk.WORD, bg='white', font=('Segoe UI', 10), height=5)
53 | self.input_field.pack(fill=tk.BOTH, expand=True, padx=1, pady=1)
54 |
55 | self.input_field.insert('1.0', 'Enter your thoughts here...')
56 | self.input_field.tag_configure('placeholder', foreground='grey')
57 | self.input_field.tag_add('placeholder', '1.0', 'end')
58 |
59 | self.input_field.bind('', self._on_focus_in)
60 | self.input_field.bind('', self._on_focus_out)
61 |
62 | self._configure_styles()
63 |
64 | def handle_enter(event):
65 | if not event.state & 0x1:
66 | self.send_message()
67 | return 'break'
68 |
69 | self.input_field.bind('', handle_enter)
70 |
71 | def start_move(self, event):
72 | self._drag_data = {"x": event.x_root, "y": event.y_root}
73 |
74 | def do_move(self, event):
75 | if not hasattr(self, '_drag_data'): return
76 | dx = event.x_root - self._drag_data["x"]
77 | dy = event.y_root - self._drag_data["y"]
78 | self._drag_data.update({"x": event.x_root, "y": event.y_root})
79 | if self.chat_window_id:
80 | coords = self.master.coords(self.chat_window_id)
81 | self.master.coords(self.chat_window_id, coords[0] + dx, coords[1] + dy)
82 |
83 | def add_message(self, message, sender_type='user'):
84 | # Only add newline if this is a new message (not a continuation)
85 | if not self.receiving_assistant_message:
86 | if self.history.get('1.0', tk.END).strip():
87 | self.history.insert(tk.END, "\n")
88 | timestamp = time.strftime("(%I:%M:%S %p)")
89 | sender = "uberushaximus" if sender_type == 'user' else "SmartestLad"
90 | tag = 'user_message' if sender_type == 'user' else 'assistant_message'
91 |
92 | self.history.insert(tk.END, f"{timestamp} ", 'timestamp')
93 | self.history.insert(tk.END, f"{sender}: ", tag)
94 | self.receiving_assistant_message = (sender_type == 'assistant')
95 |
96 | # Add the message fragment
97 | if isinstance(message, dict) and 'end_of_message' in message:
98 | self.receiving_assistant_message = False
99 | else:
100 | self.history.insert(tk.END, str(message))
101 | self.history.see(tk.END)
102 |
103 | def add_user_message(self, message):
104 | self.add_message(message, sender_type='user')
105 |
106 | def add_assistant_message(self, message):
107 | self.add_message(message, sender_type='assistant')
108 |
109 | def _on_focus_in(self, event):
110 | self._handle_placeholder(event)
111 |
112 | def _on_focus_out(self, event):
113 | self._handle_placeholder(event, removing=False)
114 |
115 | def _handle_placeholder(self, event, removing=True):
116 | text = self.input_field.get('1.0', 'end-1c')
117 | if removing and text == 'Enter your thoughts here...':
118 | self.input_field.delete('1.0', tk.END)
119 | self.input_field.tag_remove('placeholder', '1.0', 'end')
120 | elif not removing and not text.strip():
121 | self.input_field.insert('1.0', 'Enter your thoughts here...')
122 | self.input_field.tag_add('placeholder', '1.0', 'end')
123 |
124 | def send_message(self):
125 | message = self.input_field.get('1.0', tk.END).strip()
126 | if message and message != 'Enter your thoughts here...':
127 | # Clear input field first
128 | self.input_field.delete('1.0', tk.END)
129 |
130 | # Send to message queue if available
131 | if hasattr(self, 'message_queue') and self.message_queue:
132 | self._from_chat_interface = True
133 | self.message_queue.put(message)
134 | # Don't add the message here - let process_ai_messages handle it
135 | return
136 |
137 | # Only add directly if no message queue (fallback case)
138 | self.add_message(message)
139 |
140 | def _configure_styles(self):
141 | style = ttk.Style()
142 | style.theme_use('clam')
143 | style.configure("Vertical.TScrollbar",
144 | troughcolor="#FFFFFF",
145 | background="#D4D0C8",
146 | bordercolor="#D4D0C8")
147 |
148 | self.history.tag_configure('user_message',
149 | foreground='#FF0000',
150 | font=('Segoe UI', 10))
151 | self.history.tag_configure('assistant_message',
152 | foreground='#2196F3',
153 | font=('Segoe UI', 10))
154 | self.history.tag_configure('timestamp',
155 | foreground='#9E9E9E',
156 | font=('Segoe UI', 9))
--------------------------------------------------------------------------------
/datalake/Bakery_TEA.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/datalake/Bakery_TEA.xlsx
--------------------------------------------------------------------------------
/datalake/business_plan.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/datalake/business_plan.docx
--------------------------------------------------------------------------------
/datalake/data_by_cities.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/datalake/data_by_cities.xlsx
--------------------------------------------------------------------------------
/datalake/eggs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/datalake/eggs.pdf
--------------------------------------------------------------------------------
/datalake/electricity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/datalake/electricity.png
--------------------------------------------------------------------------------
/datalake/wages.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alkali-Sim/SmartestKid/e0e00df185e618d513a3e1cc3a5619901eb5be24/datalake/wages.pdf
--------------------------------------------------------------------------------
/smartest_kid.py:
--------------------------------------------------------------------------------
1 | import tkinter as tk
2 | from tkinter import ttk
3 | from PIL import Image, ImageTk, ImageDraw, ImageFont, ImageSequence
4 | import os
5 | import wave
6 | import threading
7 | import time
8 | import pyaudio
9 | import audioop # For RMS calculations
10 | import queue
11 | from io import BytesIO
12 | from chat_interface import ChatInterface
13 |
14 | # Import the new ChatCompletion-based assistant.
15 | from ai_assistant import ChatCompletionAssistant
16 |
17 | # ------------------ Recording with VAD ------------------
18 |
19 | def transcribe_file_with_whisper(audio_filename, app):
20 | """
21 | Uses OpenAI's Whisper API to transcribe a local audio file.
22 | Returns only the final transcript string.
23 | """
24 | try:
25 | transcribe_start = time.time() # Start timing transcription
26 | from openai import OpenAI
27 |
28 | print("Calling Whisper API...")
29 | app.is_transcribing = True
30 |
31 | client = OpenAI(api_key=app.assistant_api_key)
32 |
33 | with open(audio_filename, "rb") as audio_file:
34 | try:
35 | response = client.audio.transcriptions.create(
36 | model="whisper-1",
37 | file=audio_file,
38 | response_format="text"
39 | )
40 | print("[WHISPER] Successfully transcribed audio")
41 | except Exception as whisper_error:
42 | print(f"[WHISPER] API Error: {str(whisper_error)}")
43 | raise
44 |
45 | transcript_str = response.strip()
46 | transcribe_duration = time.time() - transcribe_start
47 | print(f"[TIMING] Transcription completed in {transcribe_duration:.2f}s")
48 |
49 | with open("transcription.txt", "w") as f:
50 | f.write(transcript_str)
51 |
52 | return transcript_str
53 |
54 | except Exception as e:
55 | error_msg = f"Error during Whisper transcription: {str(e)}"
56 | app.log(error_msg)
57 | print(error_msg)
58 | return None
59 | finally:
60 | app.is_transcribing = False
61 |
62 |
63 | def record_and_transcribe_vad(app):
64 | """
65 | Records audio until silence is detected using VAD.
66 | Saves the audio to a WAV file and transcribes it via Whisper.
67 | The transcript is then added to the AI processing queue.
68 | """
69 | audio_filename = "temp_recording.wav"
70 | transcript_filename = "transcription.txt"
71 |
72 | try:
73 | chunk = 1024
74 | sample_format = pyaudio.paInt16
75 | channels = 1
76 | rate = 16000
77 | silence_threshold = 200
78 | silence_chunks = 0
79 |
80 | pause_duration_sec = 2.5
81 | max_silence_chunks = int(pause_duration_sec / (chunk / rate))
82 | min_chunks = int(0.5 / (chunk / rate))
83 | frames = []
84 |
85 | p = pyaudio.PyAudio()
86 | stream = p.open(format=sample_format, channels=channels, rate=rate,
87 | input=True, frames_per_buffer=chunk)
88 | app.log("Recording started (VAD enabled). Speak now...")
89 | app.is_listening = False
90 | voiced = False
91 |
92 | while True:
93 | if app.is_paused:
94 | # Always process what we have when muting
95 | break
96 |
97 | data = stream.read(chunk, exception_on_overflow=False)
98 | frames.append(data)
99 | rms = audioop.rms(data, 2)
100 | if rms > silence_threshold:
101 | silence_chunks = 0
102 | voiced = True
103 | app.is_listening = True
104 | else:
105 | if voiced:
106 | silence_chunks += 1
107 | if voiced and silence_chunks > max_silence_chunks and len(frames) > min_chunks:
108 | app.log("Silence detected. Finishing recording.")
109 | break
110 |
111 | # Close the stream after breaking from the loop
112 | stream.stop_stream()
113 | stream.close()
114 | p.terminate()
115 |
116 | # Always process the recording if we have enough voiced frames
117 | if voiced and len(frames) > min_chunks:
118 | wf = wave.open(audio_filename, 'wb')
119 | wf.setnchannels(channels)
120 | wf.setsampwidth(p.get_sample_size(sample_format))
121 | wf.setframerate(rate)
122 | wf.writeframes(b''.join(frames))
123 | wf.close()
124 | app.log("Recording finished. Audio saved to " + audio_filename)
125 |
126 | # Remove the pause check here - always process the final chunk
127 | transcript = transcribe_file_with_whisper(audio_filename, app)
128 | if transcript:
129 | app.log("Transcription: " + transcript)
130 | print("Transcription:", transcript)
131 | app.message_queue.put(transcript)
132 | else:
133 | app.log("Whisper transcription failed.")
134 |
135 | finally:
136 | try:
137 | if os.path.exists(audio_filename):
138 | os.remove(audio_filename)
139 | print(f"Cleaned up {audio_filename}")
140 | if os.path.exists(transcript_filename):
141 | os.remove(transcript_filename)
142 | print(f"Cleaned up {transcript_filename}")
143 | except Exception as e:
144 | print(f"Error cleaning up temporary files: {e}")
145 |
146 | def continuous_record_and_transcribe(app):
147 | while True:
148 | record_and_transcribe_vad(app)
149 | # Optionally, add a delay here if desired.
150 |
151 | # ------------------ Animated Tkinter Application ------------------
152 |
153 | class AnimatedCharacter:
154 | def __init__(self, canvas, x, y):
155 | self.canvas = canvas
156 | self.x = x
157 | self.y = y
158 | self.is_animated = False
159 |
160 | static_img = Image.open("assets/static_assistant.png")
161 | if static_img.mode != 'RGBA':
162 | static_img = static_img.convert('RGBA')
163 | data = static_img.getdata()
164 | new_data = []
165 | for item in data:
166 | if item[0] > 240 and item[1] > 240 and item[2] > 240:
167 | new_data.append((255, 255, 255, 0))
168 | else:
169 | new_data.append(item)
170 | static_img.putdata(new_data)
171 | self.static_image = ImageTk.PhotoImage(static_img)
172 |
173 | gif = Image.open("assets/animated_assistant.gif")
174 | self.animated_frames = []
175 | for frame in ImageSequence.Iterator(gif):
176 | frame = frame.convert('RGBA')
177 | data = frame.getdata()
178 | new_data = []
179 | for item in data:
180 | if item[0] < 15 and item[1] < 15 and item[2] < 15:
181 | new_data.append((0, 0, 0, 0))
182 | else:
183 | new_data.append(item)
184 | frame.putdata(new_data)
185 | self.animated_frames.append(ImageTk.PhotoImage(frame))
186 |
187 | self.image_id = canvas.create_image(x, y, image=self.static_image, anchor='center', tags='character')
188 | self.current_frame = 0
189 |
190 | def set_animated(self, animated):
191 | self.is_animated = animated
192 | if not animated:
193 | self.canvas.itemconfig(self.image_id, image=self.static_image)
194 |
195 | def update(self):
196 | if self.is_animated and self.animated_frames:
197 | self.current_frame = (self.current_frame + 1) % len(self.animated_frames)
198 | self.canvas.itemconfig(self.image_id, image=self.animated_frames[self.current_frame])
199 |
200 | class App(tk.Tk):
201 | def __init__(self):
202 | super().__init__()
203 | self.title("Animated AI Avatar - Transparent Robot Listening")
204 | self.geometry("1800x1600")
205 | self.resizable(False, False)
206 |
207 | self.overrideredirect(True)
208 | self.attributes('-alpha', 0.0)
209 | self.wm_attributes("-transparentcolor", "SystemButtonFace")
210 | self.wm_attributes("-topmost", True)
211 |
212 | self.canvas = tk.Canvas(
213 | self,
214 | bg="SystemButtonFace",
215 | width=1800,
216 | height=1600,
217 | highlightthickness=0
218 | )
219 | self.canvas.pack()
220 | self.after(100, lambda: self.attributes('-alpha', 1.0))
221 |
222 | self._drag_data = {"x": 0, "y": 0, "item": None, "start_time": 0, "start_x": 0, "start_y": 0}
223 | self.bind("", lambda e: self.cleanup())
224 |
225 | self.is_listening = False
226 | self.is_transcribing = False
227 | self.is_ai_processing = False
228 | self.red_dot_id = None
229 | self.dot_visible = False
230 | self.is_paused = True
231 |
232 | self.chat_interface = None
233 | self.chat_window = None
234 |
235 | self.canvas.tag_bind("character", "", self.on_drag_start)
236 | self.canvas.tag_bind("character", "", self.on_drag_stop)
237 | self.canvas.tag_bind("character", "", self.on_drag_motion)
238 |
239 | self.character = AnimatedCharacter(self.canvas, 400, 300)
240 |
241 | self.log_widget = tk.Text(self, height=5, width=100)
242 | self.log_widget.pack(padx=10, pady=10)
243 |
244 | # Load API key and initialize ChatCompletionAssistant.
245 | from os import getenv
246 | from dotenv import load_dotenv
247 | load_dotenv()
248 | self.assistant_api_key = getenv('OPENAI_API_KEY')
249 | if not self.assistant_api_key:
250 | print("Please set your OPENAI_API_KEY environment variable")
251 | exit(1)
252 | self.assistant = ChatCompletionAssistant(self.assistant_api_key)
253 |
254 | self.message_queue = queue.Queue()
255 | self._ai_lock = threading.Lock()
256 |
257 | threading.Thread(target=continuous_record_and_transcribe, args=(self,), daemon=True).start()
258 | self.animate_robot()
259 | threading.Thread(target=self.process_ai_messages, daemon=True).start()
260 | self.create_mode_toggles()
261 | self.after(100, self.toggle_messages)
262 |
263 | def log(self, message):
264 | self.log_widget.insert(tk.END, message + "\n")
265 | self.log_widget.see(tk.END)
266 |
267 | def animate_robot(self):
268 | # Show red dot only when listening (not when processing)
269 | if self.is_listening and not self.is_ai_processing:
270 | self.toggle_red_dot()
271 | elif self.red_dot_id is not None:
272 | self.canvas.delete(self.red_dot_id)
273 | self.red_dot_id = None
274 | self.dot_visible = False
275 |
276 | # Animate robot when AI is processing (both voice and text)
277 | if self.is_ai_processing:
278 | if not self.character.is_animated:
279 | self.character.set_animated(True)
280 | self.character.update()
281 | else:
282 | if self.character.is_animated:
283 | self.character.set_animated(False)
284 |
285 | self.after(150, self.animate_robot)
286 |
287 | def on_drag_start(self, event):
288 | self._drag_data["x"] = event.x_root
289 | self._drag_data["y"] = event.y_root
290 | self._drag_data["start_time"] = time.time()
291 | self._drag_data["start_x"] = event.x_root
292 | self._drag_data["start_y"] = event.y_root
293 |
294 | def on_drag_stop(self, event):
295 | self._drag_data.update({"x": 0, "y": 0, "item": None, "start_time": 0, "start_x": 0, "start_y": 0})
296 |
297 | def on_drag_motion(self, event):
298 | delta_x = event.x_root - self._drag_data["x"]
299 | delta_y = event.y_root - self._drag_data["y"]
300 | x = self.winfo_x() + delta_x
301 | y = self.winfo_y() + delta_y
302 | self.geometry(f"+{x}+{y}")
303 | self._drag_data["x"] = event.x_root
304 | self._drag_data["y"] = event.y_root
305 |
306 | def toggle_mute(self):
307 | self.is_paused = not self.is_paused
308 | try:
309 | self.mic_button.configure(image=self.mic_icon_muted if self.is_paused else self.mic_icon_active)
310 | except Exception as e:
311 | print(f"Error in toggle_mute: {e}")
312 |
313 | def process_ai_messages(self):
314 | while True:
315 | message = self.message_queue.get()
316 | if message:
317 | try:
318 | print(f"Processing with AI: {message}")
319 | self.log("Processing with AI: " + message)
320 |
321 | # Add user message to chat interface
322 | if self.chat_interface:
323 | self.chat_interface.add_user_message(message)
324 |
325 | # Set AI processing state and animate
326 | self.is_ai_processing = True
327 | self.is_transcribing = False # Ensure transcribing is off
328 | self.is_listening = False # Ensure listening is off
329 |
330 | def single_callback(text_fragment):
331 | if self.chat_interface:
332 | self.chat_interface.add_assistant_message(text_fragment)
333 | # Keep AI processing true while streaming response
334 | self.is_ai_processing = True
335 | self.character.set_animated(True)
336 |
337 | # Use the new ChatCompletion-based assistant
338 | with self._ai_lock:
339 | self.assistant.send_message(message, ui_callback=single_callback)
340 | except Exception as e:
341 | self.log(f"AI Processing error: {str(e)}")
342 | finally:
343 | # Reset all states after complete response
344 | self.is_ai_processing = False
345 | self.character.set_animated(False)
346 | self.message_queue.task_done()
347 |
348 | def process_new_message(self, user_input):
349 | start_time = time.time()
350 | print(f"\n[{time.strftime('%H:%M:%S')}] Processing new message: {user_input}")
351 | def single_callback(text_fragment):
352 | self.chat_interface and self.chat_interface.add_assistant_message(text_fragment)
353 | with self._ai_lock:
354 | try:
355 | self.is_transcribing = True
356 | if self.chat_interface:
357 | if not hasattr(self, '_from_chat_interface') or not self._from_chat_interface:
358 | self.chat_interface.add_user_message(user_input)
359 | self._from_chat_interface = False
360 | response = self.assistant.send_message(user_input, ui_callback=single_callback)
361 | print(f"[TIMING] Total response completed in {time.time()-start_time:.2f}s")
362 | except Exception as e:
363 | print(f"[ERROR] after {time.time()-start_time:.2f}s: {str(e)}")
364 | finally:
365 | self.is_transcribing = False
366 |
367 | def cleanup(self):
368 | self.destroy()
369 |
370 | def create_mode_toggles(self):
371 | button_size = 64
372 | def create_button(icon_file, active=True, use_green=True):
373 | button = Image.new('RGBA', (button_size, button_size), (0, 0, 0, 0))
374 | draw = ImageDraw.Draw(button)
375 | padding = 8
376 | circle_color = (220, 255, 220, 255) if active else (255, 220, 220, 255)
377 | draw.ellipse([padding, padding, button_size-padding, button_size-padding], fill=circle_color)
378 | icon = Image.open(icon_file).convert('RGBA')
379 | icon = icon.resize((28, 28), Image.Resampling.LANCZOS)
380 | icon_x = (button_size - icon.width) // 2
381 | icon_y = (button_size - icon.height) // 2
382 | final_button = Image.new('RGBA', (button_size, button_size), (0, 0, 0, 0))
383 | final_button.paste(button, (0, 0), button)
384 | final_button.paste(icon, (icon_x, icon_y), icon)
385 | return ImageTk.PhotoImage(final_button)
386 |
387 | self.mic_icon_active = create_button("assets/mic.png", True, use_green=True)
388 | self.mic_icon_muted = create_button("assets/mic-off.png", False, use_green=True)
389 | self.messages_icon_active = create_button("assets/messages-square.png", True, use_green=False)
390 | self.messages_icon_muted = create_button("assets/messages-square.png", False, use_green=False)
391 |
392 | self.mic_button = tk.Button(
393 | self.canvas,
394 | image=self.mic_icon_muted,
395 | command=self.toggle_mute,
396 | relief='flat',
397 | bg='SystemButtonFace',
398 | activebackground='SystemButtonFace',
399 | bd=0,
400 | highlightthickness=0,
401 | cursor="hand2",
402 | width=64,
403 | height=64
404 | )
405 |
406 | self.messages_button = tk.Button(
407 | self.canvas,
408 | image=self.messages_icon_active,
409 | command=self.toggle_messages,
410 | relief='flat',
411 | bg='SystemButtonFace',
412 | activebackground='SystemButtonFace',
413 | bd=0,
414 | highlightthickness=0,
415 | cursor="hand2",
416 | width=64,
417 | height=64
418 | )
419 |
420 | self.mic_button_window = self.canvas.create_window(
421 | self.character.x - 32,
422 | self.character.y - 100,
423 | window=self.mic_button,
424 | anchor='center'
425 | )
426 |
427 | self.messages_button_window = self.canvas.create_window(
428 | self.character.x + 32,
429 | self.character.y - 100,
430 | window=self.messages_button,
431 | anchor='center'
432 | )
433 |
434 | def toggle_messages(self):
435 | if not self.chat_interface:
436 | self.chat_interface = ChatInterface(
437 | self.canvas,
438 | self.assistant,
439 | width=400,
440 | height=500,
441 | message_queue=self.message_queue
442 | )
443 | self.chat_window = self.canvas.create_window(
444 | self.character.x + 120,
445 | self.character.y - 200,
446 | window=self.chat_interface,
447 | anchor='nw',
448 | tags='chat'
449 | )
450 | self.chat_interface.chat_window_id = self.chat_window
451 | self.canvas.itemconfig(self.chat_window, state='normal')
452 | self.messages_button.configure(image=self.messages_icon_active)
453 | else:
454 | current_state = self.canvas.itemcget(self.chat_window, 'state')
455 | new_state = 'hidden' if current_state == 'normal' else 'normal'
456 | self.canvas.itemconfig(self.chat_window, state=new_state)
457 | new_icon = self.messages_icon_muted if new_state == 'hidden' else self.messages_icon_active
458 | self.messages_button.configure(image=new_icon)
459 |
460 | def toggle_red_dot(self):
461 | if not self.dot_visible:
462 | x = self.character.x + 50
463 | y = self.character.y - 50
464 | self.red_dot_id = self.canvas.create_oval(
465 | x-5, y-5, x+5, y+5,
466 | fill='red',
467 | outline='darkred',
468 | tags='red_dot'
469 | )
470 | self.dot_visible = True
471 | else:
472 | if self.red_dot_id:
473 | self.canvas.delete(self.red_dot_id)
474 | self.red_dot_id = None
475 | self.dot_visible = False
476 |
477 | def main():
478 | app = App()
479 | app.mainloop()
480 |
481 | if __name__ == "__main__":
482 | main()
--------------------------------------------------------------------------------
/tools/directory_tools.py:
--------------------------------------------------------------------------------
1 | import os
2 | from dotenv import load_dotenv
3 | load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env'))
4 |
5 | class DirectoryTools:
6 | def __init__(self):
7 | self.base_directory = os.getenv("BASE_DIRECTORY")
8 | if not self.base_directory:
9 | print("Warning: BASE_DIRECTORY not set in environment variables")
10 |
11 | def list_directory(self) -> str:
12 | """List all files and directories in the base directory"""
13 | try:
14 | if not self.base_directory:
15 | return "Error: BASE_DIRECTORY environment variable is not set"
16 |
17 | if not os.path.exists(self.base_directory):
18 | return f"Error: Directory {self.base_directory} does not exist"
19 |
20 | items = os.listdir(self.base_directory)
21 | if not items:
22 | return "Directory is empty"
23 |
24 | result = [f"Contents of directory ({self.base_directory}):"]
25 | for item in items:
26 | full_path = os.path.join(self.base_directory, item)
27 | item_type = "Directory" if os.path.isdir(full_path) else "File"
28 | result.append(f"{item_type}: {item}")
29 |
30 | return "\n".join(result)
31 | except Exception as e:
32 | return f"Error listing directory: {str(e)}"
--------------------------------------------------------------------------------
/tools/excel_tools.py:
--------------------------------------------------------------------------------
1 | import win32com.client
2 | import pythoncom # Add this import
3 | import time
4 | from typing import List, Dict
5 | import json
6 | import os
7 |
8 | # Class to represent a cell edit, if needed elsewhere.
9 | class CellEdit:
10 | def __init__(self, cell_address: str, value: str):
11 | self.cell_address = cell_address
12 | self.value = value
13 |
14 | class ExcelTools:
15 | def __init__(self):
16 | self.excel_app = None
17 | self.open_workbooks = {}
18 |
19 | def _initialize_excel_app(self):
20 | """Initialize Excel application if not already initialized"""
21 | try:
22 | if self.excel_app is None:
23 | print("Initializing new Excel application...")
24 | # Initialize COM in this thread
25 | pythoncom.CoInitialize()
26 | self.excel_app = win32com.client.Dispatch("Excel.Application")
27 | # Set visibility after dispatch
28 | try:
29 | self.excel_app.Visible = True
30 | self.excel_app.DisplayAlerts = False
31 | except Exception as e:
32 | print(f"Warning: Could not set Excel visibility: {str(e)}")
33 | except Exception as e:
34 | print(f"Error initializing Excel: {str(e)}")
35 | raise
36 |
37 | def _activate_workbook(self, file_path: str):
38 | """
39 | Helper method to activate an existing workbook by file path.
40 | If the workbook is already open, it is activated; otherwise, it is opened.
41 | Returns: (workbook, is_new_workbook)
42 | """
43 | self._initialize_excel_app()
44 | normalized_path = os.path.abspath(file_path).lower()
45 |
46 | # Check if the workbook is already in our tracking dict
47 | if normalized_path in self.open_workbooks:
48 | try:
49 | self.open_workbooks[normalized_path].Activate()
50 | return self.open_workbooks[normalized_path], False
51 | except Exception as e:
52 | print(f"Warning: Could not activate tracked workbook: {str(e)}")
53 | del self.open_workbooks[normalized_path]
54 |
55 | # Check if the workbook is open in Excel
56 | try:
57 | for wb in self.excel_app.Workbooks:
58 | try:
59 | if wb.FullName.lower() == normalized_path:
60 | wb.Activate()
61 | self.open_workbooks[normalized_path] = wb
62 | return wb, False
63 | except:
64 | continue
65 | except:
66 | pass
67 |
68 | # Open or create the workbook
69 | try:
70 | if os.path.exists(file_path):
71 | wb = self.excel_app.Workbooks.Open(file_path)
72 | else:
73 | wb = self.excel_app.Workbooks.Add()
74 | wb.SaveAs(file_path)
75 |
76 | # For new workbooks, adjust the view
77 | active_window = wb.Windows(1)
78 | active_window.Zoom = 200 # Set zoom to 100%
79 |
80 | self.open_workbooks[normalized_path] = wb
81 | return wb, True
82 | except Exception as e:
83 | print(f"Error opening workbook: {str(e)}")
84 | raise
85 |
86 | def _get_full_path(self, file_path: str) -> str:
87 | """Helper method to get the full path, with better error handling and logging"""
88 | try:
89 | if os.path.isabs(file_path):
90 | print(f"Using absolute path: {file_path}")
91 | return file_path
92 |
93 | # Use the datalake directory relative to the project root
94 | base_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'datalake')
95 |
96 | # Create the datalake directory if it doesn't exist
97 | if not os.path.exists(base_dir):
98 | os.makedirs(base_dir)
99 | print(f"Created datalake directory at: {base_dir}")
100 |
101 | full_path = os.path.join(base_dir, file_path)
102 | print(f"Converted to full path: {full_path}")
103 | return full_path
104 |
105 | except Exception as e:
106 | print(f"Error in path handling: {str(e)}")
107 | raise
108 |
109 | def read_excel(self, file_path: str) -> str:
110 | """
111 | Unified method to read a fixed range (A1:Z20) from the first worksheet of an Excel file.
112 | This method activates the target workbook and worksheet based on the provided file path.
113 |
114 | Args:
115 | file_path (str): The full path of the Excel file.
116 |
117 | Returns:
118 | str: A formatted string with each non-empty cell address and its value.
119 | """
120 | try:
121 | self._activate_workbook(file_path)
122 | state = []
123 |
124 | # Define the range to read ("A1:Z20").
125 | range_obj = self.worksheet.Range("A1:Z20")
126 | for row in range(1, 21): # Rows 1 to 20.
127 | for col in range(1, 27): # Columns 1 to 26 (A to Z).
128 | cell = range_obj.Cells(row, col)
129 | if cell.Text:
130 | col_letter = chr(64 + col) # Convert column number to letter.
131 | cell_address = f"{col_letter}{row}"
132 | state.append(f"{cell_address}: {cell.Text}")
133 | result = "\n".join(state)
134 | return result
135 | except Exception as e:
136 | error_msg = f"Error reading Excel file: {str(e)}"
137 | return error_msg
138 |
139 | def write_to_excel(self, file_path: str, data: dict) -> str:
140 | """
141 | Unified method to write data to cells in the first worksheet of an Excel file.
142 | After writing, the workbook is saved.
143 | This method activates the target workbook and worksheet based on the provided file path before performing any write operations.
144 |
145 | Args:
146 | file_path (str): The full path of the Excel file.
147 | data (dict): A dictionary mapping cell addresses (e.g., "A1") to values to write.
148 | If data is nested under a sheet name (e.g., {"Sheet1": { "A1": "Valve Name", ... }})
149 | or {"Sheet1": [ { ... } ]}), the method extracts the inner dictionary or processes the list payload.
150 | Additionally, if the nested payload (e.g., under "cells") is a list of dictionaries each having
151 | 'cell' and 'value' keys, the method converts the list into a flat dictionary.
152 |
153 | Returns:
154 | str: A status message summarizing the write operations.
155 | """
156 | try:
157 | self._activate_workbook(file_path)
158 | # Try to make visible after activation
159 | try:
160 | self.excel_app.Visible = True
161 | except:
162 | pass
163 | print(f"Debug: Workbook and worksheet activated for writing using file {file_path}.")
164 |
165 | # Check for nested data payload under a single key.
166 | if isinstance(data, dict) and len(data) == 1:
167 | first_key = next(iter(data))
168 | inner_data = data[first_key]
169 | # If the inner data is already a dictionary, extract it.
170 | if isinstance(inner_data, dict):
171 | print(f"Debug: Detected nested data payload under sheet name '{first_key}' (dictionary format). Extracting payload.")
172 | data = inner_data
173 | # If the inner data is a list, determine its structure.
174 | elif isinstance(inner_data, list) and len(inner_data) > 0:
175 | # Check if every item in the list is a dict with 'cell' and 'value' keys.
176 | if all(isinstance(item, dict) and 'cell' in item and 'value' in item for item in inner_data):
177 | # Convert the list into a flat dictionary with cell addresses as keys.
178 | print(f"Debug: Detected list of cell-value dictionaries under key '{first_key}'. Converting list to flat dictionary.")
179 | data = {item['cell']: item['value'] for item in inner_data}
180 | else:
181 | # Fallback: Extract the first element if it doesn't match the expected structure.
182 | print(f"Debug: Detected nested data payload under sheet name '{first_key}' (list format) but not in cell-value format. Extracting first element as payload.")
183 | data = inner_data[0]
184 | else:
185 | print("Debug: Data payload nested under sheet name, but format is not recognized. Proceeding with original data.")
186 |
187 | results = []
188 | for cell_address, value in data.items():
189 | cell = self.worksheet.Range(cell_address)
190 | cell.Value2 = value
191 | print(f"Debug: Written {value} to {cell_address} in workbook {file_path}.")
192 | results.append(f"Wrote {value} to {cell_address}")
193 | time.sleep(0.1) # Small delay to allow Excel to refresh.
194 | self.workbook.Save()
195 | return "\n".join(results)
196 | except Exception as e:
197 | return f"Error writing to Excel file: {str(e)}"
198 |
199 | def create_new_workbook(self, new_file_path: str) -> str:
200 | """
201 | Create a new Excel workbook, activate its first worksheet, and save it to the specified path.
202 | """
203 | try:
204 | self._initialize_excel_app()
205 | # Create new workbook without affecting others
206 | self.workbook = self.excel_app.Workbooks.Add()
207 | try:
208 | self.excel_app.Visible = True
209 | except:
210 | pass
211 |
212 | # Save immediately to establish the file
213 | self.workbook.SaveAs(new_file_path)
214 | self.worksheet = self.workbook.Worksheets(1)
215 | self.worksheet.Select()
216 | print(f"Debug: New workbook created and saved at {new_file_path}.")
217 | return f"New workbook created and saved at {new_file_path}"
218 | except Exception as e:
219 | return f"Error creating new workbook: {str(e)}"
220 |
221 | def close_workbook(self, file_path: str) -> str:
222 | """
223 | Close the workbook specified by file_path if it is open, saving changes before closing.
224 |
225 | Args:
226 | file_path (str): The full path of the Excel file to close.
227 |
228 | Returns:
229 | str: A status message indicating whether the workbook was closed.
230 | """
231 | try:
232 | if self.excel_app is None:
233 | return "Excel application is not initialized."
234 | normalized_path = file_path.lower()
235 | workbook_found = None
236 | for wb in self.excel_app.Workbooks:
237 | try:
238 | if wb.FullName.lower() == normalized_path:
239 | workbook_found = wb
240 | break
241 | except Exception:
242 | continue
243 |
244 | if workbook_found:
245 | workbook_found.Save()
246 | workbook_found.Close()
247 | print(f"Debug: Closed workbook at {file_path}.")
248 | if self.workbook and self.workbook.FullName.lower() == normalized_path:
249 | self.workbook = None
250 | self.worksheet = None
251 | return f"Workbook at {file_path} closed successfully."
252 | else:
253 | return f"No open workbook found at {file_path}."
254 | except Exception as e:
255 | return f"Error closing workbook: {str(e)}"
256 |
257 | def cleanup(self):
258 | """
259 | Clean up Excel resources by saving and closing all tracked workbooks
260 | and quitting the Excel application.
261 | """
262 | try:
263 | # Save and close all tracked workbooks
264 | for path, wb in self.open_workbooks.items():
265 | try:
266 | wb.Save()
267 | wb.Close()
268 | except Exception as e:
269 | print(f"Error closing workbook {path}: {str(e)}")
270 | self.open_workbooks.clear()
271 |
272 | if self.excel_app:
273 | # Close any remaining open workbooks
274 | while self.excel_app.Workbooks.Count:
275 | self.excel_app.Workbooks(1).Close()
276 | self.excel_app.Quit()
277 | self.excel_app = None
278 | print("Debug: Excel application quit.")
279 | pythoncom.CoUninitialize()
280 | except Exception as e:
281 | print(f"Error during cleanup: {str(e)}")
282 |
283 | def process_excel(self, file_path, write_data=None):
284 | """Process Excel operations with better workbook state handling"""
285 | try:
286 | self._initialize_excel_app()
287 |
288 | # Convert to full path
289 | abs_path = self._get_full_path(file_path)
290 | if not abs_path.endswith('.xlsx'):
291 | abs_path += '.xlsx'
292 |
293 | print(f"Processing Excel file: {abs_path}")
294 |
295 | # Ensure we have a valid Excel instance
296 | try:
297 | _ = self.excel_app.Visible
298 | except:
299 | print("Reconnecting to Excel...")
300 | self.excel_app = win32com.client.Dispatch("Excel.Application")
301 | self.excel_app.Visible = True
302 | self.excel_app.DisplayAlerts = True
303 |
304 | # Make Excel visible
305 | self.excel_app.Visible = True
306 |
307 | # Get or create workbook
308 | workbook, is_new_workbook = self._activate_workbook(abs_path)
309 | worksheet = workbook.Worksheets(1)
310 | worksheet.Activate()
311 |
312 | # Only maximize if it's a new workbook
313 | if is_new_workbook:
314 | try:
315 | window = workbook.Windows(1)
316 | window.WindowState = -4137 # xlMaximized
317 | except Exception as e:
318 | print(f"Warning: Could not maximize window: {str(e)}")
319 |
320 | if write_data:
321 | print(f"Writing data to workbook...")
322 | for cell, value in write_data.items():
323 | try:
324 | print(f"Writing {value} to {cell}")
325 | cell_range = worksheet.Range(cell)
326 |
327 | # Store original color and format
328 | original_color = cell_range.Interior.Color
329 | original_pattern = cell_range.Interior.Pattern
330 |
331 | # Set value and highlight cell
332 | cell_range.Value = value
333 | cell_range.Select()
334 | cell_range.Interior.Color = 0xFF9019
335 |
336 | # Force immediate update
337 | self.excel_app.ScreenUpdating = True
338 |
339 | time.sleep(0.1)
340 |
341 | # Fade back to original color
342 | cell_range.Interior.Color = original_color
343 | cell_range.Interior.Pattern = original_pattern
344 |
345 | time.sleep(0.1)
346 | except Exception as e:
347 | print(f"Error writing to cell {cell}: {str(e)}")
348 | continue
349 |
350 | try:
351 | workbook.Save()
352 | print("Workbook saved successfully")
353 | except Exception as e:
354 | print(f"Error saving workbook: {str(e)}")
355 | time.sleep(1)
356 | workbook.Save()
357 |
358 | return f"Successfully wrote data to {abs_path}"
359 | else:
360 | print("Reading data from workbook...")
361 | data = {}
362 | used_range = worksheet.UsedRange
363 | for row in range(1, used_range.Rows.Count + 1):
364 | for col in range(1, used_range.Columns.Count + 1):
365 | cell = worksheet.Cells(row, col)
366 | if cell.Value is not None:
367 | col_letter = chr(64 + col) if col <= 26 else chr(64 + col//26) + chr(64 + col%26)
368 | cell_ref = f"{col_letter}{row}"
369 | data[cell_ref] = cell.Value
370 | return str(data)
371 |
372 | except Exception as e:
373 | print(f"Excel operation error: {str(e)}")
374 | try:
375 | self.excel_app = win32com.client.Dispatch("Excel.Application")
376 | self.excel_app.Visible = True
377 | except:
378 | pass
379 | return f"Error processing Excel file: {str(e)}"
--------------------------------------------------------------------------------
/tools/image_tools.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import requests
3 | import os
4 | from typing import Optional
5 | import PyPDF2
6 | from dotenv import load_dotenv
7 | import tkinter as tk
8 | from PIL import Image, ImageTk
9 | import time
10 | import psutil
11 | import subprocess
12 | import win32api
13 | load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env'))
14 |
15 | class ImageTools:
16 | def __init__(self):
17 | self.api_key = os.getenv("OPENAI_API_KEY")
18 | self.api_url = "https://api.openai.com/v1/chat/completions"
19 | self.base_directory = os.getenv("BASE_DIRECTORY")
20 | self.image_path = None
21 | self.pdf_path = None
22 | self.viewer = None
23 | self.pdf_process = None
24 |
25 | def show_file(self, file_path):
26 | """Display the file in a window"""
27 | if file_path.lower().endswith('.pdf'):
28 | print(f"Opening PDF: {file_path}")
29 | # Move mouse to main monitor center before opening PDF
30 | self.focus_main_monitor()
31 | # For PDFs, use system default viewer and track the process
32 | self.pdf_process = subprocess.Popen(['start', '', file_path], shell=True)
33 | # Wait 2 seconds then close
34 | time.sleep(2)
35 | print("Attempting to close PDF viewer...")
36 | self.close_pdf_viewer()
37 | else:
38 | # For images, use Tkinter
39 | self.viewer = tk.Toplevel()
40 | self.viewer.title("Image Analysis")
41 | self.viewer.attributes('-topmost', True)
42 |
43 | # Position window on main monitor
44 | main_screen_width = self.viewer.winfo_screenwidth()
45 | main_screen_height = self.viewer.winfo_screenheight()
46 | window_width = 800
47 | window_height = 600
48 | x = (main_screen_width - window_width) // 2
49 | y = (main_screen_height - window_height) // 2
50 | self.viewer.geometry(f"{window_width}x{window_height}+{x}+{y}")
51 |
52 | # Load and display image
53 | img = Image.open(file_path)
54 | # Resize if too large while maintaining aspect ratio
55 | max_size = (800, 600)
56 | img.thumbnail(max_size, Image.Resampling.LANCZOS)
57 | photo = ImageTk.PhotoImage(img)
58 |
59 | label = tk.Label(self.viewer, image=photo)
60 | label.image = photo # Keep a reference
61 | label.pack()
62 |
63 | def focus_main_monitor(self):
64 | """Move mouse to center of main monitor to help focus windows there"""
65 | try:
66 | # Get main monitor resolution
67 | width = win32api.GetSystemMetrics(0)
68 | height = win32api.GetSystemMetrics(1)
69 | # Move mouse to center of main monitor
70 | win32api.SetCursorPos((width // 2, height // 2))
71 | except Exception as e:
72 | print(f"Could not move cursor to main monitor: {e}")
73 |
74 | def close_pdf_viewer(self):
75 | """Close any PDF viewer processes"""
76 | print("Searching for PDF viewer processes...")
77 |
78 | try:
79 | # Windows-specific: Use taskkill to close PDF viewer windows
80 | # This targets window titles containing "PDF" or specific viewer names
81 | subprocess.run([
82 | 'taskkill', '/F', '/FI',
83 | 'WINDOWTITLE eq *PDF*'
84 | ], capture_output=True)
85 |
86 | # Also try to close specific PDF applications
87 | subprocess.run(['taskkill', '/F', '/IM', 'AcroRd32.exe'], capture_output=True)
88 | subprocess.run(['taskkill', '/F', '/IM', 'Acrobat.exe'], capture_output=True)
89 | subprocess.run(['taskkill', '/F', '/IM', 'SumatraPDF.exe'], capture_output=True)
90 |
91 | # For Edge PDF viewer specifically
92 | subprocess.run([
93 | 'powershell',
94 | '-command',
95 | "Get-Process | Where-Object {$_.MainWindowTitle -like '*PDF*'} | Stop-Process -Force"
96 | ], capture_output=True)
97 |
98 | except Exception as e:
99 | print(f"Error during PDF viewer cleanup: {e}")
100 |
101 | def close_viewer(self):
102 | """Close the viewer window if it exists"""
103 | if self.viewer:
104 | print("Closing image viewer...")
105 | self.viewer.destroy()
106 | self.viewer = None
107 | print("Ensuring PDF viewer is closed...")
108 | self.close_pdf_viewer()
109 |
110 | def analyze_file(self, question: str, file_name: Optional[str] = None) -> str:
111 | """Analyze either an image or PDF file and answer questions about it"""
112 | try:
113 | # Always use the specified file if provided
114 | if file_name:
115 | file_path = os.path.join(self.base_directory, file_name)
116 | print(f"Debug: Using specified file: {file_name}")
117 | else:
118 | # Search for first image or PDF if no specific file was requested
119 | for file in os.listdir(self.base_directory):
120 | if file.lower().endswith(('.png', '.jpg', '.jpeg', '.pdf')):
121 | file_path = os.path.join(self.base_directory, file)
122 | print(f"Debug: Found first file: {file}")
123 | break
124 | else:
125 | return "Error: No image or PDF file found in the datalake directory"
126 |
127 | if not os.path.exists(file_path):
128 | return f"Error: File not found at {file_path}"
129 |
130 | print(f"Debug: Attempting to analyze file at: {file_path}")
131 |
132 | # Show the file before analysis
133 | self.show_file(file_path)
134 |
135 | # Determine file type and process accordingly
136 | if file_path.lower().endswith('.pdf'):
137 | # Process PDF
138 | pdf_text = ""
139 | with open(file_path, 'rb') as pdf_file:
140 | pdf_reader = PyPDF2.PdfReader(pdf_file)
141 | for page in pdf_reader.pages:
142 | pdf_text += page.extract_text() + "\n"
143 |
144 | payload = {
145 | "model": "gpt-4o",
146 | "messages": [
147 | {
148 | "role": "user",
149 | "content": f"PDF Content:\n{pdf_text}\n\nQuestion: {question}"
150 | }
151 | ],
152 | "max_tokens": 500
153 | }
154 | else:
155 | # Process Image
156 | with open(file_path, "rb") as image_file:
157 | base64_image = base64.b64encode(image_file.read()).decode('utf-8')
158 |
159 | payload = {
160 | "model": "gpt-4o",
161 | "messages": [
162 | {
163 | "role": "user",
164 | "content": [
165 | {
166 | "type": "text",
167 | "text": question
168 | },
169 | {
170 | "type": "image_url",
171 | "image_url": {
172 | "url": f"data:image/png;base64,{base64_image}",
173 | # "detail": "high" # Can be "low", "high", or "auto"
174 | }
175 | }
176 | ]
177 | }
178 | ],
179 | "max_tokens": 3000
180 | }
181 |
182 | headers = {
183 | "Authorization": f"Bearer {self.api_key}",
184 | "Content-Type": "application/json"
185 | }
186 |
187 | response = requests.post(self.api_url, headers=headers, json=payload)
188 |
189 | if response.status_code != 200:
190 | print(f"\nDebug: API Error: {response.text}", flush=True)
191 | return f"Error: API call failed with status code: {response.status_code}"
192 |
193 | result = response.json()
194 |
195 | # Close the viewer after analysis
196 | self.close_viewer()
197 |
198 | print("RETURNED FROM IMAGE TOOL", result['choices'][0]['message']['content'])
199 |
200 | return result['choices'][0]['message']['content'].strip()
201 |
202 | except Exception as e:
203 | print(f"\nDebug: Exception details: {str(e)}", flush=True)
204 | self.close_viewer() # Ensure viewer is closed even if there's an error
205 | return f"Error analyzing file: {str(e)}"
--------------------------------------------------------------------------------
/tools/word_tools.py:
--------------------------------------------------------------------------------
1 | import win32com.client
2 | import os
3 | import time
4 | import pythoncom
5 |
6 | class WordTools:
7 | def __init__(self):
8 | self.word_app = None
9 | self.document = None
10 | self.base_directory = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'datalake')
11 |
12 | # Create the datalake directory if it doesn't exist
13 | if not os.path.exists(self.base_directory):
14 | os.makedirs(self.base_directory)
15 | print(f"Created datalake directory at: {self.base_directory}")
16 |
17 | def _initialize_word_app(self):
18 | """Initialize Microsoft Word application if not already running"""
19 | if self.word_app is None:
20 | # Initialize COM in this thread
21 | pythoncom.CoInitialize()
22 | self.word_app = win32com.client.Dispatch("Word.Application")
23 | self.word_app.Visible = True
24 | print("Debug: Word application initialized.")
25 |
26 | def process_word(self, file_path: str, content: str = None) -> str:
27 | """
28 | Process a Word document - read current content and optionally write new content
29 | """
30 | try:
31 | # Convert relative path to full path if needed
32 | if not os.path.isabs(file_path):
33 | file_path = os.path.join(self.base_directory, file_path)
34 | print(f"Debug: Converting to full path: {file_path}")
35 |
36 | print("Debug: About to initialize Word...")
37 | self._initialize_word_app()
38 | print("Debug: Word initialized successfully")
39 |
40 | # Create new document if it doesn't exist
41 | if not os.path.exists(file_path):
42 | print("Debug: Creating new document...")
43 | self.document = self.word_app.Documents.Add()
44 | print("Debug: Document added, about to save...")
45 | try:
46 | self.document.SaveAs(file_path)
47 | print(f"Debug: Document saved successfully at {file_path}")
48 | except Exception as save_error:
49 | print(f"Debug: Error saving document: {str(save_error)}")
50 | raise
51 | else:
52 | print(f"Debug: Opening existing document at {file_path}")
53 | self.document = self.word_app.Documents.Open(file_path)
54 |
55 | # Read initial content
56 | initial_content = self.document.Content.Text
57 |
58 | # Write new content if provided
59 | if content:
60 | print("Debug: Writing new content...")
61 | self.document.Content.Text = content
62 | self.document.Save()
63 | time.sleep(0.1) # Give Word time to process
64 |
65 | # Read final content
66 | final_content = self.document.Content.Text
67 |
68 | # Format result
69 | result_sections = [
70 | "Initial Content:",
71 | initial_content.strip() if initial_content.strip() else "(Empty document)",
72 | "\nWrite Operations:" if content else "",
73 | f"Wrote new content: {content}" if content else "",
74 | "\nFinal Content:",
75 | final_content.strip() if final_content.strip() else "(Empty document)"
76 | ]
77 |
78 | return "\n".join(filter(None, result_sections))
79 |
80 | except Exception as e:
81 | error_details = f"Error processing Word document: {str(e)}\nType: {type(e)}"
82 | print(f"Debug: {error_details}")
83 | return error_details
84 |
85 | def cleanup(self):
86 | """Clean up Word resources"""
87 | try:
88 | if self.document:
89 | self.document.Close()
90 | if self.word_app:
91 | self.word_app.Quit()
92 | except:
93 | pass
--------------------------------------------------------------------------------