├── .devcontainer
    └── devcontainer.json
├── .github
    └── workflows
    │   └── codeql.yml
├── .gitignore
├── .vscode
    ├── launch.json
    └── settings.json
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── SECURITY.md
├── SUPPORT.md
├── docs
    ├── ARCHITECTURE.md
    └── Example Taxonomy.md
├── log.txt
├── poetry.toml
├── pyproject.toml
├── src
    ├── agents
    │   ├── README.md
    │   ├── agent.py
    │   ├── agents.py
    │   ├── pyproject.toml
    │   └── smart_agent
    │   │   ├── prompt.yaml
    │   │   └── smart_agent.py
    ├── api
    │   ├── README.md
    │   ├── api.py
    │   ├── main.py
    │   └── pyproject.toml
    ├── app
    │   ├── README.md
    │   ├── app.py
    │   └── pyproject.toml
    ├── distributed_cache
    │   ├── README.md
    │   ├── cache.py
    │   ├── distributedcache.py
    │   └── pyproject.toml
    ├── functions
    │   ├── README.md
    │   ├── functions.py
    │   ├── pyproject.toml
    │   └── search_vector_function.py
    ├── main.py
    ├── models
    │   ├── README.md
    │   ├── agent_configuration.py
    │   ├── agent_response.py
    │   ├── models.py
    │   ├── pyproject.toml
    │   └── settings.py
    ├── services
    │   ├── README.md
    │   ├── history.py
    │   ├── pyproject.toml
    │   └── services.py
    └── utils
    │   ├── README.md
    │   ├── poetry.lock
    │   ├── pyproject.toml
    │   ├── smart_agent_factory.py
    │   ├── utilities.py
    │   └── utils.py
└── tests
    ├── README.md
    ├── pyproject.toml
    ├── test_api.py
    ├── test_search_vector_function.py
    ├── test_server.py
    ├── test_smart_agent.py
    ├── test_vectors.py
    └── tests.py


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"image": "mcr.microsoft.com/devcontainers/universal:2",
 3 | 	"features": {
 4 | 		"ghcr.io/devcontainers/features/python": {
 5 | 			"version": "3.11"
 6 | 		}
 7 | 	},
 8 | 	"initializeCommand": "pip install poetry -U && poetry lock --no-update && poetry install",
 9 | 	"customizations": {
10 | 		"vscode": {
11 | 			"extensions": [
12 | 				"ms-python.python",
13 | 				"ms-python.pylint",
14 | 				"GitHub.copilot",
15 | 				"DavidAnson.vscode-markdownlint",
16 | 				"EditorConfig.EditorConfig",
17 | 				"ms-azuretools.vscode-bicep",
18 | 				"ms-dotnettools.dotnet-interactive-vscode"
19 | 			]
20 | 		}
21 | 	}
22 | }


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ "main" ]
17 |   pull_request:
18 |     branches: [ "main" ]
19 |   schedule:
20 |     - cron: '23 5 * * 4'
21 | 
22 | jobs:
23 |   analyze:
24 |     name: Analyze (${{ matrix.language }})
25 |     # Runner size impacts CodeQL analysis time. To learn more, please see:
26 |     #   - https://gh.io/recommended-hardware-resources-for-running-codeql
27 |     #   - https://gh.io/supported-runners-and-hardware-resources
28 |     #   - https://gh.io/using-larger-runners (GitHub.com only)
29 |     # Consider using larger runners or machines with greater resources for possible analysis time improvements.
30 |     runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
31 |     timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
32 |     permissions:
33 |       # required for all workflows
34 |       security-events: write
35 | 
36 |       # required to fetch internal or private CodeQL packs
37 |       packages: read
38 | 
39 |       # only required for workflows in private repositories
40 |       actions: read
41 |       contents: read
42 | 
43 |     strategy:
44 |       fail-fast: false
45 |       matrix:
46 |         language: [ "python" ]
47 |         # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'
48 |         # Use `c-cpp` to analyze code written in C, C++ or both
49 |         # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
50 |         # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
51 |         # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
52 |         # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
53 |         # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
54 |         # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
55 |     steps:
56 |     - name: Checkout repository
57 |       uses: actions/checkout@v4
58 | 
59 |     # Initializes the CodeQL tools for scanning.
60 |     - name: Initialize CodeQL
61 |       uses: github/codeql-action/init@v3
62 |       with:
63 |         languages: ${{ matrix.language }}
64 |         build-mode: ${{ matrix.build-mode }}
65 |         # If you wish to specify custom queries, you can do so here or in a config file.
66 |         # By default, queries listed here will override any specified in a config file.
67 |         # Prefix the list here with "+" to use these queries and those in the config file.
68 | 
69 |         # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
70 |         # queries: security-extended,security-and-quality
71 | 
72 |     # If the analyze step fails for one of the languages you are analyzing with
73 |     # "We were unable to automatically build your code", modify the matrix above
74 |     # to set the build mode to "manual" for that language. Then modify this step
75 |     # to build your code.
76 |     # ℹ️ Command-line programs to run using the OS shell.
77 |     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
78 |     - if: matrix.build-mode == 'manual'
79 |       shell: bash
80 |       run: |
81 |         echo 'If you are using a "manual" build mode for one or more of the' \
82 |           'languages you are analyzing, replace this with the commands to build' \
83 |           'your code, for example:'
84 |         echo '  make bootstrap'
85 |         echo '  make release'
86 |         exit 1
87 | 
88 |     - name: Perform CodeQL Analysis
89 |       uses: github/codeql-action/analyze@v3
90 |       with:
91 |         category: "/language:${{matrix.language}}"
92 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
  5 | 
  6 | # Project files
  7 | dist/
  8 | .pytest_cache/
  9 | .venv/
 10 | .env
 11 | poetry.lock
 12 | ingestion/
 13 | processed_data/
 14 | images
 15 | # User-specific files
 16 | *.rsuser
 17 | *.suo
 18 | *.user
 19 | *.userosscache
 20 | *.sln.docstates
 21 | 
 22 | # User-specific files (MonoDevelop/Xamarin Studio)
 23 | *.userprefs
 24 | 
 25 | # Mono auto generated files
 26 | mono_crash.*
 27 | 
 28 | # Build results
 29 | [Dd]ebug/
 30 | [Dd]ebugPublic/
 31 | [Rr]elease/
 32 | [Rr]eleases/
 33 | x64/
 34 | x86/
 35 | [Ww][Ii][Nn]32/
 36 | [Aa][Rr][Mm]/
 37 | [Aa][Rr][Mm]64/
 38 | bld/
 39 | [Bb]in/
 40 | [Oo]bj/
 41 | [Ll]og/
 42 | [Ll]ogs/
 43 | 
 44 | # Visual Studio 2015/2017 cache/options directory
 45 | .vs/
 46 | # Uncomment if you have tasks that create the project's static files in wwwroot
 47 | #wwwroot/
 48 | 
 49 | # Visual Studio 2017 auto generated files
 50 | Generated\ Files/
 51 | 
 52 | # MSTest test Results
 53 | [Tt]est[Rr]esult*/
 54 | [Bb]uild[Ll]og.*
 55 | 
 56 | # NUnit
 57 | *.VisualState.xml
 58 | TestResult.xml
 59 | nunit-*.xml
 60 | 
 61 | # Build Results of an ATL Project
 62 | [Dd]ebugPS/
 63 | [Rr]eleasePS/
 64 | dlldata.c
 65 | 
 66 | # Benchmark Results
 67 | BenchmarkDotNet.Artifacts/
 68 | 
 69 | # .NET Core
 70 | project.lock.json
 71 | project.fragment.lock.json
 72 | artifacts/
 73 | 
 74 | # ASP.NET Scaffolding
 75 | ScaffoldingReadMe.txt
 76 | 
 77 | # StyleCop
 78 | StyleCopReport.xml
 79 | 
 80 | # Files built by Visual Studio
 81 | *_i.c
 82 | *_p.c
 83 | *_h.h
 84 | *.ilk
 85 | *.meta
 86 | *.obj
 87 | *.iobj
 88 | *.pch
 89 | *.pdb
 90 | *.ipdb
 91 | *.pgc
 92 | *.pgd
 93 | *.rsp
 94 | *.sbr
 95 | *.tlb
 96 | *.tli
 97 | *.tlh
 98 | *.tmp
 99 | *.tmp_proj
100 | *_wpftmp.csproj
101 | *.log
102 | *.tlog
103 | *.vspscc
104 | *.vssscc
105 | .builds
106 | *.pidb
107 | *.svclog
108 | *.scc
109 | 
110 | # Chutzpah Test files
111 | _Chutzpah*
112 | 
113 | # Visual C++ cache files
114 | ipch/
115 | *.aps
116 | *.ncb
117 | *.opendb
118 | *.opensdf
119 | *.sdf
120 | *.cachefile
121 | *.VC.db
122 | *.VC.VC.opendb
123 | 
124 | # Visual Studio profiler
125 | *.psess
126 | *.vsp
127 | *.vspx
128 | *.sap
129 | 
130 | # Visual Studio Trace Files
131 | *.e2e
132 | 
133 | # TFS 2012 Local Workspace
134 | $tf/
135 | 
136 | # Guidance Automation Toolkit
137 | *.gpState
138 | 
139 | # ReSharper is a .NET coding add-in
140 | _ReSharper*/
141 | *.[Rr]e[Ss]harper
142 | *.DotSettings.user
143 | 
144 | # TeamCity is a build add-in
145 | _TeamCity*
146 | 
147 | # DotCover is a Code Coverage Tool
148 | *.dotCover
149 | 
150 | # AxoCover is a Code Coverage Tool
151 | .axoCover/*
152 | !.axoCover/settings.json
153 | 
154 | # Coverlet is a free, cross platform Code Coverage Tool
155 | coverage*.json
156 | coverage*.xml
157 | coverage*.info
158 | 
159 | # Visual Studio code coverage results
160 | *.coverage
161 | *.coveragexml
162 | 
163 | # NCrunch
164 | _NCrunch_*
165 | .*crunch*.local.xml
166 | nCrunchTemp_*
167 | 
168 | # MightyMoose
169 | *.mm.*
170 | AutoTest.Net/
171 | 
172 | # Web workbench (sass)
173 | .sass-cache/
174 | 
175 | # Installshield output folder
176 | [Ee]xpress/
177 | 
178 | # DocProject is a documentation generator add-in
179 | DocProject/buildhelp/
180 | DocProject/Help/*.HxT
181 | DocProject/Help/*.HxC
182 | DocProject/Help/*.hhc
183 | DocProject/Help/*.hhk
184 | DocProject/Help/*.hhp
185 | DocProject/Help/Html2
186 | DocProject/Help/html
187 | 
188 | # Click-Once directory
189 | publish/
190 | 
191 | # Publish Web Output
192 | *.[Pp]ublish.xml
193 | *.azurePubxml
194 | # Note: Comment the next line if you want to checkin your web deploy settings,
195 | # but database connection strings (with potential passwords) will be unencrypted
196 | *.pubxml
197 | *.publishproj
198 | 
199 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
200 | # checkin your Azure Web App publish settings, but sensitive information contained
201 | # in these scripts will be unencrypted
202 | PublishScripts/
203 | 
204 | # NuGet Packages
205 | *.nupkg
206 | # NuGet Symbol Packages
207 | *.snupkg
208 | # The packages folder can be ignored because of Package Restore
209 | **/[Pp]ackages/*
210 | # except build/, which is used as an MSBuild target.
211 | !**/[Pp]ackages/build/
212 | # Uncomment if necessary however generally it will be regenerated when needed
213 | #!**/[Pp]ackages/repositories.config
214 | # NuGet v3's project.json files produces more ignorable files
215 | *.nuget.props
216 | *.nuget.targets
217 | 
218 | # Microsoft Azure Build Output
219 | csx/
220 | *.build.csdef
221 | 
222 | # Microsoft Azure Emulator
223 | ecf/
224 | rcf/
225 | 
226 | # Windows Store app package directories and files
227 | AppPackages/
228 | BundleArtifacts/
229 | Package.StoreAssociation.xml
230 | _pkginfo.txt
231 | *.appx
232 | *.appxbundle
233 | *.appxupload
234 | 
235 | # Visual Studio cache files
236 | # files ending in .cache can be ignored
237 | *.[Cc]ache
238 | # but keep track of directories ending in .cache
239 | !?*.[Cc]ache/
240 | 
241 | # Others
242 | ClientBin/
243 | ~$*
244 | *~
245 | *.dbmdl
246 | *.dbproj.schemaview
247 | *.jfm
248 | *.pfx
249 | *.publishsettings
250 | orleans.codegen.cs
251 | 
252 | # Including strong name files can present a security risk
253 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
254 | #*.snk
255 | 
256 | # Since there are multiple workflows, uncomment next line to ignore bower_components
257 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
258 | #bower_components/
259 | 
260 | # RIA/Silverlight projects
261 | Generated_Code/
262 | 
263 | # Backup & report files from converting an old project file
264 | # to a newer Visual Studio version. Backup files are not needed,
265 | # because we have git ;-)
266 | _UpgradeReport_Files/
267 | Backup*/
268 | UpgradeLog*.XML
269 | UpgradeLog*.htm
270 | ServiceFabricBackup/
271 | *.rptproj.bak
272 | 
273 | # SQL Server files
274 | *.mdf
275 | *.ldf
276 | *.ndf
277 | 
278 | # Business Intelligence projects
279 | *.rdl.data
280 | *.bim.layout
281 | *.bim_*.settings
282 | *.rptproj.rsuser
283 | *- [Bb]ackup.rdl
284 | *- [Bb]ackup ([0-9]).rdl
285 | *- [Bb]ackup ([0-9][0-9]).rdl
286 | 
287 | # Microsoft Fakes
288 | FakesAssemblies/
289 | 
290 | # GhostDoc plugin setting file
291 | *.GhostDoc.xml
292 | 
293 | # Node.js Tools for Visual Studio
294 | .ntvs_analysis.dat
295 | node_modules/
296 | 
297 | # Visual Studio 6 build log
298 | *.plg
299 | 
300 | # Visual Studio 6 workspace options file
301 | *.opt
302 | 
303 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
304 | *.vbw
305 | 
306 | # Visual Studio 6 auto-generated project file (contains which files were open etc.)
307 | *.vbp
308 | 
309 | # Visual Studio 6 workspace and project file (working project files containing files to include in project)
310 | *.dsw
311 | *.dsp
312 | 
313 | # Visual Studio 6 technical files
314 | *.ncb
315 | *.aps
316 | 
317 | # Visual Studio LightSwitch build output
318 | **/*.HTMLClient/GeneratedArtifacts
319 | **/*.DesktopClient/GeneratedArtifacts
320 | **/*.DesktopClient/ModelManifest.xml
321 | **/*.Server/GeneratedArtifacts
322 | **/*.Server/ModelManifest.xml
323 | _Pvt_Extensions
324 | 
325 | # Paket dependency manager
326 | .paket/paket.exe
327 | paket-files/
328 | 
329 | # FAKE - F# Make
330 | .fake/
331 | 
332 | # CodeRush personal settings
333 | .cr/personal
334 | 
335 | # Python Tools for Visual Studio (PTVS)
336 | __pycache__/
337 | *.pyc
338 | 
339 | # Cake - Uncomment if you are using it
340 | # tools/**
341 | # !tools/packages.config
342 | 
343 | # Tabs Studio
344 | *.tss
345 | 
346 | # Telerik's JustMock configuration file
347 | *.jmconfig
348 | 
349 | # BizTalk build output
350 | *.btp.cs
351 | *.btm.cs
352 | *.odx.cs
353 | *.xsd.cs
354 | 
355 | # OpenCover UI analysis results
356 | OpenCover/
357 | 
358 | # Azure Stream Analytics local run output
359 | ASALocalRun/
360 | 
361 | # MSBuild Binary and Structured Log
362 | *.binlog
363 | 
364 | # NVidia Nsight GPU debugger configuration file
365 | *.nvuser
366 | 
367 | # MFractors (Xamarin productivity tool) working folder
368 | .mfractor/
369 | 
370 | # Local History for Visual Studio
371 | .localhistory/
372 | 
373 | # Visual Studio History (VSHistory) files
374 | .vshistory/
375 | 
376 | # BeatPulse healthcheck temp database
377 | healthchecksdb
378 | 
379 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
380 | MigrationBackup/
381 | 
382 | # Ionide (cross platform F# VS Code tools) working folder
383 | .ionide/
384 | 
385 | # Fody - auto-generated XML schema
386 | FodyWeavers.xsd
387 | 
388 | # VS Code files for those working on multiple tools
389 | .vscode/*
390 | !.vscode/settings.json
391 | !.vscode/tasks.json
392 | !.vscode/launch.json
393 | !.vscode/extensions.json
394 | *.code-workspace
395 | 
396 | # Local History for Visual Studio Code
397 | .history/
398 | 
399 | # Windows Installer files from build outputs
400 | *.cab
401 | *.msi
402 | *.msix
403 | *.msm
404 | *.msp
405 | 
406 | # JetBrains Rider
407 | *.sln.iml
408 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "name": "Api",
 9 |             "type": "debugpy",
10 |             "request": "launch",
11 |             "module": "main"
12 |         },
13 |         {
14 |             "name": "App",
15 |             "type": "debugpy",
16 |             "request": "launch",
17 |             "module": "streamlit",
18 |             "cwd": "${workspaceFolder}",
19 |             "env": {
20 |                 "PYTHONPATH": "${workspaceFolder}/src"
21 |             },
22 |             "args": [
23 |                 "run",
24 |                 "${cwd}/src/app/app.py",
25 |                 "--server.port",
26 |                 "8000"
27 |             ]
28 |         }
29 |     ],
30 |     "compounds": [
31 |         {
32 |             "name": "Server/Client",
33 |             "configurations": [
34 |                 "App",
35 |                 "Api"
36 |             ]
37 |         }
38 |     ]
39 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "python.globalModuleInstallation": true,
 3 |     "python.languageServer": "Pylance",
 4 |     "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python.exe",
 5 |     "python.analysis.completeFunctionParens": true,
 6 |     "python.analysis.extraPaths": [
 7 |         "${workspaceFolder}/bin/python",
 8 |         "${workspaceFolder}/src"
 9 |     ],
10 |     "python.analysis.inlayHints.callArgumentNames": "all",
11 |     "python.analysis.inlayHints.variableTypes": true,
12 |     "python.analysis.inlayHints.pytestParameters": true,
13 |     "python.analysis.inlayHints.functionReturnTypes": true,
14 |     "python.testing.pytestEnabled": true,
15 |     "python.testing.unittestEnabled": false,
16 |     "python.autoComplete.extraPaths": [
17 |         "${workspaceFolder}/.venv/bin",
18 |     ],
19 |     "python.testing.pytestArgs": [
20 |         "${workspaceFolder}/tests"
21 |     ],
22 |     "python.analysis.typeCheckingMode": "standard"
23 |     "python.testing.unittestEnabled": false,
24 |     "python.autoComplete.extraPaths": [
25 |         "${workspaceFolder}/.venv/bin",
26 |     ],
27 |     "python.testing.pytestArgs": [
28 |         "${workspaceFolder}/tests"
29 |     ],
30 |     "python.analysis.typeCheckingMode": "standard"
31 | }


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Project
 2 | 
 3 | The deepRAG project seeks to extend classic retrieval using vector databases to optional utilize other search technologies for extended results.
 4 | This will initially be done by utilizing graph databases to help answer queries related to content summaries that aren't directly related to snippets of content.
 5 | 
 6 | The architecture of this project is located in [ARCHITECTURE.MD](./docs/ARCHITECTURE.md).
 7 | 
 8 | ## Running the project
 9 | 
10 | Run the following command.
11 | ```poetry install```
12 | 
13 | Then, select the interpreter in the poetry cache.
14 | 
15 | ## Contributing
16 | 
17 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
18 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
19 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
20 | 
21 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
22 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
23 | provided by the bot. You will only need to do this once across all repos using our CLA.
24 | 
25 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
26 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
27 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
28 | 
29 | ## Trademarks
30 | 
31 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 
32 | trademarks or logos is subject to and must follow 
33 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
34 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
35 | Any use of third-party trademarks or logos are subject to those third-party's policies.
36 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # TODO: The maintainer of this repo has not yet edited this file
 2 | 
 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
 4 | 
 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help.
 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps.
 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide.
 8 | 
 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
10 | 
11 | # Support
12 | 
13 | ## How to file issues and get help  
14 | 
15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
16 | issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
17 | feature request as a new Issue.
18 | 
19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 
20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
22 | 
23 | ## Microsoft Support Policy  
24 | 
25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
26 | 


--------------------------------------------------------------------------------
/docs/ARCHITECTURE.md:
--------------------------------------------------------------------------------
  1 | # Architecture Reference Docs
  2 | 
  3 | This document is currently a working document. The materials located here are subject to change.
  4 | 
  5 | ## Logical Architecture
  6 | 
  7 | ```mermaid
  8 | graph TD
  9 |   subgraph AzureServices
 10 |     search[Azure AI Search]
 11 |     AOAI[Azure AI Services]
 12 |     app[Azure App Services]
 13 |     storage[Azure Storage]
 14 |     graphDb[Cosmos Db]
 15 |   end
 16 | 
 17 |   subgraph Agents
 18 |     rag[Vector RAG Agent]
 19 |     graphRag[Graph RAG Agent]
 20 |   end
 21 | 
 22 |   rag --> search --> storage --> app --> AOAI
 23 |   graphRag --> graphDb --> app --> AOAI
 24 |   app --> rag --> graphRag --> AOAI
 25 | 
 26 | ```
 27 | 
 28 | ## Application Architecture
 29 | 
 30 | ```mermaid
 31 | sequenceDiagram
 32 |   autonumber
 33 |   participant rag as RAG Agent
 34 |   participant graphRag as Graph RAG Agent
 35 |   participant search as Azure AI Search
 36 |   participant AOAI as Azure AI Services
 37 |   participant app as Azure App Services
 38 |   participant storage as Azure Storage
 39 |   participant graphDb as Azure Cosmos Db
 40 | 
 41 |   user ->> app: prompt for information
 42 |   app ->> +rag: invoke vector rag agent to enhance prompt
 43 |   rag ->> search: grab vectorized content through semantic search.
 44 |   search ->> storage: search through indexed content on storage
 45 |   storage-->>search: return relevant indexed documents
 46 |   search-->>rag: return relevant document chunks/partitions.
 47 |   rag-->>app: return enhanced prompt
 48 |   deactivate rag
 49 | 
 50 |   app ->> +graphRag: invoke graphRag agent to further enhance prompt.
 51 |   graphRag ->> graphDb: query relations from entity edges of rag retrieved entities
 52 |   graphDb-->>graphRag: return names of edges.
 53 |   graphRag ->> AOAI: request to filter related entities and generate a query from user prompt.
 54 |   AOAI-->>graphRag: return a structure query for execution.
 55 |   graphRag ->> graphDb: search with AOAI generated graph query
 56 |   graphDb-->>graphRag: return relevant entities.
 57 |   deactivate graphRag
 58 | 
 59 |   app ->> AOAI: send prompt enhanced by classic and graph enhanced RAG.
 60 |   AOAI-->app: return LLM response
 61 |   app -->> app: sanitize LLM response for correctness
 62 |   app-->>user: return sanitized response
 63 | 
 64 | ```
 65 | 
 66 | ## Software Architecture
 67 | 
 68 | ```mermaid
 69 | sequenceDiagram
 70 |     autonumber
 71 |     actor u as User
 72 |     participant vRAG as Vector Agent
 73 |     participant gRAG as Graph Agent
 74 |     participant ai as LLM
 75 |     participant gDB as Graph Database
 76 |     participant vDB as Vector Database
 77 | 
 78 |     activate gRAG
 79 |     gRAG->>gDB: query graph ontology
 80 |     activate gDB
 81 |     gDB-->>gRAG: return nodes and edges
 82 |     deactivate gDB
 83 |     gRAG->>gRAG: Serialize ontology as rdf.
 84 |     gRAG->>gRAG: Populate system prompt with serialized ontology
 85 |     deactivate gRAG
 86 |     critical Get user intent
 87 |       activate u
 88 |       activate vRAG
 89 |       u->>vRAG: Provide initial user prompt
 90 |       vRAG->>ai: Identify user intent from user prompt
 91 |       activate ai
 92 |       ai-->>vRAG: Return intent as goal
 93 |       deactivate ai
 94 |       deactivate vRAG
 95 |     option Gather initial documents
 96 |       activate vRAG
 97 |       vRAG->>vDB: Search k top results
 98 |       vDB-->>vRAG: Return k top results
 99 |       vRAG->>vRAG: Enhance prompt with results
100 |       deactivate vRAG
101 |     option Generate graph query
102 |       activate gRAG
103 |       gRAG->>ai: Generate graph query based on k documents from vRAG and ontology.
104 |       activate ai
105 |       ai-->>gRAG: return graph query for target language.
106 |       deactivate ai
107 |       deactivate gRAG
108 |     option Execute graph
109 |       activate gRAG
110 |       gRAG->>gDB: Execute graph query
111 |       activate gDB
112 |       gDB-->>gRAG: Return graph query results
113 |       deactivate gDB
114 |       gRAG->>gRAG: store graph response
115 |       deactivate gRAG
116 |     option Generate additional prompts
117 |       activate vRAG
118 |       vRAG->>ai: Use results of graph query to create n number of related prompts.
119 |       ai-->>vRAG: return a question for each related node in graph query results.
120 |       vRAG->>vDB: search top k results for each generated prompt.
121 |       vDB-->>vRAG: return top k related documents.
122 |       vRAG->>vRAG: enhance prompt with vRAG results.
123 |       deactivate vRAG
124 |     end
125 |     activate vRAG
126 |     vRAG->>ai: submit enhanced user prompt to LLM
127 |     activate ai
128 |     ai-->>vRAG: return response to user prompt
129 |     deactivate ai
130 |     vRAG-->>u: return response to user    
131 |     deactivate u
132 |     deactivate vRAG
133 |     
134 | ```
135 | 


--------------------------------------------------------------------------------
/docs/Example Taxonomy.md:
--------------------------------------------------------------------------------
  1 | # Key Entities
  2 | 1.  Brand
  3 | 2.  Product
  4 | 3.  Campaign
  5 | 4.  Guideline
  6 | 5.  Market
  7 | 6.  Competitor
  8 | 7.  Influencer
  9 | 8.  Advertising Case
 10 | 9.  Aesthetic
 11 | 10. Slogan
 12 | 
 13 | # Attributes
 14 | •   Brand:
 15 |     o   Name
 16 |     o   Elements
 17 |     o   Target Consumer
 18 |     o   Slogan
 19 |     o   Color Palette
 20 |     o   Logo Usage
 21 |     o   Accent Usage
 22 |     o   Storyboard Guidelines
 23 |     o   Social Media Guidelines
 24 | 
 25 | •   Product:
 26 |     o   Name
 27 |     o   Type (e.g. SaaS, On-Premise)
 28 |     o   Popularity
 29 |     o   Market (e.g., UK, US)
 30 | 
 31 | •   Campaign:
 32 |     o   Name
 33 |     o   Focus (e.g., Digital Experience, AI)
 34 |     o   Market
 35 |     o   Creative Assets
 36 |     o   Concept
 37 |     o   Toolkit
 38 | 
 39 | •   Guideline:
 40 |     o   Type (e.g., Digital Asset, Tone of Voice Usage, Logo Usage)
 41 |     o   Details
 42 | 
 43 | •   Market:
 44 |     o   Name
 45 |     o   Trends
 46 |     o   Competitors
 47 |     o   Influencers
 48 | 
 49 | •   Competitor:
 50 |     o   Name
 51 |     o   Products
 52 |     o   Campaigns
 53 | 
 54 | •   Influencer:
 55 |     o   Name
 56 |     o   Market
 57 |     o   Segment (e.g., Coffee, Lifestyle)
 58 | 
 59 | •   Advertising Case:
 60 |     o   Market
 61 |     o   Details
 62 | 
 63 | •   Aesthetic:
 64 |     o   Style (e.g., Minimalist, Modern)
 65 |     o   Target Audience
 66 | 
 67 | # Relationships
 68 | •   Brand:
 69 |     o   "has_product" -> Product
 70 |     o   "runs_campaign" -> Campaign
 71 |     o   "follows_guideline" -> Guideline
 72 |     o   "targets_market" -> Market
 73 |     o   "competes_with" -> Competitor
 74 |     o   "collaborates_with" -> Influencer
 75 |     o   "featured_in_advertising_case" -> Advertising Case
 76 | •   Product:
 77 |     o   "belongs_to_brand" -> Brand
 78 |     o   "popular_in_market" -> Market
 79 |     o   "competes_with" -> Competitor
 80 |     o   "follows_guideline" -> Guideline
 81 | •   Campaign:
 82 |     o   "belongs_to_brand" -> Brand
 83 |     o   "targets_market" -> Market
 84 |     o   "uses_guideline" -> Guideline
 85 |     o   "includes_toolkit" -> Toolkit
 86 | 
 87 | •   Guideline:
 88 |     o   "applies_to_brand" -> Brand
 89 |     o   "applies_to_product" -> Product
 90 |     o   "applies_to_campaign" -> Campaign
 91 | •   Market:
 92 |     o   "includes_product" -> Product
 93 |     o   "includes_competitor" -> Competitor
 94 |     o   "includes_influencer" -> Influencer
 95 | 
 96 | •   Competitor:
 97 |     o   "competes_with_brand" -> Brand
 98 |     o   "competes_with_product" -> Product
 99 | 
100 | •   Influencer:
101 |     o   "collaborates_with_brand" -> Brand
102 | •   Recipe:
103 |     o   "belongs_to_product" -> Product
104 |     o   "follows_aesthetic" -> Aesthetic
105 | •   Translation:
106 |     o   "used_by_brand" -> Brand
107 | •   Advertising Case:
108 |     o   "features_brand" -> Brand
109 | •   Sustainability Claim:
110 |     o   "promoted_by_brand" -> Brand
111 | 


--------------------------------------------------------------------------------
/log.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/deepRAG/2c39bf17ecc2cb4fb179bab9663e841a22e0699e/log.txt


--------------------------------------------------------------------------------
/poetry.toml:
--------------------------------------------------------------------------------
1 | [virtualenvs]
2 | in-project = false
3 | path = "./"
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "deepRAG"
 3 | version = "0.1.0"
 4 | description = "RAG enhanced though multiple search types."
 5 | authors = ["Tyler Kendrick <tkendrick@microsoft.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | package-mode = false
 9 | 
10 | [tool.poetry.dependencies]
11 | python = "^3.11"
12 | api = { path = "./src/api", develop = true }
13 | app = { path = "./src/app", develop = true }
14 | tests = { path = "./tests", develop = true }
15 | langchain-openai = "^0.1.20"
16 | 
17 | [tool.poetry.group.dev.dependencies]
18 | pytest = "^8.2.2"
19 | 
20 | [tool.poetry.group.test.dependencies]
21 | pytest-mock = "^3.14.0"
22 | 
23 | [tool.pytest.ini_options]
24 | minversion = "8.0"
25 | addopts = [
26 |     "--import-mode=importlib",
27 | ]
28 | testpaths = [
29 |     "tests",
30 | ]
31 | pythonpath = [
32 |     ".",
33 |     "src",
34 | ]
35 | 
36 | [build-system]
37 | requires = ["poetry-core"]
38 | build-backend = "poetry.core.masonry.api"
39 | 


--------------------------------------------------------------------------------
/src/agents/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/deepRAG/2c39bf17ecc2cb4fb179bab9663e841a22e0699e/src/agents/README.md


--------------------------------------------------------------------------------
/src/agents/agent.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from logging import Logger
 3 | from dataclasses import dataclass
 4 | from logging import Logger
 5 | from abc import abstractmethod
 6 | from models import AgentConfiguration
 7 | from models import AgentResponse
 8 | 
 9 | class Agent():
10 |     """Base class for agents"""
11 |     def __init__(self, 
12 |         logger: Logger, 
13 |         agent_configuration: AgentConfiguration
14 |     ) -> None:
15 |         self._logger: Logger = logger
16 |         self._agent_configuration: AgentConfiguration = agent_configuration
17 |         self._conversation: list = [
18 |                 { "role": "system", "content": self._agent_configuration.persona },
19 |                 { "role": "assistant", "content": self._agent_configuration.initial_message }
20 |             ] if self._agent_configuration.initial_message is not None else [
21 |                 { "role": "system", "content": self._agent_configuration.persona }
22 |             ]
23 | 
24 |     @abstractmethod
25 |     def run(self, user_input: str | None, conversation=None, stream=False) -> AgentResponse:
26 |         """Abstract method to run the agent"""
27 |         pass


--------------------------------------------------------------------------------
/src/agents/agents.py:
--------------------------------------------------------------------------------
1 | """The main module for agents."""
2 | from agent import Agent
3 | from smart_agent.smart_agent import Smart_Agent
4 | 


--------------------------------------------------------------------------------
/src/agents/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "agents"
 3 | version = "0.1.0"
 4 | description = "RAG enhanced though multiple search types."
 5 | authors = ["Tyler Kendrick <tkendrick@microsoft.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.11"
11 | pydantic-settings = "^2.4.0"
12 | openai = "^1.37.1"
13 | fsspec = "^2024.6.1"
14 | models = { path = "../models", develop = true }
15 | functions = { path = "../functions", develop = true }
16 | services = { path = "../services", develop = true }
17 | 
18 | [tool.poetry.group.dev.dependencies]
19 | env = "^0.1.0"
20 | 
21 | [build-system]
22 | requires = ["poetry-core"]
23 | build-backend = "poetry.core.masonry.api"
24 | 


--------------------------------------------------------------------------------
/src/agents/smart_agent/prompt.yaml:
--------------------------------------------------------------------------------
  1 | name: "SmartAgent"
  2 | model: "gpt-4o"
  3 | initial_message: "Hi, I'm your smart research assistant. How can I help you today?"
  4 | persona: |
  5 |     You are an intelligent AI assistant designed to help users find information most relevant to their questions. 
  6 |     You have access to Azure AI Search, which provides semantic search capabilities using natural language queries and metadata filtering. 
  7 |     The data you access is organized according to the ontology below.
  8 |     As a smart research assistant, your goal is to identify the best relevant information to answer the user's question. 
  9 |     The initial search result may include hints on the related content. Use hint to start a follow-up search to find related content if neccessary.
 10 |     Engage with the user to understand their needs, conduct the research, and respond with a curated list of content along with explanations on how they can be used to answer the user's question.
 11 |     Your final response should be in JSON format like this:
 12 |     {
 13 |       "overall_explanation": "The following headlines are inspired by the concept of customization and personal expression, which resonates well with young people. These suggestions are derived from the context of launching a product that allows users to make it their own.",
 14 |       "11_LAUNCHING_MAKE_YOUR_WORLD_2022-11-17/page_7.png": "This file provides a context for launching a product with a focus on personalization and making it your own.",
 15 |       "Brand_Context/page_18.png": "This file offers insights into brand context and how to position a product in a way that appeals to young people by emphasizing individuality and customization."
 16 |     }
 17 |     Just output the JSON content in your final response and do not add any other comment.
 18 | 
 19 |     # Ontology   
 20 |     EntityClasses:  
 21 |       - Brand:  
 22 |           Description: "An entity representing a company's identity, values, and image."  
 23 |           Attributes:  
 24 |             - Name: "The official name of the brand."  
 25 |             - TargetConsumer: "The primary demographic the brand aims to reach."  
 26 |             - Slogan: "A memorable phrase representing the brand's essence."  
 27 |             - ColorPalette: "The set of colors used for brand identity."  
 28 |             - LogoUsage: "Guidelines for using the brand's logo."  
 29 |             - StoryboardGuidelines: "Rules for visual storytelling."  
 30 |             - SocialMediaGuidelines: "Instructions for maintaining brand consistency on social media."  
 31 |       
 32 |       - Product:  
 33 |           Description: "An item or service offered by the brand."  
 34 |           Attributes:  
 35 |             - Name: "The official name of the product."  
 36 |             - Type: "The category of the product, e.g., SaaS, On-Premise."  
 37 |             - Popularity: "The level of market acceptance and usage."  
 38 |             - Market: "The primary geographic or demographic market for the product."  
 39 |       
 40 |       - Campaign:  
 41 |           Description: "A series of coordinated activities aimed at promoting a product or brand."  
 42 |           Attributes:  
 43 |             - Name: "The official name of the campaign."  
 44 |             - Focus: "The main theme or objective, e.g., Digital Experience, AI."  
 45 |             - Market: "The target market for the campaign."  
 46 |             - CreativeAssets: "Visual and textual materials used in the campaign."  
 47 |             - Concept: "The central idea or message of the campaign."  
 48 |             - Toolkit: "Resources and tools used to execute the campaign."  
 49 |       
 50 |       - Guideline:  
 51 |           Description: "A set of rules or instructions for maintaining brand consistency."  
 52 |           Attributes:  
 53 |             - Type: "The category of the guideline, e.g., Digital Asset, Tone of Voice Usage, Logo Usage."  
 54 |             - Details: "Specific instructions and details."  
 55 |       
 56 |       - Market:  
 57 |           Description: "A defined geographic or demographic area where products are sold."  
 58 |           Attributes:  
 59 |             - Name: "The name of the market."  
 60 |             - Trends: "Current market trends and dynamics."  
 61 |             - Competitors: "Main competitors within the market."  
 62 |             - Influencers: "Key influencers relevant to the market."  
 63 |       
 64 |       - Competitor:  
 65 |           Description: "A company or product competing with the brand."  
 66 |           Attributes:  
 67 |             - Name: "The name of the competitor."  
 68 |             - Products: "Products offered by the competitor."  
 69 |             - Campaigns: "Promotional campaigns run by the competitor."  
 70 |       
 71 |       - Influencer:  
 72 |           Description: "A person who can influence the brand's target audience."  
 73 |           Attributes:  
 74 |             - Name: "The name of the influencer."  
 75 |             - Market: "The market segment the influencer operates in."  
 76 |             - Segment: "Specific niche or category, e.g., Coffee, Lifestyle."  
 77 |       
 78 |       - AdvertisingCase:  
 79 |           Description: "A documented instance of brand advertising."  
 80 |           Attributes:  
 81 |             - Market: "The market where the advertising case is relevant."  
 82 |             - Details: "Specific details about the advertising case."  
 83 |       
 84 |       - Aesthetic:  
 85 |           Description: "The visual style and design elements used by the brand."  
 86 |           Attributes:  
 87 |             - Style: "The design style, e.g., Minimalist, Modern."  
 88 |             - TargetAudience: "The audience for whom the aesthetic is designed."  
 89 |       
 90 |       - Recipe:  
 91 |           Description: "A detailed set of instructions for creating a product."  
 92 |           Attributes:  
 93 |             - Name: "The name of the recipe."  
 94 |             - Ingredients: "The components required for the recipe."  
 95 |             - Steps: "The sequence of actions to complete the recipe."  
 96 |       
 97 |       - Claim:  
 98 |           Description: "A statement made by the brand to promote its values or product benefits."  
 99 |           Attributes:  
100 |             - Type: "The category of claim, e.g., Sustainability, Quality, Safety."  
101 |             - Details: "Specific details of the claim."  
102 |       
103 |       - Slogan:  
104 |           Description: "A memorable phrase used in marketing to represent the brand's essence."  
105 |           Attributes:  
106 |             - Text: "The actual slogan text."  
107 |             - Language: "The language in which the slogan is written."  
108 |             - UsageContext: "The context or situation in which the slogan is used."  
109 |       
110 |     Relationships:  
111 |       - Brand:  
112 |           - has_product: Product  
113 |           - runs_campaign: Campaign  
114 |           - follows_guideline: Guideline  
115 |           - targets_market: Market  
116 |           - competes_with: Competitor  
117 |           - collaborates_with: Influencer  
118 |           - featured_in_advertising_case: AdvertisingCase  
119 |           - makes_claim: Claim  
120 |           - uses_slogan: Slogan  
121 |       
122 |       - Product:  
123 |           - belongs_to_brand: Brand  
124 |           - popular_in_market: Market  
125 |           - competes_with: Competitor  
126 |           - follows_guideline: Guideline  
127 |           - includes_recipe: Recipe  
128 |       
129 |       - Campaign:  
130 |           - belongs_to_brand: Brand  
131 |           - targets_market: Market  
132 |           - uses_guideline: Guideline  
133 |           - includes_toolkit: Toolkit  
134 |       
135 |       - Guideline:  
136 |           - applies_to_brand: Brand  
137 |           - applies_to_product: Product  
138 |           - applies_to_campaign: Campaign  
139 |       
140 |       - Market:  
141 |           - includes_product: Product  
142 |           - includes_competitor: Competitor  
143 |           - includes_influencer: Influencer  
144 |       
145 |       - Competitor:  
146 |           - competes_with_brand: Brand  
147 |           - competes_with_product: Product  
148 |       
149 |       - Influencer:  
150 |           - collaborates_with_brand: Brand  
151 |       
152 |       - AdvertisingCase:  
153 |           - features_brand: Brand  
154 |       
155 |       - Recipe:  
156 |           - belongs_to_product: Product  
157 |           - follows_aesthetic: Aesthetic  
158 |       
159 |       - Claim:  
160 |           - made_by_brand: Brand  
161 |       
162 |       - Slogan:  
163 |           - used_by_brand: Brand
164 | tools:
165 |     - name: "search"
166 |       description: "Semantic Search Engine to search for content"
167 |       type: "function"
168 |       parameters:
169 |         type: "object"
170 |         properties:
171 |         - search_query: 
172 |             type: "string"
173 |             description: "Natural language query to search for content"                
174 |       required: 
175 |         - "search_query"
176 |                 


--------------------------------------------------------------------------------
/src/agents/smart_agent/smart_agent.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import inspect
  3 | import json
  4 | from logging import Logger
  5 | from types import MappingProxyType
  6 | from typing import List
  7 | import fsspec.implementations
  8 | from openai import AzureOpenAI
  9 | from openai.types.chat.chat_completion import ChatCompletion
 10 | from openai.types.chat.chat_completion_message import ChatCompletionMessage
 11 | from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall
 12 | from agent import Agent
 13 | from models import AgentConfiguration, AgentResponse
 14 | from functions import SearchVectorFunction
 15 | import os
 16 | import fsspec
 17 | from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
 18 | 
 19 | class Smart_Agent(Agent):
 20 |     """Smart agent that uses the pulls data from a vector database and uses the Azure OpenAI API to generate responses"""
 21 | 
 22 |     def __init__(
 23 |             self,
 24 |             logger: Logger,
 25 |             agent_configuration: AgentConfiguration,
 26 |             client: AzureOpenAI,
 27 |             search_vector_function: SearchVectorFunction,
 28 |             init_history: List[dict],
 29 |             fs: fsspec.AbstractFileSystem,
 30 |             max_run_per_question: int = 10,
 31 |             max_question_to_keep: int = 3,
 32 |             max_question_with_detail_hist: int = 1,
 33 |             image_directory: str = "images",
 34 |     ) -> None:
 35 |         super().__init__(logger=logger, agent_configuration=agent_configuration)
 36 | 
 37 |         self.__client: AzureOpenAI = client
 38 |         self.__max_run_per_question: int = max_run_per_question
 39 |         self.__max_question_to_keep: int = max_question_to_keep
 40 |         self.__max_question_with_detail_hist: int = max_question_with_detail_hist
 41 |         self.__functions_spec: List[ChatCompletionToolParam] = [
 42 |             tool.to_openai_tool() for tool in self._agent_configuration.tools]
 43 |         if len(init_history) >0: #initialize the conversation with the history
 44 |             self._conversation = init_history
 45 |         self._functions_list = {
 46 |             "search": search_vector_function.search
 47 |         }
 48 |         self.__fs: fsspec.AbstractFileSystem = fs
 49 |         self.__image_directory: str = image_directory
 50 |     def clean_up_history(self, max_q_with_detail_hist=1, max_q_to_keep=2) -> None:
 51 |         """Clean up the history"""
 52 | 
 53 |         question_count=0
 54 |         removal_indices=[]
 55 | 
 56 |         for idx in range(len(self._conversation)-1, 0, -1):
 57 |             message = dict(self._conversation[idx])
 58 | 
 59 |             if message.get("role") == "user":
 60 |                 question_count +=1
 61 | 
 62 |             if question_count>= max_q_with_detail_hist and question_count < max_q_to_keep:
 63 |                 if message.get("role") != "user" \
 64 |                     and message.get("role") != "assistant" \
 65 |                         and len(message.get("content") or []) == 0:
 66 |                     removal_indices.append(idx)
 67 | 
 68 |             if question_count >= max_q_to_keep:
 69 |                 removal_indices.append(idx)
 70 |         # remove items with indices in removal_indices
 71 |         for index in removal_indices:
 72 |             del self._conversation[index]
 73 | 
 74 | 
 75 |     def reset_history_to_last_question(self) -> None:
 76 |         """Reset the history to the last question"""
 77 | 
 78 |         
 79 |         for i in range(len(self._conversation)-1, -1, -1):
 80 |             message = dict(self._conversation[i])   
 81 |             
 82 |             if message.get("role") == "user":
 83 |                 break
 84 |             
 85 |             self._conversation.pop()
 86 | 
 87 | 
 88 |     def run(self, user_input: str | None, conversation=None, stream=False) -> AgentResponse:
 89 |         if user_input is None or len(user_input)==0:  # if no input return init message
 90 |             return AgentResponse(conversation=self._conversation, response=self._conversation[1]["content"])
 91 | 
 92 |         if conversation is not None and len(conversation) > 0:
 93 |             self._conversation = conversation
 94 | 
 95 |         run_count = 0
 96 | 
 97 |         self._conversation.append({"role": "user", "content": user_input})
 98 |         self.clean_up_history(
 99 |             max_q_with_detail_hist=self.__max_question_with_detail_hist, max_q_to_keep=self.__max_question_to_keep)
100 | 
101 |         while True:
102 |             response_message: ChatCompletionMessage
103 | 
104 |             if run_count >= self.__max_run_per_question:
105 |                 self._logger.debug(
106 |                     msg=f"Need to move on from this question due to max run count reached ({run_count} runs)")
107 |                 response_message = ChatCompletionMessage(
108 |                     role="assistant",
109 |                     content="I am unable to answer this question at the moment, please ask another question."
110 |                 )
111 |                 break
112 | 
113 |             response: ChatCompletion = self.__client.chat.completions.create(
114 |                 model=self._agent_configuration.model,
115 |                 messages=self._conversation,
116 |                 tools=self.__functions_spec,
117 |                 tool_choice='auto',
118 |                 temperature=0.2,
119 |             )
120 | 
121 |             run_count += 1
122 |             response_message = response.choices[0].message
123 | 
124 |             if response_message.content is None:
125 |                 response_message.content = ""
126 | 
127 |             tool_calls: List[ChatCompletionMessageToolCall] | None = response_message.tool_calls
128 | 
129 |             if tool_calls:
130 |                 self._conversation.append(response_message)
131 |                 self.__verify_openai_tools(tool_calls=tool_calls)
132 |                 continue
133 |             else:
134 |                 break
135 | 
136 |         return AgentResponse(
137 |             streaming=stream,
138 |             conversation=self._conversation,
139 |             response=response_message.content
140 |         )
141 | 
142 |     def __check_args(self, function, args) -> bool:
143 |         """Check if the function has the correct number of arguments"""
144 |         sig: inspect.Signature = inspect.signature(obj=function)
145 |         params: MappingProxyType[str, inspect.Parameter] = sig.parameters
146 | 
147 |         for name in args:
148 |             if name not in params:
149 |                 return False
150 | 
151 |         for name, param in params.items():
152 |             if param.default is param.empty and name not in args:
153 |                 return False
154 | 
155 |         return True
156 | 
157 |     def __verify_openai_tools(self, tool_calls: List[ChatCompletionMessageToolCall]) -> None:
158 |         for tool_call in tool_calls:
159 |             function_name: str = tool_call.function.name
160 |             self._logger.debug(
161 |                 msg=f"Recommended Function call: {function_name}")
162 | 
163 |             # verify function exists
164 |             if function_name not in self._functions_list:
165 |                 self._logger.debug(
166 |                     msg=f"Function {function_name} does not exist, retrying")
167 |                 self._conversation.pop()
168 |                 break
169 | 
170 |             function_to_call = self._functions_list[function_name]
171 | 
172 |             try:
173 |                 function_args = json.loads(s=tool_call.function.arguments)
174 |             except json.JSONDecodeError as e:
175 |                 self._logger.error(msg=e)
176 |                 self._conversation.pop()
177 |                 break
178 | 
179 |             if self.__check_args(function=function_to_call, args=function_args) is False:
180 |                 self._conversation.pop()
181 |                 break
182 |             else:
183 |                 function_response = function_to_call(**function_args)
184 | 
185 |             if function_name == "search":
186 |                 function_response = self.__generate_search_function_response(
187 |                     function_response=function_response)
188 | 
189 |             self._conversation.append(
190 |                 {
191 |                     "tool_call_id": tool_call.id,
192 |                     "role": "tool",
193 |                     "name": function_name,
194 |                     "content": function_response,
195 |                 }
196 |             )
197 | 
198 |     def __generate_search_function_response(self, function_response):
199 |         search_function_response = []
200 | 
201 |         for item in function_response:
202 |             image_path = os.path.join(self.__image_directory, item['image_path'])   
203 |             related_content = item['related_content']
204 | 
205 |             image_file: str | bytes = self.__fs.read_bytes(path=image_path)
206 |             image_bytes: bytes | None = image_file if isinstance(
207 |                 image_file, bytes) else None
208 |             image_bytes = image_file.encode(
209 |                 encoding='utf-8') if isinstance(image_file, str) else image_file
210 |             base64_image: str = base64.b64encode(
211 |                 s=image_bytes).decode(encoding='utf-8')
212 |             self._logger.debug("image_path: ", image_path)
213 | 
214 |             search_function_response.append(
215 |                 {"type": "text", "text": f"file_name: {image_path}"})
216 |             search_function_response.append({"type": "image_url", "image_url": {
217 |                                             "url":  f"data:image/jpeg;base64,{base64_image}"}})
218 |             search_function_response.append(
219 |                 {"type": "text", "text": f"HINT: The following kind of content might be related to this topic\n: {related_content}"})
220 | 
221 |         return search_function_response
222 | 


--------------------------------------------------------------------------------
/src/api/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/deepRAG/2c39bf17ecc2cb4fb179bab9663e841a22e0699e/src/api/README.md


--------------------------------------------------------------------------------
/src/api/api.py:
--------------------------------------------------------------------------------
 1 | """The main server file for the LangChain server."""
 2 | from typing import Any
 3 | import uuid
 4 | import fsspec
 5 | from openai import AzureOpenAI
 6 | from fsspec.utils import get_protocol
 7 | from fastapi import FastAPI
 8 | from langserve import add_routes
 9 | from langchain_core.runnables import RunnableLambda, RunnablePassthrough
10 | from models import Settings, AgentResponse
11 | from utils import SmartAgentFactory
12 | from agents import Smart_Agent
13 | from functions import SearchVectorFunction
14 | from logging import Logger
15 | import ast
16 | from azure.search.documents import SearchClient
17 | from azure.core.credentials import AzureKeyCredential
18 | settings: Settings = Settings(_env_file=".env")  # type: ignore
19 | 
20 | def deep_rag_search(input) -> Any | str | None:
21 |     question = input['question']
22 |     session_id = input['session_id']
23 |     protocol: str = get_protocol(url=settings.smart_agent_prompt_location)
24 |     fs: fsspec.AbstractFileSystem = fsspec.filesystem(protocol=protocol)
25 |     agent: Smart_Agent = SmartAgentFactory.create_smart_agent(
26 |         fs=fs, settings=settings, session_id=session_id)
27 |     agent_response: AgentResponse = agent.run(
28 |         user_input=question, conversation=[], stream=False)
29 |     SmartAgentFactory.persist_history(smart_agent=agent, session_id=session_id,settings=settings)
30 |     
31 |     return agent_response.response
32 | 
33 | class Server:
34 |     def __init__(self, app: FastAPI, searchVectorFunction: SearchVectorFunction) -> None:
35 |         self.app = app
36 |         self.searchVectorFunction = searchVectorFunction
37 |         add_routes(
38 |             app=app,
39 |             runnable= RunnableLambda(
40 |                 func=lambda input: self.vector_rag_search(input=input)),
41 |             path="/vectorRAG",
42 |         )
43 |         add_routes(
44 |             app=app,
45 |             runnable=RunnablePassthrough() | RunnableLambda(
46 |                 func=lambda input: deep_rag_search(input=input)),
47 |             path="/deepRAG",
48 |         )
49 | 
50 |     def vector_rag_search(self, question: str) -> Any | str | None:
51 |         return self.searchVectorFunction.search(search_query=question)
52 | 
53 | if __name__ == "__main__":
54 |     import uvicorn
55 | 
56 |     app = FastAPI(
57 |         title="LangChain Server",
58 |         version="1.0",
59 |         description="A simple api server using Langchain's Runnable interfaces",
60 |     )
61 | 
62 |     settings: Settings = Settings(_env_file=".env")  # type: ignore
63 | 
64 |     openai_client = AzureOpenAI(
65 |         azure_endpoint=settings.openai_endpoint,
66 |         api_key=settings.openai_key,
67 |         api_version=settings.openai_api_version
68 |     )
69 | 
70 |     search_client = SearchClient(
71 |         endpoint=settings.azure_search_endpoint,
72 |         index_name=settings.azure_search_index_name,
73 |         credential=AzureKeyCredential(key=settings.azure_search_key)
74 |     )
75 | 
76 |     search_vector_function = SearchVectorFunction(
77 |         logger=Logger(name="search_vector_function"),
78 |         search_client=search_client,
79 |         client=openai_client,
80 |         model=settings.openai_embedding_deployment,
81 |         image_directory=settings.smart_agent_image_path,
82 |         storage_account_key=settings.azure_storage_account_key,  
83 |         storage_account_name=settings.azure_storage_account_name,  
84 |         container_name=settings.azure_container_name  
85 | 
86 |     )
87 | 
88 |     server = Server(app=app, searchVectorFunction=search_vector_function)
89 |     uvicorn.run(app=server.app, host=settings.api_host, port=settings.api_port)


--------------------------------------------------------------------------------
/src/api/main.py:
--------------------------------------------------------------------------------
 1 | import uvicorn
 2 | from fastapi import FastAPI
 3 | from models import Settings
 4 | from functions import SearchVectorFunction
 5 | from logging import Logger
 6 | from azure.search.documents import SearchClient
 7 | from azure.core.credentials import AzureKeyCredential
 8 | from openai import AzureOpenAI
 9 | from api import Server
10 | 
11 | app = FastAPI(
12 |     title="LangChain Server",
13 |     version="1.0",
14 |     description="A simple api server using Langchain's Runnable interfaces",
15 | )
16 | 
17 | settings: Settings = Settings(_env_file=".env")  # type: ignore
18 | 
19 | openai_client = AzureOpenAI(
20 |     azure_endpoint=settings.openai_endpoint,
21 |     api_key=settings.openai_key,
22 |     api_version=settings.openai_api_version
23 | )
24 | 
25 | search_client = SearchClient(
26 |     endpoint=settings.azure_search_endpoint,
27 |     index_name=settings.azure_search_index_name,
28 |     credential=AzureKeyCredential(key=settings.azure_search_key)
29 | )
30 | 
31 | search_vector_function = SearchVectorFunction(
32 |     logger=Logger(name="search_vector_function"),
33 |     search_client=search_client,
34 |     client=openai_client,
35 |     model=settings.openai_embedding_deployment,
36 |     image_directory=settings.smart_agent_image_path,
37 |     storage_account_key=settings.azure_storage_account_key,  
38 |     storage_account_name=settings.azure_storage_account_name,  
39 |     container_name=settings.azure_container_name  
40 | 
41 | )
42 | 
43 | server = Server(app=app, searchVectorFunction=search_vector_function)
44 | uvicorn.run(app=server.app, host=settings.api_host, port=settings.api_port)
45 | 


--------------------------------------------------------------------------------
/src/api/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "api"
 3 | version = "0.1.0"
 4 | description = "RAG enhanced though multiple search types."
 5 | authors = ["Tyler Kendrick <tkendrick@microsoft.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.11"
11 | langserve = "^0.2.2"
12 | uvicorn = "^0.23.2"
13 | pydantic_settings = "^2.3.4"
14 | redis = "^5.0.7"
15 | models = { path = "../models", develop = true }
16 | utils = { path = "../utils", develop = true }
17 | agents = { path = "../agents", develop = true }
18 | distributedcache = { path = "../distributed_cache", develop = true }
19 | services = { path = "../services", develop = true }
20 | fastapi = "^0.112.0"
21 | langchain-community = "^0.2.11"
22 | azure-identity = "^1.17.1"
23 | langchain-openai = "^0.1.20"
24 | azure-search-documents = "^11.5.1"
25 | 
26 | [tool.poetry.group.dev.dependencies]
27 | env = "^0.1.0"
28 | 
29 | [build-system]
30 | requires = ["poetry-core"]
31 | build-backend = "poetry.core.masonry.api"
32 | 


--------------------------------------------------------------------------------
/src/app/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/deepRAG/2c39bf17ecc2cb4fb179bab9663e841a22e0699e/src/app/README.md


--------------------------------------------------------------------------------
/src/app/app.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | import streamlit as st
  3 | import os
  4 | import json
  5 | import pandas as pd
  6 | import yaml
  7 | import fsspec
  8 | from typing import LiteralString
  9 | from streamlit_extras.add_vertical_space import add_vertical_space
 10 | from plotly.graph_objects import Figure as PlotlyFigure
 11 | from matplotlib.figure import Figure as MatplotFigure
 12 | from langserve.client import RemoteRunnable
 13 | from utils import SmartAgentFactory
 14 | from agents import Smart_Agent
 15 | from models import (
 16 |     AgentConfiguration,
 17 |     agent_configuration_from_dict,
 18 |     AgentResponse,
 19 |     Settings)
 20 | import fsspec
 21 | from fsspec.utils import get_protocol
 22 | from logging import Logger
 23 | 
 24 | settings: Settings = Settings(_env_file=".env")  # type: ignore
 25 | protocol: str = get_protocol(url=settings.smart_agent_prompt_location)
 26 | fs: fsspec.AbstractFileSystem = fsspec.filesystem(protocol=protocol)
 27 | logger=Logger(name="Frontend")
 28 | with fs.open(path=settings.smart_agent_prompt_location, mode="r", encoding="utf-8") as file:
 29 |     agent_config_data = yaml.safe_load(stream=file)
 30 |     agent_config: AgentConfiguration = agent_configuration_from_dict(
 31 |         data=agent_config_data)
 32 | 
 33 | if 'session_id' in st.session_state:
 34 |     session_id= st.session_state['session_id']
 35 | else:
 36 |     session_id = str(object=uuid.uuid4())
 37 |     st.session_state['session_id'] = session_id
 38 | # agent: Smart_Agent = SmartAgentFactory.create_smart_agent(fs=fs, settings=settings, session_id=session_id)
 39 | remoteAgent = RemoteRunnable(f"http://{settings.api_host}:{settings.api_port}/deepRAG")
 40 | st.set_page_config(
 41 |     layout="wide", page_title="Smart Research Copilot Demo Application with Multi-Modal AI", page_icon="🧠")
 42 | style: LiteralString = f"""
 43 | <style>
 44 |     .stTextInput {{
 45 |       position: fixed;
 46 |       bottom: 3rem;
 47 |     }}
 48 | </style>
 49 | """
 50 | st.markdown(body=style, unsafe_allow_html=True)
 51 | 
 52 | MAX_HIST = 3
 53 | # Sidebar contents
 54 | with st.sidebar:
 55 | 
 56 |     st.title(body='Deep RAG AI Copilot')
 57 |     st.markdown(body='''
 58 |     ''')
 59 |     add_vertical_space(num_lines=5)
 60 |     if st.button(label='Clear Chat'):
 61 | 
 62 |         if 'history' in st.session_state:
 63 |             st.session_state['history'] = []
 64 |         if 'session_id' in st.session_state:
 65 |             del st.session_state['session_id'] 
 66 | 
 67 |     st.markdown(body="""
 68 | ### Sample Questions:  
 69 | 1. Suggest alternative headlines inspired from 'hack it the way you like it' for a new coffee concentrate product launch in Australia targeting young people
 70 | 2. What is the slogan of NESCAFE?
 71 | 3. Three separate steps of preparing an iced latte using NESCAFE coffee concentrate
 72 | 4. Nescaf brand guidelines for creating TikTok content for Gen Z with a focus on natural and authentic imagery
 73 | 5. Nescaf brand guidelines for creating a storyboard for an iced latte coffee recipe focusing on innovation
 74 |           """)
 75 |     st.write('')
 76 |     st.write('')
 77 |     st.write('')
 78 | 
 79 |     if 'history' not in st.session_state:
 80 |         st.session_state['history'] = []
 81 |     if 'input' not in st.session_state:
 82 |         st.session_state['input'] = ""
 83 |     if 'question_count' not in st.session_state:
 84 |         st.session_state['question_count'] = 0
 85 | 
 86 | history = st.session_state['history']
 87 | for message in history:
 88 |     if message.get("role") == "user":
 89 |         st.markdown(body=message.get("content"))
 90 |     else:
 91 |         with st.chat_message(name="assistant"):
 92 |             agent_response = message.get("content")
 93 |             json_response = None
 94 |             if "overall_explanation" in agent_response:
 95 |                 try:
 96 |                     agent_response = agent_response.strip("```json")
 97 |                     json_response = json.loads(s=agent_response)
 98 |                     st.markdown(body=json_response.get("overall_explanation"))
 99 |                 except Exception as e:
100 |                     logger.error("exception json loading: "+str(e))
101 |                     st.markdown(body=agent_response)
102 |                 if json_response:
103 |                     for item in json_response:
104 |                         if item != "overall_explanation":
105 |                             
106 |                             image_path: str = os.path.join(
107 |                                 settings.smart_agent_image_path, item)
108 |                             st.markdown(body=json_response[item])
109 |                             st.image(image=image_path)
110 |             else:
111 |                 st.markdown(body=agent_response)
112 | if len(history) ==0:
113 |     welcome_message = remoteAgent.invoke(input={"question":"", "session_id":session_id}) 
114 |     with st.chat_message(name="assistant"):
115 |         st.markdown(body=welcome_message)
116 |         history.append({"role": "assistant", "content": welcome_message})
117 | 
118 | 
119 | user_input: str | None = st.chat_input(placeholder="You:")
120 | # Conditional display of AI generated responses as a function of user provided prompts
121 | if user_input:
122 |     with st.chat_message(name="user"):
123 |         st.markdown(body=user_input)
124 | 
125 |     history.append({"role": "user", "content": user_input})
126 |     try:
127 |         agent_response:str = remoteAgent.invoke(input={"question":user_input, "session_id":session_id})                
128 |     except Exception as e:
129 |         agent_response = None
130 |         logger.error("error in running agent, error is "+ str(e))
131 | 
132 |     with st.chat_message(name="assistant"):
133 |         history.append({"role": "assistant", "content": agent_response})
134 |         json_response = None
135 |         if agent_response:
136 |             if "overall_explanation" in agent_response:
137 |                 try:
138 |                     agent_response = agent_response.strip("```json")
139 |                     json_response = json.loads(s=agent_response)
140 |                     st.markdown(body=json_response.get("overall_explanation"))
141 |                 except Exception as e:
142 |                     logger.error("exception json load "+str(e))
143 |                     logger.debug((agent_respose))
144 |                     st.markdown(body=agent_response)
145 |             if json_response:
146 |                 for item in json_response:
147 |                     if item != "overall_explanation":
148 |                         logger.debug("item is "+str(item))
149 |                         
150 |                         image_path: str = os.path.join(
151 |                             settings.smart_agent_image_path, item)
152 |                         st.markdown(body=json_response[item])
153 |                         st.image(image=image_path)
154 | st.session_state['history'] = history


--------------------------------------------------------------------------------
/src/app/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "app"
 3 | version = "0.1.0"
 4 | description = "RAG enhanced though multiple search types."
 5 | authors = ["Tyler Kendrick <tkendrick@microsoft.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.11"
11 | streamlit-extras = "^0.4.3"
12 | fsspec = "^2024.6.1"
13 | models = { path = "../models", develop = true }
14 | agents = { path = "../agents", develop = true }
15 | functions = { path = "../functions", develop = true }
16 | utils = { path = "../utils", develop = true }
17 | pydantic-settings = "^2.4.0"
18 | langserve = {extras = ["client"], version = "^0.2.2"}
19 | 
20 | [tool.poetry.group.dev.dependencies]
21 | env = "^0.1.0"
22 | 
23 | [build-system]
24 | requires = ["poetry-core"]
25 | build-backend = "poetry.core.masonry.api"
26 | 


--------------------------------------------------------------------------------
/src/distributed_cache/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/deepRAG/2c39bf17ecc2cb4fb179bab9663e841a22e0699e/src/distributed_cache/README.md


--------------------------------------------------------------------------------
/src/distributed_cache/cache.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from typing import Protocol, TypeVar, Union
 3 | 
 4 | KeyT = TypeVar("KeyT", contravariant=True)
 5 | ResponseT = TypeVar("ResponseT", covariant=True)
 6 | EncodableT = TypeVar("EncodableT", contravariant=True)
 7 | ExpiryT = TypeVar("ExpiryT", contravariant=True)
 8 | AbsExpiryT = TypeVar("AbsExpiryT", contravariant=True)
 9 | 
10 | class CacheProtocol(Protocol[KeyT, ResponseT, EncodableT, ExpiryT, AbsExpiryT]):
11 |     @abstractmethod
12 |     def get(self, name: KeyT) -> ResponseT:
13 |         pass
14 | 
15 |     @abstractmethod
16 |     def set(
17 |         self,
18 |         name: KeyT,
19 |         value: EncodableT,
20 |         ex: Union[ExpiryT, None] = None,
21 |         px: Union[ExpiryT, None] = None,
22 |         nx: bool = False,
23 |         xx: bool = False,
24 |         keepttl: bool = False,
25 |         get: bool = False,
26 |         exat: Union[AbsExpiryT, None] = None,
27 |         pxat: Union[AbsExpiryT, None] = None,
28 |     ) -> ResponseT:
29 |         pass
30 | 
31 |     @abstractmethod
32 |     def delete(self, *names: KeyT) -> ResponseT:
33 |         pass
34 | 


--------------------------------------------------------------------------------
/src/distributed_cache/distributedcache.py:
--------------------------------------------------------------------------------
1 | """The main module for services."""
2 | from cache import CacheProtocol
3 | 
4 | __all__: list[str] = ["CacheProtocol"]
5 | 


--------------------------------------------------------------------------------
/src/distributed_cache/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "distributedcache"
 3 | version = "0.1.0"
 4 | description = "Common services"
 5 | authors = ["Tyler Kendrick <tkendrick@microsoft.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.11"
11 | 
12 | [build-system]
13 | requires = ["poetry-core"]
14 | build-backend = "poetry.core.masonry.api"
15 | 


--------------------------------------------------------------------------------
/src/functions/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/deepRAG/2c39bf17ecc2cb4fb179bab9663e841a22e0699e/src/functions/README.md


--------------------------------------------------------------------------------
/src/functions/functions.py:
--------------------------------------------------------------------------------
1 | """The main module for functions."""
2 | from search_vector_function import SearchVectorFunction
3 | 


--------------------------------------------------------------------------------
/src/functions/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "functions"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["Tyler Kendrick <145080887+Tyler-R-Kendrick@users.noreply.github.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.11"
11 | openai = "^1.37.1"
12 | azure-search-documents = "^11.4.0"
13 | 
14 | [build-system]
15 | requires = ["poetry-core"]
16 | build-backend = "poetry.core.masonry.api"
17 | 


--------------------------------------------------------------------------------
/src/functions/search_vector_function.py:
--------------------------------------------------------------------------------
 1 | import os  
 2 | from logging import Logger  
 3 | from typing import Any, List, Dict  
 4 | from openai import AzureOpenAI  
 5 | from azure.search.documents import SearchItemPaged, SearchClient  
 6 | from azure.search.documents.models import (  
 7 |     QueryAnswerType,  
 8 |     QueryCaptionType,  
 9 |     QueryType,  
10 |     VectorizedQuery,  
11 | )  
12 | from azure.storage.blob import BlobServiceClient, ContainerClient  
13 |   
14 | class SearchVectorFunction:  
15 |     """Search function that uses a vector database to search for related content"""  
16 |   
17 |     def __init__(  
18 |             self,  
19 |             logger: Logger,  
20 |             search_client: SearchClient,  
21 |             client: AzureOpenAI,  
22 |             model: str,  
23 |             image_directory: str,  
24 |             storage_account_key: str,  
25 |             storage_account_name: str,  
26 |             container_name: str  
27 |         ) -> None:  
28 |         self.__logger: Logger = logger  
29 |         self.__search_client: SearchClient = search_client  
30 |         self.__client: AzureOpenAI = client  
31 |         self.__model: str = model  
32 |         self.__image_directory: str = image_directory  
33 |         self.__blob_service_client = BlobServiceClient(  
34 |             account_url=f"https://{storage_account_name}.blob.core.windows.net",  
35 |             credential=storage_account_key  
36 |         )  
37 |         self.__container_client: ContainerClient = self.__blob_service_client.get_container_client(container_name)  
38 |   
39 |     def search(self, search_query) -> list:  
40 |         """Search for related content based on a search query"""  
41 |         self.__logger.debug("search query: ", search_query)  
42 |         output = []  
43 |         vector_query = VectorizedQuery(  
44 |             vector=self.__get_text_embedding(text=search_query),  
45 |             k_nearest_neighbors=3,  
46 |             fields="contentVector"  
47 |         )  
48 |         results: SearchItemPaged[Dict] = self.__search_client.search(  
49 |             query_type=QueryType.SEMANTIC,  
50 |             semantic_configuration_name='my-semantic-config',  
51 |             query_caption=QueryCaptionType.EXTRACTIVE,  
52 |             query_answer=QueryAnswerType.EXTRACTIVE,  
53 |             vector_queries=[vector_query],  
54 |             select=["topic", "file_name", "page_number", "related_content"],  
55 |             top=3  
56 |         )  
57 |         for result in results:  
58 |             self.__logger.debug(msg=f"topic: {result['topic']}")  
59 |             self.__logger.debug("related_content: ", result['related_content'])  
60 |             page_image_name = f"{result['file_name']}/page_{result['page_number']}.png"  
61 |             page_image_local_path = os.path.join(self.__image_directory, page_image_name)  
62 |             print("page_image_name ", page_image_name)
63 |             # Ensure the local directory exists  
64 |             os.makedirs(os.path.dirname(page_image_local_path), exist_ok=True)  
65 |   
66 |             # Download the image from Azure Blob Storage to local directory  
67 |             self.__download_image_from_blob(page_image_name, page_image_local_path)  
68 |             output.append({  
69 |                 'id': result['id'] if 'id' in result.keys() else None,  
70 |                 'image_path': page_image_name,  
71 |                 'related_content': result['related_content']  
72 |             })  
73 |         return output  
74 |   
75 |     def __get_text_embedding(self, text: str) -> List[float]:  
76 |         text = text.replace("\n", " ")  
77 |         return self.__client.embeddings.create(  
78 |             input=[text],  
79 |             model=self.__model  
80 |         ).data[0].embedding  
81 |   
82 |     def __download_image_from_blob(self, blob_name: str, download_file_path: str) -> None:  
83 |         """  
84 |         Download an image from Azure Blob Storage to a local file  
85 |         """  
86 |         with open(download_file_path, "wb") as download_file:  
87 |             blob_client = self.__container_client.get_blob_client(blob_name)  
88 |             download_stream = blob_client.download_blob()  
89 |             download_file.write(download_stream.readall())  
90 | 


--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
 1 | import fsspec
 2 | from fsspec.utils import get_protocol
 3 | from pathlib import Path
 4 | from agents import Smart_Agent
 5 | from models import Settings
 6 | from models import AgentResponse
 7 | from utils import SmartAgentFactory
 8 | 
 9 | env_path: Path = Path('.') / '.env'
10 | settings: Settings = Settings(_env_file=env_path) # type: ignore
11 | agent_path: str = settings.smart_agent_prompt_location
12 | protocol: str = get_protocol(url=agent_path)
13 | fs: fsspec.AbstractFileSystem = fsspec.filesystem(protocol=protocol)
14 | agent: Smart_Agent = SmartAgentFactory.create_smart_agent(fs=fs, settings=settings, session_id='session_id')
15 | agent_response: AgentResponse = agent.run(user_input="What is the slogan of NESCAFE?", conversation=[], stream=False)
16 | 
17 | print(agent_response)
18 | 


--------------------------------------------------------------------------------
/src/models/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/deepRAG/2c39bf17ecc2cb4fb179bab9663e841a22e0699e/src/models/README.md


--------------------------------------------------------------------------------
/src/models/agent_configuration.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Any, Literal
 3 | from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
 4 | from openai.types.shared_params import FunctionDefinition
 5 | 
 6 | @dataclass
 7 | class Parameter():
 8 |     """Parameter class to represent the parameters of a tool"""
 9 |     type: str
10 |     properties: dict
11 | 
12 |     @staticmethod
13 |     def from_dict(data: dict) -> "Parameter":
14 |         """Converts a dictionary to a Parameter object"""
15 |         return Parameter(
16 |             type=data["type"],
17 |             properties=data["properties"]
18 |         )
19 | 
20 |     def to_dict(self) -> dict:
21 |         """Converts a Parameter object to a dictionary"""
22 |         return {
23 |             "type": self.type,
24 |             "properties": {
25 |                 property_key: tool_property for property in self.properties for property_key, tool_property in property.items()
26 |             }
27 |         }
28 | 
29 | @dataclass
30 | class Tool():
31 |     """Tool class to represent the tools that the agent can use"""
32 |     name: str
33 |     description: str
34 |     type: Literal["function"]
35 |     parameters: Parameter
36 |     required: list[str]
37 | 
38 |     @staticmethod
39 |     def from_dict(data: dict) -> "Tool":
40 |         """Converts a dictionary to a Tool object"""
41 |         return Tool(
42 |             name=data["name"],
43 |             description=data["description"],
44 |             type=data["type"],
45 |             parameters=Parameter.from_dict(data=data["parameters"]),
46 |             required=data["required"]
47 |         )
48 | 
49 |     def to_openai_tool(self) -> ChatCompletionToolParam:
50 |         """Converts a Tool object to a dictionary that matches the openai format"""
51 |         return ChatCompletionToolParam(
52 |             type=self.type,
53 |             function=FunctionDefinition(
54 |                 name=self.name,
55 |                 description=self.description,
56 |                 #required=self.required,
57 |                 parameters=self.parameters.to_dict(),
58 |             )
59 |         )
60 | 
61 | @dataclass
62 | class AgentConfiguration():
63 |     """AgentConfiguration class to represent the configuration of the agent"""
64 |     persona: str
65 |     model: str
66 |     initial_message: str
67 |     name: str
68 |     tools: list[Tool]
69 | 
70 |     @staticmethod
71 |     def from_dict(data: dict) -> "AgentConfiguration":
72 |         """Converts a dictionary to an AgentConfiguration object"""
73 |         return AgentConfiguration(
74 |             persona=data["persona"],
75 |             model=data["model"],
76 |             initial_message=data["initial_message"],
77 |             name=data["name"],
78 |             tools=[Tool.from_dict(data=tool) for tool in data["tools"]]
79 |         )
80 | 
81 | def agent_configuration_from_dict(data: dict) -> AgentConfiguration:
82 |     """Converts a dictionary to an AgentConfiguration object"""
83 |     return AgentConfiguration(
84 |         persona=data["persona"],
85 |         model=data["model"],
86 |         initial_message=data["initial_message"],
87 |         name=data["name"],
88 |         tools=[Tool.from_dict(data=tool) for tool in data["tools"]]
89 |     )
90 | 


--------------------------------------------------------------------------------
/src/models/agent_response.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | 
3 | @dataclass
4 | class AgentResponse:
5 |     """Class to represent a response from an agent"""
6 |     conversation: list
7 |     response: str | None
8 |     streaming: bool = False


--------------------------------------------------------------------------------
/src/models/models.py:
--------------------------------------------------------------------------------
1 | """The main module for models."""
2 | 
3 | from agent_response import AgentResponse
4 | from agent_configuration import AgentConfiguration, agent_configuration_from_dict
5 | from settings import Settings
6 | 


--------------------------------------------------------------------------------
/src/models/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "models"
 3 | version = "0.1.0"
 4 | description = "Models for the app"
 5 | authors = ["Tyler Kendrick"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.11"
11 | pydantic-settings = "^2.4.0"
12 | openai = "^1.37.1"
13 | 
14 | [build-system]
15 | requires = ["poetry-core"]
16 | build-backend = "poetry.core.masonry.api"
17 | 


--------------------------------------------------------------------------------
/src/models/settings.py:
--------------------------------------------------------------------------------
 1 | from pydantic_settings import BaseSettings
 2 | from pydantic import Field
 3 | 
 4 | class Settings(BaseSettings):
 5 |     """Settings for the application"""
 6 |     openai_endpoint: str = Field(validation_alias='AZURE_OPENAI_ENDPOINT')
 7 |     openai_key: str = Field(validation_alias='AZURE_OPENAI_API_KEY')
 8 |     openai_embedding_deployment: str = Field(validation_alias='AZURE_OPENAI_EMB_DEPLOYMENT')
 9 |     openai_chat_deployment: str = Field(validation_alias='AZURE_OPENAI_CHAT_DEPLOYMENT')
10 |     openai_api_version: str = Field(validation_alias='AZURE_OPENAI_API_VERSION')
11 |     azure_search_endpoint: str = Field(validation_alias='AZURE_SEARCH_ENDPOINT')
12 |     azure_search_key: str = Field(validation_alias='AZURE_SEARCH_KEY')
13 |     azure_search_index_name: str = Field(validation_alias='AZURE_SEARCH_INDEX_NAME')
14 |     azure_vision_key: str = Field(validation_alias='AZURE_AI_VISION_API_KEY')
15 |     azure_vision_endpoint: str = Field(validation_alias='AZURE_AI_VISION_ENDPOINT')
16 |     smart_agent_prompt_location: str = Field(validation_alias='SMART_AGENT_PROMPT_LOCATION')
17 |     smart_agent_image_path: str = Field(validation_alias='IMAGE_PATH')
18 |     azure_redis_endpoint: str = Field(validation_alias='AZURE_REDIS_ENDPOINT')
19 |     azure_redis_key: str = Field(validation_alias='AZURE_REDIS_KEY')
20 |     azure_storage_account_key: str = Field(validation_alias='AZURE_STORAGE_ACCOUNT_KEY')  
21 |     azure_storage_account_name: str = Field(validation_alias='AZURE_STORAGE_ACCOUNT_NAME')  
22 |     azure_container_name: str = Field(validation_alias='AZURE_CONTAINER_NAME')  
23 |     api_port: int = Field(validation_alias='API_PORT', default='8080')
24 |     app_port: int = Field(validation_alias='APP_PORT', default='8000')
25 |     app_host: str = Field(validation_alias='APP_HOST', default='localhost')
26 |     api_host: str = Field(validation_alias='API_HOST', default='localhost')
27 | 


--------------------------------------------------------------------------------
/src/services/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/deepRAG/2c39bf17ecc2cb4fb179bab9663e841a22e0699e/src/services/README.md


--------------------------------------------------------------------------------
/src/services/history.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import pickle
 3 | from distributedcache import CacheProtocol
 4 | 
 5 | class History:
 6 |     """History class"""
 7 | 
 8 |     def __init__(self, session_id: str, cache: CacheProtocol) -> None:
 9 |         """Constructor for History"""
10 |         self.__session_id: str = session_id
11 |         self.__cache: CacheProtocol = cache
12 |         cache_history = self.__cache.get(name=self.__session_id)
13 | 
14 |         if cache_history is None:
15 |             return
16 |         
17 |         self.history = pickle.loads(base64.b64decode(s=cache_history))
18 | 
19 | 
20 |     def set_history(self, history) -> None:
21 |         """Set the history"""
22 |         self.__cache.set(name=self.__session_id, value=base64.b64encode(s=pickle.dumps(obj=history)))
23 | 
24 |     def clean_up_history(self, max_q_with_detail_hist=1, max_q_to_keep=2) -> None:
25 |         """Clean up the history"""
26 | 
27 |         cache_history = self.__cache.get(name=self.__session_id)
28 | 
29 |         if cache_history is None:
30 |             return
31 |         
32 |         history = pickle.loads(base64.b64decode(s=cache_history))
33 |         question_count=0
34 |         removal_indices=[]
35 | 
36 |         for idx in range(len(history)-1, 0, -1):
37 |             message = dict(history[idx])
38 | 
39 |             if message.get("role") == "user":
40 |                 question_count +=1
41 | 
42 |             if question_count>= max_q_with_detail_hist and question_count < max_q_to_keep:
43 |                 if message.get("role") != "user" \
44 |                     and message.get("role") != "assistant" \
45 |                         and len(message.get("content") or []) == 0:
46 |                     removal_indices.append(idx)
47 | 
48 |             if question_count >= max_q_to_keep:
49 |                 removal_indices.append(idx)
50 |         
51 |         # remove items with indices in removal_indices
52 |         for index in removal_indices:
53 |             del history[index]
54 | 
55 |         self.set_history(history=history)
56 | 
57 |     def reset_history_to_last_question(self) -> None:
58 |         """Reset the history to the last question"""
59 | 
60 |         cache_history = self.__cache.get(name=self.__session_id)
61 | 
62 |         if cache_history is None:
63 |             return
64 |         
65 |         history = pickle.loads(base64.b64decode(s=cache_history))
66 |         
67 |         for i in range(len(history)-1, -1, -1):
68 |             message = dict(history[i])   
69 |             
70 |             if message.get("role") == "user":
71 |                 break
72 |             
73 |             history.pop()
74 | 
75 |         self.set_history(history=history)
76 | 


--------------------------------------------------------------------------------
/src/services/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "services"
 3 | version = "0.1.0"
 4 | description = "Common services"
 5 | authors = ["Tyler Kendrick <tkendrick@microsoft.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.11"
11 | distributedcache = { path = "../distributed_cache", develop = true }
12 | 
13 | [build-system]
14 | requires = ["poetry-core"]
15 | build-backend = "poetry.core.masonry.api"
16 | 


--------------------------------------------------------------------------------
/src/services/services.py:
--------------------------------------------------------------------------------
1 | """The main module for services."""
2 | from history import History
3 | 


--------------------------------------------------------------------------------
/src/utils/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/deepRAG/2c39bf17ecc2cb4fb179bab9663e841a22e0699e/src/utils/README.md


--------------------------------------------------------------------------------
/src/utils/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "utils"
 3 | version = "0.1.0"
 4 | description = "RAG enhanced though multiple search types."
 5 | authors = ["Tyler Kendrick <tkendrick@microsoft.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.11"
11 | azure-search-documents = "^11.4.0"
12 | azure-storage-blob = "^12.20.0"
13 | streamlit = "^1.36.0"
14 | matplotlib = "^3.9.0"
15 | plotly = "^5.23.0"
16 | openai = "^1.37.0"
17 | models = { path = "../models", develop = true }
18 | functions = { path = "../functions", develop = true }
19 | agents = { path = "../agents", develop = true }
20 | distributedcache = { path = "../distributed_cache", develop = true }
21 | redis = "^5.0.8"
22 | 
23 | [tool.poetry.group.dev.dependencies]
24 | env = "^0.1.0"
25 | 
26 | [build-system]
27 | requires = ["poetry-core"]
28 | build-backend = "poetry.core.masonry.api"
29 | 


--------------------------------------------------------------------------------
/src/utils/smart_agent_factory.py:
--------------------------------------------------------------------------------
 1 | from typing import Awaitable
 2 | import redis
 3 | import yaml
 4 | import fsspec
 5 | from logging import Logger
 6 | from openai import AzureOpenAI
 7 | from azure.search.documents import SearchClient
 8 | from azure.core.credentials import AzureKeyCredential
 9 | from distributedcache import CacheProtocol
10 | from functions import SearchVectorFunction
11 | from models import AgentConfiguration, agent_configuration_from_dict
12 | from models import Settings
13 | import base64
14 | import pickle
15 | from agents import Smart_Agent
16 | from redis.commands.core import BasicKeyCommands
17 | from redis.typing import KeyT, ResponseT, AbsExpiryT, ExpiryT, EncodableT
18 | 
19 | class SmartAgentFactory:
20 |     @staticmethod
21 |     def create_smart_agent(fs: fsspec.AbstractFileSystem, settings: Settings, session_id: str) -> Smart_Agent:
22 |         with fs.open(path=settings.smart_agent_prompt_location, mode="r", encoding="utf-8") as file:
23 |             agent_config_data = yaml.safe_load(stream=file)
24 |             agent_config: AgentConfiguration = agent_configuration_from_dict(data=agent_config_data)
25 | 
26 |         search_client = SearchClient(
27 |             endpoint=settings.azure_search_endpoint,
28 |             index_name=settings.azure_search_index_name,
29 |             credential=AzureKeyCredential(key=settings.azure_search_key)
30 |         )
31 | 
32 |         client = AzureOpenAI(
33 |             api_key=settings.openai_key,
34 |             api_version=settings.openai_api_version,
35 |             azure_endpoint=settings.openai_endpoint,
36 |         )
37 | 
38 |         search_vector_function = SearchVectorFunction(  
39 |             logger=Logger(name="search_vector_function"),  
40 |             search_client=search_client,  
41 |             client=client,  
42 |             model=settings.openai_embedding_deployment,  
43 |             image_directory=settings.smart_agent_image_path,  
44 |             storage_account_key=settings.azure_storage_account_key,  
45 |             storage_account_name=settings.azure_storage_account_name,  
46 |             container_name=settings.azure_container_name  
47 |         )  
48 | 
49 |         redis_client: CacheProtocol[KeyT, ResponseT, EncodableT, ExpiryT, AbsExpiryT] = redis.Redis(
50 |             host=settings.azure_redis_endpoint,
51 |             port=6380,
52 |             ssl=True,
53 |             db=0,
54 |             password=settings.azure_redis_key,
55 |             decode_responses=True
56 |         )
57 |         init_history=[]
58 |         if session_id:
59 | 
60 |             raw_hist = redis_client.get(session_id)
61 |             init_history = pickle.loads(base64.b64decode(s=raw_hist)) if raw_hist else []
62 |         return Smart_Agent(
63 |             logger=Logger(name="smart_agent"),
64 |             client=client,
65 |             agent_configuration=agent_config,
66 |             search_vector_function = search_vector_function,
67 |             init_history=init_history,
68 |             fs=fs,
69 |             image_directory=settings.smart_agent_image_path,
70 |         )
71 |     @staticmethod
72 |     def persist_history(smart_agent:Smart_Agent, session_id: str, settings: Settings) -> None:
73 |         redis_client: CacheProtocol[KeyT, ResponseT, EncodableT, ExpiryT, AbsExpiryT] = redis.Redis(
74 |             host=settings.azure_redis_endpoint,
75 |             port=6380,
76 |             ssl=True,
77 |             db=0,
78 |             password=settings.azure_redis_key,
79 |             decode_responses=True
80 |         )
81 |         history = smart_agent._conversation   
82 |         redis_client.set(name=session_id, value=base64.b64encode(pickle.dumps(history)))
83 |         redis_client.expire(name=session_id, time=3600)


--------------------------------------------------------------------------------
/src/utils/utilities.py:
--------------------------------------------------------------------------------
  1 | # Agent class
  2 | # responsbility definition: expertise, scope, conversation script, style
  3 | from pathlib import Path
  4 | import json
  5 | import os
  6 | import base64
  7 | from openai import AzureOpenAI
  8 | import streamlit as st
  9 | from azure.core.credentials import AzureKeyCredential
 10 | from azure.search.documents import SearchClient
 11 | import time
 12 | from azure.search.documents.models import (
 13 | 
 14 |     QueryAnswerType,
 15 |     QueryCaptionType,
 16 |     QueryType,
 17 |     VectorizedQuery,
 18 | )
 19 | 
 20 | 
 21 | import pandas as pd
 22 | from dotenv import load_dotenv
 23 | import inspect
 24 | import openai
 25 | env_path = Path('..') / '.env'
 26 | load_dotenv(dotenv_path=env_path)
 27 | MAX_ERROR_RUN = 3
 28 | MAX_RUN_PER_QUESTION = 10
 29 | MAX_QUESTION_TO_KEEP = 3
 30 | MAX_QUESTION_WITH_DETAIL_HIST = 1
 31 | 
 32 | env: os._Environ[str] = os.environ
 33 | getenv = os.getenv
 34 | emb_engine = getenv("AZURE_OPENAI_EMB_DEPLOYMENT")
 35 | chat_engine = getenv("AZURE_OPENAI_CHAT_DEPLOYMENT")
 36 | client = AzureOpenAI(
 37 |     api_key=env.get("AZURE_OPENAI_API_KEY"),
 38 |     api_version=getenv("AZURE_OPENAI_API_VERSION"),
 39 |     azure_endpoint=env.get("AZURE_OPENAI_ENDPOINT"),
 40 | )
 41 | max_conversation_len = 5  # Set the desired value of k
 42 | 
 43 | 
 44 | emb_engine: str | None = getenv("AZURE_OPENAI_EMB_DEPLOYMENT")
 45 | # azcs implementation
 46 | searchservice: str | None = getenv("AZURE_SEARCH_ENDPOINT")
 47 | index_name: str | None = getenv("AZURE_SEARCH_INDEX_NAME")
 48 | key: str | None = getenv("AZURE_SEARCH_KEY")
 49 | search_client = SearchClient(
 50 |     endpoint=searchservice,
 51 |     index_name=index_name,
 52 |     credential=AzureKeyCredential(key=getenv("AZURE_SEARCH_KEY"))
 53 | )
 54 | 
 55 | 
 56 | # @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
 57 | # Function to generate embeddings for title and content fields, also used for query embeddings
 58 | def get_embedding(text, model=emb_engine):
 59 |     text = text.replace("\n", " ")
 60 |     return client.embeddings.create(input=[text], model=model).data[0].embedding
 61 | 
 62 | 
 63 | credential = AzureKeyCredential(key)
 64 | 
 65 | 
 66 | def get_text_embedding(text, model=os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT")):
 67 |     text = text.replace("\n", " ")
 68 |     while True:
 69 |         try:
 70 |             embedding_response = client.embeddings.create(
 71 |                 input=[text], model=model).data[0].embedding
 72 |             return embedding_response
 73 |         except openai.error.RateLimitError:
 74 |             print("Rate limit exceeded. Retrying after 10 seconds...")
 75 |             time.sleep(10)
 76 | 
 77 | 
 78 | today = pd.Timestamp.today()
 79 | # format today's date
 80 | today = today.strftime("%Y-%m-%d")
 81 | PERSONA = """
 82 | You are an intelligent AI assistant designed to help users find information most relevant to their questions. 
 83 | You have access to Azure AI Search, which provides semantic search capabilities using natural language queries and metadata filtering. 
 84 | The data you access is organized according to the ontology below.
 85 | As a smart research assistant, your goal is to identify the best relevant information to answer the user's question. 
 86 | The initial search result may include hints on the related content. Use hint to start a follow-up search to find related content if neccessary.
 87 | Engage with the user to understand their needs, conduct the research, and respond with a curated list of content along with explanations on how they can be used to answer the user's question.
 88 | Your final response should be in JSON format like this:
 89 | {
 90 |   "overall_explanation": "The following headlines are inspired by the concept of customization and personal expression, which resonates well with young people. These suggestions are derived from the context of launching a product that allows users to make it their own.",
 91 |   "11_LAUNCHING_MAKE_YOUR_WORLD_2022-11-17/page_7.png": "This file provides a context for launching a product with a focus on personalization and making it your own.",
 92 |   "Brand_Context/page_18.png": "This file offers insights into brand context and how to position a product in a way that appeals to young people by emphasizing individuality and customization."
 93 | }
 94 | Just output the JSON content in your final response and do not add any other comment.
 95 | 
 96 | # Ontology   
 97 | EntityClasses:  
 98 |   - Brand:  
 99 |       Description: "An entity representing a company's identity, values, and image."  
100 |       Attributes:  
101 |         - Name: "The official name of the brand."  
102 |         - TargetConsumer: "The primary demographic the brand aims to reach."  
103 |         - Slogan: "A memorable phrase representing the brand's essence."  
104 |         - ColorPalette: "The set of colors used for brand identity."  
105 |         - LogoUsage: "Guidelines for using the brand's logo."  
106 |         - StoryboardGuidelines: "Rules for visual storytelling."  
107 |         - SocialMediaGuidelines: "Instructions for maintaining brand consistency on social media."  
108 |   
109 |   - Product:  
110 |       Description: "An item or service offered by the brand."  
111 |       Attributes:  
112 |         - Name: "The official name of the product."  
113 |         - Type: "The category of the product, e.g., SaaS, On-Premise."  
114 |         - Popularity: "The level of market acceptance and usage."  
115 |         - Market: "The primary geographic or demographic market for the product."  
116 |   
117 |   - Campaign:  
118 |       Description: "A series of coordinated activities aimed at promoting a product or brand."  
119 |       Attributes:  
120 |         - Name: "The official name of the campaign."  
121 |         - Focus: "The main theme or objective, e.g., Digital Experience, AI."  
122 |         - Market: "The target market for the campaign."  
123 |         - CreativeAssets: "Visual and textual materials used in the campaign."  
124 |         - Concept: "The central idea or message of the campaign."  
125 |         - Toolkit: "Resources and tools used to execute the campaign."  
126 |   
127 |   - Guideline:  
128 |       Description: "A set of rules or instructions for maintaining brand consistency."  
129 |       Attributes:  
130 |         - Type: "The category of the guideline, e.g., Digital Asset, Tone of Voice Usage, Logo Usage."  
131 |         - Details: "Specific instructions and details."  
132 |   
133 |   - Market:  
134 |       Description: "A defined geographic or demographic area where products are sold."  
135 |       Attributes:  
136 |         - Name: "The name of the market."  
137 |         - Trends: "Current market trends and dynamics."  
138 |         - Competitors: "Main competitors within the market."  
139 |         - Influencers: "Key influencers relevant to the market."  
140 |   
141 |   - Competitor:  
142 |       Description: "A company or product competing with the brand."  
143 |       Attributes:  
144 |         - Name: "The name of the competitor."  
145 |         - Products: "Products offered by the competitor."  
146 |         - Campaigns: "Promotional campaigns run by the competitor."  
147 |   
148 |   - Influencer:  
149 |       Description: "A person who can influence the brand's target audience."  
150 |       Attributes:  
151 |         - Name: "The name of the influencer."  
152 |         - Market: "The market segment the influencer operates in."  
153 |         - Segment: "Specific niche or category, e.g., Coffee, Lifestyle."  
154 |   
155 |   - AdvertisingCase:  
156 |       Description: "A documented instance of brand advertising."  
157 |       Attributes:  
158 |         - Market: "The market where the advertising case is relevant."  
159 |         - Details: "Specific details about the advertising case."  
160 |   
161 |   - Aesthetic:  
162 |       Description: "The visual style and design elements used by the brand."  
163 |       Attributes:  
164 |         - Style: "The design style, e.g., Minimalist, Modern."  
165 |         - TargetAudience: "The audience for whom the aesthetic is designed."  
166 |   
167 |   - Recipe:  
168 |       Description: "A detailed set of instructions for creating a product."  
169 |       Attributes:  
170 |         - Name: "The name of the recipe."  
171 |         - Ingredients: "The components required for the recipe."  
172 |         - Steps: "The sequence of actions to complete the recipe."  
173 |   
174 |   - Claim:  
175 |       Description: "A statement made by the brand to promote its values or product benefits."  
176 |       Attributes:  
177 |         - Type: "The category of claim, e.g., Sustainability, Quality, Safety."  
178 |         - Details: "Specific details of the claim."  
179 |   
180 |   - Slogan:  
181 |       Description: "A memorable phrase used in marketing to represent the brand's essence."  
182 |       Attributes:  
183 |         - Text: "The actual slogan text."  
184 |         - Language: "The language in which the slogan is written."  
185 |         - UsageContext: "The context or situation in which the slogan is used."  
186 |   
187 | Relationships:  
188 |   - Brand:  
189 |       - has_product: Product  
190 |       - runs_campaign: Campaign  
191 |       - follows_guideline: Guideline  
192 |       - targets_market: Market  
193 |       - competes_with: Competitor  
194 |       - collaborates_with: Influencer  
195 |       - featured_in_advertising_case: AdvertisingCase  
196 |       - makes_claim: Claim  
197 |       - uses_slogan: Slogan  
198 |   
199 |   - Product:  
200 |       - belongs_to_brand: Brand  
201 |       - popular_in_market: Market  
202 |       - competes_with: Competitor  
203 |       - follows_guideline: Guideline  
204 |       - includes_recipe: Recipe  
205 |   
206 |   - Campaign:  
207 |       - belongs_to_brand: Brand  
208 |       - targets_market: Market  
209 |       - uses_guideline: Guideline  
210 |       - includes_toolkit: Toolkit  
211 |   
212 |   - Guideline:  
213 |       - applies_to_brand: Brand  
214 |       - applies_to_product: Product  
215 |       - applies_to_campaign: Campaign  
216 |   
217 |   - Market:  
218 |       - includes_product: Product  
219 |       - includes_competitor: Competitor  
220 |       - includes_influencer: Influencer  
221 |   
222 |   - Competitor:  
223 |       - competes_with_brand: Brand  
224 |       - competes_with_product: Product  
225 |   
226 |   - Influencer:  
227 |       - collaborates_with_brand: Brand  
228 |   
229 |   - AdvertisingCase:  
230 |       - features_brand: Brand  
231 |   
232 |   - Recipe:  
233 |       - belongs_to_product: Product  
234 |       - follows_aesthetic: Aesthetic  
235 |   
236 |   - Claim:  
237 |       - made_by_brand: Brand  
238 |   
239 |   - Slogan:  
240 |       - used_by_brand: Brand  
241 | 
242 | 
243 | 
244 | """
245 | 
246 | 
247 | def search(search_query):
248 |     print("search query: ", search_query)
249 |     vector_query = VectorizedQuery(vector=get_text_embedding(
250 |         search_query), k_nearest_neighbors=3, fields="contentVector")
251 | 
252 |     results = search_client.search(
253 | 
254 |         query_type=QueryType.SEMANTIC, semantic_configuration_name='my-semantic-config', query_caption=QueryCaptionType.EXTRACTIVE, query_answer=QueryAnswerType.EXTRACTIVE,
255 | 
256 |         vector_queries=[vector_query],
257 |         select=["topic", "file_name", "page_number", "related_content"],
258 |         top=3
259 |     )
260 |     images_directory = ".\\processed_data"
261 |     output = []
262 |     for result in results:
263 |         print(f"topic: {result['topic']}")
264 |         print("related_content: ", result['related_content'])
265 | 
266 |         page_image = os.path.join(
267 |             images_directory, result['file_name'], "page_" + str(result['page_number']))+".png"
268 |         output.append({'image_path': page_image,
269 |                       'related_content': result['related_content']})
270 |     return output
271 | 
272 | 
273 | AVAILABLE_FUNCTIONS = {
274 |     "search": search,
275 | }
276 | 
277 | 
278 | FUNCTIONS_SPEC = [
279 | 
280 |     {
281 |         "type": "function",
282 |         "function": {
283 | 
284 |             "name": "search",
285 |             "description": "Semantic Search Engine to search for content",
286 | 
287 |             "parameters": {
288 |                 "type": "object",
289 |                 "properties": {
290 |                     "search_query": {
291 |                         "type": "string",
292 |                         "description": "Natural language query to search for content"
293 |                     }
294 | 
295 | 
296 |                 },
297 |                 "required": ["search_query"],
298 |             },
299 |         }
300 |     },
301 | 
302 | 
303 | ]
304 | 
305 | 
306 | def check_args(function, args):
307 |     sig = inspect.signature(function)
308 |     params = sig.parameters
309 | 
310 |     # Check if there are extra arguments
311 |     for name in args:
312 |         if name not in params:
313 |             return False
314 |     # Check if the required arguments are provided
315 |     for name, param in params.items():
316 |         if param.default is param.empty and name not in args:
317 |             return False
318 | 
319 | 
320 | def clean_up_history(history, max_q_with_detail_hist=1, max_q_to_keep=2):
321 |     # start from end of history, count the messages with role user, if the count is more than max_q_with_detail_hist, remove messages from there with roles tool.
322 |     # if the count is more than max_q_hist_to_keep, remove all messages from there until message number 1
323 |     question_count = 0
324 |     removal_indices = []
325 |     for idx in range(len(history)-1, 0, -1):
326 |         message = dict(history[idx])
327 |         if message.get("role") == "user":
328 |             question_count += 1
329 |             # print("question_count added, it becomes: ", question_count)
330 |         if question_count >= max_q_with_detail_hist and question_count < max_q_to_keep:
331 |             if message.get("role") != "user" and message.get("role") != "assistant" and len(message.get("content")) == 0:
332 |                 removal_indices.append(idx)
333 |         if question_count >= max_q_to_keep:
334 |             removal_indices.append(idx)
335 | 
336 |     # remove items with indices in removal_indices
337 |     for index in removal_indices:
338 |         del history[index]
339 | 
340 | 
341 | def reset_history_to_last_question(history):
342 |     # pop messages from history from last item to the message with role user
343 |     for i in range(len(history)-1, -1, -1):
344 |         message = dict(history[i])
345 |         if message.get("role") == "user":
346 |             break
347 |         history.pop()
348 |     for session_item in st.session_state:
349 |         if 'data_from_display' in session_item or 'comment_on_graph' in session_item:
350 |             del st.session_state[session_item]
351 | 
352 | 
353 | class Smart_Agent():
354 |     """
355 |     """
356 | 
357 |     def __init__(self, persona, functions_spec, functions_list, name=None, init_message=None, engine=chat_engine):
358 |         if init_message is not None:
359 |             init_hist = [{"role": "system", "content": persona},
360 |                          {"role": "assistant", "content": init_message}]
361 |         else:
362 |             init_hist = [{"role": "system", "content": persona}]
363 | 
364 |         self.conversation = init_hist
365 |         self.persona = persona
366 |         self.engine = engine
367 |         self.name = name
368 | 
369 |         self.functions_spec = functions_spec
370 |         self.functions_list = functions_list
371 |     # @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
372 | 
373 |     def run(self, user_input, conversation=None, stream=False, ):
374 |         if user_input is None:  # if no input return init message
375 |             return self.conversation, self.conversation[1]["content"]
376 |         if conversation is not None:  # if no history return init message
377 |             self.conversation = conversation
378 | 
379 |         self.conversation.append(
380 |             {"role": "user", "content": user_input, "name": "James"})
381 |         clean_up_history(self.conversation, max_q_with_detail_hist=MAX_QUESTION_WITH_DETAIL_HIST,
382 |                          max_q_to_keep=MAX_QUESTION_TO_KEEP)
383 | 
384 |         execution_error_count = 0
385 |         code = ""
386 |         response_message = None
387 |         data = {}
388 |         execution_context = {}
389 |         run_count = 0
390 |         while True:
391 |             if run_count >= MAX_RUN_PER_QUESTION:
392 |                 reset_history_to_last_question(self.conversation)
393 |                 print(
394 |                     f"Need to move on from this question due to max run count reached ({run_count} runs)")
395 |                 response_message = {
396 |                     "role": "assistant", "content": "I am unable to answer this question at the moment, please ask another question."}
397 |                 break
398 |             if execution_error_count >= MAX_ERROR_RUN:
399 |                 reset_history_to_last_question(self.conversation)
400 |                 print(
401 |                     f"resetting history due to too many errors ({execution_error_count} errors) in the code execution")
402 |                 execution_error_count = 0
403 |             response = client.chat.completions.create(
404 |                 # The deployment name you chose when you deployed the GPT-35-turbo or GPT-4 model.
405 |                 model=self.engine,
406 |                 messages=self.conversation,
407 |                 tools=self.functions_spec,
408 | 
409 |                 tool_choice='auto',
410 |                 temperature=0.2,
411 | 
412 | 
413 |             )
414 |             run_count += 1
415 |             response_message = response.choices[0].message
416 |             if response_message.content is None:
417 |                 response_message.content = ""
418 |             tool_calls = response_message.tool_calls
419 | 
420 |             if tool_calls:
421 |                 # print("Tool calls: ")
422 |                 # extend conversation with assistant's reply
423 |                 self.conversation.append(response_message)
424 |                 for tool_call in tool_calls:
425 |                     function_name = tool_call.function.name
426 | 
427 |                     print("Recommended Function call:")
428 |                     print(function_name)
429 |                     print()
430 | 
431 |                     # verify function exists
432 |                     if function_name not in self.functions_list:
433 |                         # raise Exception("Function " + function_name + " does not exist")
434 |                         print(("Function " + function_name +
435 |                               " does not exist, retrying"))
436 |                         self.conversation.pop()
437 |                         break
438 |                     function_to_call = self.functions_list[function_name]
439 | 
440 |                     # verify function has correct number of arguments
441 |                     try:
442 |                         function_args = json.loads(
443 |                             tool_call.function.arguments)
444 |                     except json.JSONDecodeError as e:
445 |                         print(e)
446 |                         self.conversation.pop()
447 |                         break
448 |                     if check_args(function_to_call, function_args) is False:
449 |                         self.conversation.pop()
450 |                         break
451 | 
452 |                     else:
453 |                         function_response = function_to_call(**function_args)
454 | 
455 |                     if function_name == "search":
456 |                         search_function_response = []
457 |                         for item in function_response:
458 |                             image_path = item['image_path']
459 |                             related_content = item['related_content']
460 | 
461 |                             with open(image_path, "rb") as image_file:
462 |                                 base64_image = base64.b64encode(
463 |                                     image_file.read()).decode('utf-8')
464 |                             # path= "_".join(image_path.split("\\")[-2:])
465 |                             print("image_path: ", image_path)
466 | 
467 |                             search_function_response.append(
468 |                                 {"type": "text", "text": f"file_name: {image_path}"})
469 |                             search_function_response.append({"type": "image_url", "image_url": {
470 |                                                             "url":  f"data:image/jpeg;base64,{base64_image}"}})
471 |                             search_function_response.append(
472 |                                 {"type": "text", "text": f"HINT: The following kind of content might be related to this topic\n: {related_content}"})
473 | 
474 |                         function_response = search_function_response
475 |                     self.conversation.append(
476 |                         {
477 |                             "tool_call_id": tool_call.id,
478 |                             "role": "tool",
479 |                             "name": function_name,
480 |                             "content": function_response,
481 |                         }
482 |                     )  # extend conversation with function response
483 | 
484 |                 continue
485 |             else:
486 |                 # print('no function call')
487 |                 break  # if no function call break out of loop as this indicates that the agent finished the research and is ready to respond to the user
488 | 
489 |         if not stream:
490 |             self.conversation.append(response_message)
491 |             if type(response_message) is dict:
492 |                 assistant_response = response_message.get('content')
493 |             else:
494 |                 assistant_response = response_message.dict().get('content')
495 |             # conversation.append({"role": "assistant", "content": assistant_response})
496 | 
497 |         else:
498 |             assistant_response = response_message
499 | 
500 |         return stream, code, self.conversation, assistant_response, data
501 | 


--------------------------------------------------------------------------------
/src/utils/utils.py:
--------------------------------------------------------------------------------
1 | """The main module for utils."""
2 | 
3 | from smart_agent_factory import SmartAgentFactory
4 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/deepRAG/2c39bf17ecc2cb4fb179bab9663e841a22e0699e/tests/README.md


--------------------------------------------------------------------------------
/tests/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "tests"
 3 | version = "0.1.0"
 4 | description = "RAG enhanced though multiple search types."
 5 | authors = ["Tyler Kendrick <tkendrick@microsoft.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.11"
11 | asyncio = "^3.4.3"
12 | api = { path = "../src/api", develop = true }
13 | models = { path = "../src/models", develop = true }
14 | utils = { path = "../src/utils", develop = true }
15 | agents = { path = "../src/agents", develop = true }
16 | distributedcache = { path = "../src/distributed_cache", develop = true }
17 | services = { path = "../src/services", develop = true }
18 | sse-starlette = "^2.1.3"
19 | pytest-mock = "^3.14.0"
20 | 
21 | [tool.poetry.group.dev.dependencies]
22 | env = "^0.1.0"
23 | 
24 | [tool.poetry.group.test.dependencies]
25 | pytest = "^8.2.2"
26 | pytest-mock = "^3.14.0"
27 | 
28 | [tool.pytest.ini_options]
29 | pythonpath = [
30 |     ".",
31 |     "../src",
32 | ]
33 | 
34 | [build-system]
35 | requires = ["poetry-core"]
36 | build-backend = "poetry.core.masonry.api"
37 | 


--------------------------------------------------------------------------------
/tests/test_api.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from typing import Any, List
 3 | from langserve.client import RemoteRunnable
 4 | from api import Server
 5 | 
 6 | def test_vectorRAG_api_happy_path():
 7 | 
 8 |     client = RemoteRunnable(url="http://localhost:8000/vectorRAG")
 9 |     response: List[Any] = client.invoke(input="What is a vector?")
10 |     assert response is not None
11 | 
12 |     document = response.pop()
13 |     print(document)
14 |     assert document is not None
15 |     assert document.get("image_path", None) is not None
16 |     assert document.get("related_content", None) is not None
17 | 


--------------------------------------------------------------------------------
/tests/test_search_vector_function.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import Mock, MagicMock
  2 | import pytest_mock
  3 | from typing import Any, List, Union
  4 | from openai import AzureOpenAI
  5 | from openai.types.create_embedding_response import CreateEmbeddingResponse, Usage
  6 | from openai.types.embedding import Embedding
  7 | from azure.search.documents import (SearchItemPaged, SearchClient)
  8 | from functions import SearchVectorFunction
  9 | 
 10 | def setup(mocker: pytest_mock.MockerFixture,
 11 |           input: Union[str, List[str], List[int], List[List[int]]],
 12 |           image_directory: str,
 13 |           documents: list[dict[str, Any]]) -> SearchVectorFunction:
 14 |     mockAzureOpenAI: Mock | Mock = mocker.Mock(target=AzureOpenAI, embeddings=mocker.Mock(create=mocker.Mock()))
 15 |     mockSearchClient: Mock | Mock = mocker.Mock(target=SearchClient, search=mocker.Mock(search=mocker.Mock()))
 16 |     mock_search_item_paged = MagicMock(spec=SearchItemPaged)
 17 | 
 18 | # Define the behavior of the mock instance
 19 |     mock_search_item_paged.__iter__.return_value = iter(documents)
 20 | 
 21 |     mockAzureOpenAI.embeddings.create.return_value = CreateEmbeddingResponse(
 22 |         data=[
 23 |             Embedding(
 24 |                 embedding=[0.23333 for _ in range(233)],
 25 |                 index=i,
 26 |                 object='embedding'
 27 |             ) for i in range(len(input))
 28 |         ],
 29 |         model="gpt-4",
 30 |         object='list',
 31 |         usage=Usage(
 32 |             prompt_tokens=2,
 33 |             total_tokens=2
 34 |         )
 35 |     )
 36 | 
 37 |     mockSearchClient.search.return_value = mock_search_item_paged
 38 | 
 39 |     return SearchVectorFunction(
 40 |         logger=mocker.Mock(),
 41 |         search_client=mockSearchClient,
 42 |         client=mockAzureOpenAI,
 43 |         model="gpt-4",
 44 |         image_directory=image_directory
 45 |     )
 46 | 
 47 | def test_valid_search_return(mocker: pytest_mock.MockerFixture,):
 48 |     """Test for a valid search return"""
 49 |     image_directory: str = "images"
 50 |     file_name: str = "page_1.png"
 51 |     related_content: str = "Hello World"
 52 |     page_number: int = 1
 53 |     documents:list[dict[str, Any]] = [
 54 |         {
 55 |             'id': '1',
 56 |             'name': 'Item 1',
 57 |             'topic': 'test',
 58 |             'related_content': related_content,
 59 |             'page_number': page_number,
 60 |             'file_name': file_name
 61 |         }
 62 |     ]
 63 | 
 64 |     search_vector_function: SearchVectorFunction = setup(
 65 |         mocker=mocker,
 66 |         input="search query",
 67 |         image_directory=image_directory,
 68 |         documents=documents
 69 |     )
 70 | 
 71 |     search_vector_response: list[Any] = search_vector_function.search(search_query="search query")
 72 | 
 73 |     for t in search_vector_response:
 74 |         assert t['image_path'] == f"{image_directory}/{file_name}/page_{page_number}.png"
 75 |         assert t['related_content'] == related_content
 76 | 
 77 | def test_muliple_document_return(mocker: pytest_mock.MockerFixture):
 78 |     """Test for multiple document return"""
 79 |     image_directory: str = "images"
 80 |     documents:list[dict[str, Any]] = [
 81 |         {
 82 |             'id': '1',
 83 |             'name': 'Item 1',
 84 |             'topic': 'test',
 85 |             'related_content': "Hello World",
 86 |             'page_number': 1,
 87 |             'file_name': "page_1.png"
 88 |         }, {
 89 |             'id': '2',
 90 |             'name': 'Item 2',
 91 |             'topic': 'test',
 92 |             'related_content': None,
 93 |             'page_number': 2,
 94 |             'file_name': "page_2.png"
 95 |         }
 96 |     ]
 97 | 
 98 |     search_vector_function: SearchVectorFunction = setup(
 99 |         mocker=mocker,
100 |         input="search query",
101 |         image_directory=image_directory,
102 |         documents= documents
103 |     )
104 | 
105 |     search_vector_response: list[Any] = search_vector_function.search(search_query="search query")
106 | 
107 |     for search_document in search_vector_response:
108 |         document: dict[str, Any] = next((document for document in documents if document.get("id") == search_document["id"]), None)
109 |         file_name= document["file_name"]
110 |         page_number= document["page_number"]
111 | 
112 |         assert search_document['image_path'] == f"{image_directory}/{file_name}/page_{page_number}.png"
113 |         assert search_document['related_content'] == document["related_content"]


--------------------------------------------------------------------------------
/tests/test_server.py:
--------------------------------------------------------------------------------
 1 | from fastapi.testclient import TestClient
 2 | from api import app
 3 | from httpx import Response
 4 | import pytest
 5 |  
 6 | @pytest.fixture
 7 | def client() -> TestClient:
 8 |     return TestClient(app=app)
 9 | 
10 | def test_vector_rag(client: TestClient) -> None:
11 |     # Test simple route
12 |     response: Response = client.post(url="/vectorRAG/invoke", json={"input": "NESCAF\u00c9's New Global Initiative"})
13 |     assert response.status_code == 200
14 | 
15 | def test_deep_rag(client: TestClient) -> None:
16 |     # Test simple route
17 |     response: Response = client.post(url="/deepRAG/invoke", json={"input": "NESCAF\u00c9's New Global Initiative"})
18 |     assert response.status_code == 200


--------------------------------------------------------------------------------
/tests/test_smart_agent.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | import pytest_mock
  3 | from typing_extensions import Literal
  4 | from datetime import datetime
  5 | from unittest.mock import Mock
  6 | from openai import AzureOpenAI
  7 | from openai.types.chat.chat_completion import ChatCompletion, Choice
  8 | from openai.types.chat.chat_completion_message import ChatCompletionMessage
  9 | from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall, Function
 10 | from agents import Smart_Agent
 11 | from agent import AgentResponse
 12 | 
 13 | def setup_mock_azure_openai_with_side_effects(
 14 |         mocker: pytest_mock.MockerFixture,
 15 |         chat_completion_response: str,
 16 |         chat_completion_finish_reason: Literal["stop", "tool_calls"] = 'stop',
 17 |         chat_completion_tool_calls: List[ChatCompletionMessageToolCall] = [],
 18 |         chat_completion_side_effect: List[ChatCompletion] = []
 19 | ) -> Mock:
 20 |     mockAzureOpenAI: Mock | Mock = mocker.Mock(target=AzureOpenAI, chat=mocker.Mock(completions=mocker.Mock(create=mocker.Mock())))
 21 |     mockAzureOpenAI.chat.completions.create.side_effect = [ChatCompletion(
 22 |             id="foo",
 23 |             model="gpt-4",
 24 |             object="chat.completion",
 25 |             choices=[
 26 |                 Choice(
 27 |                     finish_reason=chat_completion_finish_reason,
 28 |                     index=0,
 29 |                     message=ChatCompletionMessage(
 30 |                         content=chat_completion_response,
 31 |                         role="assistant",
 32 |                         tool_calls=chat_completion_tool_calls,
 33 |                     ),
 34 |                 )
 35 |             ],
 36 |             created=int(datetime.now().timestamp())
 37 |         ),
 38 |         *chat_completion_side_effect
 39 |     ]
 40 | 
 41 |     return mockAzureOpenAI
 42 | 
 43 | def setup_mock_azure_openai(
 44 |         mocker: pytest_mock.MockerFixture,
 45 |         chat_completion_response: str,
 46 |         chat_completion_finish_reason: Literal["stop", "tool_calls"] = 'stop',
 47 |         chat_completion_tool_calls: List[ChatCompletionMessageToolCall] = []
 48 |     ) -> Mock:
 49 |     mockAzureOpenAI: Mock | Mock = mocker.Mock(target=AzureOpenAI, chat=mocker.Mock(completions=mocker.Mock(create=mocker.Mock())))
 50 |     mockAzureOpenAI.chat.completions.create.return_value = ChatCompletion(
 51 |             id="foo",
 52 |             model="gpt-4",
 53 |             object="chat.completion",
 54 |             choices=[
 55 |                 Choice(
 56 |                     finish_reason=chat_completion_finish_reason,
 57 |                     index=0,
 58 |                     message=ChatCompletionMessage(
 59 |                         content=chat_completion_response,
 60 |                         role="assistant",
 61 |                         tool_calls=chat_completion_tool_calls,
 62 |                     ),
 63 |                 )
 64 |             ],
 65 |             created=int(datetime.now().timestamp())
 66 |         )
 67 | 
 68 |     return mockAzureOpenAI
 69 | 
 70 | def setup(
 71 |         mocker: pytest_mock.MockerFixture,
 72 |         mockAzureOpenAI: Mock) -> Smart_Agent:
 73 | 
 74 |     return Smart_Agent(
 75 |         logger= mocker.Mock(),
 76 |         client=mockAzureOpenAI,
 77 |         agent_configuration= mocker.Mock(tools=[]),
 78 |         search_vector_function= mocker.Mock(search=mocker.Mock(return_value=[])),
 79 |         fs= mocker.Mock(),
 80 |         history= mocker.Mock()
 81 |     )
 82 | 
 83 | def test_for_valid_response(mocker: pytest_mock.MockerFixture) -> None:
 84 |     """Test for a valid response from the smart agent"""
 85 |     chat_completion_response: str = "Assistant Response"
 86 |     smart_agent_prompt: str = "Hello World"
 87 |     mockAzureOpenAI = setup_mock_azure_openai(mocker=mocker, chat_completion_response=chat_completion_response)
 88 |     smart_agent: Smart_Agent = setup(mocker=mocker, mockAzureOpenAI=mockAzureOpenAI)
 89 |     smart_agent_response: AgentResponse = smart_agent.run(smart_agent_prompt)
 90 |     
 91 |     assert smart_agent_response.response == chat_completion_response
 92 | 
 93 | def test_for_no_user_input(mocker: pytest_mock.MockerFixture) -> None:
 94 |     """Test for no user input"""
 95 |     smart_agent_prompt = None
 96 |     mockAzureOpenAI = setup_mock_azure_openai(mocker=mocker, chat_completion_response="Assistant Response")
 97 |     smart_agent: Smart_Agent = setup(mocker=mocker, mockAzureOpenAI=mockAzureOpenAI)
 98 |     smart_agent_response: AgentResponse = smart_agent.run(user_input=smart_agent_prompt)
 99 | 
100 |     assert smart_agent_response.response == smart_agent._conversation[1]["content"]
101 |  
102 | def test_if_conversation_is_not_none(mocker: pytest_mock.MockerFixture) -> None:
103 |     """Test if conversation is not None"""
104 |     conversation_item = {"role": "user", "content": "Initial Conversation Item"}
105 |     mockAzureOpenAI = setup_mock_azure_openai(mocker=mocker, chat_completion_response="Assistant Response")
106 |     smart_agent: Smart_Agent = setup(mocker=mocker, mockAzureOpenAI=mockAzureOpenAI)
107 |     smart_agent_response: AgentResponse = smart_agent.run(user_input="Hello World", conversation=[conversation_item])
108 | 
109 |     assert conversation_item in smart_agent_response.conversation
110 | 
111 | def test_for_max_run_count(mocker: pytest_mock.MockerFixture) -> None:
112 |     """Test for max run count"""
113 |     chat_completion_tools: List[ChatCompletionMessageToolCall] = [
114 |         ChatCompletionMessageToolCall(id="foo", type="function", function=Function(name="test", arguments="{}"))
115 |     ]
116 | 
117 |     mockAzureOpenAI = setup_mock_azure_openai(
118 |         mocker=mocker,
119 |         chat_completion_response="Assistant Response",
120 |         chat_completion_finish_reason="tool_calls",
121 |         chat_completion_tool_calls=chat_completion_tools
122 |     )
123 | 
124 |     smart_agent: Smart_Agent = setup(
125 |         mocker=mocker,
126 |         mockAzureOpenAI=mockAzureOpenAI
127 |     )
128 | 
129 |     smart_agent_response: AgentResponse = smart_agent.run(user_input="Hello World")
130 | 
131 |     assert smart_agent_response.response == "I am unable to answer this question at the moment, please ask another question."
132 | 
133 | def test_for_tool_calls(mocker: pytest_mock.MockerFixture) -> None:
134 |     """Test for tool calls"""
135 |     chat_completion_response: str = "Assistant Response"
136 |     tool_id: str = "foo"
137 |     tool_name: str = "search"
138 |     tool_message = {'tool_call_id': tool_id, 'role': 'tool', 'name': tool_name, 'content': []}
139 |     chat_completion_side_effects: List[ChatCompletion] = [
140 |         ChatCompletion(
141 |             id=tool_id,
142 |             model="gpt-4",
143 |             object="chat.completion",
144 |             choices=[
145 |                 Choice(
146 |                     finish_reason="stop",
147 |                     index=0,
148 |                     message=ChatCompletionMessage(
149 |                         content=chat_completion_response,
150 |                         role="assistant",
151 |                     ),
152 |                 )
153 |             ],
154 |             created=int(datetime.now().timestamp())
155 |         )
156 |     ]
157 | 
158 |     chat_completion_tools: List[ChatCompletionMessageToolCall] = [
159 |         ChatCompletionMessageToolCall(
160 |             id="foo",
161 |             type="function",
162 |             function=Function(
163 |                 name=tool_name,
164 |                 arguments="{\"args\": \"Hello World\", \"kwargs\": \"Hello World\"}"
165 |             )
166 |         )
167 |     ]
168 | 
169 |     mockAzureOpenAI = setup_mock_azure_openai_with_side_effects(
170 |         mocker=mocker,
171 |         chat_completion_response="Assistant Response",
172 |         chat_completion_finish_reason="tool_calls",
173 |         chat_completion_tool_calls=chat_completion_tools,
174 |         chat_completion_side_effect=chat_completion_side_effects
175 |     )
176 | 
177 |     smart_agent: Smart_Agent = setup(
178 |         mocker=mocker,
179 |         mockAzureOpenAI=mockAzureOpenAI
180 |     )
181 | 
182 |     smart_agent_response: AgentResponse = smart_agent.run(user_input="Hello World")
183 | 
184 |     assert smart_agent_response.response == chat_completion_response
185 |     assert tool_message in smart_agent_response.conversation


--------------------------------------------------------------------------------
/tests/test_vectors.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, List
 2 | from langchain_core.documents.base import Document
 3 | from langchain_core.embeddings import DeterministicFakeEmbedding
 4 | from langchain_core.vectorstores.base import VectorStoreRetriever
 5 | from langchain_core.vectorstores.in_memory import InMemoryVectorStore
 6 | import pytest
 7 | 
 8 | @pytest.fixture(scope="module")
 9 | def vector_document() -> Document:
10 |     return Document(page_content="A vector is a mathematical object that has both a magnitude and a direction.")
11 | 
12 | @pytest.fixture(scope="module")
13 | def scalar_document() -> Document:
14 |     return Document(page_content="A scalar is a mathematical object that has only a magnitude. It is not a vector.")
15 | 
16 | @pytest.fixture(scope="module")
17 | def unrelated_document() -> Document:
18 |     return Document(page_content="The sky is blue.")
19 | 
20 | @pytest.fixture(scope="module")
21 | def vector_store(
22 |     vector_document: Document,
23 |     scalar_document: Document,
24 |     unrelated_document: Document) -> InMemoryVectorStore:
25 |     store = InMemoryVectorStore(embedding=DeterministicFakeEmbedding(size=1568))
26 |     store.add_documents(documents=[vector_document, scalar_document, unrelated_document])
27 |     return store
28 | 
29 | def test_vector_rag(
30 |     vector_store: InMemoryVectorStore,
31 |     vector_document: Document) -> None:
32 |     """
33 |     Test the vector RAG pipeline to ensure it pulls the documents we expect, and not the ones we don't.
34 |     """
35 |     
36 |     # arrange
37 |     query: str = vector_document.page_content
38 |     retriever: VectorStoreRetriever = vector_store.as_retriever(
39 |         search_kwargs={"k": 1},
40 |     )
41 | 
42 |     # act
43 |     documents: List[Document] = retriever.invoke(input=query)
44 | 
45 |     # assert
46 |     assert len(documents) == 1
47 |     assert documents[0].page_content == query
48 | 


--------------------------------------------------------------------------------
/tests/tests.py:
--------------------------------------------------------------------------------
1 | """The main module for tests."""
2 | 


--------------------------------------------------------------------------------