├── .editorconfig
├── .gitattributes
├── .github
└── dependabot.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── COMMUNITY.md
├── CONFIGURATION.md
├── CONTRIBUTING.md
├── DIARY.md
├── Directory.Build.props
├── Directory.Packages.props
├── DockerElasticsearchAndKibana.md
├── KernelMemoryElasticsearch.sln
├── KernelMemoryElasticsearch.sln.DotSettings
├── LICENSE
├── NUGET.md
├── README.md
├── SECURITY.md
├── TODO.md
├── code-analysis.props
├── content
└── images
│ ├── Connectors.jpg
│ ├── CreateIndices.png
│ ├── DataPage1.jpg
│ ├── DataPage2.jpg
│ ├── DataPageAllRows.jpg
│ ├── ESLogo.jpg
│ ├── FML-Logo-Round.gif
│ ├── FML-Logo-Round.png
│ ├── FML-Logo-Square.gif
│ ├── FML-Logo-Square.png
│ ├── FMLLogo.png
│ ├── Free Mind Labs logo.png
│ ├── Free Mind Labs.gif
│ ├── KnnQuery.jpg
│ ├── Mappings.jpg
│ ├── Pipelines.jpg
│ ├── RAG.jpg
│ ├── Solution.png
│ └── TestRunning.png
├── docker
├── .env.example
├── .gitattributes
├── LICENSE
├── README.md
├── app
│ ├── dockerfile
│ ├── main.py
│ └── requirements.txt
├── docker-compose.yml
├── filebeat.yml
├── images
│ ├── CACRT.png
│ ├── ComposeRunning.png
│ ├── DevConsole.png
│ ├── DockerDesktop.png
│ ├── DockerSolutionFolder.png
│ ├── ELKStack.png
│ ├── ESServer.png
│ ├── ElasticAgentMetrics.png
│ ├── EnvSample.png
│ ├── FinalAgentConfiguration.png
│ ├── Fleet.png
│ ├── GoodAgent.png
│ ├── InvalidAgent1.png
│ ├── InvalidAgent2.png
│ ├── InvalidAgent3.png
│ ├── SaveAndDeploy.png
│ └── WelcomePageKibana.png
├── kibana.yml
├── logstash.conf
├── logstash_ingest_data
│ └── Air_Quality.csv
└── metricbeat.yml
├── icon.png
├── nuget-package.props
├── nuget.config
├── packages
└── README.md
├── src
└── ElasticsearchMemoryStorage
│ ├── ConfigurationException.cs
│ ├── ElasticsearchConfig.cs
│ ├── ElasticsearchConfigBuilder.cs
│ ├── ElasticsearchConfigExtensions.cs
│ ├── ElasticsearchMemory.cs
│ ├── ElasticsearchMemoryFilter.cs
│ ├── ElasticsearchMemoryRecord.cs
│ ├── ElasticsearchMemoryStorage.csproj
│ ├── ElasticsearchTag.cs
│ ├── Extensions
│ ├── KernelMemoryBuilderExtensions.cs
│ ├── MemoryFilterExtensions.cs
│ └── ServiceCollectionExtensions.cs
│ ├── IIndexNameHelper.cs
│ └── IndexNameHelper.cs
└── tests
└── UnitTests
├── Data
├── file1-Wikipedia-Carbon.txt
├── file2-Wikipedia-Moon.txt
├── file3-lorem-ipsum.docx
├── file4-SK-Readme.pdf
├── file5-NASA-news.pdf
└── file6-ANWC-image.jpg
├── DataStorageTests.cs
├── ElasticsearchTestBase.cs
├── IndexManagementTests.cs
├── IndexnameTests.cs
├── KernelMemoryTests.cs
├── SearchTests.cs
├── Startup.cs
├── TestsHelper.cs
├── UnitTests.csproj
└── appSettings.json
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto-detect text files, ensure they use LF.
2 | * text=auto eol=lf working-tree-encoding=UTF-8
3 |
4 | # Bash scripts
5 | *.sh text eol=lf
6 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "" # See documentation for possible values
9 | directory: "/" # Location of package manifests
10 | schedule:
11 | interval: "weekly"
12 | day: "sunday"
13 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dotnet/.config
2 | tmp/
3 | tmp-*/
4 |
5 | ## Ignore Visual Studio temporary files, build results, and
6 | ## files generated by popular Visual Studio add-ons.
7 | ##
8 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
9 |
10 | # User-specific files
11 | *.rsuser
12 | *.suo
13 | *.user
14 | *.userosscache
15 | *.sln.docstates
16 |
17 | # User-specific files (MonoDevelop/Xamarin Studio)
18 | *.userprefs
19 |
20 | # Mono auto generated files
21 | mono_crash.*
22 |
23 | # Build results
24 | [Dd]ebug/
25 | [Dd]ebugPublic/
26 | [Rr]elease/
27 | [Rr]eleases/
28 | x64/
29 | x86/
30 | [Ww][Ii][Nn]32/
31 | [Aa][Rr][Mm]/
32 | [Aa][Rr][Mm]64/
33 | bld/
34 | [Bb]in/
35 | [Oo]bj/
36 | [Ll]og/
37 | [Ll]ogs/
38 |
39 | # Visual Studio 2015/2017 cache/options directory
40 | .vs/
41 | # Uncomment if you have tasks that create the project's static files in wwwroot
42 | #wwwroot/
43 |
44 | # Visual Studio 2017 auto generated files
45 | Generated\ Files/
46 |
47 | # MSTest test Results
48 | [Tt]est[Rr]esult*/
49 | [Bb]uild[Ll]og.*
50 |
51 | # NUnit
52 | *.VisualState.xml
53 | TestResult.xml
54 | nunit-*.xml
55 |
56 | # Build Results of an ATL Project
57 | [Dd]ebugPS/
58 | [Rr]eleasePS/
59 | dlldata.c
60 |
61 | # Benchmark Results
62 | BenchmarkDotNet.Artifacts/
63 |
64 | # .NET Core
65 | project.lock.json
66 | project.fragment.lock.json
67 | artifacts/
68 |
69 | # ASP.NET Scaffolding
70 | ScaffoldingReadMe.txt
71 |
72 | # StyleCop
73 | StyleCopReport.xml
74 |
75 | # Files built by Visual Studio
76 | *_i.c
77 | *_p.c
78 | *_h.h
79 | *.ilk
80 | *.meta
81 | *.obj
82 | *.iobj
83 | *.pch
84 | *.pdb
85 | *.ipdb
86 | *.pgc
87 | *.pgd
88 | *.rsp
89 | *.sbr
90 | *.tlb
91 | *.tli
92 | *.tlh
93 | *.tmp
94 | *.tmp_proj
95 | *_wpftmp.csproj
96 | *.log
97 | *.tlog
98 | *.vspscc
99 | *.vssscc
100 | .builds
101 | *.pidb
102 | *.svclog
103 | *.scc
104 |
105 | # Chutzpah Test files
106 | _Chutzpah*
107 |
108 | # Visual C++ cache files
109 | ipch/
110 | *.aps
111 | *.ncb
112 | *.opendb
113 | *.opensdf
114 | *.sdf
115 | *.cachefile
116 | *.VC.db
117 | *.VC.VC.opendb
118 |
119 | # Visual Studio profiler
120 | *.psess
121 | *.vsp
122 | *.vspx
123 | *.sap
124 |
125 | # Visual Studio Trace Files
126 | *.e2e
127 |
128 | # TFS 2012 Local Workspace
129 | $tf/
130 |
131 | # Guidance Automation Toolkit
132 | *.gpState
133 |
134 | # ReSharper is a .NET coding add-in
135 | _ReSharper*/
136 | *.[Rr]e[Ss]harper
137 | *.DotSettings.user
138 |
139 | # TeamCity is a build add-in
140 | _TeamCity*
141 |
142 | # DotCover is a Code Coverage Tool
143 | *.dotCover
144 |
145 | # AxoCover is a Code Coverage Tool
146 | .axoCover/*
147 | !.axoCover/settings.json
148 |
149 | # Coverlet is a free, cross platform Code Coverage Tool
150 | coverage*.json
151 | coverage*.xml
152 | coverage*.info
153 |
154 | # Visual Studio code coverage results
155 | *.coverage
156 | *.coveragexml
157 |
158 | # NCrunch
159 | _NCrunch_*
160 | .*crunch*.local.xml
161 | nCrunchTemp_*
162 |
163 | # MightyMoose
164 | *.mm.*
165 | AutoTest.Net/
166 |
167 | # Web workbench (sass)
168 | .sass-cache/
169 |
170 | # Installshield output folder
171 | [Ee]xpress/
172 |
173 | # DocProject is a documentation generator add-in
174 | DocProject/buildhelp/
175 | DocProject/Help/*.HxT
176 | DocProject/Help/*.HxC
177 | DocProject/Help/*.hhc
178 | DocProject/Help/*.hhk
179 | DocProject/Help/*.hhp
180 | DocProject/Help/Html2
181 | DocProject/Help/html
182 |
183 | # Click-Once directory
184 | publish/
185 |
186 | # Publish Web Output
187 | *.[Pp]ublish.xml
188 | *.azurePubxml
189 | # Note: Comment the next line if you want to checkin your web deploy settings,
190 | # but database connection strings (with potential passwords) will be unencrypted
191 | *.pubxml
192 | *.publishproj
193 |
194 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
195 | # checkin your Azure Web App publish settings, but sensitive information contained
196 | # in these scripts will be unencrypted
197 | PublishScripts/
198 |
199 | # NuGet Packages
200 | *.nupkg
201 | # NuGet Symbol Packages
202 | *.snupkg
203 | # The packages folder can be ignored because of Package Restore
204 | **/[Pp]ackages/*
205 | # except build/, which is used as an MSBuild target.
206 | !**/[Pp]ackages/build/
207 | # Uncomment if necessary however generally it will be regenerated when needed
208 | #!**/[Pp]ackages/repositories.config
209 | # NuGet v3's project.json files produces more ignorable files
210 | *.nuget.props
211 | *.nuget.targets
212 |
213 | # Microsoft Azure Build Output
214 | csx/
215 | *.build.csdef
216 |
217 | # Microsoft Azure Emulator
218 | ecf/
219 | rcf/
220 |
221 | # Windows Store app package directories and files
222 | AppPackages/
223 | BundleArtifacts/
224 | Package.StoreAssociation.xml
225 | _pkginfo.txt
226 | *.appx
227 | *.appxbundle
228 | *.appxupload
229 |
230 | # Visual Studio cache files
231 | # files ending in .cache can be ignored
232 | *.[Cc]ache
233 | # but keep track of directories ending in .cache
234 | !?*.[Cc]ache/
235 |
236 | # Others
237 | ClientBin/
238 | ~$*
239 | *~
240 | *.dbmdl
241 | *.dbproj.schemaview
242 | *.jfm
243 | *.pfx
244 | *.publishsettings
245 | orleans.codegen.cs
246 |
247 | # Including strong name files can present a security risk
248 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
249 | #*.snk
250 |
251 | # Since there are multiple workflows, uncomment next line to ignore bower_components
252 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
253 | #bower_components/
254 |
255 | # RIA/Silverlight projects
256 | Generated_Code/
257 |
258 | # Backup & report files from converting an old project file
259 | # to a newer Visual Studio version. Backup files are not needed,
260 | # because we have git ;-)
261 | _UpgradeReport_Files/
262 | Backup*/
263 | UpgradeLog*.XML
264 | UpgradeLog*.htm
265 | ServiceFabricBackup/
266 | *.rptproj.bak
267 |
268 | # SQL Server files
269 | *.mdf
270 | *.ldf
271 | *.ndf
272 |
273 | # Business Intelligence projects
274 | *.rdl.data
275 | *.bim.layout
276 | *.bim_*.settings
277 | *.rptproj.rsuser
278 | *- [Bb]ackup.rdl
279 | *- [Bb]ackup ([0-9]).rdl
280 | *- [Bb]ackup ([0-9][0-9]).rdl
281 |
282 | # Microsoft Fakes
283 | FakesAssemblies/
284 |
285 | # GhostDoc plugin setting file
286 | *.GhostDoc.xml
287 |
288 | # Node.js Tools for Visual Studio
289 | .ntvs_analysis.dat
290 | node_modules/
291 |
292 | # Visual Studio 6 build log
293 | *.plg
294 |
295 | # Visual Studio 6 workspace options file
296 | *.opt
297 |
298 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
299 | *.vbw
300 |
301 | # Visual Studio 6 auto-generated project file (contains which files were open etc.)
302 | *.vbp
303 |
304 | # Visual Studio 6 workspace and project file (working project files containing files to include in project)
305 | *.dsw
306 | *.dsp
307 |
308 | # Visual Studio 6 technical files
309 | *.ncb
310 | *.aps
311 |
312 | # Visual Studio LightSwitch build output
313 | **/*.HTMLClient/GeneratedArtifacts
314 | **/*.DesktopClient/GeneratedArtifacts
315 | **/*.DesktopClient/ModelManifest.xml
316 | **/*.Server/GeneratedArtifacts
317 | **/*.Server/ModelManifest.xml
318 | _Pvt_Extensions
319 |
320 | # Paket dependency manager
321 | .paket/paket.exe
322 | paket-files/
323 |
324 | # FAKE - F# Make
325 | .fake/
326 |
327 | # CodeRush personal settings
328 | .cr/personal
329 |
330 | # Python Tools for Visual Studio (PTVS)
331 | __pycache__/
332 | *.pyc
333 |
334 | # Cake - Uncomment if you are using it
335 | # tools/**
336 | # !tools/packages.config
337 |
338 | # Tabs Studio
339 | *.tss
340 |
341 | # Telerik's JustMock configuration file
342 | *.jmconfig
343 |
344 | # BizTalk build output
345 | *.btp.cs
346 | *.btm.cs
347 | *.odx.cs
348 | *.xsd.cs
349 |
350 | # OpenCover UI analysis results
351 | OpenCover/
352 |
353 | # Azure Stream Analytics local run output
354 | ASALocalRun/
355 |
356 | # MSBuild Binary and Structured Log
357 | *.binlog
358 |
359 | # NVidia Nsight GPU debugger configuration file
360 | *.nvuser
361 |
362 | # MFractors (Xamarin productivity tool) working folder
363 | .mfractor/
364 |
365 | # Local History for Visual Studio
366 | .localhistory/
367 |
368 | # Visual Studio History (VSHistory) files
369 | .vshistory/
370 |
371 | # BeatPulse healthcheck temp database
372 | healthchecksdb
373 |
374 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
375 | MigrationBackup/
376 |
377 | # Ionide (cross platform F# VS Code tools) working folder
378 | .ionide/
379 |
380 | # Fody - auto-generated XML schema
381 | FodyWeavers.xsd
382 |
383 | # VS Code files for those working on multiple tools
384 | .vscode/*
385 | !.vscode/settings.json
386 | !.vscode/tasks.json
387 | !.vscode/launch.json
388 | !.vscode/extensions.json
389 | *.code-workspace
390 |
391 | # Local History for Visual Studio Code
392 | .history/
393 |
394 | # Windows Installer files from build outputs
395 | *.cab
396 | *.msi
397 | *.msix
398 | *.msm
399 | *.msp
400 |
401 | # JetBrains Rider
402 | *.sln.iml
403 | *.tmp
404 | *.log
405 | *.bck
406 | *.tgz
407 | *.tar
408 | *.zip
409 | *.cer
410 | *.crt
411 | *.key
412 | *.pem
413 |
414 | .env
415 | certs/
416 | launchSettings.json
417 | config.development.yaml
418 | *.development.config
419 | *.development.json
420 | .DS_Store
421 | .idea/
422 | node_modules/
423 | obj/
424 | bin/
425 | _dev/
426 | .dev/
427 | *.devis.*
428 | *.devis
429 | .vs/
430 | *.user
431 | **/.vscode/chrome
432 | **/.vscode/.ropeproject/objectdb
433 | *.pyc
434 | .ipynb_checkpoints
435 | .jython_cache/
436 | __pycache__/
437 | .mypy_cache/
438 | __pypackages__/
439 | .pdm.toml
440 | global.json
441 |
442 | # doxfx
443 | **/DROP/
444 | **/TEMP/
445 | **/packages/
446 | **/bin/
447 | **/obj/
448 | _site
449 |
450 | # Yarn
451 | .yarn
452 | .yarnrc.yml
453 |
454 | # Python Environments
455 | .env
456 | .venv
457 | .myenv
458 | env/
459 | venv/
460 | myvenv/
461 | ENV/
462 |
463 | # Python dist
464 | dist/
465 |
466 | # Peristant storage
467 | data/qdrant
468 | data/chatstore*
469 |
470 | # Java build
471 | java/**/target
472 | java/.mvn/wrapper/maven-wrapper.jar
473 |
474 | # Java settings
475 | conf.properties
476 |
477 | # Playwright
478 | playwright-report/
479 |
480 | # Static Web App deployment config
481 | swa-cli.config.json
482 | **/copilot-chat-app/webapp/build
483 | **/copilot-chat-app/webapp/node_modules
484 | /content/.$ArticleDiagrams.drawio.bkp
485 | /content/.$ArticleDiagrams.drawio.dtmp
486 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 |
3 | This project has adopted the code of conduct defined by the Contributor Covenant to clarify expected behavior in our community. For more information, see the [.NET Foundation Code of Conduct](https://dotnetfoundation.org/about/policies/code-of-conduct).
4 |
--------------------------------------------------------------------------------
/COMMUNITY.md:
--------------------------------------------------------------------------------
1 | # Welcome to the Semantic Kernel / Kernel Memory Community!
2 |
3 | Below are some ways that you can get involved.
4 |
5 | ## Engage on Github
6 |
7 | File issues, submit PRs, and provide feedback and ideas to what you'd like to
8 | see from the Kernel Memory. We do our best to respond to each submission.
9 |
10 | ## Public Semantic Kernel Community Office Hours
11 |
12 | We regularly have Community Office Hours that are open to the **public** to join.
13 |
14 | Add Semantic Kernel events to your calendar: download the
15 | [calendar.ics](https://aka.ms/sk-community-calendar) file.
16 |
17 | To keep topics organized, please submit what you'd like us to cover here:
18 | [https://forms.office.com/r/BbXFzmmFys](https://forms.office.com/r/BbXFzmmFys)
19 |
20 | If you are unable to make it live, all meetings will be recorded and posted online.
21 |
22 | ## Join the conversation on Discord
23 |
24 | We have a growing and active channel on Discord where you can get help, engage
25 | in lively discussion, and share what you've built with Kernel Memory and
26 | Semantic Kernel!
27 |
28 | Join our Discord:
29 | [https://aka.ms/SKDiscord](https://aka.ms/SKDiscord)
30 |
--------------------------------------------------------------------------------
/CONFIGURATION.md:
--------------------------------------------------------------------------------
1 | # Configuration
2 |
3 | The xUnit project UnitTests contains an [appSettings.json](tests/UnitTests/appSettings.json) file that lists all available options. The file reads as follows:
4 |
5 | ```
6 | {
7 | "Elasticsearch": {
8 | "CertificateFingerPrint": "...SECRETS...",
9 | "Endpoint": "http://localhost:9200",
10 | "UserName": "...SECRETS...",
11 | "Password": "...SECRETS..."
12 | }
13 | }
14 | ```
15 | This file is supposed to show the available options but it is not meant to store sensitive information.
16 | Modify it as necessary (e.g. by changing the Endpoint), but add the values for the certificate fingerprint and the password in user secrets.
17 |
18 | >*The class used to store configuration is [ElasticsearchConfig](/src/ElasticsearchMemoryStorage/ElasticsearchConfig.cs).*
19 |
20 | ## User Secrets
21 |
22 | First, notice how the UserSecretsId of the test project is set to the same value of Semantic Kernel and Kernel Memory:
23 | ```
24 | 5ee045b0-aea3-4f08-8d31-32d1a6f8fed0
25 | ```
26 | By virtue of doing this we can use the **same secrets file for all the projects** in SK, KM and these projects.
27 |
28 | ### How to add user secrets
29 |
30 | To add secrets either:
31 | 1. Open the secrets file in your IDE by right clicking on the project name and selecting Manage User Secrets.
32 | - To read more about user secrets click [here](https://learn.microsoft.com/en-us/aspnet/core/security/app-secrets?view=aspnetcore-8.0&tabs=windows)
33 |
34 | 1. Add the secrets from the command line by running the following commands:
35 | ```
36 | > dotnet user-secrets set "Elasticsearch:CertificateFingerPrint" "...your value..."
37 | > dotnet user-secrets set "Elasticsearch:UserName" "...your value..."
38 | > dotnet user-secrets set "Elasticsearch:Password" "...your value..."
39 | ```
40 |
41 | This ultimately results in the following secrets.json additions:
42 | ```
43 | {
44 | [..]
45 | "Elasticsearch:CertificateFingerPrint": "...your value...",
46 | "Elasticsearch:UserName": "...your value...",
47 | "Elasticsearch:Password": "...your value...",
48 | }
49 | ```
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to this project
2 |
3 | You can contribute to this project with issues and pull requests (PRs).
4 | Simply filing issues for problems you encounter is a great way to contribute.
5 | Contributing code is greatly appreciated.
6 |
7 | ## Reporting Issues
8 |
9 | We always welcome bug reports, API proposals and overall feedback. Here are a
10 | few tips on how you can make reporting your issue as effective as possible.
11 |
12 | ### Where to Report
13 |
14 | New issues can be reported in our
15 | [issues page](https://github.com/freemindlabsinc/FreeMindLabs.SemanticKernel/issues).
16 |
17 | Before filing a new issue, please search the list of issues to make sure it does
18 | not already exist.
19 |
20 | If you do find an existing issue for what you wanted to report, please include
21 | your own feedback in the discussion. Do consider upvoting (👍 reaction) the
22 | original report, as this helps us prioritize popular issues in our backlog.
23 |
24 | ### Writing a Good Bug Report
25 |
26 | Good bug reports make it easier for maintainers to verify and root cause the
27 | underlying problem.
28 | The better a bug report, the faster the problem will be resolved. Ideally, a bug
29 | report should contain the following information:
30 |
31 | - A high-level description of the problem.
32 | - A _minimal reproduction_, i.e. the smallest size of code/configuration
33 | required to reproduce the wrong behavior.
34 | - A description of the _expected behavior_, contrasted with the
35 | _actual behavior_ observed.
36 | - Information on the environment: OS/distribution, CPU architecture, SDK version,
37 | etc.
38 | - Additional information, e.g. Is it a regression from previous versions? Are
39 | there any known workarounds?
40 |
41 | ## Contributing Changes
42 |
43 | Project maintainers will merge accepted code changes from contributors.
44 |
45 | ### DOs and DON'Ts
46 |
47 | DO's:
48 |
49 | - **DO** follow the standard coding conventions
50 |
51 | - [.NET](https://learn.microsoft.com/dotnet/csharp/fundamentals/coding-style/coding-conventions)
52 | - [Python](https://pypi.org/project/black/)
53 | - [Typescript](https://typescript-eslint.io/rules/)/[React](https://github.com/jsx-eslint/eslint-plugin-react/tree/master/docs/rules)
54 |
55 | - **DO** give priority to the current style of the project or file you're changing
56 | if it diverges from the general guidelines.
57 | - **DO** include tests when adding new features. When fixing bugs, start with
58 | adding a test that highlights how the current behavior is broken.
59 | - **DO** keep the discussions focused. When a new or related topic comes up
60 | it's often better to create new issue than to side track the discussion.
61 | - **DO** clearly state on an issue that you are going to take on implementing it.
62 | - **DO** blog and tweet (or whatever) about your contributions, frequently!
63 |
64 | DON'Ts:
65 |
66 | - **DON'T** surprise us with big pull requests. Instead, file an issue and start
67 | a discussion, so we can agree on a direction before you invest a large amount of time.
68 | - **DON'T** commit code that you didn't write. If you find code that you think is a good
69 | fit to add to Kernel Memory, file an issue and start a discussion before proceeding.
70 | - **DON'T** submit PRs that alter licensing related files or headers. If you believe
71 | there's a problem with them, file an issue, and we'll be happy to discuss it.
72 | - **DON'T** make new APIs without filing an issue and discussing with us first.
73 |
74 | ### Breaking Changes
75 |
76 | Contributions must maintain API signature and behavioral compatibility. Contributions
77 | that include breaking changes will be rejected. Please file an issue to discuss
78 | your idea or change if you believe that a breaking change is warranted.
79 |
80 | ### Suggested Workflow
81 |
82 | We use and recommend the following workflow:
83 |
84 | 1. Create an issue for your work.
85 | - You can skip this step for trivial changes.
86 | - Reuse an existing issue on the topic, if there is one.
87 | - Get agreement from the team and the community that your proposed change is
88 | a good one.
89 | - Clearly state that you are going to take on implementing it, if that's the case.
90 | You can request that the issue be assigned to you. Note: The issue filer and
91 | the implementer don't have to be the same person.
92 | 2. Create a personal fork of the repository on GitHub (if you don't already have one).
93 | 3. In your fork, create a branch off of main (`git checkout -b mybranch`).
94 | - Name the branch so that it clearly communicates your intentions, such as
95 | "issue-123" or "githubhandle-issue".
96 | 4. Make and commit your changes to your branch.
97 | 5. Add new tests corresponding to your change, if applicable.
98 | 6. Ensure that your code is formatted, the build is clean and all tests are passing.
99 | 7. Create a PR against the repository's **main** branch.
100 | - State in the description what issue or improvement your change is addressing.
101 | - Verify that all the Continuous Integration checks are passing.
102 | 8. Wait for feedback or approval of your changes from the code maintainers.
103 | 9. When area owners have signed off, and all checks are green, your PR will be merged.
104 |
105 | ### PR - CI Process
106 |
107 | The continuous integration (CI) system will automatically perform the required
108 | builds and run tests (including the ones you are expected to run) for PRs.
109 | Builds and test runs must be clean.
110 |
111 | If the CI build fails for any reason, the PR issue will be updated with a link
112 | that can be used to determine the cause of the failure.
113 |
--------------------------------------------------------------------------------
/DIARY.md:
--------------------------------------------------------------------------------
1 | ## Diary
2 |
3 | >A bunch of notes and thoughts about the project.
4 |
5 | :calendar: 12/20/2023
6 | 1. Added TODO.md file to the project
7 |
8 | :calendar: 12/19/2023
9 |
10 | 1. Version 0.4.0
11 | 1. Large number of changes inspired by Hackathon and Davis Lucato.
12 | 1. See commits and PRs for details.
13 | 1. The issues with the local nuget repository ```local``` hardcoded should be fixed.
14 | 1. Added ```/docker``` folder with a Docker Compose file that runs the ELK stack very easily.
15 | 1. Added [installation instructions for Elastic Stack](/docker/README.md)
16 | 1. Renamed the repo
17 |
18 | :calendar: 12/05/2023
19 |
20 | 1. Working on SK Hackathon: Code Mapper project.
21 |
22 | :calendar: 12/04/2023
23 | 1. Version 0.3.0
24 | 1. Implemented most of the methods of IMemoryDb
25 | 1. Need to finish MemoryFilter implementation
26 | 1. General repo cleanup
27 |
28 | :calendar: 12/01/2023
29 | 1. Version 0.2.0
30 | 1. Added this DIARY .md file to the project.
31 | 1. Merged with the new KM nuget 0.15.231130.2-preview
32 | 1. Cleaned up the repo a lot after merging with the [kernel-memory-postgres repository](https://github.com/microsoft/kernel-memory-postgres).
33 | 1. Pages like LICENSE, README, etc. have been 'ported' from the same repository.
34 | 1. The analyzers are awesome. We essentially standardized to MS' conventions.
35 | 1. Changed editor .editorconfig to be for FML
36 | 1. Improved the configuration setup in UnitTests/Startup.cs
37 | 1. Determined how to better structure configuration options.
38 | 1. Created several extensions, including one to go from ElasticsearchConfig to ElasticsearchClientSettings
39 | 1. Removed code from the TestApplication into the UnitTest project
40 | 1. This is a better place for it.
41 |
--------------------------------------------------------------------------------
/Directory.Build.props:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | net7.0
6 | true
7 | true
8 | AllEnabledByDefault
9 | latest
10 | true
11 | 11
12 | enable
13 | disable
14 | LatestMajor
15 |
16 |
17 |
18 |
19 | disable
20 |
21 |
22 |
23 | true
24 | full
25 |
26 |
27 |
28 | portable
29 |
30 |
31 |
32 | $([System.IO.Path]::GetDirectoryName($([MSBuild]::GetPathOfFileAbove('.gitignore', '$(MSBuildThisFileDirectory)'))))
33 |
34 |
35 |
36 |
37 |
38 | <_Parameter1>false
39 |
40 |
41 |
--------------------------------------------------------------------------------
/Directory.Packages.props:
--------------------------------------------------------------------------------
1 |
2 |
3 | true
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/DockerElasticsearchAndKibana.md:
--------------------------------------------------------------------------------
1 | # How to install a running version of Elasticsearch and Kibana 8.x running on Docker
2 |
3 | This article will show you how to install a running version of Elasticsearch and Kibana 8.x running on Docker.
4 | The data will be persisted in a volume on the host machine, so it can survive container restart.
5 |
6 | These are the steps we will follow:
7 | 1. Make sure that the value of `vm.max_map_count` on the Docker host machine is high enough.
8 | 1. Create a Docker network called `elastic`.
9 | 1. We will use this network to connect Elasticsearch and Kibana.
10 | 1. We will then launch Elasticsearch using Docker.
11 | 1. When Elasticsearch runs for the first time it will output some security keys that we need to grab.
12 | 1. Such keys will allow us to later connect Kibana to Elasticsearch.
13 | 1. We will then launch Kibana using Docker.
14 | 1. We will use the keys we grabbed from the logs of Elasticsearch to connect Kibana to Elasticsearch.
15 | 1. The installation should be now complete.
16 |
17 | To access Kibana you should be able to point your browser at http://localhost:5601
18 | Kibana will, in turn, connected to Elasticsearch at http://localhost:9200
19 |
20 | ## Prerequisites
21 |
22 | 1. WSL2 running properly, if on Windows
23 | 1. Docker running on Linux/WSL2
24 |
25 | ## Launch a terminal to configure the Docker host
26 |
27 | Launch a terminal and go to the '\docker' subfolder of this repo, which is where we will mount the volumes that will contain the data of Elasticsearch and Kibana. *We don't necessarily need in this folder to run all the commands, but it is a good practice to keep all the files related to a project in the same folder.*
28 |
29 | If you are in a Powershell terminal, just go to the ```\docker``` subfolder of this repository.
30 |
31 | ```powershell
32 | PS D:\> cd .\FreeMindLabs.KernelMemory.Elasticsearch\docker\
33 | ```
34 |
35 | Then launch WSL by typing ```wsl``` in the terminal
36 |
37 | ```powershell
38 | PS D:\FreeMindLabs.KernelMemory.Elasticsearch\docker> wsl
39 | ```
40 |
41 | You should now be in a Linux terminal similar to the following:
42 |
43 | ```bash
44 | sysadmin@OptimusPrime:/mnt/d/FreeMindLabs.KernelMemory.Elasticsearch/docker$
45 | ```
46 |
47 | ## How to Install Elasticsearch
48 |
49 | Before we can launch a Docker container running Elasticsearch, we need to configure two things:
50 |
51 | 1. Set a correct value for `vm.max_map_count` on the Docker host.
52 | 1. Create a Docker network called `elastic` that we will use to connect Elasticsearch and Kibana.
53 |
54 | ### Set vm.max_map_count on the Docker host
55 |
56 | > vm.max_map_count is a critical setting in Linux systems, particularly necessary for running Elasticsearch efficiently. It defines the maximum number of memory map areas a process can have.
57 | > The default vm.max_map_count value is typically too low for Elasticsearch, potentially leading to performance issues or even **preventing it from starting**. By increasing this value to at least 262144, as recommended by Elasticsearch's official documentation, you ensure that the Elasticsearch process has sufficient memory map areas for optimal performance and stability.
58 |
59 | To see the current value of `vm.max_map_count`, run the following command from WSL/Linux:
60 |
61 | ```bash
62 | sysctl vm.max_map_count
63 | ```
64 |
65 | If the value you read is less than `262144`, you need to increase it.
66 |
67 | You can set `vm.max_map_count` on the Docker host machine in two ways: temporarily or permanently.
68 |
69 | #### Temporarily (until the next reboot):
70 | Run the following command on your host machine (not inside the container):
71 |
72 | ```bash
73 | sudo sysctl -w vm.max_map_count=262144
74 | ```
75 |
76 | This command sets `vm.max_map_count` to `262144` temporarily.
77 |
78 | #### Permanently (recommended):
79 | To make this change permanent, you need to add it to your system's configuration file.
80 |
81 | 1. Edit the `/etc/sysctl.conf` file on your host machine:
82 |
83 | ```bash
84 | sudo nano /etc/sysctl.conf
85 | ```
86 |
87 | 2. Add the following line at the end of the file:
88 |
89 | ```
90 | vm.max_map_count=262144
91 | ```
92 |
93 | 3. Save and close the file.
94 |
95 | 4. To apply the changes without rebooting, run:
96 |
97 | ```bash
98 | sudo sysctl -p
99 | ```
100 |
101 | ### Create a Docker network called `elastic`
102 |
103 | To create a Docker network called `elastic`, run the following command from WSL/Linux:
104 |
105 | ```bash
106 | docker network create elastic
107 | ```
108 |
109 | The response should be something like this:
110 |
111 | ```bash
112 | 8a42de666bc5fdc5de1b9951eddc31da059eb2a13eb2c9eec879c7b0d9a0906b
113 | ```
114 |
115 | ### Launch Elasticsearch
116 |
117 | To launch Elasticsearch, run the following command from WSL/Linux:
118 |
119 | ```bash
120 | docker run -d --name elasticsearch-01 --net elastic -p 9200:9200 -e "discovery.type=single-node" -v $(pwd)/elasticsearch:/usr/share/elasticsearch/data -m 4g docker.elastic.co/elasticsearch/elasticsearch:8.3.3
121 | ```
122 | The response should be something like this:
123 |
124 | ```bash
125 | a4c20c2ecbec1fc0f90d72e481924928e4af49a16464928d92d0fedba784eb54
126 | ```
127 |
128 | Now we need to inspect the logs so we can grab the keys we need.
129 | Run the following command from WSL/Linux:
130 |
131 | ```bash
132 | docker logs elasticsearch-01 -f
133 | ```
134 |
135 | At some point, after everything takes the time it needs, you should see the following:
136 |
137 | ```bash
138 |
139 | ```
140 |
141 |
142 | ## References
143 |
144 | 1. [How to install elasticsearch and kibana 8.x using Docker?](https://www.devopsschool.com/blog/how-to-install-elasticsearch-and-kibana-using-docker/)
145 | 1. [Install Elasticsearch with Docker](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html)
146 | 1. [Install Kibana with Docker](https://www.elastic.co/guide/en/kibana/current/docker.html)
147 | 1. [Deploying Elasticsearch and Kibana with Docker](https://quoeamaster.medium.com/deploying-elasticsearch-and-kibana-with-docker-86a4ac78d851)
148 | 1. (Getting started with the Elastic Stack and Docker Compose: Part 1)[https://www.elastic.co/blog/getting-started-with-the-elastic-stack-and-docker-compose]
149 | 1.
--------------------------------------------------------------------------------
/KernelMemoryElasticsearch.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.9.34310.174
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "root", "root", "{6EF76FD8-4C35-4370-8539-5DDF45357A50}"
7 | ProjectSection(SolutionItems) = preProject
8 | .editorconfig = .editorconfig
9 | .gitattributes = .gitattributes
10 | .gitignore = .gitignore
11 | code-analysis.props = code-analysis.props
12 | CODE_OF_CONDUCT.md = CODE_OF_CONDUCT.md
13 | CONFIGURATION.md = CONFIGURATION.md
14 | CONTRIBUTING.md = CONTRIBUTING.md
15 | Directory.Build.props = Directory.Build.props
16 | Directory.Packages.props = Directory.Packages.props
17 | icon.png = icon.png
18 | LICENSE = LICENSE
19 | nuget-package.props = nuget-package.props
20 | nuget.config = nuget.config
21 | NUGET.md = NUGET.md
22 | README.md = README.md
23 | SECURITY.md = SECURITY.md
24 | EndProjectSection
25 | EndProject
26 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "source", "source", "{98E1758C-113A-41F4-85A3-1C8EFFA6CEC2}"
27 | EndProject
28 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{A455710B-0D10-4200-AB39-DB5ECC457FAC}"
29 | EndProject
30 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "examples", "examples", "{9532FF95-6D14-43E7-B554-F5289C605172}"
31 | EndProject
32 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ElasticsearchMemoryStorage", "src\ElasticsearchMemoryStorage\ElasticsearchMemoryStorage.csproj", "{BE8D8957-8A6F-4879-BAAE-0462A118DFD3}"
33 | EndProject
34 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UnitTests", "tests\UnitTests\UnitTests.csproj", "{A615A571-2B3A-4C2B-9B1C-371BF87D8DBE}"
35 | EndProject
36 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "docker", "docker", "{F8AB554F-C604-4098-B4C1-4CBAD790ADC0}"
37 | ProjectSection(SolutionItems) = preProject
38 | docker\.env = docker\.env
39 | docker\.env.example = docker\.env.example
40 | docker\docker-compose.yml = docker\docker-compose.yml
41 | docker\filebeat.yml = docker\filebeat.yml
42 | docker\kibana.yml = docker\kibana.yml
43 | docker\logstash.conf = docker\logstash.conf
44 | docker\metricbeat.yml = docker\metricbeat.yml
45 | docker\README.md = docker\README.md
46 | EndProjectSection
47 | EndProject
48 | Global
49 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
50 | Debug|Any CPU = Debug|Any CPU
51 | Release|Any CPU = Release|Any CPU
52 | EndGlobalSection
53 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
54 | {BE8D8957-8A6F-4879-BAAE-0462A118DFD3}.Debug|Any CPU.ActiveCfg = Release|Any CPU
55 | {BE8D8957-8A6F-4879-BAAE-0462A118DFD3}.Debug|Any CPU.Build.0 = Release|Any CPU
56 | {BE8D8957-8A6F-4879-BAAE-0462A118DFD3}.Release|Any CPU.ActiveCfg = Release|Any CPU
57 | {BE8D8957-8A6F-4879-BAAE-0462A118DFD3}.Release|Any CPU.Build.0 = Release|Any CPU
58 | {A615A571-2B3A-4C2B-9B1C-371BF87D8DBE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
59 | {A615A571-2B3A-4C2B-9B1C-371BF87D8DBE}.Debug|Any CPU.Build.0 = Debug|Any CPU
60 | {A615A571-2B3A-4C2B-9B1C-371BF87D8DBE}.Release|Any CPU.ActiveCfg = Release|Any CPU
61 | {A615A571-2B3A-4C2B-9B1C-371BF87D8DBE}.Release|Any CPU.Build.0 = Release|Any CPU
62 | EndGlobalSection
63 | GlobalSection(SolutionProperties) = preSolution
64 | HideSolutionNode = FALSE
65 | EndGlobalSection
66 | GlobalSection(NestedProjects) = preSolution
67 | {BE8D8957-8A6F-4879-BAAE-0462A118DFD3} = {98E1758C-113A-41F4-85A3-1C8EFFA6CEC2}
68 | {A615A571-2B3A-4C2B-9B1C-371BF87D8DBE} = {A455710B-0D10-4200-AB39-DB5ECC457FAC}
69 | EndGlobalSection
70 | GlobalSection(ExtensibilityGlobals) = postSolution
71 | SolutionGuid = {964BE41E-E834-4596-BFDB-5F9D5BA9F048}
72 | EndGlobalSection
73 | EndGlobal
74 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Free Mind Labs, Inc.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/NUGET.md:
--------------------------------------------------------------------------------
1 | # Kernel Memory with Elasticsearch
2 |
3 | [](https://github.com/microsoft/kernel-memory/blob/main/LICENSE)
4 |
5 | Use [Elasticsearch](https://www.elastic.co/) as vector storage for Microsoft [Kernel Memory](https://github.com/microsoft/semantic-memory).
6 |
7 | See our [Github repository](https://github.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ⚠️ The Elasticsearch connector for Kernel Memory has been incorporated in Microsoft Kernel Memory and this this repository
2 | has been archived.
3 | You can find the latest source code [here](https://github.com/microsoft/kernel-memory/tree/main/extensions/Elasticsearch/Elasticsearch).
4 |
5 | # Kernel Memory with Elasticsearch
6 |
7 |
8 | Use [Elasticsearch](https://www.elastic.co/) as vector storage for Microsoft [Kernel Memory](https://github.com/microsoft/semantic-memory).
9 |
10 |
11 |

12 |
13 |
14 |
15 |
16 | [](https://www.nuget.org/packages/Freemindlabs.KernelMemory.Elasticsearch) [](https://www.nuget.org/packages/Freemindlabs.KernelMemory.Elasticsearch) [](https://github.com/freemindlabsinc/FreeMindLabs.SemanticKernel/blob/main/LICENSE)
17 |
18 |
19 |
20 | ---
21 | **Kernel Memory** (KM) is a **multi-modal [AI Service](https://github.com/microsoft/kernel-memory/blob/main/service/Service/README.md)** specialized in the efficient indexing of datasets through custom continuous data hybrid pipelines, with support for **[Retrieval Augmented Generation](https://en.wikipedia.org/wiki/Prompt_engineering#Retrieval-augmented_generation)** (RAG), synthetic memory, prompt engineering, and custom semantic memory processing.
22 |
23 |
24 |
25 | Utilizing advanced embeddings and LLMs, the system enables Natural Language querying for obtaining answers from the indexed data, complete with citations and links to the original sources.
26 |
27 |
28 |
29 | ---
30 |
31 | This repository contains the **Elasticsearch adapter** that allows KM to use Elasticsearch as vector database, thus allowing developers to perform [lexical and semantic search](https://www.elastic.co/search-labs/blog/articles/lexical-and-semantic-search-with-elasticsearch), in addition to [hybrid](https://opster.com/guides/elasticsearch/machine-learning/elasticsearch-hybrid-search/), keyword and full-text search on your *semantic content*.
32 |
33 | ## Pre-requisites
34 |
35 | 1. A running instance of Elasticsearch
36 |
37 | 1. You can install a **local instance** of Elasticsearch using Docker.
38 | To simplify the setup of a running instance of Elasticsearch we prepared the article [Installing the Elastic Stack using Docker Compose](/docker/README.md) that guides you through the process. *The following diagram shows what will be running once the installation is complete.*
39 |
40 |

41 |
42 |
43 | 2. Alternatively you can use a **cloud** service like [Elastic Cloud](https://www.elastic.co/cloud/). The free tier is enough.
44 |
45 | ## Configuration
46 |
47 | The xUnit project UnitTests contains an [appSettings.json](tests/UnitTests/appSettings.json) file that lists all available options. The file reads as follows:
48 |
49 | ```
50 | {
51 | "OpenAI": {
52 | "ApiKey": "...SECRETS...",
53 | "EmbeddingModelId": "text-embedding-ada-002",
54 | "ModelId": "text-davinci-003",
55 | "ChatModelId": "gpt-3.5-turbo"
56 | },
57 | "Elasticsearch": {
58 | "CertificateFingerPrint": "...SECRETS...",
59 | "Endpoint": "https://localhost:9200",
60 | "UserName": "...SECRETS...",
61 | "Password": "...SECRETS..."
62 | }
63 | }
64 | ```
65 |
66 | >*The class used to store configuration is [ElasticsearchConfig](/src/ElasticsearchMemoryStorage/ElasticsearchConfig.cs).*
67 |
68 | This file is supposed to show the available options but it is not meant to store sensitive information such as ```ApiKey```, ```Password``` or ```CertificateFingerPrint```. Modify this file as necessary (e.g. by changing the Endpoint), but add the values for the certificate fingerprint and the password in user secrets.
69 |
70 | ### How to add user secrets
71 |
72 | To add secrets either:
73 | - Open the secrets file in your IDE by right clicking on the project name and selecting Manage User Secrets.
74 | - To read more about user secrets click [here](https://learn.microsoft.com/en-us/aspnet/core/security/app-secrets?view=aspnetcore-8.0&tabs=windows)
75 |
76 | - Add the secrets from the command line by running the following commands:
77 | ```
78 | > dotnet user-secrets set "OpenAI:ApiKey" "...your Open AI API key..."
79 | > dotnet user-secrets set "Elasticsearch:CertificateFingerPrint" "...your value..."
80 | > dotnet user-secrets set "Elasticsearch:Password" "...your value..."
81 | ```
82 |
83 | This ultimately results in the following secrets.json additions:
84 | ```
85 | {
86 | [..]
87 | "OpenAI:ApiKey": "...your Open AI API key...",
88 | "Elasticsearch:CertificateFingerPrint": "...your value...",
89 | "Elasticsearch:Password": "...your value...",
90 | }
91 | ```
92 |
93 |
94 | ## The .NET Solution
95 |
96 | This is a screenshot of the solution.
97 | We highlighted some of the most important files for you to explore and look at.
98 |
99 |
100 |
101 |
102 |
103 | ---
104 |
105 | Here are some screenshots of the tests included in the project.
106 | This project tries to follow [TDD](https://www.coscreen.co/blog/tdd-in-c-guide/) an uses a test-first approach. The tests are meant to show how to use the library and to teach of the available features.
107 |
108 |
109 |
110 |
111 |
112 | Click [here](tests/UnitTests/DataStorageTests.cs) to see the source code of the test.
113 |
114 | *Always make sure to look at the output window to see details about the execution.* :eyes:
115 |
116 |
117 |
118 |
119 |
120 |
121 | Click [here](tests/UnitTests/IndexManagementTests.cs) to see the source code of the test.
122 |
123 | ## How to add the Elasticsearch adapter to your Kernel Memory project
124 |
125 | In order to add the Elasticsearch adapter to your project you first need to add a reference to the [Freemindlabs.KernelMemory.Elasticsearch](https://www.nuget.org/packages/Freemindlabs.KernelMemory.Elasticsearch) NuGet package.
126 |
127 | ```
128 | > dotnet add package Freemindlabs.KernelMemory.Elasticsearch
129 | ```
130 |
131 | Then you can chose to use one of the ```WithElasticsearch``` extensions methods of the interface IKernelMemoryBuilder.
132 |
133 | ```csharp
134 | // From Program.cs of the Service project of the Kernel Memory repository. Line 86.
135 |
136 | [..]
137 | // Loads the Elasticsearch configuration
138 | var esConfig = config.GetServiceConfig(appBuilder.Configuration, "ElasticsearchVectorDb");
139 |
140 | // Inject memory client and its dependencies
141 | // Note: pass the current service collection to the builder, in order to start the pipeline handlers
142 | IKernelMemory memory = new KernelMemoryBuilder(appBuilder.Services)
143 | .FromAppSettings()
144 | // .With...() // in case you need to set something not already defined by `.FromAppSettings()`
145 | .WithElasticsearch(esConfig) // <--- this
146 | .Build();
147 |
148 | appBuilder.Services.AddSingleton(memory);
149 |
150 | // Build .NET web app as usual
151 | var app = appBuilder.Build();
152 | [..]
153 | ```
154 |
155 |
156 | ## Resources
157 |
158 | 1. :fire: [ How to build a Kernel Memory connector and use Elasticsearch as vector database - Part 1](/content/IMemoryDbArticle.md)
159 | 1. To be relocated and published officially on Microsoft's [devblogs for Semantic kernel](https://devblogs.microsoft.com/semantic-kernel/).
160 |
161 | 1. [A Quick Introduction to Vector Search](https://opster.com/guides/opensearch/opensearch-machine-learning/introduction-to-vector-search/)
162 | 1. [Elasticsearch Hybrid Search](https://opster.com/guides/elasticsearch/machine-learning/elasticsearch-hybrid-search/)
163 |
164 | 1. Elastic's official docs on the client.
165 | 1. NEST 7.17: https://www.elastic.co/guide/en/elasticsearch/client/net-api/7.17/nest-getting-started.html
166 | 1. New client 8.9: https://www.elastic.co/guide/en/elasticsearch/client/net-api/8.9/introduction.html
167 | 1. This client is not yet feature complete.
168 | 1. Look here for details: https://www.elastic.co/guide/en/elasticsearch/client/net-api/current/release-notes-8.0.0.html
169 | 1. In addition, the docs are not up to date. For some stuff we need to lok at NEST's docs.
170 |
171 | 1. [Elasticsearch.net GitHub repository](https://github.com/elastic/elasticsearch-net)
172 |
173 | 1. Semantic Kernel/Memory-Kernel
174 | 1. [Introduction to Semantic Memory (feat. Devis Lucato) | Semantic Kernel](https://www.youtube.com/watch?v=5JYW_uAxwYM)
175 | 1. [11.29.2023 - Semantic Kernel Office Hours (US/Europe Region)](https://www.youtube.com/watch?v=JSca9mVUUJo)
176 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## Security
4 |
5 | Microsoft takes the security of our software products and services seriously,
6 | which includes all source code repositories managed through our GitHub
7 | organizations, which include [Microsoft](https://github.com/microsoft)
8 | [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet),
9 | [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin),
10 | and [our GitHub organizations](https://opensource.microsoft.com/).
11 |
12 | If you believe you have found a security vulnerability in any Microsoft-owned
13 | repository that meets
14 | [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition),
15 | please report it to us as described below.
16 |
17 | ## Reporting Security Issues
18 |
19 | **Please do not report security vulnerabilities through public GitHub issues.**
20 |
21 | Instead, please report them to the Microsoft Security Response Center (MSRC) at
22 | [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
23 |
24 | If you prefer to submit without logging in, send email to
25 | [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your
26 | message with our PGP key; please download it from the
27 | [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
28 |
29 | You should receive a response within 24 hours. If for some reason you do not,
30 | please follow up via email to ensure we received your original message.
31 | Additional information can be found at
32 | [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).
33 |
34 | Please include the requested information listed below (as much as you can
35 | provide) to help us better understand the nature and scope of the possible issue:
36 |
37 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
38 | * Full paths of source file(s) related to the manifestation of the issue
39 | * The location of the affected source code (tag/branch/commit or direct URL)
40 | * Any special configuration required to reproduce the issue
41 | * Step-by-step instructions to reproduce the issue
42 | * Proof-of-concept or exploit code (if possible)
43 | * Impact of the issue, including how an attacker might exploit the issue
44 |
45 | This information will help us triage your report more quickly.
46 |
47 | If you are reporting for a bug bounty, more complete reports can contribute to
48 | a higher bounty award. Please visit our
49 | [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page
50 | for more details about our active programs.
51 |
52 | ## Preferred Languages
53 |
54 | We prefer all communications to be in English.
55 |
56 | ## Policy
57 |
58 | Microsoft follows the principle of
59 | [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
60 |
61 |
62 |
--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
1 | # TODOs
2 |
3 | 1. Complete article about IMemoryConnector
4 | - Now that the repo is clean, add code examples to article (MOSTLY DONE)
5 |
6 | 1. Make properties of the mapping not nullable as per the postgres [code](https://github.com/microsoft/kernel-memory-postgres/blob/58df8fa4cee89add3ba6e49e00535aa1f7b43b02/PostgresMemoryStorage/Db/PostgresDbClient.cs#L142)
7 |
8 | 1. Make custom column available.
9 | 1. See [PostgresConfig](https://github.com/microsoft/kernel-memory-postgres/blob/main/PostgresMemoryStorage/PostgresConfig.cs)
10 |
11 | # Done
12 |
13 | 1. Add a new content (text) column to the ES mapping to index the content that is inside the Payload
14 | - Q: Look into Payload as it stores JSON like ```{"file": "blabla.txt", "text": "...the chunk's text...", "vector_provider": "xxxx", "vector_generator: "TODO", "last_update": "20023-12-05T16:23:19" }```
15 | - [See image here](https://github.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/blob/main/content/images/DataPage2.jpg)
16 |
17 | - See if I need to integrate more of Davis' thoughts into the article
--------------------------------------------------------------------------------
/code-analysis.props:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | all
5 | runtime; build; native; contentfiles; analyzers; buildtransitive
6 |
7 |
8 | all
9 | runtime; build; native; contentfiles; analyzers; buildtransitive
10 |
11 |
12 | all
13 | runtime; build; native; contentfiles; analyzers; buildtransitive
14 |
15 |
16 | all
17 | runtime; build; native; contentfiles; analyzers; buildtransitive
18 |
19 |
20 | all
21 | runtime; build; native; contentfiles; analyzers; buildtransitive
22 |
23 |
24 | all
25 | runtime; build; native; contentfiles; analyzers; buildtransitive
26 |
27 |
28 |
--------------------------------------------------------------------------------
/content/images/Connectors.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/Connectors.jpg
--------------------------------------------------------------------------------
/content/images/CreateIndices.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/CreateIndices.png
--------------------------------------------------------------------------------
/content/images/DataPage1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/DataPage1.jpg
--------------------------------------------------------------------------------
/content/images/DataPage2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/DataPage2.jpg
--------------------------------------------------------------------------------
/content/images/DataPageAllRows.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/DataPageAllRows.jpg
--------------------------------------------------------------------------------
/content/images/ESLogo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/ESLogo.jpg
--------------------------------------------------------------------------------
/content/images/FML-Logo-Round.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/FML-Logo-Round.gif
--------------------------------------------------------------------------------
/content/images/FML-Logo-Round.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/FML-Logo-Round.png
--------------------------------------------------------------------------------
/content/images/FML-Logo-Square.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/FML-Logo-Square.gif
--------------------------------------------------------------------------------
/content/images/FML-Logo-Square.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/FML-Logo-Square.png
--------------------------------------------------------------------------------
/content/images/FMLLogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/FMLLogo.png
--------------------------------------------------------------------------------
/content/images/Free Mind Labs logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/Free Mind Labs logo.png
--------------------------------------------------------------------------------
/content/images/Free Mind Labs.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/Free Mind Labs.gif
--------------------------------------------------------------------------------
/content/images/KnnQuery.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/KnnQuery.jpg
--------------------------------------------------------------------------------
/content/images/Mappings.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/Mappings.jpg
--------------------------------------------------------------------------------
/content/images/Pipelines.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/Pipelines.jpg
--------------------------------------------------------------------------------
/content/images/RAG.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/RAG.jpg
--------------------------------------------------------------------------------
/content/images/Solution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/Solution.png
--------------------------------------------------------------------------------
/content/images/TestRunning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/TestRunning.png
--------------------------------------------------------------------------------
/docker/.env.example:
--------------------------------------------------------------------------------
1 | # Project namespace (defaults to the current folder name if not set)
2 | COMPOSE_PROJECT_NAME=es-cluster
3 |
4 | # Password for the 'elastic' user (at least 6 characters)
5 | ELASTIC_PASSWORD=changeme
6 |
7 | # Password for the 'kibana_system' user (at least 6 characters)
8 | KIBANA_PASSWORD=changeme
9 |
10 | # Version of Elastic products
11 | #https://www.elastic.co/downloads/past-releases#elasticsearch
12 | STACK_VERSION=8.8.2
13 |
14 | # Set the cluster name
15 | CLUSTER_NAME=docker-cluster
16 |
17 | # Set to 'basic' or 'trial' to automatically start the 30-day trial
18 | LICENSE=basic
19 | #LICENSE=trial
20 |
21 | # Port to expose Elasticsearch HTTP API to the host
22 | ES_PORT=9200
23 |
24 | # Port to expose Kibana to the host
25 | KIBANA_PORT=5601
26 |
27 | # Port to expose Fleet to the host
28 | FLEET_PORT=8220
29 |
30 | # Port to expose APM to the host
31 | APMSERVER_PORT=8200
32 |
33 | # APM Secret Token for POC environments only
34 | ELASTIC_APM_SECRET_TOKEN=supersecrettoken
35 |
36 | # Increase or decrease based on the available host memory (in bytes)
37 | ES_MEM_LIMIT=3073741824
38 | KB_MEM_LIMIT=1073741824
39 | LS_MEM_LIMIT=1073741824
40 |
41 | # SAMPLE Predefined Key only to be used in POC environments
42 | ENCRYPTION_KEY=c72d38b3a14956121ff2170e4030b4715513701fff43e5626eec58b04a30fae3
--------------------------------------------------------------------------------
/docker/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/docker/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
1 | # How to install the Elastic Stack using Docker Compose
2 |
3 | ## Prerequisites
4 |
5 | You need to have [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/install/) running on a Linux box.
6 |
7 | If you are on a Windows 10/11 machine, such Linux box can be a [WSL2](https://learn.microsoft.com/en-us/windows/wsl/about) instance.
8 |
9 | > *See [this article :green_book:](https://www.windowscentral.com/how-install-wsl2-windows-10) to see how to install WSL2 on Windows 10/11, if you are not familiar with the procedure.*
10 |
11 | The remainder of this document shows how to install the Elastic Stack on Windows 11 using WSL2 and Docker Desktop running on it.
12 |
13 | ## The installation
14 |
15 | There are several ways to install and run the Elastic Stack on a development machine. We will install the Elastic Stack using Docker Compose as inspired by the articles of [Eddie Mitchell](https://www.elastic.co/blog/author/eddie-mitchell).
16 |
17 | The environment used for this installation is a Windows 11 machine with [WSL2](https://www.windowscentral.com/how-install-wsl2-windows-10) and [Docker Desktop](https://docs.docker.com/desktop/install/windows-install/) installed. Please read the
18 |
19 | The necessary files have been copied in the ```/docker``` folder so that we can run it directly without the need to Mitchell's repository, and so that we can alter them as necessary in the future.
20 |
21 | The files we might need to access and change have also been grouped under the solution folder ```docker```:
22 |
23 |
24 |

25 |
26 |
27 | - **.env**: this file contains the environment variables that will be used by Docker Compose.
28 | 1. :warning: **The .env file needs to be created manually**, as it is not part of the repository. **Without this file, Docker Compose will not work.**
29 | 1. To create a valid .env file, copy the contents of the ```.env.example``` file and paste them into a new file named ```.env```. Then, update the values of the variables as needed.
30 |
31 | - **.env.example**: [this file](/.env.example) contains a complete example of all the options available.
32 | - It is not used by Docker Compose, and it should only be used as a reference when creating the .env file. This is how such file reads:
33 |
34 |
35 |

36 |
37 |
38 |
39 | > :warning: The default username/password for Kibana and Elasticsearch is ```elastic```/```changeme```. You can change such values in your .env file.
40 |
41 | - **docker-compose.yml**: [this file](./docker-compose.yml) contains the configuration for Docker Compose. The compose file will allow us to run the Elastic Stack on a single machine and get access to the Elasticsearch, Kibana, Logstash, Filebeat, and Metricbeat.
42 |
43 | The remaining files allow to configure options of the individual services. It's unlikely that you will have to change any of them at the beginning.
44 |
45 | - **filebeat.yml**: This file contains the configuration for Filebeat. It is used by the Filebeat container to collect and ship logs to Elasticsearch. *It's is unlikely you will need to change this file.*
46 |
47 | - **kibana.yml**: This file contains the configuration for Kibana. It is used by the Kibana container to connect to Elasticsearch.
48 |
49 | - **logstash.yml**: This file contains the configuration for Logstash. It is used by the Logstash container to connect to Elasticsearch. *It's is unlikely you will need to change this file.*
50 |
51 | - **metricbeat.yml**: This file contains the configuration for Metricbeat. It is used by the Metricbeat container to collect and ship metrics to Elasticsearch.
52 | *It's is unlikely you will need to change this file.*
53 |
54 | - **README.md**: this file.
55 |
56 |
57 | ## High level overview
58 |
59 | From a high-level perspective, the Elastic Stack we will launch is composed of the following components:
60 |
61 |
62 |

63 |
64 |
65 | - **Elasticsearch** (es01): This is the heart of the Elastic Stack, acting as a powerful and scalable search engine. It stores, indexes, and retrieves data, allowing for fast and efficient searching and analysis.
66 |
67 | - **Kibana** (kibana): It serves as the visualization layer in the Elastic Stack. It offers a user-friendly interface to visualize data stored in Elasticsearch. With Kibana, you can create and share dashboards, charts, and reports, making data analysis accessible and insightful, even for those new to data analytics.
68 |
69 | - **Logstash** (logstash01): A data processing pipeline that ingests, transforms, and sends data to Elasticsearch. Logstash allows you to collect data from various sources, process it with a wide range of filters, and enhance it before it gets indexed in Elasticsearch.
70 |
71 | - **Filebeat** (filebeat01): Acting as a lightweight log shipper, Filebeat forwards log data from multiple sources directly to Elasticsearch or Logstash. It simplifies data collection, is resource-efficient, and is ideal for collecting and aggregating log data in real time.
72 |
73 | - **Metricbeat** (metricbeat01): Similar to Filebeat but focused on metrics, Metricbeat collects various system and service metrics. It's essential for real-time monitoring of servers and services, providing valuable insights into their performance and health.
74 |
75 | ## The running environment
76 |
77 | Once you launch the docker compose file, you will have access to the Kibana, from where you will do most of your work:
78 |
79 |
80 |

81 |

82 |
83 |
84 | The following sections will guide you through the installation process.
85 |
86 | ## The configuration files
87 |
88 | Make sure you created the **.env** file as explained above.
89 |
90 | ## Step 1/3: Ensure the vm.max_map_count setting is set to at least 262144
91 |
92 | As explained at the beginning of this document, in these instructions we are using Docker Desktop on top of WSL2. Elasticsearch will run in a container on the Linux host, not on Windows.
93 |
94 | When setting up Elasticsearch on Linux, it's essential to configure the `vm.max_map_count` kernel setting on the Linux host to at least `262144`. This setting is critical for Elasticsearch to startup and to function.
95 |
96 | :warning: Once again: this change has to be made on the Linux machine running Docker, not inside the container nor on Windows.
97 |
98 | There are two ways to set `vm.max_map_count`:
99 |
100 | 1. Temporary:
101 | - This method is quick and useful for testing purposes. The setting can be changed temporarily by executing a command on your Docker host. It's an immediate change but won't persist after a system reboot. Here's how to do it:
102 |
103 | ```bash
104 | # Set vm.max_map_count temporarily
105 | $ sysctl -w vm.max_map_count=262144
106 | ```
107 |
108 | *This approach is ideal when you need to quickly set up Elasticsearch for short-term use or testing, without the need for the setting to persist after a reboot.*
109 |
110 | 1. Permanent (recommended):
111 | - For long-term use, especially in containerized environments like Docker, you'll want this setting to be permanent. This requires editing a system configuration file to ensure the setting persists across reboots and container restarts. Follow these steps:
112 |
113 | ```bash
114 | # Edit the sysctl configuration file for persistent changes
115 | $ echo 'vm.max_map_count=262144' >> /etc/sysctl.conf
116 |
117 | # Apply the changes without rebooting
118 | $ sysctl -p
119 | ```
120 |
121 | Additional information can be found [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/vm-max-map-count.html) and in Eddie Mitchell's [original article](https://www.elastic.co/blog/getting-started-with-the-elastic-stack-and-docker-compose-part-2).
122 |
123 | ## Step 2/3: Launch the docker compose file
124 |
125 | Launch a terminal and navigate to the `docker` directory of this repo.
126 | Then run the following command:
127 |
128 | ```bash
129 | $ docker-compose up
130 | ```
131 |
132 | Be prepared to wait a minute or two for the containers to start up.
133 | In the end your terminal should display something like this:
134 |
135 |
136 |

137 |
138 |
139 | Once the containers are up and running and finish their initial setup you will be able to access the Kibana UI at https://localhost:5601.
140 |
141 |

142 |
143 |
144 | The Elasticsearch API at https://localhost:9200.
145 |
146 |

147 |
148 |
149 | And, from inside Docker desktop, our Compose should look like this:
150 |
151 |
152 |

153 |
154 |
155 | ## Step 3/3: Adjust the Settings of Elastic Agent
156 |
157 | Now that Elasticsearch and Kibana are running we can apply our last configuration step: adjust the settings of the Elastic Agent which is currently not working as expected.
158 |
159 | To see the problem, click on 'Management Fleet':
160 |
161 |

162 |
163 |
164 |
165 |
166 | In the Fleet management screen you should now see the following issues: CPU and Memory are not reading correctly. This is because, by default, our Elastic Agent is attempting to log data to a local Elasticsearch instance, which is not correct for our Docker environment.
167 |
168 |
169 |

170 |
171 |
172 | We will need to perform a couple of updates in the Fleet -> Settings UI in order to resolve this. Click on the 'Settings' tab and then the edit action (*green circle*):
173 |
174 |

175 |
176 |
177 | This should display the following. Notice the red circles.
178 |
179 |

180 |
181 |
182 | We now need to change three values:
183 | 1. **Hosts**:
184 | - Change the value http://elasticsearch:9200 to https://es01:9200
185 |
186 | 1. **The CA fingerprint**:
187 | - We'll need to get the CA fingerprint from the cluster, as explained in the next section.
188 | 1. **Advanced YAML configuration**:
189 | - We'll need to get the CA certificate from the cluster, as explained in the next section.
190 |
191 | ### How to get the CA certificate from the cluster?
192 |
193 | Run the following command to pull the CA certificate from the cluster:
194 |
195 | ```bash
196 | docker cp es-cluster-es01-1:/usr/share/elasticsearch/config/certs/ca/ca.crt /tmp/.
197 | ```
198 |
199 | >*Note: This command will be different based on either the directory you’re running the docker-compose.yml file from or the COMPOSE_PROJECT_NAME variable that is specified in the .env file.*
200 |
201 | Next, we will need to get the fingerprint of the certificate. For this, we can use an OpenSSL command:
202 |
203 | ```bash
204 | openssl x509 -fingerprint -sha256 -noout -in /tmp/ca.crt | awk -F"=" {' print $2 '} | sed s/://g
205 | ```
206 |
207 | This will produce a value similar to:
208 |
209 | ```
210 | C8EEE11A0713CF5E3E49979A548F1D133DE0ED4A9263DA43AE039A883F94A726
211 | ``````
212 |
213 | Finally, we need to get the whole cert into a yml format. We can do this with a `cat` command or just by opening the cert in a text editor:
214 |
215 | ```bash
216 | cat /tmp/ca.crt
217 | ```
218 |
219 |

220 |
221 |
222 | ### The correct settings
223 |
224 | The final settings should look like this (*ignore the fingerprint*):
225 |
226 |
227 |

228 |
229 |
230 | Don't forget to click “Save and Apply Settings” -> “Save and Deploy.”
231 |
232 |
233 |

234 |
235 |
236 | Your agent should now be running and reporting data to Elasticsearch correctly.
237 |
238 |
239 |

240 |
241 |
242 | And dashboards should work properly:
243 |
244 |
245 |

246 |
247 |
248 | ## Final considerations
249 |
250 | - These instructions have been tested on Windows, using WSL2 and Docker Desktop.
251 |
252 | ## Resources
253 |
254 | 1. [Getting started with the Elastic Stack and Docker Compose: Part 1](https://www.elastic.co/blog/getting-started-with-the-elastic-stack-and-docker-compose)
255 | 1. The Githun repo for this article can be found [here](https://github.com/elkninja/elastic-stack-docker-part-one)
256 |
257 | 2. [Getting started with the Elastic Stack and Docker Compose: Part 2](https://www.elastic.co/blog/getting-started-with-the-elastic-stack-and-docker-compose-part-2)
258 | 1. The Github repo for this article can be found [here](https://github.com/elkninja/elastic-stack-docker-part-two)
259 |
260 | 1. [Install Elasticsearch with Docker](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html)
261 |
262 | 1. [Install Kibana with Docker](https://www.elastic.co/guide/en/kibana/current/docker.html)
263 |
--------------------------------------------------------------------------------
/docker/app/dockerfile:
--------------------------------------------------------------------------------
1 | # syntax=docker/dockerfile:1
2 |
3 | FROM python:3.9-slim-buster
4 |
5 | WORKDIR /app
6 |
7 | COPY requirements.txt requirements.txt
8 |
9 | RUN pip3 install -r requirements.txt
10 |
11 | COPY main.py main.py
12 |
13 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--log-level", "info", "--workers", "1"]
--------------------------------------------------------------------------------
/docker/app/main.py:
--------------------------------------------------------------------------------
1 | from elasticapm.contrib.starlette import ElasticAPM, make_apm_client
2 | from fastapi import FastAPI
3 | from nicegui import ui
4 | from typing import Callable
5 | import asyncio
6 | import functools
7 | import httpx as r
8 | #import psutil
9 |
10 | try:
11 | apm = make_apm_client({
12 | 'SERVICE_NAME': 'my_python_service',
13 | 'SECRET_TOKEN': 'supersecrettoken',
14 | # SERVER_URL must be set to "fleet-server" if running as a docker container.
15 | # if running as a local python script, then set the url to "LOCALHOST"
16 | 'SERVER_URL': 'http://fleet-server:8200',
17 | 'ENVIRONMENT': 'development'
18 | })
19 | except Exception as e:
20 | print('failed to create client')
21 |
22 | app = FastAPI()
23 |
24 | try:
25 | app.add_middleware(ElasticAPM, client=apm)
26 | except Exception as e:
27 | print('failed to add APM Middleware')
28 |
29 |
30 | @app.get("/custom_message/{message}")
31 | async def custom_message(message: str):
32 | apm.capture_message(f"Custom Message: {message}")
33 | return {"message": f"Custom Message: {message}"}
34 |
35 |
36 | @app.get("/error")
37 | async def throw_error():
38 | try:
39 | 1 / 0
40 | except Exception as e:
41 | apm.capture_exception()
42 | return {"message": "Failed Successfully :)"}
43 |
44 |
45 | def init(fastapi_app: FastAPI) -> None:
46 | @ui.page('/', title="APM Demo App")
47 | async def show():
48 | with ui.header(elevated=True).style('background-color: #3874c8').classes('items-center justify-between'):
49 | ui.markdown('### APM DEMO')
50 | ui.button(on_click=lambda: right_drawer.toggle(), icon='menu').props('flat color=white')
51 | with ui.right_drawer(fixed=False).style('background-color: #ebf1fa').props('bordered') as right_drawer:
52 | ui.chat_message('Hello Elastic Stack User!',
53 | name='APM Robot',
54 | stamp='now',
55 | avatar='https://robohash.org/apm_robot')
56 | ui.chat_message('This app is powered by NICEGUI and FastAPI with Elastic APM Instrumentation :)',
57 | name='APM Robot',
58 | stamp='now',
59 | avatar='https://robohash.org/apm_robot')
60 | ui.chat_message('Please click a button to trigger an APM event.',
61 | name='APM Robot',
62 | stamp='now',
63 | avatar='https://robohash.org/apm_robot')
64 | with ui.footer().style('background-color: #3874c8'):
65 | ui.label('APM DEMO PAGE')
66 |
67 | with ui.card():
68 | ui.label('Generate Error - Python')
69 | ui.button('Generate', on_click=python_error)
70 |
71 | with ui.card():
72 | ui.label('Generate Error - JS')
73 | ui.button('Generate', on_click=js_error)
74 |
75 | with ui.card():
76 | ui.label('Generate Custom Message')
77 | custom_message_text = ui.input(placeholder='Message')
78 | ui.button('Generate').on('click', handler=lambda: gen_custom_message(custom_message_text.value))
79 |
80 | ui.run_with(
81 | fastapi_app,
82 | storage_secret='supersecret', # NOTE setting a secret is optional but allows for persistent storage per user
83 | )
84 |
85 |
86 | async def io_bound(callback: Callable, *args: any, **kwargs: any):
87 | '''Makes a blocking function awaitable; pass function as first parameter and its arguments as the rest'''
88 | return await asyncio.get_event_loop().run_in_executor(None, functools.partial(callback, *args, **kwargs))
89 |
90 |
91 | async def python_error():
92 | try:
93 | res = await io_bound(r.get, 'http://localhost:8000/error')
94 | ui.notify(res.text)
95 | except Exception as e:
96 | apm.capture_exception()
97 | ui.notify(f'{e}')
98 |
99 |
100 | async def js_error():
101 | try:
102 | res = await ui.run_javascript('fetch("http://localhost:8000/error")')
103 | ui.notify(f'Message: Failed Successfully :)')
104 | except Exception as e:
105 | apm.capture_exception()
106 | ui.notify(f'{e}')
107 |
108 |
109 | async def gen_custom_message(text_message):
110 | try:
111 | res = await io_bound(r.get, 'http://localhost:8000/custom_message/' + str(text_message))
112 | ui.notify(res.text)
113 | except Exception as e:
114 | apm.capture_exception()
115 | ui.notify(f'{e}')
116 |
117 | init(app)
118 |
119 | try:
120 | apm.capture_message('App Loaded, Hello World!')
121 | except Exception as e:
122 | print('error: ' + e)
123 |
124 | if __name__ == '__main__':
125 | print('Please start the app with the "uvicorn" command as shown in the start.sh script')
126 |
--------------------------------------------------------------------------------
/docker/app/requirements.txt:
--------------------------------------------------------------------------------
1 | elastic-apm==6.17.0
2 | fastapi==0.100.0
3 | httpx==0.24.1
4 | nicegui==1.3.2
5 | starlette==0.27.0
--------------------------------------------------------------------------------
/docker/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.8"
2 | volumes:
3 | certs:
4 | driver: local
5 | esdata01:
6 | driver: local
7 | kibanadata:
8 | driver: local
9 | metricbeatdata01:
10 | driver: local
11 | filebeatdata01:
12 | driver: local
13 | logstashdata01:
14 | driver: local
15 | fleetserverdata:
16 | driver: local
17 |
18 | networks:
19 | default:
20 | name: elastic
21 | external: false
22 |
23 | services:
24 | setup:
25 | image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
26 | volumes:
27 | - certs:/usr/share/elasticsearch/config/certs
28 | user: "0"
29 | command: >
30 | bash -c '
31 | if [ x${ELASTIC_PASSWORD} == x ]; then
32 | echo "Set the ELASTIC_PASSWORD environment variable in the .env file";
33 | exit 1;
34 | elif [ x${KIBANA_PASSWORD} == x ]; then
35 | echo "Set the KIBANA_PASSWORD environment variable in the .env file";
36 | exit 1;
37 | fi;
38 | if [ ! -f config/certs/ca.zip ]; then
39 | echo "Creating CA";
40 | bin/elasticsearch-certutil ca --silent --pem -out config/certs/ca.zip;
41 | unzip config/certs/ca.zip -d config/certs;
42 | fi;
43 | if [ ! -f config/certs/certs.zip ]; then
44 | echo "Creating certs";
45 | echo -ne \
46 | "instances:\n"\
47 | " - name: es01\n"\
48 | " dns:\n"\
49 | " - es01\n"\
50 | " - localhost\n"\
51 | " ip:\n"\
52 | " - 127.0.0.1\n"\
53 | " - name: kibana\n"\
54 | " dns:\n"\
55 | " - kibana\n"\
56 | " - localhost\n"\
57 | " ip:\n"\
58 | " - 127.0.0.1\n"\
59 | " - name: fleet-server\n"\
60 | " dns:\n"\
61 | " - fleet-server\n"\
62 | " - localhost\n"\
63 | " ip:\n"\
64 | " - 127.0.0.1\n"\
65 | > config/certs/instances.yml;
66 | bin/elasticsearch-certutil cert --silent --pem -out config/certs/certs.zip --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key;
67 | unzip config/certs/certs.zip -d config/certs;
68 | fi;
69 | echo "Setting file permissions"
70 | chown -R root:root config/certs;
71 | find . -type d -exec chmod 750 \{\} \;;
72 | find . -type f -exec chmod 640 \{\} \;;
73 | echo "Waiting for Elasticsearch availability";
74 | until curl -s --cacert config/certs/ca/ca.crt https://es01:9200 | grep -q "missing authentication credentials"; do sleep 30; done;
75 | echo "Setting kibana_system password";
76 | until curl -s -X POST --cacert config/certs/ca/ca.crt -u "elastic:${ELASTIC_PASSWORD}" -H "Content-Type: application/json" https://es01:9200/_security/user/kibana_system/_password -d "{\"password\":\"${KIBANA_PASSWORD}\"}" | grep -q "^{}"; do sleep 10; done;
77 | echo "All done!";
78 | '
79 | healthcheck:
80 | test: ["CMD-SHELL", "[ -f config/certs/es01/es01.crt ]"]
81 | interval: 1s
82 | timeout: 5s
83 | retries: 120
84 |
85 | es01:
86 | depends_on:
87 | setup:
88 | condition: service_healthy
89 | image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
90 | labels:
91 | co.elastic.logs/module: elasticsearch
92 | volumes:
93 | - certs:/usr/share/elasticsearch/config/certs
94 | - esdata01:/usr/share/elasticsearch/data
95 | ports:
96 | - ${ES_PORT}:9200
97 | environment:
98 | - node.name=es01
99 | - cluster.name=${CLUSTER_NAME}
100 | - discovery.type=single-node
101 | - ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
102 | - bootstrap.memory_lock=true
103 | - xpack.security.enabled=true
104 | - xpack.security.http.ssl.enabled=true
105 | - xpack.security.http.ssl.key=certs/es01/es01.key
106 | - xpack.security.http.ssl.certificate=certs/es01/es01.crt
107 | - xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt
108 | - xpack.security.transport.ssl.enabled=true
109 | - xpack.security.transport.ssl.key=certs/es01/es01.key
110 | - xpack.security.transport.ssl.certificate=certs/es01/es01.crt
111 | - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt
112 | - xpack.security.transport.ssl.verification_mode=certificate
113 | - xpack.license.self_generated.type=${LICENSE}
114 | mem_limit: ${ES_MEM_LIMIT}
115 | ulimits:
116 | memlock:
117 | soft: -1
118 | hard: -1
119 | healthcheck:
120 | test:
121 | [
122 | "CMD-SHELL",
123 | "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
124 | ]
125 | interval: 10s
126 | timeout: 10s
127 | retries: 120
128 |
129 | kibana:
130 | depends_on:
131 | es01:
132 | condition: service_healthy
133 | image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
134 | labels:
135 | co.elastic.logs/module: kibana
136 | volumes:
137 | - certs:/usr/share/kibana/config/certs
138 | - kibanadata:/usr/share/kibana/data
139 | - ./kibana.yml:/usr/share/kibana/config/kibana.yml:ro
140 | ports:
141 | - ${KIBANA_PORT}:5601
142 | environment:
143 | - SERVERNAME=kibana
144 | - ELASTICSEARCH_HOSTS=https://es01:9200
145 | - ELASTICSEARCH_USERNAME=kibana_system
146 | - ELASTICSEARCH_PASSWORD=${KIBANA_PASSWORD}
147 | - ELASTICSEARCH_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt
148 | - XPACK_SECURITY_ENCRYPTIONKEY=${ENCRYPTION_KEY}
149 | - XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY=${ENCRYPTION_KEY}
150 | - XPACK_REPORTING_ENCRYPTIONKEY=${ENCRYPTION_KEY}
151 | - XPACK_REPORTING_KIBANASERVER_HOSTNAME=localhost
152 | - SERVER_SSL_ENABLED=true
153 | - SERVER_SSL_CERTIFICATE=config/certs/kibana/kibana.crt
154 | - SERVER_SSL_KEY=config/certs/kibana/kibana.key
155 | - SERVER_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt
156 | - ELASTIC_APM_SECRET_TOKEN=${ELASTIC_APM_SECRET_TOKEN}
157 | mem_limit: ${KB_MEM_LIMIT}
158 | healthcheck:
159 | test:
160 | [
161 | "CMD-SHELL",
162 | "curl -I -s --cacert config/certs/ca/ca.crt https://localhost:5601 | grep -q 'HTTP/1.1 302 Found'",
163 | ]
164 | interval: 10s
165 | timeout: 10s
166 | retries: 120
167 |
168 | metricbeat01:
169 | depends_on:
170 | es01:
171 | condition: service_healthy
172 | kibana:
173 | condition: service_healthy
174 | image: docker.elastic.co/beats/metricbeat:${STACK_VERSION}
175 | user: root
176 | volumes:
177 | - certs:/usr/share/metricbeat/certs
178 | - metricbeatdata01:/usr/share/metricbeat/data
179 | - "./metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro"
180 | - "/var/run/docker.sock:/var/run/docker.sock:ro"
181 | - "/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro"
182 | - "/proc:/hostfs/proc:ro"
183 | - "/:/hostfs:ro"
184 | environment:
185 | - ELASTIC_USER=elastic
186 | - ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
187 | - ELASTIC_HOSTS=https://es01:9200
188 | - KIBANA_HOSTS=https://kibana:5601
189 | - LOGSTASH_HOSTS=http://logstash01:9600
190 | - CA_CERT=certs/ca/ca.crt
191 | - ES_CERT=certs/es01/es01.crt
192 | - ES_KEY=certs/es01/es01.key
193 | - KB_CERT=certs/kibana/kibana.crt
194 | - KB_KEY=certs/kibana/kibana.key
195 | command:
196 | -strict.perms=false
197 |
198 | filebeat01:
199 | depends_on:
200 | es01:
201 | condition: service_healthy
202 | image: docker.elastic.co/beats/filebeat:${STACK_VERSION}
203 | user: root
204 | volumes:
205 | - certs:/usr/share/filebeat/certs
206 | - filebeatdata01:/usr/share/filebeat/data
207 | - "./filebeat_ingest_data/:/usr/share/filebeat/ingest_data/"
208 | - "./filebeat.yml:/usr/share/filebeat/filebeat.yml:ro"
209 | - "/var/lib/docker/containers:/var/lib/docker/containers:ro"
210 | - "/var/run/docker.sock:/var/run/docker.sock:ro"
211 | environment:
212 | - ELASTIC_USER=elastic
213 | - ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
214 | - ELASTIC_HOSTS=https://es01:9200
215 | - KIBANA_HOSTS=https://kibana:5601
216 | - LOGSTASH_HOSTS=http://logstash01:9600
217 | - CA_CERT=certs/ca/ca.crt
218 | command:
219 | -strict.perms=false
220 |
221 | logstash01:
222 | depends_on:
223 | es01:
224 | condition: service_healthy
225 | kibana:
226 | condition: service_healthy
227 | image: docker.elastic.co/logstash/logstash:${STACK_VERSION}
228 | labels:
229 | co.elastic.logs/module: logstash
230 | user: root
231 | volumes:
232 | - certs:/usr/share/logstash/certs
233 | - logstashdata01:/usr/share/logstash/data
234 | - "./logstash_ingest_data/:/usr/share/logstash/ingest_data/"
235 | - "./logstash.conf:/usr/share/logstash/pipeline/logstash.conf:ro"
236 | environment:
237 | - xpack.monitoring.enabled=false
238 | - ELASTIC_USER=elastic
239 | - ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
240 | - ELASTIC_HOSTS=https://es01:9200
241 |
242 | fleet-server:
243 | depends_on:
244 | kibana:
245 | condition: service_healthy
246 | es01:
247 | condition: service_healthy
248 | image: docker.elastic.co/beats/elastic-agent:${STACK_VERSION}
249 | volumes:
250 | - certs:/certs
251 | - fleetserverdata:/usr/share/elastic-agent
252 | - "/var/lib/docker/containers:/var/lib/docker/containers:ro"
253 | - "/var/run/docker.sock:/var/run/docker.sock:ro"
254 | - "/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro"
255 | - "/proc:/hostfs/proc:ro"
256 | - "/:/hostfs:ro"
257 | ports:
258 | - ${FLEET_PORT}:8220
259 | - ${APMSERVER_PORT}:8200
260 | user: root
261 | environment:
262 | - SSL_CERTIFICATE_AUTHORITIES=/certs/ca/ca.crt
263 | - CERTIFICATE_AUTHORITIES=/certs/ca/ca.crt
264 | - FLEET_CA=/certs/ca/ca.crt
265 | - FLEET_ENROLL=1
266 | - FLEET_INSECURE=true
267 | - FLEET_SERVER_ELASTICSEARCH_CA=/certs/ca/ca.crt
268 | - FLEET_SERVER_ELASTICSEARCH_HOST=https://es01:9200
269 | - FLEET_SERVER_ELASTICSEARCH_INSECURE=true
270 | - FLEET_SERVER_ENABLE=1
271 | - FLEET_SERVER_CERT=/certs/fleet-server/fleet-server.crt
272 | - FLEET_SERVER_CERT_KEY=/certs/fleet-server/fleet-server.key
273 | - FLEET_SERVER_INSECURE_HTTP=true
274 | - FLEET_SERVER_POLICY_ID=fleet-server-policy
275 | - FLEET_URL=https://fleet-server:8220
276 | - KIBANA_FLEET_CA=/certs/ca/ca.crt
277 | - KIBANA_FLEET_SETUP=1
278 | - KIBANA_FLEET_USERNAME=elastic
279 | - KIBANA_FLEET_PASSWORD=${ELASTIC_PASSWORD}
280 | - KIBANA_HOST=https://kibana:5601
281 |
282 | webapp:
283 | build:
284 | context: app
285 | volumes:
286 | - "/var/lib/docker/containers:/var/lib/docker/containers:ro"
287 | - "/var/run/docker.sock:/var/run/docker.sock:ro"
288 | - "/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro"
289 | - "/proc:/hostfs/proc:ro"
290 | - "/:/hostfs:ro"
291 | ports:
292 | - 8000:8000
--------------------------------------------------------------------------------
/docker/filebeat.yml:
--------------------------------------------------------------------------------
1 | filebeat.inputs:
2 | - type: filestream
3 | id: default-filestream
4 | paths:
5 | - ingest_data/*.log
6 |
7 | filebeat.autodiscover:
8 | providers:
9 | - type: docker
10 | hints.enabled: true
11 |
12 | processors:
13 | - add_docker_metadata: ~
14 |
15 | setup.kibana:
16 | host: ${KIBANA_HOSTS}
17 | username: ${ELASTIC_USER}
18 | password: ${ELASTIC_PASSWORD}
19 |
20 | output.elasticsearch:
21 | hosts: ${ELASTIC_HOSTS}
22 | username: ${ELASTIC_USER}
23 | password: ${ELASTIC_PASSWORD}
24 | ssl:
25 | enabled: true
26 | certificate_authorities: ${CA_CERT}
27 |
--------------------------------------------------------------------------------
/docker/images/CACRT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/CACRT.png
--------------------------------------------------------------------------------
/docker/images/ComposeRunning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/ComposeRunning.png
--------------------------------------------------------------------------------
/docker/images/DevConsole.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/DevConsole.png
--------------------------------------------------------------------------------
/docker/images/DockerDesktop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/DockerDesktop.png
--------------------------------------------------------------------------------
/docker/images/DockerSolutionFolder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/DockerSolutionFolder.png
--------------------------------------------------------------------------------
/docker/images/ELKStack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/ELKStack.png
--------------------------------------------------------------------------------
/docker/images/ESServer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/ESServer.png
--------------------------------------------------------------------------------
/docker/images/ElasticAgentMetrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/ElasticAgentMetrics.png
--------------------------------------------------------------------------------
/docker/images/EnvSample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/EnvSample.png
--------------------------------------------------------------------------------
/docker/images/FinalAgentConfiguration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/FinalAgentConfiguration.png
--------------------------------------------------------------------------------
/docker/images/Fleet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/Fleet.png
--------------------------------------------------------------------------------
/docker/images/GoodAgent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/GoodAgent.png
--------------------------------------------------------------------------------
/docker/images/InvalidAgent1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/InvalidAgent1.png
--------------------------------------------------------------------------------
/docker/images/InvalidAgent2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/InvalidAgent2.png
--------------------------------------------------------------------------------
/docker/images/InvalidAgent3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/InvalidAgent3.png
--------------------------------------------------------------------------------
/docker/images/SaveAndDeploy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/SaveAndDeploy.png
--------------------------------------------------------------------------------
/docker/images/WelcomePageKibana.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/WelcomePageKibana.png
--------------------------------------------------------------------------------
/docker/kibana.yml:
--------------------------------------------------------------------------------
1 | elastic:
2 | apm:
3 | active: true
4 | serverUrl: "http://fleet-server:8200"
5 | secretToken: ${ELASTIC_APM_SECRET_TOKEN}
6 | server.host: "0.0.0.0"
7 | telemetry.enabled: "true"
8 | xpack.fleet.packages:
9 | - name: fleet_server
10 | version: latest
11 | - name: system
12 | version: latest
13 | - name: elastic_agent
14 | version: latest
15 | - name: apm
16 | version: latest
17 | xpack.fleet.agentPolicies:
18 | - name: Fleet-Server-Policy
19 | id: fleet-server-policy
20 | namespace: default
21 | monitoring_enabled:
22 | - logs
23 | - metrics
24 | package_policies:
25 | - name: fleet_server-1
26 | package:
27 | name: fleet_server
28 | - name: system-1
29 | package:
30 | name: system
31 | - name: elastic_agent-1
32 | package:
33 | name: elastic_agent
34 | - name: apm-1
35 | package:
36 | name: apm
37 | inputs:
38 | - type: apm
39 | enabled: true
40 | vars:
41 | - name: host
42 | value: 0.0.0.0:8200
43 | - name: secret_token
44 | value: ${ELASTIC_APM_SECRET_TOKEN}
--------------------------------------------------------------------------------
/docker/logstash.conf:
--------------------------------------------------------------------------------
1 | input {
2 | file {
3 | #https://www.elastic.co/guide/en/logstash/current/plugins-inputs-file.html
4 | #default is TAIL which assumes more data will come into the file.
5 | #change to mode => "read" if the file is a complete file. by default, the file will be removed once reading is complete -- backup your files if you need them.
6 | mode => "tail"
7 | path => "/usr/share/logstash/ingest_data/*"
8 | }
9 | }
10 |
11 | filter {
12 | }
13 |
14 | output {
15 | elasticsearch {
16 | index => "logstash-%{+YYYY.MM.dd}"
17 | hosts=> "${ELASTIC_HOSTS}"
18 | user=> "${ELASTIC_USER}"
19 | password=> "${ELASTIC_PASSWORD}"
20 | cacert=> "certs/ca/ca.crt"
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/docker/metricbeat.yml:
--------------------------------------------------------------------------------
1 | metricbeat.config.modules:
2 | path: ${path.config}/modules.d/*.yml
3 | reload.enabled: false
4 |
5 | metricbeat.modules:
6 | - module: elasticsearch
7 | xpack.enabled: true
8 | period: 10s
9 | hosts: ${ELASTIC_HOSTS}
10 | username: ${ELASTIC_USER}
11 | password: ${ELASTIC_PASSWORD}
12 | ssl:
13 | enabled: true
14 | certificate_authorities: ${CA_CERT}
15 |
16 | - module: logstash
17 | xpack.enabled: true
18 | period: 10s
19 | hosts: ${LOGSTASH_HOSTS}
20 |
21 | - module: kibana
22 | metricsets:
23 | - stats
24 | period: 10s
25 | hosts: ${KIBANA_HOSTS}
26 | username: ${ELASTIC_USER}
27 | password: ${ELASTIC_PASSWORD}
28 | xpack.enabled: true
29 | ssl:
30 | enabled: true
31 | certificate_authorities: ${CA_CERT}
32 |
33 | - module: docker
34 | metricsets:
35 | - "container"
36 | - "cpu"
37 | - "diskio"
38 | - "healthcheck"
39 | - "info"
40 | #- "image"
41 | - "memory"
42 | - "network"
43 | hosts: ["unix:///var/run/docker.sock"]
44 | period: 10s
45 | enabled: true
46 |
47 | processors:
48 | - add_host_metadata: ~
49 | - add_docker_metadata: ~
50 |
51 | output.elasticsearch:
52 | hosts: ${ELASTIC_HOSTS}
53 | username: ${ELASTIC_USER}
54 | password: ${ELASTIC_PASSWORD}
55 | ssl:
56 | enabled: true
57 | certificate_authorities: ${CA_CERT}
58 |
59 |
--------------------------------------------------------------------------------
/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/icon.png
--------------------------------------------------------------------------------
/nuget-package.props:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | 0.9.5
5 |
6 |
7 | Free Mind Labs and contributors
8 | Free Mind Labs, Inc.
9 | Free Mind Labs
10 | Kernel Memory adapter for Elasticsearch
11 | Elasticsearch connector for Microsoft Kernel Memory, to store and search memory using Elasticsearch vector indexing and Elasticsearch features.
12 | Copilot, Memory, RAG, Kernel Memory, Elasticsearch, AI, Artificial Intelligence, Embeddings, Vector DB, Vector Search, ETL
13 | $(AssemblyName)
14 |
15 |
16 | MIT
17 | © Free Mind Labs, Inc. All rights reserved.
18 | https://github.com/freemindlabsinc/FreeMindLabs.SemanticKernel
19 | https://github.com/freemindlabsinc/FreeMindLabs.SemanticKernel
20 | true
21 |
22 |
23 | icon.png
24 | icon.png
25 | NUGET.md
26 |
27 |
28 | true
29 | snupkg
30 |
31 |
32 | bin\$(Configuration)\$(TargetFramework)\$(AssemblyName).xml
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 | true
49 |
50 |
51 |
52 | false
53 |
54 |
55 |
--------------------------------------------------------------------------------
/nuget.config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/packages/README.md:
--------------------------------------------------------------------------------
1 | # Package folder
2 |
3 | Do not remove this file
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/ConfigurationException.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | using Microsoft.KernelMemory;
4 |
5 | namespace FreeMindLabs.KernelMemory.Elasticsearch;
6 |
7 | ///
8 | /// Exception thrown when the Elasticsearch configuration is invalid in appSettings, secrets, etc.
9 | ///
10 | public class ConfigurationException : ElasticsearchException
11 | {
12 | ///
13 | public ConfigurationException() { }
14 |
15 | ///
16 | public ConfigurationException(string message) : base(message) { }
17 |
18 | ///
19 | public ConfigurationException(string message, Exception? innerException) : base(message, innerException) { }
20 | }
21 |
22 | ///
23 | /// Base exception for all the exceptions thrown by the Elasticsearch connector for KernelMemory
24 | ///
25 | public class ElasticsearchException : KernelMemoryException
26 | {
27 | ///
28 | public ElasticsearchException() { }
29 |
30 | ///
31 | public ElasticsearchException(string message) : base(message) { }
32 |
33 | ///
34 | public ElasticsearchException(string message, Exception? innerException) : base(message, innerException) { }
35 | }
36 |
37 | ///
38 | /// Exception thrown when an index name does pass Elasticsearch validation.
39 | ///
40 | public class InvalidIndexNameException : ElasticsearchException
41 | {
42 | ///
43 | public InvalidIndexNameException(string indexName, IEnumerable errors, Exception? innerException = default)
44 | : base($"The given index name '{indexName}' is invalid. {string.Join(", ", errors)}", innerException)
45 | {
46 | this.IndexName = indexName;
47 | this.Errors = errors;
48 | }
49 |
50 | ///
51 | public InvalidIndexNameException(
52 | (string IndexName, IEnumerable Errors) conversionResult,
53 | Exception? innerException = default)
54 |
55 | => (this.IndexName, this.Errors) = conversionResult;
56 |
57 | ///
58 | /// The index name that failed validation.
59 | ///
60 | public string IndexName { get; }
61 |
62 | ///
63 | /// The list of errors that caused the validation to fail.
64 | ///
65 | public IEnumerable Errors { get; }
66 | }
67 |
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/ElasticsearchConfig.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | using Elastic.Clients.Elasticsearch.Mapping;
4 |
5 | namespace FreeMindLabs.KernelMemory.Elasticsearch;
6 |
7 | ///
8 | /// The configuration for the Elasticsearch connector.
9 | /// Use to instantiate and configure this class.
10 | ///
11 | public class ElasticsearchConfig
12 | {
13 | ///
14 | public ElasticsearchConfig()
15 | { }
16 |
17 | ///
18 | /// The certificate fingerprint for the Elasticsearch instance.
19 | /// See .
20 | ///
21 | public string CertificateFingerPrint { get; set; } = string.Empty;
22 |
23 | ///
24 | /// The Elasticsearch endpoint.
25 | ///
26 | public string Endpoint { get; set; } = string.Empty;
27 |
28 | ///
29 | /// The username used to connect to Elasticsearch.
30 | ///
31 | public string UserName { get; set; } = string.Empty;
32 |
33 | ///
34 | /// The password used to connect to Elasticsearch.
35 | ///
36 | public string Password { get; set; } = string.Empty;
37 |
38 | ///
39 | /// The prefix to be prepend to the index names in Elasticsearch.
40 | ///
41 | public string IndexPrefix { get; set; } = string.Empty;
42 |
43 | ///
44 | /// The number of shards to use for the Elasticsearch index.
45 | ///
46 | public int? ShardCount { get; set; } = 1;
47 |
48 | ///
49 | /// The number of replicas to use for the Elasticsearch index.
50 | ///
51 | public int? ReplicaCount { get; set; } = 0;
52 |
53 | ///
54 | /// A delegate to configure the Elasticsearch index properties.
55 | ///
56 | public Action>? ConfigureProperties { get; internal set; }
57 | }
58 |
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/ElasticsearchConfigBuilder.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | using Microsoft.Extensions.Configuration;
4 |
5 | namespace FreeMindLabs.KernelMemory.Elasticsearch;
6 |
7 | ///
8 | /// The builder for ElasticsearchConfig.
9 | ///
10 | public class ElasticsearchConfigBuilder
11 | {
12 | ///
13 | /// The default Elasticsearch endpoint.
14 | ///
15 | public const string DefaultEndpoint = "https://localhost:9200";
16 |
17 | ///
18 | /// The default Elasticsearch username.
19 | ///
20 | public const string DefaultUserName = "elastic";
21 |
22 | ///
23 | /// The name of the section that will contain the configuration for Elasticsearch
24 | /// (e.g. appSettings.json, user secrets, etc.).
25 | ///
26 | public const string DefaultSettingsSection = "Elasticsearch";
27 |
28 | ///
29 | /// The default prefix to be prepend to the index names in Elasticsearch.
30 | ///
31 | public const string DefaultIndexPrefix = "km.";
32 |
33 | private ElasticsearchConfig _config;
34 |
35 | ///
36 | /// The default constructor.
37 | ///
38 | public ElasticsearchConfigBuilder()
39 | {
40 | this._config = new ElasticsearchConfig();
41 | this.WithEndpoint(DefaultEndpoint)
42 | .WithIndexPrefix(DefaultIndexPrefix)
43 | .WithCertificateFingerPrint(string.Empty)
44 | .WithUserNameAndPassword(DefaultUserName, string.Empty);
45 | }
46 |
47 | ///
48 | /// Sets Elasticsearch endpoint to connect to.
49 | ///
50 | ///
51 | ///
52 | public ElasticsearchConfigBuilder WithEndpoint(string endpoint)
53 | {
54 | // TODO: validate URL
55 | this._config.Endpoint = endpoint;
56 | return this;
57 | }
58 |
59 | ///
60 | /// Sets the username and password used to connect to Elasticsearch.
61 | ///
62 | ///
63 | ///
64 | ///
65 | public ElasticsearchConfigBuilder WithUserNameAndPassword(string userName, string password)
66 | {
67 | this._config.UserName = userName;
68 | this._config.Password = password;
69 | return this;
70 | }
71 |
72 | ///
73 | /// Sets the certificate fingerprint used to communicate with Elasticsearch.
74 | /// See .
75 | ///
76 | ///
77 | ///
78 | public ElasticsearchConfigBuilder WithCertificateFingerPrint(string certificateFingerPrint)
79 | {
80 | this._config.CertificateFingerPrint = certificateFingerPrint;
81 | return this;
82 | }
83 |
84 | ///
85 | /// Sets the prefix to be prepend to the index names in Elasticsearch.
86 | ///
87 | ///
88 | ///
89 | public ElasticsearchConfigBuilder WithIndexPrefix(string indexPrefix)
90 | {
91 | this._config.IndexPrefix = indexPrefix;
92 | return this;
93 | }
94 |
95 | ///
96 | /// Validates the Elasticsearch configuration.
97 | ///
98 | ///
99 | public ElasticsearchConfigBuilder Validate()
100 | {
101 | // TODO: improve this at some point
102 | const string Prefix = "Invalid Elasticsearch configuration: missing ";
103 |
104 | if (string.IsNullOrWhiteSpace(this._config.Endpoint))
105 | {
106 | throw new ConfigurationException(Prefix + $"{nameof(ElasticsearchConfig.Endpoint)}.");
107 | }
108 |
109 | if (string.IsNullOrWhiteSpace(this._config.UserName))
110 | {
111 | throw new ConfigurationException(Prefix + $"{nameof(ElasticsearchConfig.UserName)}.");
112 | }
113 |
114 | if (string.IsNullOrWhiteSpace(this._config.Password))
115 | {
116 | throw new ConfigurationException(Prefix + $"{nameof(ElasticsearchConfig.Password)}.");
117 | }
118 |
119 | if (string.IsNullOrWhiteSpace(this._config.CertificateFingerPrint))
120 | {
121 | throw new ConfigurationException(Prefix + $"{nameof(ElasticsearchConfig.CertificateFingerPrint)}");
122 | }
123 |
124 | return this;
125 | }
126 |
127 | ///
128 | /// Reads the Elasticsearch configuration from the Services section of KernelMemory's configuration.
129 | ///
130 | ///
131 | ///
132 | public ElasticsearchConfigBuilder WithConfiguration(IConfiguration configuration)
133 | {
134 | const string SectionPath = "KernelMemory:Services:Elasticsearch";
135 |
136 | var kmSvcEsSection = configuration.GetSection(SectionPath);
137 | if (!kmSvcEsSection.Exists())
138 | {
139 | throw new ConfigurationException($"Missing configuration section {SectionPath}.");
140 | }
141 |
142 | this._config = new ElasticsearchConfig();
143 | kmSvcEsSection.Bind(this._config);
144 |
145 | configuration.Bind(SectionPath, this._config);
146 |
147 | return this;
148 | }
149 |
150 | ///
151 | /// Sets the number of shards and replicas to use for the Elasticsearch index.
152 | ///
153 | ///
154 | ///
155 | ///
156 | public ElasticsearchConfigBuilder WithShardsAndReplicas(int shards, int replicas)
157 | {
158 | this._config.ShardCount = shards;
159 | this._config.ReplicaCount = replicas;
160 | return this;
161 | }
162 |
163 | ///
164 | /// Builds the ElasticsearchConfig.
165 | ///
166 | /// Indicates if validation should be skipped.
167 | ///
168 | public ElasticsearchConfig Build(bool skipValidation = false)
169 | {
170 | if (!skipValidation)
171 | {
172 | this.Validate();
173 | }
174 |
175 | return this._config;
176 | }
177 | }
178 |
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/ElasticsearchConfigExtensions.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | using Elastic.Clients.Elasticsearch;
4 | using Elastic.Transport;
5 |
6 | namespace FreeMindLabs.KernelMemory.Elasticsearch;
7 |
8 | ///
9 | /// Elasticsearch configuration extensions.
10 | ///
11 | public static class ElasticsearchConfigExtensions
12 | {
13 | ///
14 | /// Converts an ElasticsearchConfig to a ElasticsearchClientSettings that can be used
15 | /// to instantiate .
16 | ///
17 | public static ElasticsearchClientSettings ToElasticsearchClientSettings(this ElasticsearchConfig config)
18 | {
19 | ArgumentNullException.ThrowIfNull(config, nameof(config));
20 |
21 | // TODO: figure out the Dispose issue. It does not feel right.
22 | // See https://www.elastic.co/guide/en/elasticsearch/client/net-api/current/_options_on_elasticsearchclientsettings.html
23 | #pragma warning disable CA2000 // Dispose objects before losing scope
24 | return new ElasticsearchClientSettings(new Uri(config.Endpoint))
25 |
26 | // TODO: this needs to be more flexible.
27 | .Authentication(new BasicAuthentication(config.UserName, config.Password))
28 | .DisableDirectStreaming(true)
29 | // TODO: Not sure why I need this. Verify configuration maybe?
30 | .ServerCertificateValidationCallback((sender, certificate, chain, errors) => true)
31 | .CertificateFingerprint(config.CertificateFingerPrint)
32 | .ThrowExceptions(true) // Much easier to work with
33 | #if DEBUG
34 | .DisableDirectStreaming(true)
35 | #endif
36 | ;
37 | #pragma warning restore CA2000 // Dispose objects before losing scope
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/ElasticsearchMemory.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | using System.Runtime.CompilerServices;
4 | using Elastic.Clients.Elasticsearch;
5 | using Elastic.Clients.Elasticsearch.Mapping;
6 | using Elastic.Clients.Elasticsearch.QueryDsl;
7 | using Microsoft.Extensions.Logging;
8 | using Microsoft.KernelMemory;
9 | using Microsoft.KernelMemory.AI;
10 | using Microsoft.KernelMemory.Diagnostics;
11 | using Microsoft.KernelMemory.MemoryStorage;
12 |
13 | namespace FreeMindLabs.KernelMemory.Elasticsearch;
14 |
15 | ///
16 | /// Elasticsearch connector for Kernel Memory.
17 | ///
18 | public class ElasticsearchMemory : IMemoryDb
19 | {
20 | private readonly ITextEmbeddingGenerator _embeddingGenerator;
21 | private readonly IIndexNameHelper _indexNameHelper;
22 | private readonly ElasticsearchConfig _config;
23 | private readonly ILogger _log;
24 | private readonly ElasticsearchClient _client;
25 |
26 | ///
27 | /// Create a new instance of Elasticsearch KM connector
28 | ///
29 | /// Elasticsearch configuration
30 | /// Elasticsearch client
31 | /// Application logger
32 | /// Embedding generator
33 | /// Index name helper
34 | public ElasticsearchMemory(
35 | ElasticsearchConfig config,
36 | ElasticsearchClient client,
37 | ITextEmbeddingGenerator embeddingGenerator,
38 | IIndexNameHelper indexNameHelper,
39 | ILogger? log = null)
40 | {
41 | this._embeddingGenerator = embeddingGenerator ?? throw new ArgumentNullException(nameof(embeddingGenerator));
42 | this._indexNameHelper = indexNameHelper ?? throw new ArgumentNullException(nameof(indexNameHelper));
43 | this._config = config ?? throw new ArgumentNullException(nameof(config));
44 | this._client = client;// new ElasticsearchClient(this._config.ToElasticsearchClientSettings()); // TODO: inject
45 | this._log = log ?? DefaultLogger.Instance;
46 | }
47 |
48 | ///
49 | public async Task CreateIndexAsync(
50 | string index,
51 | int vectorSize,
52 | CancellationToken cancellationToken = default)
53 | {
54 | index = this._indexNameHelper.Convert(index);
55 |
56 | var existsResponse = await this._client.Indices.ExistsAsync(index, cancellationToken).ConfigureAwait(false);
57 | if (existsResponse.Exists)
58 | {
59 | this._log.LogTrace("{MethodName}: Index {Index} already exists.", nameof(CreateIndexAsync), index);
60 | return;
61 | }
62 |
63 | var createIdxResponse = await this._client.Indices.CreateAsync(index,
64 | cfg =>
65 | {
66 | cfg.Settings(setts =>
67 | {
68 | setts.NumberOfShards(this._config.ShardCount);
69 | setts.NumberOfReplicas(this._config.ReplicaCount);
70 | });
71 | },
72 | cancellationToken).ConfigureAwait(false);
73 |
74 | const int Dimensions = 1536; // TODO: make not hardcoded
75 |
76 | var np = new NestedProperty()
77 | {
78 | Properties = new Properties()
79 | {
80 | { ElasticsearchTag.NameField, new KeywordProperty() },
81 | { ElasticsearchTag.ValueField, new KeywordProperty() }
82 | }
83 | };
84 |
85 | var mapResponse = await this._client.Indices.PutMappingAsync(index, x => x
86 | .Properties(propDesc =>
87 | {
88 | propDesc.Keyword(x => x.Id);
89 | propDesc.Nested(ElasticsearchMemoryRecord.TagsField, np);
90 | propDesc.Text(x => x.Payload, pd => pd.Index(false));
91 | propDesc.Text(x => x.Content);
92 | propDesc.DenseVector(x => x.Vector, d => d.Index(true).Dims(Dimensions).Similarity("cosine"));
93 |
94 | this._config.ConfigureProperties?.Invoke(propDesc);
95 | }),
96 | cancellationToken).ConfigureAwait(false);
97 |
98 | this._log.LogTrace("{MethodName}: Index {Index} creeated.", nameof(CreateIndexAsync), index);
99 | }
100 |
101 | ///
102 | public async Task> GetIndexesAsync(
103 | CancellationToken cancellationToken = default)
104 | {
105 | var resp = await this._client.Indices.GetAsync(this._config.IndexPrefix + "*", cancellationToken).ConfigureAwait(false);
106 |
107 | var names = resp.Indices
108 | .Select(x => x.Key.ToString().Replace(this._config.IndexPrefix, string.Empty, StringComparison.Ordinal))
109 | .ToHashSet(StringComparer.OrdinalIgnoreCase);
110 |
111 | this._log.LogTrace("{MethodName}: Returned {IndexCount} indices: {Indices}.", nameof(GetIndexesAsync), names.Count, string.Join(", ", names));
112 |
113 | return names;
114 | }
115 |
116 | ///
117 | public async Task DeleteIndexAsync(
118 | string index,
119 | CancellationToken cancellationToken = default)
120 | {
121 | index = this._indexNameHelper.Convert(index);
122 |
123 | var delResponse = await this._client.Indices.DeleteAsync(
124 | index,
125 | cancellationToken).ConfigureAwait(false);
126 |
127 | if (delResponse.IsSuccess())
128 | {
129 | this._log.LogTrace("{MethodName}: Index {Index} deleted.", nameof(DeleteIndexAsync), index);
130 | }
131 | else
132 | {
133 | this._log.LogWarning("{MethodName}: Index {Index} delete failed.", nameof(DeleteIndexAsync), index);
134 | }
135 | }
136 |
137 | ///
138 | public async Task DeleteAsync(
139 | string index,
140 | MemoryRecord record,
141 | CancellationToken cancellationToken = default)
142 | {
143 | index = this._indexNameHelper.Convert(index);
144 |
145 | record = record ?? throw new ArgumentNullException(nameof(record));
146 |
147 | var delResponse = await this._client.DeleteAsync(
148 | index,
149 | record.Id,
150 | (delReq) =>
151 | {
152 | delReq.Refresh(Refresh.WaitFor);
153 | },
154 | cancellationToken)
155 | .ConfigureAwait(false);
156 |
157 | if (delResponse.IsSuccess())
158 | {
159 | this._log.LogTrace("{MethodName}: Record {RecordId} deleted.", nameof(DeleteAsync), record.Id);
160 | }
161 | else
162 | {
163 | this._log.LogWarning("{MethodName}: Record {RecordId} delete failed.", nameof(DeleteAsync), record.Id);
164 | }
165 | }
166 |
167 | ///
168 | public async Task UpsertAsync(
169 | string index,
170 | MemoryRecord record,
171 | CancellationToken cancellationToken = default)
172 | {
173 | index = this._indexNameHelper.Convert(index);
174 |
175 | var memRec = ElasticsearchMemoryRecord.FromMemoryRecord(record);
176 |
177 | var response = await this._client.UpdateAsync(
178 | index,
179 | memRec.Id,
180 | (updateReq) =>
181 | {
182 | updateReq.Refresh(Refresh.WaitFor);
183 |
184 | var memRec2 = memRec;
185 | updateReq.Doc(memRec2);
186 | updateReq.DocAsUpsert(true);
187 | },
188 | cancellationToken)
189 | .ConfigureAwait(false);
190 |
191 | if (response.IsSuccess())
192 | {
193 | this._log.LogTrace("{MethodName}: Record {RecordId} upserted.", nameof(UpsertAsync), memRec.Id);
194 | }
195 | else
196 | {
197 | this._log.LogError("{MethodName}: Record {RecordId} upsert failed.", nameof(UpsertAsync), memRec.Id);
198 | }
199 |
200 | return response.Id;
201 | }
202 |
203 | ///
204 | public async IAsyncEnumerable<(MemoryRecord, double)> GetSimilarListAsync(
205 | string index,
206 | string text,
207 | ICollection? filters = null,
208 | double minRelevance = 0, int limit = 1, bool withEmbeddings = false, [EnumeratorCancellation] CancellationToken cancellationToken = default)
209 | {
210 | if (limit < 0)
211 | {
212 | limit = 10;
213 | }
214 |
215 | index = this._indexNameHelper.Convert(index);
216 |
217 | this._log.LogTrace("{MethodName}: Searching for '{Text}' on index '{IndexName}' with filters {Filters}. {MinRelevance} {Limit} {WithEmbeddings}",
218 | nameof(GetSimilarListAsync), text, index, filters.ToDebugString(), minRelevance, limit, withEmbeddings);
219 |
220 | Embedding qembed = await this._embeddingGenerator.GenerateEmbeddingAsync(text, cancellationToken).ConfigureAwait(false);
221 | var coll = qembed.Data.ToArray();
222 |
223 | var resp = await this._client.SearchAsync(s =>
224 | s.Index(index)
225 | .Knn(qd =>
226 | {
227 | qd.k(limit)
228 | .Filter(q => this.ConvertTagFilters(q, filters))
229 | .NumCandidates(limit + 100)
230 | .Field(x => x.Vector)
231 | .QueryVector(coll);
232 | }),
233 | cancellationToken)
234 | .ConfigureAwait(false);
235 |
236 | if ((resp.HitsMetadata is null) || (resp.HitsMetadata.Hits is null))
237 | {
238 | this._log.LogWarning("The search returned a null result. Should retry?");
239 | yield break;
240 | }
241 |
242 | foreach (var hit in resp.HitsMetadata.Hits)
243 | {
244 | if (hit?.Source == null)
245 | {
246 | continue;
247 | }
248 |
249 | this._log.LogTrace("{MethodName} Hit: {HitScore}, {HitId}", nameof(GetSimilarListAsync), hit.Score, hit.Id);
250 | yield return (hit.Source!.ToMemoryRecord(), hit.Score ?? 0);
251 | }
252 | }
253 |
254 | ///
255 | public async IAsyncEnumerable GetListAsync(
256 | string index,
257 | ICollection? filters = null,
258 | int limit = 1,
259 | bool withEmbeddings = false,
260 | [EnumeratorCancellation]
261 | CancellationToken cancellationToken = default)
262 | {
263 | this._log.LogTrace("{MethodName}: querying index '{IndexName}' with filters {Filters}. {Limit} {WithEmbeddings}",
264 | nameof(GetListAsync), index, filters.ToDebugString(), limit, withEmbeddings);
265 |
266 | if (limit < 0)
267 | {
268 | limit = 10;
269 | }
270 |
271 | index = this._indexNameHelper.Convert(index);
272 |
273 | var resp = await this._client.SearchAsync(s =>
274 | s.Index(index)
275 | .Size(limit)
276 | .Query(qd =>
277 | {
278 | this.ConvertTagFilters(qd, filters);
279 | }),
280 | cancellationToken)
281 | .ConfigureAwait(false);
282 |
283 | if ((resp.HitsMetadata is null) || (resp.HitsMetadata.Hits is null))
284 | {
285 | yield break;
286 | }
287 |
288 | foreach (var hit in resp.Hits)
289 | {
290 | if (hit?.Source == null)
291 | {
292 | continue;
293 | }
294 |
295 | this._log.LogTrace("{MethodName} Hit: {HitScore}, {HitId}", nameof(GetListAsync), hit.Score, hit.Id);
296 | yield return hit.Source!.ToMemoryRecord();
297 | }
298 | }
299 |
300 | //private string ConvertIndexName(string index) => ESIndexName.Convert(this._config.IndexPrefix + index);
301 |
302 | private QueryDescriptor ConvertTagFilters(
303 | QueryDescriptor qd,
304 | ICollection? filters = null)
305 | {
306 | if ((filters == null) || (filters.Count == 0))
307 | {
308 | qd.MatchAll();
309 | return qd;
310 | }
311 |
312 | filters = filters.Where(f => f.Keys.Count > 0)
313 | .ToList(); // Remove empty filters
314 |
315 | if (filters.Count == 0)
316 | {
317 | qd.MatchAll();
318 | return qd;
319 | }
320 |
321 | foreach (MemoryFilter filter in filters)
322 | {
323 | List all = new();
324 |
325 | // Each tag collection is an element of a List>>
326 | foreach (var tagName in filter.Keys)
327 | {
328 | List tagValues = filter[tagName];
329 | List terms = tagValues.Select(x => (FieldValue)(x ?? FieldValue.Null))
330 | .ToList();
331 | // ----------------
332 | Query newTagQuery = new TermQuery(ElasticsearchMemoryRecord.Tags_Name) { Value = tagName };
333 | newTagQuery &= new TermsQuery()
334 | {
335 | Field = ElasticsearchMemoryRecord.Tags_Value,
336 | Terms = new TermsQueryField(terms)
337 | };
338 | var nestedQd = new NestedQuery();
339 | nestedQd.Path = ElasticsearchMemoryRecord.TagsField;
340 | nestedQd.Query = newTagQuery;
341 |
342 | all.Add(nestedQd);
343 | qd.Bool(bq => bq.Must(all.ToArray()));
344 | }
345 | }
346 |
347 | // ---------------------
348 |
349 | //qd.Nested(nqd =>
350 | //{
351 | // nqd.Path(ElasticsearchMemoryRecord.TagsField);
352 |
353 | // nqd.Query(nq =>
354 | // {
355 | // // Each filter is a tag collection.
356 | // foreach (MemoryFilter filter in filters)
357 | // {
358 | // List all = new();
359 |
360 | // // Each tag collection is an element of a List>>
361 | // foreach (var tagName in filter.Keys)
362 | // {
363 | // List tagValues = filter[tagName];
364 | // List terms = tagValues.Select(x => (FieldValue)(x ?? FieldValue.Null))
365 | // .ToList();
366 | // // ----------------
367 |
368 | // Query newTagQuery = new TermQuery(ElasticsearchMemoryRecord.Tags_Name) { Value = tagName };
369 | // newTagQuery &= new TermsQuery() {
370 | // Field = ElasticsearchMemoryRecord.Tags_Value,
371 | // Terms = new TermsQueryField(terms)
372 | // };
373 |
374 | // all.Add(newTagQuery);
375 | // }
376 |
377 | // nq.Bool(bq => bq.Must(all.ToArray()));
378 | // }
379 | // });
380 | //});
381 |
382 | return qd;
383 | }
384 | }
385 |
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/ElasticsearchMemoryFilter.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | using Microsoft.KernelMemory;
4 |
5 | namespace FreeMindLabs.KernelMemory.Elasticsearch;
6 |
7 | ///
8 | /// Extended filtering options available when using Elasticsearch
9 | ///
10 | public class ElasticsearchMemoryFilter : MemoryFilter
11 | {
12 | // ...
13 | }
14 |
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/ElasticsearchMemoryRecord.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | using System.Text.Json;
4 | using System.Text.Json.Serialization;
5 | using Microsoft.KernelMemory;
6 | using Microsoft.KernelMemory.MemoryStorage;
7 |
8 | namespace FreeMindLabs.KernelMemory.Elasticsearch;
9 |
10 | ///
11 | /// Elasticsearch record.
12 | ///
13 | public sealed class ElasticsearchMemoryRecord
14 | {
15 | internal const string IdField = "id";
16 | internal const string EmbeddingField = "embedding";
17 |
18 | ///
19 | public const string TagsField = "tags";
20 | ///
21 | internal static readonly string Tags_Name = TagsField + "." + nameof(ElasticsearchTag.Name).ToLower();
22 | ///
23 | internal static readonly string Tags_Value = TagsField + "." + nameof(ElasticsearchTag.Value).ToLower();
24 |
25 |
26 | private const string PayloadField = "payload";
27 | private const string ContentField = "content";
28 |
29 | private static readonly JsonSerializerOptions s_jsonOptions = new()
30 | {
31 | AllowTrailingCommas = true,
32 | MaxDepth = 10,
33 | PropertyNameCaseInsensitive = true,
34 | ReadCommentHandling = JsonCommentHandling.Disallow,
35 | WriteIndented = false
36 | };
37 |
38 | ///
39 | /// TBC
40 | ///
41 | [JsonPropertyName(IdField)]
42 | public string Id { get; set; } = string.Empty;
43 |
44 | ///
45 | /// TBC
46 | ///
47 | [JsonPropertyName(TagsField)]
48 | public List Tags { get; set; } = new();
49 |
50 | ///
51 | /// TBC
52 | ///
53 | [JsonPropertyName(PayloadField)]
54 | public string Payload { get; set; } = string.Empty;
55 |
56 | ///
57 | /// TBC
58 | ///
59 | [JsonPropertyName(ContentField)]
60 | public string Content { get; set; } = string.Empty;
61 |
62 | ///
63 | /// TBC
64 | ///
65 | [JsonPropertyName(EmbeddingField)]
66 | [JsonConverter(typeof(Embedding.JsonConverter))]
67 | public Embedding Vector { get; set; } = new();
68 |
69 | ///
70 | /// TBC
71 | ///
72 | public MemoryRecord ToMemoryRecord(bool withEmbedding = true)
73 | {
74 | MemoryRecord result = new()
75 | {
76 | Id = this.Id,
77 | Payload = JsonSerializer.Deserialize>(this.Payload, s_jsonOptions)
78 | ?? new Dictionary()
79 | };
80 | // TODO: remove magic string
81 | result.Payload["text"] = this.Content;
82 |
83 | if (withEmbedding)
84 | {
85 | result.Vector = this.Vector;
86 | }
87 |
88 | foreach (var tag in this.Tags)
89 | {
90 | result.Tags.Add(tag.Name, tag.Value);
91 | }
92 |
93 | return result;
94 | }
95 |
96 | ///
97 | /// TBC
98 | ///
99 | ///
100 | ///
101 | public static ElasticsearchMemoryRecord FromMemoryRecord(MemoryRecord record)
102 | {
103 | ArgumentNullException.ThrowIfNull(record);
104 |
105 | // TODO: remove magic strings
106 | string content = record.Payload["text"]?.ToString() ?? string.Empty;
107 | string documentId = record.Tags["__document_id"][0] ?? string.Empty;
108 | string filePart = record.Tags["__file_part"][0] ?? string.Empty;
109 | string betterId = $"{documentId}|{filePart}";
110 |
111 | record.Payload.Remove("text"); // We move the text to the content field. No need to index twice.
112 |
113 | ElasticsearchMemoryRecord result = new()
114 | {
115 | Id = record.Id,
116 | Vector = record.Vector,
117 | Payload = JsonSerializer.Serialize(record.Payload, s_jsonOptions),
118 | Content = content
119 | };
120 |
121 | foreach (var tag in record.Tags)
122 | {
123 | if ((tag.Value == null) || (tag.Value.Count == 0))
124 | {
125 | // Key only, with no values
126 | result.Tags.Add(new ElasticsearchTag(name: tag.Key));
127 | continue;
128 | }
129 |
130 | foreach (var value in tag.Value)
131 | {
132 | // Key with one or more values
133 | result.Tags.Add(new ElasticsearchTag(name: tag.Key, value: value));
134 | }
135 | }
136 |
137 | return result;
138 | }
139 | }
140 |
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/ElasticsearchMemoryStorage.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net6.0
5 | LatestMajor
6 | FreeMindLabs.KernelMemory.Elasticsearch
7 | FreeMindLabs.KernelMemory.Elasticsearch
8 | true
9 | FreeMindLabs.KernelMemory.Elasticsearch
10 | CA1724,NU5104,CA1304,CA1311,IDE0130
11 | 750ea0f7-073f-44fb-b791-08ce5fd978a2
12 | enable
13 | portable
14 |
15 | true
16 |
17 | true
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/ElasticsearchTag.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | using System.Text.Json.Serialization;
4 |
5 | namespace FreeMindLabs.KernelMemory.Elasticsearch;
6 |
7 | ///
8 | /// An elasticsearch tag.
9 | ///
10 | public class ElasticsearchTag
11 | {
12 | ///
13 | public const string NameField = "name";
14 |
15 | ///
16 | public const string ValueField = "value";
17 |
18 | ///
19 | /// Instantiates a new instance of .
20 | ///
21 | ///
22 | ///
23 | ///
24 | public ElasticsearchTag(string name, string? value = default)
25 | {
26 | this.Name = name ?? throw new ArgumentNullException(nameof(name));
27 | this.Value = value;
28 | }
29 |
30 | ///
31 | /// The name of this tag.
32 | ///
33 | [JsonPropertyName(NameField)]
34 | public string Name { get; set; } = string.Empty;
35 |
36 | ///
37 | /// The value of this tag.
38 | ///
39 | [JsonPropertyName(ValueField)]
40 | public string? Value { get; set; }
41 |
42 | ///
43 | public override string ToString()
44 | {
45 | return $"{this.Name}={this.Value}";
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/Extensions/KernelMemoryBuilderExtensions.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | using FreeMindLabs.KernelMemory.Elasticsearch;
4 | using Microsoft.Extensions.DependencyInjection;
5 |
6 | namespace Microsoft.KernelMemory;
7 |
8 | ///
9 | /// Extensions for KernelMemoryBuilder
10 | ///
11 | public static partial class KernelMemoryBuilderExtensions
12 | {
13 | ///
14 | /// Kernel Memory Builder extension method to add the Elasticsearch memory connector.
15 | ///
16 | /// The IKernelMemoryBuilder instance
17 | /// The application configuration"
18 | public static IKernelMemoryBuilder WithElasticsearch(this IKernelMemoryBuilder builder,
19 | ElasticsearchConfig configuration)
20 | {
21 | builder.Services.AddElasticsearchAsVectorDb(configuration);
22 |
23 | return builder;
24 | }
25 |
26 | ///
27 | /// Extension method to add the Elasticsearch memory connector.
28 | ///
29 | ///
30 | ///
31 | ///
32 | public static IKernelMemoryBuilder WithElasticsearch(this IKernelMemoryBuilder builder,
33 | Action configure)
34 | {
35 | ArgumentNullException.ThrowIfNull(configure, nameof(configure));
36 |
37 | var cfg = new ElasticsearchConfigBuilder();
38 | configure(cfg);
39 |
40 | builder.Services.AddElasticsearchAsVectorDb(cfg.Build());
41 | return builder;
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/Extensions/MemoryFilterExtensions.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | namespace Microsoft.KernelMemory;
4 |
5 | ///
6 | /// Extensions methods for MemoryFilter.
7 | ///
8 | public static class MemoryFilterExtensions
9 | {
10 | ///
11 | /// Displays the MemoryFilter in a human-readable format.
12 | ///
13 | ///
14 | ///
15 | public static string ToDebugString(this MemoryFilter? filter)
16 | {
17 | if (filter == null)
18 | {
19 | return string.Empty;
20 | }
21 |
22 | // Prints all the tags in the record
23 | var tags = filter.Select(x => $"({x.Key}={string.Join("|", x.Value)})");
24 | return string.Join(" & ", tags);
25 | }
26 |
27 | ///
28 | /// Displays the MemoryFilter(s) in a human-readable format.
29 | ///
30 | ///
31 | ///
32 | public static string ToDebugString(this IEnumerable? filters)
33 | {
34 | if (filters == null)
35 | {
36 | return string.Empty;
37 | }
38 |
39 | // Prints all the tags in the record
40 | var tags = filters.Select(x => x.ToDebugString());
41 | return string.Join(" & ", tags);
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/Extensions/ServiceCollectionExtensions.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | using Elastic.Clients.Elasticsearch;
4 | using FreeMindLabs.KernelMemory.Elasticsearch;
5 | using Microsoft.KernelMemory.MemoryStorage;
6 |
7 | namespace Microsoft.Extensions.DependencyInjection;
8 |
9 | ///
10 | /// Extensions for KernelMemoryBuilder and generic DI
11 | ///
12 | public static partial class ServiceCollectionExtensions
13 | {
14 | ///
15 | /// Inject Elasticsearch as the default implementation of IMemoryDb
16 | ///
17 | public static IServiceCollection AddElasticsearchAsVectorDb(this IServiceCollection services,
18 | ElasticsearchConfig esConfig)
19 | {
20 | ArgumentNullException.ThrowIfNull(esConfig, nameof(esConfig));
21 |
22 | // The ElasticsearchClient type is thread-safe and can be shared and
23 | // reused across multiple threads in consuming applications.
24 | // See https://www.elastic.co/guide/en/elasticsearch/client/net-api/current/recommendations.html
25 | services.AddSingleton(sp =>
26 | {
27 | var esConfig = sp.GetRequiredService();
28 | return new ElasticsearchClient(esConfig.ToElasticsearchClientSettings());
29 | });
30 |
31 | return services
32 | .AddSingleton()
33 | .AddSingleton(esConfig)
34 | .AddSingleton();
35 | }
36 |
37 | ///
38 | /// Inject Elasticsearch as the default implementation of IMemoryDb
39 | ///
40 | public static IServiceCollection AddElasticsearchAsVectorDb(this IServiceCollection services,
41 | Action configure)
42 | {
43 | ArgumentNullException.ThrowIfNull(configure, nameof(configure));
44 |
45 | var cfg = new ElasticsearchConfigBuilder();
46 | configure(cfg);
47 |
48 | return services.AddElasticsearchAsVectorDb(cfg.Build());
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/IIndexNameHelper.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | namespace FreeMindLabs.KernelMemory.Elasticsearch;
4 |
5 | ///
6 | /// A utility class to help with Elasticsearch index names.
7 | /// It applies
8 | ///
9 | public interface IIndexNameHelper
10 | {
11 | ///
12 | /// Attempts to convert the given index name to a valid Elasticsearch index name.
13 | ///
14 | /// The index name to convert.
15 | /// The result of the conversion. The result includes the converted index name if the conversion succeeded, or a list of errors if the conversion failed.
16 | /// A structure containing the actual index name or a list of errors if the conversion failed.
17 | ///
18 | public bool TryConvert(string indexName, out (string ActualIndexName, IEnumerable Errors) result);
19 |
20 | ///
21 | /// Converts the given index name to a valid Elasticsearch index name.
22 | /// It throws an exception if the conversion fails.
23 | ///
24 | /// The index name to convert.
25 | /// The converted index name.
26 | public string Convert(string indexName);
27 | }
28 |
--------------------------------------------------------------------------------
/src/ElasticsearchMemoryStorage/IndexNameHelper.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | namespace FreeMindLabs.KernelMemory.Elasticsearch;
4 |
5 | ///
6 | public class IndexNameHelper : IIndexNameHelper
7 | {
8 | ///
9 | public IndexNameHelper(ElasticsearchConfig config)
10 | {
11 | this.IndexPrefix = config.IndexPrefix;
12 | }
13 |
14 | ///
15 | /// The prefix to use for all index names.
16 | ///
17 | public string IndexPrefix { get; }
18 |
19 | ///
20 | public bool TryConvert(string indexName, out (string ActualIndexName, IEnumerable Errors) result)
21 | {
22 | // Convert to lowercase and replace underscores with hyphens to
23 | // have a consistent behavior with other storage types supported by Kernel Memory. (see #18)
24 | indexName = (this.IndexPrefix + indexName)
25 | .Replace("_", "-", StringComparison.Ordinal)
26 | .Trim()
27 | .ToLower();
28 |
29 | // Check for null or whitespace
30 | if (string.IsNullOrWhiteSpace(indexName))
31 | {
32 | result = ("default", Array.Empty());
33 | return true;
34 | }
35 |
36 | var errors = new List();
37 |
38 | // Check for invalid start characters
39 | if (indexName.StartsWith('-') || indexName.StartsWith('_'))
40 | {
41 | errors.Add("An index name cannot start with a hyphen (-) or underscore (_).");
42 | }
43 |
44 | // Check for invalid characters
45 | if (indexName.Any(x => !char.IsLetterOrDigit(x) && x != '-'))
46 | {
47 | errors.Add("An index name can only contain letters, digits, and hyphens (-).");
48 | }
49 |
50 | // Check for length (max 255 bytes)
51 | if (System.Text.Encoding.UTF8.GetByteCount(indexName) > 255)
52 | {
53 | errors.Add("An index name cannot be longer than 255 bytes.");
54 | }
55 |
56 | // Avoid names that are dot-only or dot and numbers
57 | if (indexName.All(c => c == '.' || char.IsDigit(c)))
58 | {
59 | errors.Add("Index name cannot be only dots or dots and numbers.");
60 | }
61 |
62 | if (errors.Count > 0)
63 | {
64 | result = (string.Empty, errors);
65 | return false;
66 | }
67 |
68 | result = (indexName, Array.Empty());
69 | return true;
70 | }
71 |
72 | ///
73 | public string Convert(string indexName)
74 | {
75 | if (!this.TryConvert(indexName, out var result))
76 | {
77 | throw new InvalidIndexNameException(result);
78 | }
79 |
80 | return result.ActualIndexName;
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/tests/UnitTests/Data/file1-Wikipedia-Carbon.txt:
--------------------------------------------------------------------------------
1 | Carbon (from Latin carbo 'coal') is a chemical element with the symbol C and atomic number 6. It is nonmetallic and tetravalent—its atom making four electrons available to form covalent chemical bonds. It belongs to group 14 of the periodic table.[14] Carbon makes up about 0.025 percent of Earth's crust.[15] Three isotopes occur naturally, 12C and 13C being stable, while 14C is a radionuclide, decaying with a half-life of about 5,730 years.[16] Carbon is one of the few elements known since antiquity.[17]
2 |
3 | Carbon is the 15th most abundant element in the Earth's crust, and the fourth most abundant element in the universe by mass after hydrogen, helium, and oxygen. Carbon's abundance, its unique diversity of organic compounds, and its unusual ability to form polymers at the temperatures commonly encountered on Earth, enables this element to serve as a common element of all known life. It is the second most abundant element in the human body by mass (about 18.5%) after oxygen.[18]
4 |
5 | The atoms of carbon can bond together in diverse ways, resulting in various allotropes of carbon. Well-known allotropes include graphite, diamond, amorphous carbon, and fullerenes. The physical properties of carbon vary widely with the allotropic form. For example, graphite is opaque and black, while diamond is highly transparent. Graphite is soft enough to form a streak on paper (hence its name, from the Greek verb "γράφειν" which means "to write"), while diamond is the hardest naturally occurring material known. Graphite is a good electrical conductor while diamond has a low electrical conductivity. Under normal conditions, diamond, carbon nanotubes, and graphene have the highest thermal conductivities of all known materials. All carbon allotropes are solids under normal conditions, with graphite being the most thermodynamically stable form at standard temperature and pressure. They are chemically resistant and require high temperature to react even with oxygen.
6 |
7 | The most common oxidation state of carbon in inorganic compounds is +4, while +2 is found in carbon monoxide and transition metal carbonyl complexes. The largest sources of inorganic carbon are limestones, dolomites and carbon dioxide, but significant quantities occur in organic deposits of coal, peat, oil, and methane clathrates. Carbon forms a vast number of compounds, with about two hundred million having been described and indexed;[19] and yet that number is but a fraction of the number of theoretically possible compounds under standard conditions.
8 |
9 | The allotropes of carbon include graphite, one of the softest known substances, and diamond, the hardest naturally occurring substance. It bonds readily with other small atoms, including other carbon atoms, and is capable of forming multiple stable covalent bonds with suitable multivalent atoms. Carbon is a component element in the large majority of all chemical compounds, with about two hundred million examples having been described in the published chemical literature.[19] Carbon also has the highest sublimation point of all elements. At atmospheric pressure it has no melting point, as its triple point is at 10.8 ± 0.2 megapascals (106.6 ± 2.0 atm; 1,566 ± 29 psi) and 4,600 ± 300 K (4,330 ± 300 °C; 7,820 ± 540 °F),[3][4] so it sublimes at about 3,900 K (3,630 °C; 6,560 °F).[21][22] Graphite is much more reactive than diamond at standard conditions, despite being more thermodynamically stable, as its delocalised pi system is much more vulnerable to attack. For example, graphite can be oxidised by hot concentrated nitric acid at standard conditions to mellitic acid, C6(CO2H)6, which preserves the hexagonal units of graphite while breaking up the larger structure.[23]
10 |
11 | Carbon sublimes in a carbon arc, which has a temperature of about 5800 K (5,530 °C or 9,980 °F). Thus, irrespective of its allotropic form, carbon remains solid at higher temperatures than the highest-melting-point metals such as tungsten or rhenium. Although thermodynamically prone to oxidation, carbon resists oxidation more effectively than elements such as iron and copper, which are weaker reducing agents at room temperature.
12 |
13 | Carbon is the sixth element, with a ground-state electron configuration of 1s22s22p2, of which the four outer electrons are valence electrons. Its first four ionisation energies, 1086.5, 2352.6, 4620.5 and 6222.7 kJ/mol, are much higher than those of the heavier group-14 elements. The electronegativity of carbon is 2.5, significantly higher than the heavier group-14 elements (1.8–1.9), but close to most of the nearby nonmetals, as well as some of the second- and third-row transition metals. Carbon's covalent radii are normally taken as 77.2 pm (C−C), 66.7 pm (C=C) and 60.3 pm (C≡C), although these may vary depending on coordination number and what the carbon is bonded to. In general, covalent radius decreases with lower coordination number and higher bond order.[24]
14 |
15 | Carbon-based compounds form the basis of all known life on Earth, and the carbon-nitrogen-oxygen cycle provides a small portion of the energy produced by the Sun, and most of the energy in larger stars (e.g. Sirius). Although it forms an extraordinary variety of compounds, most forms of carbon are comparatively unreactive under normal conditions. At standard temperature and pressure, it resists all but the strongest oxidizers. It does not react with sulfuric acid, hydrochloric acid, chlorine or any alkalis. At elevated temperatures, carbon reacts with oxygen to form carbon oxides and will rob oxygen from metal oxides to leave the elemental metal. This exothermic reaction is used in the iron and steel industry to smelt iron and to control the carbon content of steel:
16 |
17 | Fe
18 | 3O
19 | 4 + 4 C(s) + 2 O
20 | 2 → 3 Fe(s) + 4 CO
21 | 2(g).
22 |
23 | Carbon reacts with sulfur to form carbon disulfide, and it reacts with steam in the coal-gas reaction used in coal gasification:
24 |
25 | C(s) + H2O(g) → CO(g) + H2(g).
26 |
27 | Carbon combines with some metals at high temperatures to form metallic carbides, such as the iron carbide cementite in steel and tungsten carbide, widely used as an abrasive and for making hard tips for cutting tools.
28 |
29 |
--------------------------------------------------------------------------------
/tests/UnitTests/Data/file2-Wikipedia-Moon.txt:
--------------------------------------------------------------------------------
1 | The Moon is Earth's only natural satellite. Its diameter is about one-quarter of Earth's (comparable to the width of Australia),[17] making it the fifth largest satellite in the Solar System and the largest and most massive relative to its parent planet. It is larger than all known dwarf planets in the Solar System.[18] The Moon is a planetary-mass object with a differentiated rocky body, making it a satellite planet under the geophysical definitions of the term. It lacks any significant atmosphere, hydrosphere, or magnetic field. Its surface gravity is about one-sixth of Earth's at 0.1654 g—Jupiter's moon Io is the only satellite in the Solar System known to have a higher surface gravity and density.
2 |
3 | The Moon orbits Earth at an average distance of 384,400 km (238,900 mi), or about 30 times Earth's diameter. Its gravitational influence is the main driver of Earth's tides and very slowly lengthens Earth's day. The Moon's orbit around Earth has a sidereal period of 27.3 days. During each synodic period of 29.5 days, the amount of the Moon's Earth-facing surface that is illuminated by the Sun varies from none up to nearly 100%, resulting in lunar phases that form the basis for the months of a lunar calendar.[19] The Moon is tidally locked to Earth, which means that the length of a full rotation of the Moon on its own axis causes its same side (the near side) to always face Earth, and the somewhat longer lunar day is the same as the synodic period. Due to cyclical shifts in perspective (libration), 59% of the lunar surface is visible from Earth.
--------------------------------------------------------------------------------
/tests/UnitTests/Data/file3-lorem-ipsum.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/tests/UnitTests/Data/file3-lorem-ipsum.docx
--------------------------------------------------------------------------------
/tests/UnitTests/Data/file4-SK-Readme.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/tests/UnitTests/Data/file4-SK-Readme.pdf
--------------------------------------------------------------------------------
/tests/UnitTests/Data/file5-NASA-news.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/tests/UnitTests/Data/file5-NASA-news.pdf
--------------------------------------------------------------------------------
/tests/UnitTests/Data/file6-ANWC-image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/tests/UnitTests/Data/file6-ANWC-image.jpg
--------------------------------------------------------------------------------
/tests/UnitTests/DataStorageTests.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 | using System.Globalization;
3 | using Elastic.Clients.Elasticsearch;
4 | using FreeMindLabs.KernelMemory.Elasticsearch;
5 | using Microsoft.KernelMemory;
6 | using Microsoft.KernelMemory.AI;
7 | using Microsoft.KernelMemory.DataFormats.Text;
8 | using Microsoft.KernelMemory.MemoryStorage;
9 | using Xunit;
10 | using Xunit.Abstractions;
11 |
12 | namespace UnitTests;
13 |
14 | public class DataStorageTests : ElasticsearchTestBase
15 | {
16 | public DataStorageTests(ITestOutputHelper output, IMemoryDb memoryDb, ITextEmbeddingGenerator textEmbeddingGenerator, ElasticsearchClient client,
17 | IIndexNameHelper indexNameHelper)
18 | : base(output, client, indexNameHelper)
19 | {
20 | this.MemoryDb = memoryDb ?? throw new ArgumentNullException(nameof(memoryDb));
21 | this.TextEmbeddingGenerator = textEmbeddingGenerator ?? throw new ArgumentNullException(nameof(textEmbeddingGenerator));
22 | }
23 |
24 | public IMemoryDb MemoryDb { get; }
25 | public ITextEmbeddingGenerator TextEmbeddingGenerator { get; }
26 |
27 | [Fact]
28 | public async Task CanUpsertOneTextDocumentAndDeleteAsync()
29 | {
30 | // We upsert the file
31 | var docIds = await DataStorageTests.UpsertTextFilesAsync(
32 | memoryDb: this.MemoryDb,
33 | textEmbeddingGenerator: this.TextEmbeddingGenerator,
34 | output: this.Output,
35 | indexName: nameof(CanUpsertOneTextDocumentAndDeleteAsync),
36 | fileNames: new[]
37 | {
38 | "Data/file1-Wikipedia-Carbon.txt"
39 | }).ConfigureAwait(false);
40 |
41 | // Deletes the document
42 | var deletes = docIds.Select(id => new MemoryRecord()
43 | {
44 | Id = id
45 | });
46 |
47 | foreach (var deleteRec in deletes)
48 | {
49 | await this.MemoryDb.DeleteAsync(nameof(CanUpsertOneTextDocumentAndDeleteAsync), deleteRec)
50 | .ConfigureAwait(false);
51 | }
52 |
53 | // Verfies that the documents are gone
54 | var indexName = this.IndexNameHelper.Convert(nameof(CanUpsertOneTextDocumentAndDeleteAsync));
55 | var res = await this.Client.CountAsync(r => r.Index(indexName))
56 | .ConfigureAwait(false);
57 | Assert.Equal(0, res.Count);
58 | }
59 |
60 | [Fact]
61 | public async Task CanUpsertTwoTextFilesAndGetSimilarListAsync()
62 | {
63 | await DataStorageTests.UpsertTextFilesAsync(
64 | memoryDb: this.MemoryDb,
65 | textEmbeddingGenerator: this.TextEmbeddingGenerator,
66 | output: this.Output,
67 | indexName: nameof(CanUpsertTwoTextFilesAndGetSimilarListAsync),
68 | fileNames: new[]
69 | {
70 | "Data/file1-Wikipedia-Carbon.txt",
71 | "Data/file2-Wikipedia-Moon.txt"
72 | }).ConfigureAwait(false);
73 |
74 | // Gets documents that are similar to the word "carbon" .
75 | var foundSomething = false;
76 |
77 | var textToMatch = "carbon";
78 | await foreach (var result in this.MemoryDb.GetSimilarListAsync(
79 | index: nameof(CanUpsertTwoTextFilesAndGetSimilarListAsync),
80 | text: textToMatch,
81 | limit: 1))
82 | {
83 | this.Output.WriteLine($"Found a document matching '{textToMatch}': {result.Item1.Payload["file"]}.");
84 | return;
85 | };
86 |
87 | Assert.True(foundSomething, "It should have found something...");
88 | }
89 |
90 | public static string GuidWithoutDashes() => Guid.NewGuid().ToString().Replace("-", "", StringComparison.OrdinalIgnoreCase).ToLower(CultureInfo.CurrentCulture);
91 |
92 | public static async Task> UpsertTextFilesAsync(
93 | IMemoryDb memoryDb,
94 | ITextEmbeddingGenerator textEmbeddingGenerator,
95 | ITestOutputHelper output,
96 | string indexName,
97 | IEnumerable fileNames)
98 | {
99 | ArgumentNullException.ThrowIfNull(memoryDb);
100 | ArgumentNullException.ThrowIfNull(textEmbeddingGenerator);
101 | ArgumentNullException.ThrowIfNull(output);
102 | ArgumentNullException.ThrowIfNull(indexName);
103 | ArgumentNullException.ThrowIfNull(fileNames);
104 |
105 | // IMemoryDb does not create the index automatically.
106 | await memoryDb.CreateIndexAsync(indexName, 1536)
107 | .ConfigureAwait(false);
108 |
109 | var results = new List();
110 | foreach (var fileName in fileNames)
111 | {
112 | // Reads the text from the file
113 | string fullText = await File.ReadAllTextAsync(fileName)
114 | .ConfigureAwait(false);
115 |
116 | // Splits the text into lines of up to 1000 tokens each
117 | var lines = TextChunker.SplitPlainTextLines(fullText,
118 | maxTokensPerLine: 1000,
119 | tokenCounter: null);
120 |
121 | // Splits the line into paragraphs
122 | var paragraphs = TextChunker.SplitPlainTextParagraphs(lines,
123 | maxTokensPerParagraph: 1000,
124 | overlapTokens: 100);
125 |
126 | output.WriteLine($"File '{fileName}' contains {paragraphs.Count} paragraphs.");
127 |
128 | // Indexes each paragraph as a separate document
129 | var paraIdx = 0;
130 | var documentId = GuidWithoutDashes() + GuidWithoutDashes();
131 | var fileId = GuidWithoutDashes();
132 |
133 | foreach (var paragraph in paragraphs)
134 | {
135 | var embedding = await textEmbeddingGenerator.GenerateEmbeddingAsync(paragraph)
136 | .ConfigureAwait(false);
137 |
138 | output.WriteLine($"Indexed paragraph {++paraIdx}/{paragraphs.Count}. {paragraph.Length} characters.");
139 |
140 | var filePartId = GuidWithoutDashes();
141 |
142 | var esId = $"d={documentId}//p={filePartId}";
143 |
144 | var mrec = new MemoryRecord()
145 | {
146 | Id = esId,
147 | Payload = new Dictionary()
148 | {
149 | { "file", fileName },
150 | { "text", paragraph },
151 | { "vector_provider", textEmbeddingGenerator.GetType().Name },
152 | { "vector_generator", "TODO" },
153 | { "last_update", DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss") },
154 | { "text_embedding_generator", textEmbeddingGenerator.GetType().Name }
155 | },
156 | Tags = new TagCollection()
157 | {
158 | { "__document_id", documentId },
159 | { "__file_type", "text/plain" },
160 | { "__file_id", fileId },
161 | { "__file_part", filePartId }
162 |
163 | },
164 | Vector = embedding
165 | };
166 |
167 | var res = await memoryDb.UpsertAsync(indexName, mrec)
168 | .ConfigureAwait(false);
169 |
170 | results.Add(res);
171 | }
172 |
173 | output.WriteLine("");
174 | }
175 |
176 | return results;
177 | }
178 | }
179 |
180 |
--------------------------------------------------------------------------------
/tests/UnitTests/ElasticsearchTestBase.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 | using Elastic.Clients.Elasticsearch;
3 | using FreeMindLabs.KernelMemory.Elasticsearch;
4 | using Xunit;
5 | using Xunit.Abstractions;
6 |
7 | namespace UnitTests;
8 |
9 | ///
10 | /// A simple base class for Elasticsearch tests.
11 | /// It ensures that all indices created by the test methods of the derived class are
12 | /// deleted before and after the tests. This ensures that Elasticsearch is left in a clean state
13 | /// or that subsequent tests don't fail because of left-over indices.
14 | ///
15 | public abstract class ElasticsearchTestBase : IAsyncLifetime
16 | {
17 | protected ElasticsearchTestBase(ITestOutputHelper output, ElasticsearchClient client, IIndexNameHelper indexNameHelper)
18 | {
19 | this.Output = output ?? throw new ArgumentNullException(nameof(output));
20 | this.Client = client ?? throw new ArgumentNullException(nameof(client));
21 | this.IndexNameHelper = indexNameHelper ?? throw new ArgumentNullException(nameof(indexNameHelper));
22 | }
23 |
24 | public ITestOutputHelper Output { get; }
25 | public ElasticsearchClient Client { get; }
26 | public IIndexNameHelper IndexNameHelper { get; }
27 |
28 | public async Task InitializeAsync()
29 | {
30 | // Within a single test class, the tests are executed sequentially by default so
31 | // there is no chance for a method to finish and delete indices of other methods before the next
32 | // method starts executing.
33 | //var delIndexResponse = await this.Client.Indices.DeleteAsync(indices: this.con)
34 | // .ConfigureAwait(false);
35 |
36 | var indicesFound = await this.Client.DeleteIndicesOfTestAsync(this.GetType(), this.IndexNameHelper).ConfigureAwait(false);
37 |
38 | if (indicesFound.Any())
39 | {
40 | this.Output.WriteLine($"Deleted left-over test indices: {string.Join(", ", indicesFound)}");
41 | this.Output.WriteLine("");
42 | }
43 | }
44 |
45 | public async Task DisposeAsync()
46 | {
47 | var indicesFound = await this.Client.DeleteIndicesOfTestAsync(this.GetType(), this.IndexNameHelper).ConfigureAwait(false);
48 |
49 | if (indicesFound.Any())
50 | {
51 | this.Output.WriteLine($"Deleted test indices: {string.Join(", ", indicesFound)}");
52 | this.Output.WriteLine("");
53 | }
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/tests/UnitTests/IndexManagementTests.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 | using Elastic.Clients.Elasticsearch;
3 | using FreeMindLabs.KernelMemory.Elasticsearch;
4 | using Microsoft.KernelMemory.MemoryStorage;
5 | using Xunit;
6 | using Xunit.Abstractions;
7 |
8 | namespace UnitTests;
9 |
10 | public class IndexManagementTests : ElasticsearchTestBase
11 | {
12 | public IndexManagementTests(ITestOutputHelper output, IMemoryDb memoryDb, ElasticsearchClient client, IIndexNameHelper indexNameHelper)
13 | : base(output, client, indexNameHelper)
14 | {
15 | this.MemoryDb = memoryDb ?? throw new ArgumentNullException(nameof(memoryDb));
16 | }
17 |
18 | public IMemoryDb MemoryDb { get; }
19 |
20 | [Fact]
21 | public async Task CanCreateAndDeleteIndexAsync()
22 | {
23 | var indexName = nameof(CanCreateAndDeleteIndexAsync);
24 | var vectorSize = 1536;
25 |
26 | // Creates the index using IMemoryDb
27 | await this.MemoryDb.CreateIndexAsync(indexName, vectorSize)
28 | .ConfigureAwait(false);
29 |
30 | // Verifies the index is created using the ES client
31 | var actualIndexName = this.IndexNameHelper.Convert(nameof(CanCreateAndDeleteIndexAsync));
32 | var resp = await this.Client.Indices.ExistsAsync(actualIndexName)
33 | .ConfigureAwait(false);
34 | Assert.True(resp.Exists);
35 | this.Output.WriteLine($"The index '{actualIndexName}' was created successfully.");
36 |
37 | // Deletes the index
38 | await this.MemoryDb.DeleteIndexAsync(indexName)
39 | .ConfigureAwait(false);
40 |
41 | // Verifies the index is deleted using the ES client
42 | resp = await this.Client.Indices.ExistsAsync(actualIndexName)
43 | .ConfigureAwait(false);
44 | Assert.False(resp.Exists);
45 | this.Output.WriteLine($"The index '{actualIndexName}' was deleted successfully.");
46 | }
47 |
48 | [Fact]
49 | public async Task CanGetIndicesAsync()
50 | {
51 | var indexNames = new[]
52 | {
53 | this.IndexNameHelper.Convert(nameof(CanGetIndicesAsync) + "-First"),
54 | this.IndexNameHelper.Convert(nameof(CanGetIndicesAsync) + "-Second")
55 | };
56 |
57 | // Creates the indices using IMemoryDb
58 | foreach (var indexName in indexNames)
59 | {
60 | await this.MemoryDb.CreateIndexAsync(indexName, 1536)
61 | .ConfigureAwait(false);
62 | }
63 |
64 | // Verifies the indices are returned
65 | var indices = await this.MemoryDb.GetIndexesAsync()
66 | .ConfigureAwait(false);
67 |
68 | Assert.True(indices.All(nme => indices.Contains(nme)));
69 |
70 | // Cleans up
71 | foreach (var indexName in indexNames)
72 | {
73 | await this.MemoryDb.DeleteIndexAsync(indexName)
74 | .ConfigureAwait(false);
75 | }
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/tests/UnitTests/IndexnameTests.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 | using FreeMindLabs.KernelMemory.Elasticsearch;
3 | using Xunit;
4 | using Xunit.Abstractions;
5 |
6 | namespace UnitTests;
7 |
8 | public class IndexnameTests
9 | {
10 | private readonly ITestOutputHelper _output;
11 | private readonly IIndexNameHelper _indexNameHelper;
12 |
13 | public IndexnameTests(ITestOutputHelper output, IIndexNameHelper indexNameHelper)
14 | {
15 | this._output = output ?? throw new ArgumentNullException(nameof(output));
16 | this._indexNameHelper = indexNameHelper ?? throw new ArgumentNullException(nameof(indexNameHelper));
17 | }
18 |
19 | [Theory]
20 | [InlineData("")] // default index
21 | [InlineData("nondefault")]
22 | [InlineData("WithUppercase")]
23 | [InlineData("With-Dashes")]
24 | [InlineData("123numberfirst")]
25 | public void GoodIndexNamesAreAccepted(string indexName)
26 | {
27 | Assert.True(this._indexNameHelper.TryConvert(indexName, out var convResult));
28 | Assert.Empty(convResult.Errors);
29 |
30 | this._output.WriteLine($"The index name '{indexName}' will be translated to '{convResult.ActualIndexName}'.");
31 | }
32 |
33 | [Theory]
34 | // An index name cannot start with a hyphen (-) or underscore (_).
35 | //[InlineData("-test", 1)]
36 | //[InlineData("test_", 1)]
37 | // An index name can only contain letters, digits, and hyphens (-).
38 | [InlineData("test space", 1)]
39 | [InlineData("test/slash", 1)]
40 | [InlineData("test\\backslash", 1)]
41 | [InlineData("test.dot", 1)]
42 | [InlineData("test:colon", 1)]
43 | [InlineData("test*asterisk", 1)]
44 | [InlineData("testgreater", 1)]
46 | [InlineData("test|pipe", 1)]
47 | [InlineData("test?question", 1)]
48 | [InlineData("test\"quote", 1)]
49 | [InlineData("test'quote", 1)]
50 | [InlineData("test`backtick", 1)]
51 | [InlineData("test~tilde", 1)]
52 | [InlineData("test!exclamation", 1)]
53 | // Avoid names that are dot-only or dot and numbers
54 | // Multi error
55 | [InlineData(".", 1)]
56 | [InlineData("..", 1)]
57 | [InlineData("1.2.3", 1)]
58 | //[InlineData("_test", 1)]
59 |
60 | public void BadIndexNamesAreRejected(string indexName, int errorCount)
61 | {
62 | // Creates the index using IMemoryDb
63 | var exception = Assert.Throws(() =>
64 | {
65 | this._indexNameHelper.Convert(indexName);
66 | });
67 |
68 | this._output.WriteLine(
69 | $"The index name '{indexName}' had the following errors:\n{string.Join("\n", exception.Errors)}" +
70 | $"" +
71 | $"The expected number of errors was {errorCount}.");
72 |
73 | Assert.True(errorCount == exception.Errors.Count(), $"The number of errprs expected is different than the number of errors found.");
74 | }
75 |
76 | [Fact]
77 | public void IndexNameCannotBeLongerThan255Bytes()
78 | {
79 | var indexName = new string('a', 256);
80 | var exception = Assert.Throws(() =>
81 | {
82 | this._indexNameHelper.Convert(indexName);
83 | });
84 |
85 | Assert.Equal(1, exception.Errors.Count());
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/tests/UnitTests/KernelMemoryTests.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 | using Elastic.Clients.Elasticsearch;
3 | using FreeMindLabs.KernelMemory.Elasticsearch;
4 | using Microsoft.KernelMemory;
5 | using Xunit;
6 | using Xunit.Abstractions;
7 |
8 | namespace UnitTests;
9 | public class KernelMemoryTests : ElasticsearchTestBase
10 | {
11 | private const string NoAnswer = "INFO NOT FOUND";
12 |
13 | public KernelMemoryTests(ITestOutputHelper output, IKernelMemory kernelMemory, ElasticsearchClient client, IIndexNameHelper indexNameHelper)
14 | : base(output, client, indexNameHelper)
15 | {
16 | this.KernelMemory = kernelMemory ?? throw new ArgumentNullException(nameof(kernelMemory));
17 | }
18 |
19 | public IKernelMemory KernelMemory { get; }
20 |
21 | private const string NotFound = "INFO NOT FOUND";
22 |
23 | [Fact]
24 | [System.Diagnostics.CodeAnalysis.SuppressMessage("Reliability", "CA2007:Consider calling ConfigureAwait on the awaited task", Justification = "")]
25 | public async Task ItSupportsMultipleFiltersAsync()
26 | {
27 | // This is an adaptation of the same test in Elasticsearch.FunctionalTests
28 |
29 | string indexName = nameof(ItSupportsMultipleFiltersAsync);
30 | this.Output.WriteLine($"Index name: {indexName}");
31 |
32 | const string Id = "ItSupportsMultipleFilters-file1-NASA-news.pdf";
33 | const string Found = "spacecraft";
34 |
35 | this.Output.WriteLine("Uploading document");
36 | await this.KernelMemory.ImportDocumentAsync(
37 | new Document(Id)
38 | .AddFile("data/file5-NASA-news.pdf")
39 | .AddTag("type", "news")
40 | .AddTag("user", "admin")
41 | .AddTag("user", "owner"),
42 | index: indexName,
43 | steps: Constants.PipelineWithoutSummary);
44 |
45 | while (!await this.KernelMemory.IsDocumentReadyAsync(documentId: Id, index: indexName))
46 | {
47 | this.Output.WriteLine("Waiting for memory ingestion to complete...");
48 | await Task.Delay(TimeSpan.FromSeconds(2));
49 | }
50 |
51 | // Multiple filters: unknown users cannot see the memory
52 | var answer = await this.KernelMemory.AskAsync("What is Orion?", filters: new List
53 | {
54 | MemoryFilters.ByTag("user", "someone1"),
55 | MemoryFilters.ByTag("user", "someone2"),
56 | }, index: indexName);
57 | this.Output.WriteLine(answer.Result);
58 | Assert.Contains(NotFound, answer.Result, StringComparison.OrdinalIgnoreCase);
59 |
60 | // Multiple filters: unknown users cannot see the memory even if the type is correct (testing AND logic)
61 | answer = await this.KernelMemory.AskAsync("What is Orion?", filters: new List
62 | {
63 | MemoryFilters.ByTag("user", "someone1").ByTag("type", "news"),
64 | MemoryFilters.ByTag("user", "someone2").ByTag("type", "news"),
65 | }, index: indexName);
66 | this.Output.WriteLine(answer.Result);
67 | Assert.Contains(NotFound, answer.Result, StringComparison.OrdinalIgnoreCase);
68 |
69 | // Multiple filters: AND + OR logic works
70 | answer = await this.KernelMemory.AskAsync("What is Orion?", filters: new List
71 | {
72 | MemoryFilters.ByTag("user", "someone1").ByTag("type", "news"),
73 | MemoryFilters.ByTag("user", "admin").ByTag("type", "fact"),
74 | }, index: indexName);
75 | this.Output.WriteLine(answer.Result);
76 | Assert.Contains(NotFound, answer.Result, StringComparison.OrdinalIgnoreCase);
77 |
78 | // Multiple filters: OR logic works
79 | answer = await this.KernelMemory.AskAsync("What is Orion?", filters: new List
80 | {
81 | MemoryFilters.ByTag("user", "someone1"),
82 | MemoryFilters.ByTag("user", "admin"),
83 | }, index: indexName);
84 | this.Output.WriteLine(answer.Result);
85 | Assert.Contains(Found, answer.Result, StringComparison.OrdinalIgnoreCase);
86 |
87 | // Multiple filters: OR logic works
88 | answer = await this.KernelMemory.AskAsync("What is Orion?", filters: new List
89 | {
90 | MemoryFilters.ByTag("user", "someone1").ByTag("type", "news"),
91 | MemoryFilters.ByTag("user", "admin").ByTag("type", "news"),
92 | }, index: indexName);
93 | this.Output.WriteLine(answer.Result);
94 | Assert.Contains(Found, answer.Result, StringComparison.OrdinalIgnoreCase);
95 |
96 | await this.KernelMemory.DeleteDocumentAsync(Id, index: indexName);
97 |
98 | this.Output.WriteLine("Deleting index");
99 | await this.KernelMemory.DeleteIndexAsync(indexName);
100 | }
101 |
102 | [Fact]
103 | public async Task ItSupportsTagsAsync()
104 | {
105 | // This is an adaptation of the same test in Elasticsearch.FunctionalTests
106 |
107 | // Arrange
108 | const string Id = "ItSupportTags-file1-NASA-news.pdf";
109 | await this.KernelMemory.ImportDocumentAsync(
110 | "data/file5-NASA-news.pdf",
111 | documentId: Id,
112 | tags: new TagCollection
113 | {
114 | { "type", "news" },
115 | { "type", "test" },
116 | { "ext", "pdf" }
117 | },
118 | steps: Constants.PipelineWithoutSummary).ConfigureAwait(false);
119 |
120 | while (!await this.KernelMemory.IsDocumentReadyAsync(documentId: Id).ConfigureAwait(false))
121 | {
122 | this.Output.WriteLine("Waiting for memory ingestion to complete...");
123 | await Task.Delay(TimeSpan.FromSeconds(2)).ConfigureAwait(false);
124 | }
125 |
126 | // Act
127 | var defaultRetries = 0;// withRetries ? 4 : 0;
128 |
129 | var retries = defaultRetries;
130 | var answer1 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "news")).ConfigureAwait(false);
131 | this.Output.WriteLine("answer1: " + answer1.Result);
132 | while (retries-- > 0 && !answer1.Result.Contains("spacecraft"))
133 | {
134 | await Task.Delay(TimeSpan.FromSeconds(1)).ConfigureAwait(false);
135 | answer1 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "news")).ConfigureAwait(false);
136 | this.Output.WriteLine("answer1: " + answer1.Result);
137 | }
138 |
139 | retries = defaultRetries;
140 | var answer2 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "test")).ConfigureAwait(false);
141 | this.Output.WriteLine("answer2: " + answer2.Result);
142 | while (retries-- > 0 && !answer2.Result.Contains("spacecraft"))
143 | {
144 | await Task.Delay(TimeSpan.FromSeconds(1)).ConfigureAwait(false);
145 | answer2 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "test")).ConfigureAwait(false);
146 | this.Output.WriteLine("answer2: " + answer2.Result);
147 | }
148 |
149 | retries = defaultRetries;
150 | var answer3 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("ext", "pdf")).ConfigureAwait(false);
151 | this.Output.WriteLine("answer3: " + answer3.Result);
152 | while (retries-- > 0 && !answer3.Result.Contains("spacecraft"))
153 | {
154 | await Task.Delay(TimeSpan.FromSeconds(1)).ConfigureAwait(false);
155 | answer3 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "test")).ConfigureAwait(false);
156 | this.Output.WriteLine("answer3: " + answer3.Result);
157 | }
158 |
159 | var answer4 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("foo", "bar")).ConfigureAwait(false);
160 | this.Output.WriteLine(answer4.Result);
161 |
162 | // Assert
163 | Assert.Contains("spacecraft", answer1.Result, StringComparison.OrdinalIgnoreCase);
164 | Assert.Contains("spacecraft", answer2.Result, StringComparison.OrdinalIgnoreCase);
165 | Assert.Contains("spacecraft", answer3.Result, StringComparison.OrdinalIgnoreCase);
166 | Assert.Contains("NOT FOUND", answer4.Result, StringComparison.OrdinalIgnoreCase);
167 | }
168 |
169 | [Fact]
170 | public async Task ItSupportsASingleFilterAsync()
171 | {
172 | // This is an adaptation of the same test in Elasticsearch.FunctionalTests
173 |
174 | string indexName = nameof(ItSupportsASingleFilterAsync);
175 | const string Id = "ItSupportsASingleFilter-file1-NASA-news.pdf";
176 | const string Found = "spacecraft";
177 |
178 | this.Output.WriteLine("Uploading document");
179 | await this.KernelMemory.ImportDocumentAsync(
180 | new Document(Id)
181 | .AddFile("data/file5-NASA-news.pdf")
182 | .AddTag("type", "news")
183 | .AddTag("user", "admin")
184 | .AddTag("user", "owner"),
185 | index: indexName,
186 | steps: Constants.PipelineWithoutSummary).ConfigureAwait(false);
187 |
188 | while (!await this.KernelMemory.IsDocumentReadyAsync(documentId: Id, index: indexName).ConfigureAwait(false))
189 | {
190 | this.Output.WriteLine("Waiting for memory ingestion to complete...");
191 | await Task.Delay(TimeSpan.FromSeconds(2)).ConfigureAwait(false);
192 | }
193 |
194 | //await Task.Delay(TimeSpan.FromSeconds(4)).ConfigureAwait(false);
195 |
196 | MemoryAnswer answer;
197 | // Simple filter: unknown user cannot see the memory
198 | answer = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("user", "someone"), index: indexName).ConfigureAwait(false);
199 | this.Output.WriteLine(answer.Result);
200 | Assert.Contains(NotFound, answer.Result, StringComparison.OrdinalIgnoreCase);
201 |
202 | // Simple filter: test AND logic: valid type + invalid user
203 | answer = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "news").ByTag("user", "someone"), index: indexName).ConfigureAwait(false);
204 | this.Output.WriteLine(answer.Result);
205 | Assert.Contains(NotFound, answer.Result, StringComparison.OrdinalIgnoreCase);
206 |
207 | // Simple filter: test AND logic: invalid type + valid user
208 | answer = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "fact").ByTag("user", "owner"), index: indexName).ConfigureAwait(false);
209 | this.Output.WriteLine(answer.Result);
210 | //Assert.Contains(Found, answer.Result, StringComparison.OrdinalIgnoreCase);
211 | Assert.Contains(NotFound, answer.Result, StringComparison.OrdinalIgnoreCase);
212 |
213 | // Simple filter: known user can see the memory
214 | answer = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("user", "admin"), index: indexName).ConfigureAwait(false);
215 | this.Output.WriteLine(answer.Result);
216 | Assert.Contains(Found, answer.Result, StringComparison.OrdinalIgnoreCase);
217 |
218 | // Simple filter: known user can see the memory
219 | answer = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("user", "owner"), index: indexName).ConfigureAwait(false);
220 | this.Output.WriteLine(answer.Result);
221 | Assert.Contains(Found, answer.Result, StringComparison.OrdinalIgnoreCase);
222 |
223 | // Simple filter: test AND logic with correct values
224 | answer = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "news").ByTag("user", "owner"), index: indexName).ConfigureAwait(false);
225 | this.Output.WriteLine(answer.Result);
226 | Assert.Contains(Found, answer.Result, StringComparison.OrdinalIgnoreCase);
227 |
228 | this.Output.WriteLine("Deleting memories extracted from the document");
229 | await this.KernelMemory.DeleteDocumentAsync(Id, index: indexName).ConfigureAwait(false);
230 |
231 | this.Output.WriteLine("Deleting index");
232 | await this.KernelMemory.DeleteIndexAsync(indexName).ConfigureAwait(false);
233 | }
234 |
235 | [Fact]
236 | public async Task CanImportOneDocumentAndAskAsync()
237 | {
238 | var indexName = nameof(CanImportOneDocumentAndAskAsync);
239 |
240 | // Imports a document into the index
241 | var id = await this.KernelMemory.ImportDocumentAsync(
242 | filePath: "Data/file1-Wikipedia-Carbon.txt",
243 | documentId: "doc001",
244 | tags: new TagCollection
245 | {
246 | { "indexedOn", DateTime.UtcNow.ToString("yyyy-MM-dd'T'HH:mm:ss.fffzzz") }
247 | },
248 | index: indexName)
249 | .ConfigureAwait(false);
250 |
251 | this.Output.WriteLine($"Indexed document with id '{id}'.");
252 |
253 | // Waits for the documents to be saved
254 | var actualIndexName = this.IndexNameHelper.Convert(indexName);
255 | //await this.Client.WaitForDocumentsAsync(actualIndexName, expectedDocuments: 2)
256 | // .ConfigureAwait(false);
257 |
258 | // Asks a question on the data we just inserted
259 | MemoryAnswer? answer = await this.TryToGetTopAnswerAsync(indexName, "What can carbon bond to?")
260 | .ConfigureAwait(false);
261 | this.PrintAnswerOfDocument(answer, "doc001");
262 | }
263 |
264 | [Fact]
265 | public async Task CanImportTwoDocumentsAndAskAsync()
266 | {
267 | var indexName = nameof(CanImportTwoDocumentsAndAskAsync);
268 |
269 | // Proceeds
270 | var docId = await this.KernelMemory.ImportDocumentAsync(
271 | "Data/file1-Wikipedia-Carbon.txt",
272 | index: indexName,
273 | documentId: "doc001").ConfigureAwait(false);
274 |
275 | this.Output.WriteLine($"Indexed {docId}");
276 |
277 | docId = await this.KernelMemory.ImportDocumentAsync(
278 | new Document("doc002")
279 | .AddFiles(new[] {
280 | "Data/file2-Wikipedia-Moon.txt",
281 | "Data/file3-lorem-ipsum.docx",
282 | "Data/file4-SK-Readme.pdf" })
283 | .AddTag("user", "Blake"),
284 | index: indexName)
285 | .ConfigureAwait(false);
286 |
287 | this.Output.WriteLine($"Indexed {docId}");
288 |
289 | docId = await this.KernelMemory.ImportDocumentAsync(new Document("doc003")
290 | .AddFile("Data/file5-NASA-news.pdf")
291 | .AddTag("user", "Taylor")
292 | .AddTag("collection", "meetings")
293 | .AddTag("collection", "NASA")
294 | .AddTag("collection", "space")
295 | .AddTag("type", "news"),
296 | index: indexName)
297 | .ConfigureAwait(false);
298 |
299 | this.Output.WriteLine($"Indexed {docId}");
300 |
301 | // Waits for the documents to be saved
302 | var actualIndexName = this.IndexNameHelper.Convert(indexName);
303 | //await this.Client.WaitForDocumentsAsync(actualIndexName, expectedDocuments: 10)
304 | // .ConfigureAwait(false);
305 |
306 | // This should return a citation to doc001
307 | var answer = await this.KernelMemory.AskAsync("What's E = m*c^2?", indexName)
308 | .ConfigureAwait(false);
309 |
310 | this.PrintAnswerOfDocument(answer, "doc001");
311 |
312 | // This should return a citation to doc002
313 | answer = await this.KernelMemory.AskAsync("What's Semantic Kernel?", indexName)
314 | .ConfigureAwait(false);
315 |
316 | this.PrintAnswerOfDocument(answer, "doc002");
317 | }
318 |
319 | private void PrintAnswerOfDocument(MemoryAnswer? answer, string expectedDocumentId)
320 | {
321 | ArgumentNullException.ThrowIfNull(answer);
322 |
323 | this.Output.WriteLine($"Question: {answer.Question}");
324 | this.Output.WriteLine($"Answer: {answer.Result}");
325 |
326 | var foundDocumentReference = false;
327 | foreach (var citation in answer.RelevantSources)
328 | {
329 | this.Output.WriteLine($" - {citation.SourceName} - {citation.Link} [{citation.Partitions.First().LastUpdate:D}]");
330 |
331 | if (citation.DocumentId == expectedDocumentId)
332 | {
333 | foundDocumentReference = true;
334 | }
335 | }
336 |
337 | if (!foundDocumentReference)
338 | {
339 | throw new InvalidOperationException($"It should have found a citation to document '{expectedDocumentId}'.");
340 | }
341 | }
342 |
343 | private async Task TryToGetTopAnswerAsync(string indexName, string question)
344 | {
345 | MemoryAnswer? answer = null;
346 |
347 | // We need to wait a bit for the indexing to complete, so this is why we retry a few times with a delay.
348 | // TODO: add Polly.
349 | for (int i = 0; i < 3; i++)
350 | {
351 | answer = await this.KernelMemory.AskAsync(
352 | question: question,
353 | index: indexName,
354 | filter: null,
355 | filters: null,
356 | minRelevance: 0)
357 | .ConfigureAwait(false);
358 |
359 | if (answer.Result != NoAnswer)
360 | {
361 | break;
362 | }
363 |
364 | await Task.Delay(500)
365 | .ConfigureAwait(false);
366 | }
367 |
368 | return answer;
369 | }
370 | }
371 |
--------------------------------------------------------------------------------
/tests/UnitTests/SearchTests.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 | using Elastic.Clients.Elasticsearch;
3 | using FreeMindLabs.KernelMemory.Elasticsearch;
4 | using Microsoft.KernelMemory;
5 | using Microsoft.KernelMemory.AI;
6 | using Microsoft.KernelMemory.MemoryStorage;
7 | using Xunit;
8 | using Xunit.Abstractions;
9 |
10 | namespace UnitTests;
11 |
12 | public class SearchTests : ElasticsearchTestBase
13 | {
14 | public SearchTests(ITestOutputHelper output, IMemoryDb memoryDb, ITextEmbeddingGenerator textEmbeddingGenerator, ElasticsearchClient client, IIndexNameHelper indexNameHelper)
15 | : base(output, client, indexNameHelper)
16 | {
17 | this.MemoryDb = memoryDb ?? throw new ArgumentNullException(nameof(memoryDb));
18 | this.TextEmbeddingGenerator = textEmbeddingGenerator ?? throw new ArgumentNullException(nameof(textEmbeddingGenerator));
19 | }
20 |
21 | public IMemoryDb MemoryDb { get; }
22 | public ITextEmbeddingGenerator TextEmbeddingGenerator { get; }
23 |
24 | [Fact]
25 | public async Task CanGetListWithTagsAsync()
26 | {
27 | const int ExpectedTotalParagraphs = 4;
28 |
29 | // We upsert the file
30 | var docIds = await DataStorageTests.UpsertTextFilesAsync(
31 | memoryDb: this.MemoryDb,
32 | textEmbeddingGenerator: this.TextEmbeddingGenerator,
33 | output: this.Output,
34 | indexName: nameof(CanGetListWithTagsAsync),
35 | fileNames: new[]
36 | {
37 | "Data/file1-Wikipedia-Carbon.txt",
38 | "Data/file2-Wikipedia-Moon.txt"
39 | })
40 | .ConfigureAwait(false);
41 |
42 | // docsIds is a list of values like "d=3ed7b0787d484496ab25d50b2a887f8cf63193954fc844689116766434c11887//p=b84ee5e4841c4ab2877e30293752f7cc"
43 | Assert.Equal(expected: ExpectedTotalParagraphs, actual: docIds.Count());
44 | docIds = docIds.Select(x => x.Split("//")[0].Split("=")[1]).Distinct().ToList();
45 |
46 | this.Output.WriteLine($"Indexed returned the following ids:\n{string.Join("\n", docIds)}");
47 |
48 | var expectedDocs = docIds.Count();
49 |
50 | // Gets documents that are similar to the word "carbon" .
51 | var filter = new MemoryFilter();
52 | filter.Add("__file_type", "text/plain");
53 | filter.Add("__document_id", docIds.Select(x => (string?)x).ToList());
54 |
55 | var idx = 0;
56 | this.Output.WriteLine($"Filter: {filter.ToDebugString()}.\n");
57 |
58 | await foreach (var result in this.MemoryDb.GetListAsync(
59 | index: nameof(CanGetListWithTagsAsync),
60 | filters: new[] { filter },
61 | limit: 100,
62 | withEmbeddings: false))
63 | {
64 | var fileName = result.Payload["file"];
65 | this.Output.WriteLine($"Match #{idx++}: {fileName}");
66 | };
67 |
68 | Assert.Equal(expected: ExpectedTotalParagraphs, actual: idx);
69 | }
70 |
71 | [Fact]
72 | public async Task CanGetListWithEmptyFiltersAsync()
73 | {
74 | await foreach (var result in this.MemoryDb.GetListAsync(
75 | index: nameof(CanGetListWithTagsAsync),
76 | filters: new[] { new MemoryFilter() }, // <-- KM has a test to make sure this works.
77 | limit: 100,
78 | withEmbeddings: false))
79 | { };
80 |
81 | // If it gets here, the test passed.
82 | }
83 | }
84 |
85 |
--------------------------------------------------------------------------------
/tests/UnitTests/Startup.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | using System.Reflection;
4 | using Microsoft.Extensions.Configuration;
5 | using Microsoft.Extensions.DependencyInjection;
6 | using Microsoft.KernelMemory;
7 | using Microsoft.KernelMemory.ContentStorage.DevTools;
8 | using Microsoft.KernelMemory.FileSystem.DevTools;
9 |
10 | namespace UnitTests;
11 |
12 | ///
13 | /// Sets up dependency injection for unit tests.
14 | ///
15 | public class Startup
16 | {
17 | private readonly IConfiguration _configuration;
18 |
19 | public Startup()
20 | {
21 | // We read from the local appSettings.json and the same user secrets
22 | // as the Microsoft Semantic Kernel team.
23 | this._configuration = new ConfigurationBuilder()
24 | .AddJsonFile("appsettings.json", optional: true, reloadOnChange: true)
25 | .AddUserSecrets(Assembly.GetExecutingAssembly()) // Same secrets as SK and KM :smile:
26 | .Build();
27 | }
28 |
29 | public void ConfigureServices(IServiceCollection services)
30 | {
31 | // We use the same OpenAI API key as in Kernel Memory.
32 | const string OpenAIKeyPath = "KernelMemory:Services:OpenAI:APIKey";
33 |
34 | // TODO: Uses only OpenAI API stuff for now. Make more flexible.
35 | var openApiKey = this._configuration[OpenAIKeyPath] ?? throw new ArgumentException($"OpenAI API key is required. [path: {OpenAIKeyPath}]");
36 |
37 | // Kernel Memory with Elasticsearch
38 | IKernelMemoryBuilder kmBldr = new KernelMemoryBuilder(services)
39 | .WithSimpleFileStorage(new SimpleFileStorageConfig()
40 | {
41 | Directory = "ContentStorage",
42 | StorageType = FileSystemTypes.Volatile
43 | })
44 | .WithElasticsearch(esBldr =>
45 | {
46 | esBldr.WithConfiguration(this._configuration);
47 |
48 | // Alternatively we can use the other builder methods:
49 | //esBldr.WithEndpoint(ElasticsearchConfigBuilder.DefaultEndpoint)
50 | // .WithShardsAndReplicas(1, 0)
51 | // .WithIndexPrefix(ElasticsearchConfigBuilder.DefaultIndexPrefix)
52 | // .WithCertificateFingerPrint("...")
53 | // .WithUserNameAndPassword(ElasticsearchConfigBuilder.DefaultUserName, "...")
54 | // .WithIndexPrefix("km-");
55 |
56 | })
57 | .WithOpenAIDefaults(apiKey: openApiKey);
58 |
59 | var kernelMemory = kmBldr.Build();
60 |
61 | services.AddSingleton(kernelMemory);
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/tests/UnitTests/TestsHelper.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved.
2 |
3 | using System.Reflection;
4 | using Elastic.Clients.Elasticsearch;
5 | using FreeMindLabs.KernelMemory.Elasticsearch;
6 |
7 | namespace UnitTests;
8 |
9 | ///
10 | /// Extension methods for tests on Elasticsearch.
11 | ///
12 | internal static class TestsHelper
13 | {
14 | ///
15 | /// Deletes all indices that are created by all test methods of the given class.
16 | /// Indices must have the same name of a test method to be automatically deleted.
17 | ///
18 | public static async Task> DeleteIndicesOfTestAsync(this ElasticsearchClient client, Type unitTestType, IIndexNameHelper indexNameHelper)
19 | {
20 | ArgumentNullException.ThrowIfNull(client);
21 | ArgumentNullException.ThrowIfNull(unitTestType);
22 | ArgumentNullException.ThrowIfNull(indexNameHelper);
23 |
24 | // Iterates thru all method names of the test class and deletes the indice with the same name
25 | var methods = unitTestType.GetMethods(BindingFlags.Public | BindingFlags.Instance)
26 | .Where(m =>
27 | (m.GetCustomAttribute() != null)
28 | ||
29 | (m.GetCustomAttribute() != null)
30 | )
31 | .ToArray();
32 | if (methods.Length == 0)
33 | {
34 | throw new ArgumentException($"No public test methods found in class '{unitTestType.Name}'.");
35 | }
36 |
37 | var result = new List();
38 | foreach (var method in methods)
39 | {
40 | var indexName = indexNameHelper.Convert(method.Name);
41 | var delResp = await client.Indices.DeleteAsync(indices: indexName)
42 | .ConfigureAwait(false);
43 |
44 | if (delResp.IsSuccess())
45 | {
46 | result.Add(indexName);
47 | }
48 | }
49 |
50 | return result;
51 | }
52 |
53 | /////
54 | ///// Queries the given index for documents until the expected number of documents is found
55 | ///// or the max number of retries is reached.
56 | ///// It throws an exception if the expected number of documents is not found.
57 | /////
58 | //public static async Task WaitForDocumentsAsync(this ElasticsearchClient client, string realIndexName, int expectedDocuments, int maxRetries = 3, int msDelay = 500)
59 | //{
60 | // ArgumentNullException.ThrowIfNull(client);
61 | // ArgumentNullException.ThrowIfNull(realIndexName);
62 |
63 | // return;
64 |
65 | // var foundCount = 0;
66 | // for (int i = 0; i < maxRetries; i++)
67 | // {
68 | // // We search for all documents
69 | // var results = await client
70 | // .SearchAsync(sr =>
71 | // {
72 | // sr.Index(realIndexName)
73 | // .Query(q => q.MatchAll());
74 | // })
75 | // .ConfigureAwait(false);
76 |
77 | // foundCount = results?.HitsMetadata?.Hits?.Count ?? 0;
78 |
79 | // // If we found all documents, we can return
80 | // if ((expectedDocuments == 0) && (foundCount == 0))
81 | // {
82 | // return;
83 | // }
84 | // else if (foundCount >= expectedDocuments)
85 | // {
86 | // return;
87 | // }
88 |
89 | // await Task.Delay(msDelay).ConfigureAwait(false);
90 | // }
91 |
92 | // throw new InvalidOperationException($"It should have inserted {expectedDocuments} documents but only {foundCount}...");
93 | //}
94 | }
95 |
--------------------------------------------------------------------------------
/tests/UnitTests/UnitTests.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | true
5 | enable
6 | true
7 | CS1591,CA1861,CA1305,CA1307
8 | 5ee045b0-aea3-4f08-8d31-32d1a6f8fed0
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | all
18 | runtime; build; native; contentfiles; analyzers; buildtransitive
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 | all
28 | runtime; build; native; contentfiles; analyzers; buildtransitive
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 | Always
41 |
42 |
43 | PreserveNewest
44 |
45 |
46 | PreserveNewest
47 |
48 |
49 | PreserveNewest
50 |
51 |
52 | PreserveNewest
53 |
54 |
55 | PreserveNewest
56 |
57 |
58 | PreserveNewest
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/tests/UnitTests/appSettings.json:
--------------------------------------------------------------------------------
1 | {
2 | "KernelMemory": {
3 | "Services": {
4 | "OpenAI": {
5 | "TextModel": "gpt-3.5-turbo",
6 | "EmbeddingModel": "text-embedding-ada-002",
7 | "APIKey": "...SECRETS...",
8 | "OrgId": "",
9 | "MaxRetries": 10
10 | },
11 | "Elasticsearch": {
12 | "Endpoint": "https://localhost:9200",
13 | "CertificateFingerPrint": "...SECRETS...",
14 | "Username": "elastic",
15 | "Password": "...SECRETS...",
16 | "IndexPrefix": "kmtests-"
17 | }
18 | }
19 | },
20 | "Logging": {
21 | "LogLevel": {
22 | "Default": "Trace"
23 | }
24 | },
25 | "AllowedHosts": "*"
26 | }
--------------------------------------------------------------------------------