├── .editorconfig ├── .gitattributes ├── .github └── dependabot.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── COMMUNITY.md ├── CONFIGURATION.md ├── CONTRIBUTING.md ├── DIARY.md ├── Directory.Build.props ├── Directory.Packages.props ├── DockerElasticsearchAndKibana.md ├── KernelMemoryElasticsearch.sln ├── KernelMemoryElasticsearch.sln.DotSettings ├── LICENSE ├── NUGET.md ├── README.md ├── SECURITY.md ├── TODO.md ├── code-analysis.props ├── content └── images │ ├── Connectors.jpg │ ├── CreateIndices.png │ ├── DataPage1.jpg │ ├── DataPage2.jpg │ ├── DataPageAllRows.jpg │ ├── ESLogo.jpg │ ├── FML-Logo-Round.gif │ ├── FML-Logo-Round.png │ ├── FML-Logo-Square.gif │ ├── FML-Logo-Square.png │ ├── FMLLogo.png │ ├── Free Mind Labs logo.png │ ├── Free Mind Labs.gif │ ├── KnnQuery.jpg │ ├── Mappings.jpg │ ├── Pipelines.jpg │ ├── RAG.jpg │ ├── Solution.png │ └── TestRunning.png ├── docker ├── .env.example ├── .gitattributes ├── LICENSE ├── README.md ├── app │ ├── dockerfile │ ├── main.py │ └── requirements.txt ├── docker-compose.yml ├── filebeat.yml ├── images │ ├── CACRT.png │ ├── ComposeRunning.png │ ├── DevConsole.png │ ├── DockerDesktop.png │ ├── DockerSolutionFolder.png │ ├── ELKStack.png │ ├── ESServer.png │ ├── ElasticAgentMetrics.png │ ├── EnvSample.png │ ├── FinalAgentConfiguration.png │ ├── Fleet.png │ ├── GoodAgent.png │ ├── InvalidAgent1.png │ ├── InvalidAgent2.png │ ├── InvalidAgent3.png │ ├── SaveAndDeploy.png │ └── WelcomePageKibana.png ├── kibana.yml ├── logstash.conf ├── logstash_ingest_data │ └── Air_Quality.csv └── metricbeat.yml ├── icon.png ├── nuget-package.props ├── nuget.config ├── packages └── README.md ├── src └── ElasticsearchMemoryStorage │ ├── ConfigurationException.cs │ ├── ElasticsearchConfig.cs │ ├── ElasticsearchConfigBuilder.cs │ ├── ElasticsearchConfigExtensions.cs │ ├── ElasticsearchMemory.cs │ ├── ElasticsearchMemoryFilter.cs │ ├── ElasticsearchMemoryRecord.cs │ ├── ElasticsearchMemoryStorage.csproj │ ├── ElasticsearchTag.cs │ ├── Extensions │ ├── KernelMemoryBuilderExtensions.cs │ ├── MemoryFilterExtensions.cs │ └── ServiceCollectionExtensions.cs │ ├── IIndexNameHelper.cs │ └── IndexNameHelper.cs └── tests └── UnitTests ├── Data ├── file1-Wikipedia-Carbon.txt ├── file2-Wikipedia-Moon.txt ├── file3-lorem-ipsum.docx ├── file4-SK-Readme.pdf ├── file5-NASA-news.pdf └── file6-ANWC-image.jpg ├── DataStorageTests.cs ├── ElasticsearchTestBase.cs ├── IndexManagementTests.cs ├── IndexnameTests.cs ├── KernelMemoryTests.cs ├── SearchTests.cs ├── Startup.cs ├── TestsHelper.cs ├── UnitTests.csproj └── appSettings.json /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto-detect text files, ensure they use LF. 2 | * text=auto eol=lf working-tree-encoding=UTF-8 3 | 4 | # Bash scripts 5 | *.sh text eol=lf 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | day: "sunday" 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dotnet/.config 2 | tmp/ 3 | tmp-*/ 4 | 5 | ## Ignore Visual Studio temporary files, build results, and 6 | ## files generated by popular Visual Studio add-ons. 7 | ## 8 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore 9 | 10 | # User-specific files 11 | *.rsuser 12 | *.suo 13 | *.user 14 | *.userosscache 15 | *.sln.docstates 16 | 17 | # User-specific files (MonoDevelop/Xamarin Studio) 18 | *.userprefs 19 | 20 | # Mono auto generated files 21 | mono_crash.* 22 | 23 | # Build results 24 | [Dd]ebug/ 25 | [Dd]ebugPublic/ 26 | [Rr]elease/ 27 | [Rr]eleases/ 28 | x64/ 29 | x86/ 30 | [Ww][Ii][Nn]32/ 31 | [Aa][Rr][Mm]/ 32 | [Aa][Rr][Mm]64/ 33 | bld/ 34 | [Bb]in/ 35 | [Oo]bj/ 36 | [Ll]og/ 37 | [Ll]ogs/ 38 | 39 | # Visual Studio 2015/2017 cache/options directory 40 | .vs/ 41 | # Uncomment if you have tasks that create the project's static files in wwwroot 42 | #wwwroot/ 43 | 44 | # Visual Studio 2017 auto generated files 45 | Generated\ Files/ 46 | 47 | # MSTest test Results 48 | [Tt]est[Rr]esult*/ 49 | [Bb]uild[Ll]og.* 50 | 51 | # NUnit 52 | *.VisualState.xml 53 | TestResult.xml 54 | nunit-*.xml 55 | 56 | # Build Results of an ATL Project 57 | [Dd]ebugPS/ 58 | [Rr]eleasePS/ 59 | dlldata.c 60 | 61 | # Benchmark Results 62 | BenchmarkDotNet.Artifacts/ 63 | 64 | # .NET Core 65 | project.lock.json 66 | project.fragment.lock.json 67 | artifacts/ 68 | 69 | # ASP.NET Scaffolding 70 | ScaffoldingReadMe.txt 71 | 72 | # StyleCop 73 | StyleCopReport.xml 74 | 75 | # Files built by Visual Studio 76 | *_i.c 77 | *_p.c 78 | *_h.h 79 | *.ilk 80 | *.meta 81 | *.obj 82 | *.iobj 83 | *.pch 84 | *.pdb 85 | *.ipdb 86 | *.pgc 87 | *.pgd 88 | *.rsp 89 | *.sbr 90 | *.tlb 91 | *.tli 92 | *.tlh 93 | *.tmp 94 | *.tmp_proj 95 | *_wpftmp.csproj 96 | *.log 97 | *.tlog 98 | *.vspscc 99 | *.vssscc 100 | .builds 101 | *.pidb 102 | *.svclog 103 | *.scc 104 | 105 | # Chutzpah Test files 106 | _Chutzpah* 107 | 108 | # Visual C++ cache files 109 | ipch/ 110 | *.aps 111 | *.ncb 112 | *.opendb 113 | *.opensdf 114 | *.sdf 115 | *.cachefile 116 | *.VC.db 117 | *.VC.VC.opendb 118 | 119 | # Visual Studio profiler 120 | *.psess 121 | *.vsp 122 | *.vspx 123 | *.sap 124 | 125 | # Visual Studio Trace Files 126 | *.e2e 127 | 128 | # TFS 2012 Local Workspace 129 | $tf/ 130 | 131 | # Guidance Automation Toolkit 132 | *.gpState 133 | 134 | # ReSharper is a .NET coding add-in 135 | _ReSharper*/ 136 | *.[Rr]e[Ss]harper 137 | *.DotSettings.user 138 | 139 | # TeamCity is a build add-in 140 | _TeamCity* 141 | 142 | # DotCover is a Code Coverage Tool 143 | *.dotCover 144 | 145 | # AxoCover is a Code Coverage Tool 146 | .axoCover/* 147 | !.axoCover/settings.json 148 | 149 | # Coverlet is a free, cross platform Code Coverage Tool 150 | coverage*.json 151 | coverage*.xml 152 | coverage*.info 153 | 154 | # Visual Studio code coverage results 155 | *.coverage 156 | *.coveragexml 157 | 158 | # NCrunch 159 | _NCrunch_* 160 | .*crunch*.local.xml 161 | nCrunchTemp_* 162 | 163 | # MightyMoose 164 | *.mm.* 165 | AutoTest.Net/ 166 | 167 | # Web workbench (sass) 168 | .sass-cache/ 169 | 170 | # Installshield output folder 171 | [Ee]xpress/ 172 | 173 | # DocProject is a documentation generator add-in 174 | DocProject/buildhelp/ 175 | DocProject/Help/*.HxT 176 | DocProject/Help/*.HxC 177 | DocProject/Help/*.hhc 178 | DocProject/Help/*.hhk 179 | DocProject/Help/*.hhp 180 | DocProject/Help/Html2 181 | DocProject/Help/html 182 | 183 | # Click-Once directory 184 | publish/ 185 | 186 | # Publish Web Output 187 | *.[Pp]ublish.xml 188 | *.azurePubxml 189 | # Note: Comment the next line if you want to checkin your web deploy settings, 190 | # but database connection strings (with potential passwords) will be unencrypted 191 | *.pubxml 192 | *.publishproj 193 | 194 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 195 | # checkin your Azure Web App publish settings, but sensitive information contained 196 | # in these scripts will be unencrypted 197 | PublishScripts/ 198 | 199 | # NuGet Packages 200 | *.nupkg 201 | # NuGet Symbol Packages 202 | *.snupkg 203 | # The packages folder can be ignored because of Package Restore 204 | **/[Pp]ackages/* 205 | # except build/, which is used as an MSBuild target. 206 | !**/[Pp]ackages/build/ 207 | # Uncomment if necessary however generally it will be regenerated when needed 208 | #!**/[Pp]ackages/repositories.config 209 | # NuGet v3's project.json files produces more ignorable files 210 | *.nuget.props 211 | *.nuget.targets 212 | 213 | # Microsoft Azure Build Output 214 | csx/ 215 | *.build.csdef 216 | 217 | # Microsoft Azure Emulator 218 | ecf/ 219 | rcf/ 220 | 221 | # Windows Store app package directories and files 222 | AppPackages/ 223 | BundleArtifacts/ 224 | Package.StoreAssociation.xml 225 | _pkginfo.txt 226 | *.appx 227 | *.appxbundle 228 | *.appxupload 229 | 230 | # Visual Studio cache files 231 | # files ending in .cache can be ignored 232 | *.[Cc]ache 233 | # but keep track of directories ending in .cache 234 | !?*.[Cc]ache/ 235 | 236 | # Others 237 | ClientBin/ 238 | ~$* 239 | *~ 240 | *.dbmdl 241 | *.dbproj.schemaview 242 | *.jfm 243 | *.pfx 244 | *.publishsettings 245 | orleans.codegen.cs 246 | 247 | # Including strong name files can present a security risk 248 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 249 | #*.snk 250 | 251 | # Since there are multiple workflows, uncomment next line to ignore bower_components 252 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 253 | #bower_components/ 254 | 255 | # RIA/Silverlight projects 256 | Generated_Code/ 257 | 258 | # Backup & report files from converting an old project file 259 | # to a newer Visual Studio version. Backup files are not needed, 260 | # because we have git ;-) 261 | _UpgradeReport_Files/ 262 | Backup*/ 263 | UpgradeLog*.XML 264 | UpgradeLog*.htm 265 | ServiceFabricBackup/ 266 | *.rptproj.bak 267 | 268 | # SQL Server files 269 | *.mdf 270 | *.ldf 271 | *.ndf 272 | 273 | # Business Intelligence projects 274 | *.rdl.data 275 | *.bim.layout 276 | *.bim_*.settings 277 | *.rptproj.rsuser 278 | *- [Bb]ackup.rdl 279 | *- [Bb]ackup ([0-9]).rdl 280 | *- [Bb]ackup ([0-9][0-9]).rdl 281 | 282 | # Microsoft Fakes 283 | FakesAssemblies/ 284 | 285 | # GhostDoc plugin setting file 286 | *.GhostDoc.xml 287 | 288 | # Node.js Tools for Visual Studio 289 | .ntvs_analysis.dat 290 | node_modules/ 291 | 292 | # Visual Studio 6 build log 293 | *.plg 294 | 295 | # Visual Studio 6 workspace options file 296 | *.opt 297 | 298 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 299 | *.vbw 300 | 301 | # Visual Studio 6 auto-generated project file (contains which files were open etc.) 302 | *.vbp 303 | 304 | # Visual Studio 6 workspace and project file (working project files containing files to include in project) 305 | *.dsw 306 | *.dsp 307 | 308 | # Visual Studio 6 technical files 309 | *.ncb 310 | *.aps 311 | 312 | # Visual Studio LightSwitch build output 313 | **/*.HTMLClient/GeneratedArtifacts 314 | **/*.DesktopClient/GeneratedArtifacts 315 | **/*.DesktopClient/ModelManifest.xml 316 | **/*.Server/GeneratedArtifacts 317 | **/*.Server/ModelManifest.xml 318 | _Pvt_Extensions 319 | 320 | # Paket dependency manager 321 | .paket/paket.exe 322 | paket-files/ 323 | 324 | # FAKE - F# Make 325 | .fake/ 326 | 327 | # CodeRush personal settings 328 | .cr/personal 329 | 330 | # Python Tools for Visual Studio (PTVS) 331 | __pycache__/ 332 | *.pyc 333 | 334 | # Cake - Uncomment if you are using it 335 | # tools/** 336 | # !tools/packages.config 337 | 338 | # Tabs Studio 339 | *.tss 340 | 341 | # Telerik's JustMock configuration file 342 | *.jmconfig 343 | 344 | # BizTalk build output 345 | *.btp.cs 346 | *.btm.cs 347 | *.odx.cs 348 | *.xsd.cs 349 | 350 | # OpenCover UI analysis results 351 | OpenCover/ 352 | 353 | # Azure Stream Analytics local run output 354 | ASALocalRun/ 355 | 356 | # MSBuild Binary and Structured Log 357 | *.binlog 358 | 359 | # NVidia Nsight GPU debugger configuration file 360 | *.nvuser 361 | 362 | # MFractors (Xamarin productivity tool) working folder 363 | .mfractor/ 364 | 365 | # Local History for Visual Studio 366 | .localhistory/ 367 | 368 | # Visual Studio History (VSHistory) files 369 | .vshistory/ 370 | 371 | # BeatPulse healthcheck temp database 372 | healthchecksdb 373 | 374 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 375 | MigrationBackup/ 376 | 377 | # Ionide (cross platform F# VS Code tools) working folder 378 | .ionide/ 379 | 380 | # Fody - auto-generated XML schema 381 | FodyWeavers.xsd 382 | 383 | # VS Code files for those working on multiple tools 384 | .vscode/* 385 | !.vscode/settings.json 386 | !.vscode/tasks.json 387 | !.vscode/launch.json 388 | !.vscode/extensions.json 389 | *.code-workspace 390 | 391 | # Local History for Visual Studio Code 392 | .history/ 393 | 394 | # Windows Installer files from build outputs 395 | *.cab 396 | *.msi 397 | *.msix 398 | *.msm 399 | *.msp 400 | 401 | # JetBrains Rider 402 | *.sln.iml 403 | *.tmp 404 | *.log 405 | *.bck 406 | *.tgz 407 | *.tar 408 | *.zip 409 | *.cer 410 | *.crt 411 | *.key 412 | *.pem 413 | 414 | .env 415 | certs/ 416 | launchSettings.json 417 | config.development.yaml 418 | *.development.config 419 | *.development.json 420 | .DS_Store 421 | .idea/ 422 | node_modules/ 423 | obj/ 424 | bin/ 425 | _dev/ 426 | .dev/ 427 | *.devis.* 428 | *.devis 429 | .vs/ 430 | *.user 431 | **/.vscode/chrome 432 | **/.vscode/.ropeproject/objectdb 433 | *.pyc 434 | .ipynb_checkpoints 435 | .jython_cache/ 436 | __pycache__/ 437 | .mypy_cache/ 438 | __pypackages__/ 439 | .pdm.toml 440 | global.json 441 | 442 | # doxfx 443 | **/DROP/ 444 | **/TEMP/ 445 | **/packages/ 446 | **/bin/ 447 | **/obj/ 448 | _site 449 | 450 | # Yarn 451 | .yarn 452 | .yarnrc.yml 453 | 454 | # Python Environments 455 | .env 456 | .venv 457 | .myenv 458 | env/ 459 | venv/ 460 | myvenv/ 461 | ENV/ 462 | 463 | # Python dist 464 | dist/ 465 | 466 | # Peristant storage 467 | data/qdrant 468 | data/chatstore* 469 | 470 | # Java build 471 | java/**/target 472 | java/.mvn/wrapper/maven-wrapper.jar 473 | 474 | # Java settings 475 | conf.properties 476 | 477 | # Playwright 478 | playwright-report/ 479 | 480 | # Static Web App deployment config 481 | swa-cli.config.json 482 | **/copilot-chat-app/webapp/build 483 | **/copilot-chat-app/webapp/node_modules 484 | /content/.$ArticleDiagrams.drawio.bkp 485 | /content/.$ArticleDiagrams.drawio.dtmp 486 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | This project has adopted the code of conduct defined by the Contributor Covenant to clarify expected behavior in our community. For more information, see the [.NET Foundation Code of Conduct](https://dotnetfoundation.org/about/policies/code-of-conduct). 4 | -------------------------------------------------------------------------------- /COMMUNITY.md: -------------------------------------------------------------------------------- 1 | # Welcome to the Semantic Kernel / Kernel Memory Community! 2 | 3 | Below are some ways that you can get involved. 4 | 5 | ## Engage on Github 6 | 7 | File issues, submit PRs, and provide feedback and ideas to what you'd like to 8 | see from the Kernel Memory. We do our best to respond to each submission. 9 | 10 | ## Public Semantic Kernel Community Office Hours 11 | 12 | We regularly have Community Office Hours that are open to the **public** to join. 13 | 14 | Add Semantic Kernel events to your calendar: download the 15 | [calendar.ics](https://aka.ms/sk-community-calendar) file. 16 | 17 | To keep topics organized, please submit what you'd like us to cover here: 18 | [https://forms.office.com/r/BbXFzmmFys](https://forms.office.com/r/BbXFzmmFys) 19 | 20 | If you are unable to make it live, all meetings will be recorded and posted online. 21 | 22 | ## Join the conversation on Discord 23 | 24 | We have a growing and active channel on Discord where you can get help, engage 25 | in lively discussion, and share what you've built with Kernel Memory and 26 | Semantic Kernel! 27 | 28 | Join our Discord: 29 | [https://aka.ms/SKDiscord](https://aka.ms/SKDiscord) 30 | -------------------------------------------------------------------------------- /CONFIGURATION.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | 3 | The xUnit project UnitTests contains an [appSettings.json](tests/UnitTests/appSettings.json) file that lists all available options. The file reads as follows: 4 | 5 | ``` 6 | { 7 | "Elasticsearch": { 8 | "CertificateFingerPrint": "...SECRETS...", 9 | "Endpoint": "http://localhost:9200", 10 | "UserName": "...SECRETS...", 11 | "Password": "...SECRETS..." 12 | } 13 | } 14 | ``` 15 | This file is supposed to show the available options but it is not meant to store sensitive information. 16 | Modify it as necessary (e.g. by changing the Endpoint), but add the values for the certificate fingerprint and the password in user secrets. 17 | 18 | >*The class used to store configuration is [ElasticsearchConfig](/src/ElasticsearchMemoryStorage/ElasticsearchConfig.cs).* 19 | 20 | ## User Secrets 21 | 22 | First, notice how the UserSecretsId of the test project is set to the same value of Semantic Kernel and Kernel Memory: 23 | ``` 24 | 5ee045b0-aea3-4f08-8d31-32d1a6f8fed0 25 | ``` 26 | By virtue of doing this we can use the **same secrets file for all the projects** in SK, KM and these projects. 27 | 28 | ### How to add user secrets 29 | 30 | To add secrets either: 31 | 1. Open the secrets file in your IDE by right clicking on the project name and selecting Manage User Secrets. 32 | - To read more about user secrets click [here](https://learn.microsoft.com/en-us/aspnet/core/security/app-secrets?view=aspnetcore-8.0&tabs=windows) 33 | 34 | 1. Add the secrets from the command line by running the following commands: 35 | ``` 36 | > dotnet user-secrets set "Elasticsearch:CertificateFingerPrint" "...your value..." 37 | > dotnet user-secrets set "Elasticsearch:UserName" "...your value..." 38 | > dotnet user-secrets set "Elasticsearch:Password" "...your value..." 39 | ``` 40 | 41 | This ultimately results in the following secrets.json additions: 42 | ``` 43 | { 44 | [..] 45 | "Elasticsearch:CertificateFingerPrint": "...your value...", 46 | "Elasticsearch:UserName": "...your value...", 47 | "Elasticsearch:Password": "...your value...", 48 | } 49 | ``` -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to this project 2 | 3 | You can contribute to this project with issues and pull requests (PRs). 4 | Simply filing issues for problems you encounter is a great way to contribute. 5 | Contributing code is greatly appreciated. 6 | 7 | ## Reporting Issues 8 | 9 | We always welcome bug reports, API proposals and overall feedback. Here are a 10 | few tips on how you can make reporting your issue as effective as possible. 11 | 12 | ### Where to Report 13 | 14 | New issues can be reported in our 15 | [issues page](https://github.com/freemindlabsinc/FreeMindLabs.SemanticKernel/issues). 16 | 17 | Before filing a new issue, please search the list of issues to make sure it does 18 | not already exist. 19 | 20 | If you do find an existing issue for what you wanted to report, please include 21 | your own feedback in the discussion. Do consider upvoting (👍 reaction) the 22 | original report, as this helps us prioritize popular issues in our backlog. 23 | 24 | ### Writing a Good Bug Report 25 | 26 | Good bug reports make it easier for maintainers to verify and root cause the 27 | underlying problem. 28 | The better a bug report, the faster the problem will be resolved. Ideally, a bug 29 | report should contain the following information: 30 | 31 | - A high-level description of the problem. 32 | - A _minimal reproduction_, i.e. the smallest size of code/configuration 33 | required to reproduce the wrong behavior. 34 | - A description of the _expected behavior_, contrasted with the 35 | _actual behavior_ observed. 36 | - Information on the environment: OS/distribution, CPU architecture, SDK version, 37 | etc. 38 | - Additional information, e.g. Is it a regression from previous versions? Are 39 | there any known workarounds? 40 | 41 | ## Contributing Changes 42 | 43 | Project maintainers will merge accepted code changes from contributors. 44 | 45 | ### DOs and DON'Ts 46 | 47 | DO's: 48 | 49 | - **DO** follow the standard coding conventions 50 | 51 | - [.NET](https://learn.microsoft.com/dotnet/csharp/fundamentals/coding-style/coding-conventions) 52 | - [Python](https://pypi.org/project/black/) 53 | - [Typescript](https://typescript-eslint.io/rules/)/[React](https://github.com/jsx-eslint/eslint-plugin-react/tree/master/docs/rules) 54 | 55 | - **DO** give priority to the current style of the project or file you're changing 56 | if it diverges from the general guidelines. 57 | - **DO** include tests when adding new features. When fixing bugs, start with 58 | adding a test that highlights how the current behavior is broken. 59 | - **DO** keep the discussions focused. When a new or related topic comes up 60 | it's often better to create new issue than to side track the discussion. 61 | - **DO** clearly state on an issue that you are going to take on implementing it. 62 | - **DO** blog and tweet (or whatever) about your contributions, frequently! 63 | 64 | DON'Ts: 65 | 66 | - **DON'T** surprise us with big pull requests. Instead, file an issue and start 67 | a discussion, so we can agree on a direction before you invest a large amount of time. 68 | - **DON'T** commit code that you didn't write. If you find code that you think is a good 69 | fit to add to Kernel Memory, file an issue and start a discussion before proceeding. 70 | - **DON'T** submit PRs that alter licensing related files or headers. If you believe 71 | there's a problem with them, file an issue, and we'll be happy to discuss it. 72 | - **DON'T** make new APIs without filing an issue and discussing with us first. 73 | 74 | ### Breaking Changes 75 | 76 | Contributions must maintain API signature and behavioral compatibility. Contributions 77 | that include breaking changes will be rejected. Please file an issue to discuss 78 | your idea or change if you believe that a breaking change is warranted. 79 | 80 | ### Suggested Workflow 81 | 82 | We use and recommend the following workflow: 83 | 84 | 1. Create an issue for your work. 85 | - You can skip this step for trivial changes. 86 | - Reuse an existing issue on the topic, if there is one. 87 | - Get agreement from the team and the community that your proposed change is 88 | a good one. 89 | - Clearly state that you are going to take on implementing it, if that's the case. 90 | You can request that the issue be assigned to you. Note: The issue filer and 91 | the implementer don't have to be the same person. 92 | 2. Create a personal fork of the repository on GitHub (if you don't already have one). 93 | 3. In your fork, create a branch off of main (`git checkout -b mybranch`). 94 | - Name the branch so that it clearly communicates your intentions, such as 95 | "issue-123" or "githubhandle-issue". 96 | 4. Make and commit your changes to your branch. 97 | 5. Add new tests corresponding to your change, if applicable. 98 | 6. Ensure that your code is formatted, the build is clean and all tests are passing. 99 | 7. Create a PR against the repository's **main** branch. 100 | - State in the description what issue or improvement your change is addressing. 101 | - Verify that all the Continuous Integration checks are passing. 102 | 8. Wait for feedback or approval of your changes from the code maintainers. 103 | 9. When area owners have signed off, and all checks are green, your PR will be merged. 104 | 105 | ### PR - CI Process 106 | 107 | The continuous integration (CI) system will automatically perform the required 108 | builds and run tests (including the ones you are expected to run) for PRs. 109 | Builds and test runs must be clean. 110 | 111 | If the CI build fails for any reason, the PR issue will be updated with a link 112 | that can be used to determine the cause of the failure. 113 | -------------------------------------------------------------------------------- /DIARY.md: -------------------------------------------------------------------------------- 1 | ## Diary 2 | 3 | >A bunch of notes and thoughts about the project. 4 | 5 | :calendar: 12/20/2023 6 | 1. Added TODO.md file to the project 7 | 8 | :calendar: 12/19/2023 9 | 10 | 1. Version 0.4.0 11 | 1. Large number of changes inspired by Hackathon and Davis Lucato. 12 | 1. See commits and PRs for details. 13 | 1. The issues with the local nuget repository ```local``` hardcoded should be fixed. 14 | 1. Added ```/docker``` folder with a Docker Compose file that runs the ELK stack very easily. 15 | 1. Added [installation instructions for Elastic Stack](/docker/README.md) 16 | 1. Renamed the repo 17 | 18 | :calendar: 12/05/2023 19 | 20 | 1. Working on SK Hackathon: Code Mapper project. 21 | 22 | :calendar: 12/04/2023 23 | 1. Version 0.3.0 24 | 1. Implemented most of the methods of IMemoryDb 25 | 1. Need to finish MemoryFilter implementation 26 | 1. General repo cleanup 27 | 28 | :calendar: 12/01/2023 29 | 1. Version 0.2.0 30 | 1. Added this DIARY .md file to the project. 31 | 1. Merged with the new KM nuget 0.15.231130.2-preview 32 | 1. Cleaned up the repo a lot after merging with the [kernel-memory-postgres repository](https://github.com/microsoft/kernel-memory-postgres). 33 | 1. Pages like LICENSE, README, etc. have been 'ported' from the same repository. 34 | 1. The analyzers are awesome. We essentially standardized to MS' conventions. 35 | 1. Changed editor .editorconfig to be for FML 36 | 1. Improved the configuration setup in UnitTests/Startup.cs 37 | 1. Determined how to better structure configuration options. 38 | 1. Created several extensions, including one to go from ElasticsearchConfig to ElasticsearchClientSettings 39 | 1. Removed code from the TestApplication into the UnitTest project 40 | 1. This is a better place for it. 41 | -------------------------------------------------------------------------------- /Directory.Build.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | net7.0 6 | true 7 | true 8 | AllEnabledByDefault 9 | latest 10 | true 11 | 11 12 | enable 13 | disable 14 | LatestMajor 15 | 16 | 17 | 18 | 19 | disable 20 | 21 | 22 | 23 | true 24 | full 25 | 26 | 27 | 28 | portable 29 | 30 | 31 | 32 | $([System.IO.Path]::GetDirectoryName($([MSBuild]::GetPathOfFileAbove('.gitignore', '$(MSBuildThisFileDirectory)')))) 33 | 34 | 35 | 36 | 37 | 38 | <_Parameter1>false 39 | 40 | 41 | -------------------------------------------------------------------------------- /Directory.Packages.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | true 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /DockerElasticsearchAndKibana.md: -------------------------------------------------------------------------------- 1 | # How to install a running version of Elasticsearch and Kibana 8.x running on Docker 2 | 3 | This article will show you how to install a running version of Elasticsearch and Kibana 8.x running on Docker. 4 | The data will be persisted in a volume on the host machine, so it can survive container restart. 5 | 6 | These are the steps we will follow: 7 | 1. Make sure that the value of `vm.max_map_count` on the Docker host machine is high enough. 8 | 1. Create a Docker network called `elastic`. 9 | 1. We will use this network to connect Elasticsearch and Kibana. 10 | 1. We will then launch Elasticsearch using Docker. 11 | 1. When Elasticsearch runs for the first time it will output some security keys that we need to grab. 12 | 1. Such keys will allow us to later connect Kibana to Elasticsearch. 13 | 1. We will then launch Kibana using Docker. 14 | 1. We will use the keys we grabbed from the logs of Elasticsearch to connect Kibana to Elasticsearch. 15 | 1. The installation should be now complete. 16 | 17 | To access Kibana you should be able to point your browser at http://localhost:5601 18 | Kibana will, in turn, connected to Elasticsearch at http://localhost:9200 19 | 20 | ## Prerequisites 21 | 22 | 1. WSL2 running properly, if on Windows 23 | 1. Docker running on Linux/WSL2 24 | 25 | ## Launch a terminal to configure the Docker host 26 | 27 | Launch a terminal and go to the '\docker' subfolder of this repo, which is where we will mount the volumes that will contain the data of Elasticsearch and Kibana. *We don't necessarily need in this folder to run all the commands, but it is a good practice to keep all the files related to a project in the same folder.* 28 | 29 | If you are in a Powershell terminal, just go to the ```\docker``` subfolder of this repository. 30 | 31 | ```powershell 32 | PS D:\> cd .\FreeMindLabs.KernelMemory.Elasticsearch\docker\ 33 | ``` 34 | 35 | Then launch WSL by typing ```wsl``` in the terminal 36 | 37 | ```powershell 38 | PS D:\FreeMindLabs.KernelMemory.Elasticsearch\docker> wsl 39 | ``` 40 | 41 | You should now be in a Linux terminal similar to the following: 42 | 43 | ```bash 44 | sysadmin@OptimusPrime:/mnt/d/FreeMindLabs.KernelMemory.Elasticsearch/docker$ 45 | ``` 46 | 47 | ## How to Install Elasticsearch 48 | 49 | Before we can launch a Docker container running Elasticsearch, we need to configure two things: 50 | 51 | 1. Set a correct value for `vm.max_map_count` on the Docker host. 52 | 1. Create a Docker network called `elastic` that we will use to connect Elasticsearch and Kibana. 53 | 54 | ### Set vm.max_map_count on the Docker host 55 | 56 | > vm.max_map_count is a critical setting in Linux systems, particularly necessary for running Elasticsearch efficiently. It defines the maximum number of memory map areas a process can have. 57 | > The default vm.max_map_count value is typically too low for Elasticsearch, potentially leading to performance issues or even **preventing it from starting**. By increasing this value to at least 262144, as recommended by Elasticsearch's official documentation, you ensure that the Elasticsearch process has sufficient memory map areas for optimal performance and stability. 58 | 59 | To see the current value of `vm.max_map_count`, run the following command from WSL/Linux: 60 | 61 | ```bash 62 | sysctl vm.max_map_count 63 | ``` 64 | 65 | If the value you read is less than `262144`, you need to increase it. 66 | 67 | You can set `vm.max_map_count` on the Docker host machine in two ways: temporarily or permanently. 68 | 69 | #### Temporarily (until the next reboot): 70 | Run the following command on your host machine (not inside the container): 71 | 72 | ```bash 73 | sudo sysctl -w vm.max_map_count=262144 74 | ``` 75 | 76 | This command sets `vm.max_map_count` to `262144` temporarily. 77 | 78 | #### Permanently (recommended): 79 | To make this change permanent, you need to add it to your system's configuration file. 80 | 81 | 1. Edit the `/etc/sysctl.conf` file on your host machine: 82 | 83 | ```bash 84 | sudo nano /etc/sysctl.conf 85 | ``` 86 | 87 | 2. Add the following line at the end of the file: 88 | 89 | ``` 90 | vm.max_map_count=262144 91 | ``` 92 | 93 | 3. Save and close the file. 94 | 95 | 4. To apply the changes without rebooting, run: 96 | 97 | ```bash 98 | sudo sysctl -p 99 | ``` 100 | 101 | ### Create a Docker network called `elastic` 102 | 103 | To create a Docker network called `elastic`, run the following command from WSL/Linux: 104 | 105 | ```bash 106 | docker network create elastic 107 | ``` 108 | 109 | The response should be something like this: 110 | 111 | ```bash 112 | 8a42de666bc5fdc5de1b9951eddc31da059eb2a13eb2c9eec879c7b0d9a0906b 113 | ``` 114 | 115 | ### Launch Elasticsearch 116 | 117 | To launch Elasticsearch, run the following command from WSL/Linux: 118 | 119 | ```bash 120 | docker run -d --name elasticsearch-01 --net elastic -p 9200:9200 -e "discovery.type=single-node" -v $(pwd)/elasticsearch:/usr/share/elasticsearch/data -m 4g docker.elastic.co/elasticsearch/elasticsearch:8.3.3 121 | ``` 122 | The response should be something like this: 123 | 124 | ```bash 125 | a4c20c2ecbec1fc0f90d72e481924928e4af49a16464928d92d0fedba784eb54 126 | ``` 127 | 128 | Now we need to inspect the logs so we can grab the keys we need. 129 | Run the following command from WSL/Linux: 130 | 131 | ```bash 132 | docker logs elasticsearch-01 -f 133 | ``` 134 | 135 | At some point, after everything takes the time it needs, you should see the following: 136 | 137 | ```bash 138 | 139 | ``` 140 | 141 | 142 | ## References 143 | 144 | 1. [How to install elasticsearch and kibana 8.x using Docker?](https://www.devopsschool.com/blog/how-to-install-elasticsearch-and-kibana-using-docker/) 145 | 1. [Install Elasticsearch with Docker](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html) 146 | 1. [Install Kibana with Docker](https://www.elastic.co/guide/en/kibana/current/docker.html) 147 | 1. [Deploying Elasticsearch and Kibana with Docker](https://quoeamaster.medium.com/deploying-elasticsearch-and-kibana-with-docker-86a4ac78d851) 148 | 1. (Getting started with the Elastic Stack and Docker Compose: Part 1)[https://www.elastic.co/blog/getting-started-with-the-elastic-stack-and-docker-compose] 149 | 1. -------------------------------------------------------------------------------- /KernelMemoryElasticsearch.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.9.34310.174 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "root", "root", "{6EF76FD8-4C35-4370-8539-5DDF45357A50}" 7 | ProjectSection(SolutionItems) = preProject 8 | .editorconfig = .editorconfig 9 | .gitattributes = .gitattributes 10 | .gitignore = .gitignore 11 | code-analysis.props = code-analysis.props 12 | CODE_OF_CONDUCT.md = CODE_OF_CONDUCT.md 13 | CONFIGURATION.md = CONFIGURATION.md 14 | CONTRIBUTING.md = CONTRIBUTING.md 15 | Directory.Build.props = Directory.Build.props 16 | Directory.Packages.props = Directory.Packages.props 17 | icon.png = icon.png 18 | LICENSE = LICENSE 19 | nuget-package.props = nuget-package.props 20 | nuget.config = nuget.config 21 | NUGET.md = NUGET.md 22 | README.md = README.md 23 | SECURITY.md = SECURITY.md 24 | EndProjectSection 25 | EndProject 26 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "source", "source", "{98E1758C-113A-41F4-85A3-1C8EFFA6CEC2}" 27 | EndProject 28 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{A455710B-0D10-4200-AB39-DB5ECC457FAC}" 29 | EndProject 30 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "examples", "examples", "{9532FF95-6D14-43E7-B554-F5289C605172}" 31 | EndProject 32 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ElasticsearchMemoryStorage", "src\ElasticsearchMemoryStorage\ElasticsearchMemoryStorage.csproj", "{BE8D8957-8A6F-4879-BAAE-0462A118DFD3}" 33 | EndProject 34 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UnitTests", "tests\UnitTests\UnitTests.csproj", "{A615A571-2B3A-4C2B-9B1C-371BF87D8DBE}" 35 | EndProject 36 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "docker", "docker", "{F8AB554F-C604-4098-B4C1-4CBAD790ADC0}" 37 | ProjectSection(SolutionItems) = preProject 38 | docker\.env = docker\.env 39 | docker\.env.example = docker\.env.example 40 | docker\docker-compose.yml = docker\docker-compose.yml 41 | docker\filebeat.yml = docker\filebeat.yml 42 | docker\kibana.yml = docker\kibana.yml 43 | docker\logstash.conf = docker\logstash.conf 44 | docker\metricbeat.yml = docker\metricbeat.yml 45 | docker\README.md = docker\README.md 46 | EndProjectSection 47 | EndProject 48 | Global 49 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 50 | Debug|Any CPU = Debug|Any CPU 51 | Release|Any CPU = Release|Any CPU 52 | EndGlobalSection 53 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 54 | {BE8D8957-8A6F-4879-BAAE-0462A118DFD3}.Debug|Any CPU.ActiveCfg = Release|Any CPU 55 | {BE8D8957-8A6F-4879-BAAE-0462A118DFD3}.Debug|Any CPU.Build.0 = Release|Any CPU 56 | {BE8D8957-8A6F-4879-BAAE-0462A118DFD3}.Release|Any CPU.ActiveCfg = Release|Any CPU 57 | {BE8D8957-8A6F-4879-BAAE-0462A118DFD3}.Release|Any CPU.Build.0 = Release|Any CPU 58 | {A615A571-2B3A-4C2B-9B1C-371BF87D8DBE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 59 | {A615A571-2B3A-4C2B-9B1C-371BF87D8DBE}.Debug|Any CPU.Build.0 = Debug|Any CPU 60 | {A615A571-2B3A-4C2B-9B1C-371BF87D8DBE}.Release|Any CPU.ActiveCfg = Release|Any CPU 61 | {A615A571-2B3A-4C2B-9B1C-371BF87D8DBE}.Release|Any CPU.Build.0 = Release|Any CPU 62 | EndGlobalSection 63 | GlobalSection(SolutionProperties) = preSolution 64 | HideSolutionNode = FALSE 65 | EndGlobalSection 66 | GlobalSection(NestedProjects) = preSolution 67 | {BE8D8957-8A6F-4879-BAAE-0462A118DFD3} = {98E1758C-113A-41F4-85A3-1C8EFFA6CEC2} 68 | {A615A571-2B3A-4C2B-9B1C-371BF87D8DBE} = {A455710B-0D10-4200-AB39-DB5ECC457FAC} 69 | EndGlobalSection 70 | GlobalSection(ExtensibilityGlobals) = postSolution 71 | SolutionGuid = {964BE41E-E834-4596-BFDB-5F9D5BA9F048} 72 | EndGlobalSection 73 | EndGlobal 74 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Free Mind Labs, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NUGET.md: -------------------------------------------------------------------------------- 1 | # Kernel Memory with Elasticsearch 2 | 3 | [![License: MIT](https://img.shields.io/github/license/microsoft/kernel-memory)](https://github.com/microsoft/kernel-memory/blob/main/LICENSE) 4 | 5 | Use [Elasticsearch](https://www.elastic.co/) as vector storage for Microsoft [Kernel Memory](https://github.com/microsoft/semantic-memory). 6 | 7 | See our [Github repository](https://github.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ⚠️ The Elasticsearch connector for Kernel Memory has been incorporated in Microsoft Kernel Memory and this this repository 2 | has been archived. 3 | You can find the latest source code [here](https://github.com/microsoft/kernel-memory/tree/main/extensions/Elasticsearch/Elasticsearch). 4 | 5 | # Kernel Memory with Elasticsearch 6 | 7 | 8 | Use [Elasticsearch](https://www.elastic.co/) as vector storage for Microsoft [Kernel Memory](https://github.com/microsoft/semantic-memory). 9 | 10 |
11 | 12 |
13 | 14 |
15 | 16 | [![NuGet](https://img.shields.io/nuget/v/Freemindlabs.KernelMemory.Elasticsearch.svg)](https://www.nuget.org/packages/Freemindlabs.KernelMemory.Elasticsearch) [![NuGet](https://img.shields.io/nuget/dt/Freemindlabs.KernelMemory.Elasticsearch.svg)](https://www.nuget.org/packages/Freemindlabs.KernelMemory.Elasticsearch) [![License: MIT](https://img.shields.io/github/license/microsoft/kernel-memory)](https://github.com/freemindlabsinc/FreeMindLabs.SemanticKernel/blob/main/LICENSE) 17 | 18 |
19 | 20 | --- 21 | **Kernel Memory** (KM) is a **multi-modal [AI Service](https://github.com/microsoft/kernel-memory/blob/main/service/Service/README.md)** specialized in the efficient indexing of datasets through custom continuous data hybrid pipelines, with support for **[Retrieval Augmented Generation](https://en.wikipedia.org/wiki/Prompt_engineering#Retrieval-augmented_generation)** (RAG), synthetic memory, prompt engineering, and custom semantic memory processing. 22 | 23 | 24 | 25 | Utilizing advanced embeddings and LLMs, the system enables Natural Language querying for obtaining answers from the indexed data, complete with citations and links to the original sources. 26 | 27 | 28 | 29 | --- 30 | 31 | This repository contains the **Elasticsearch adapter** that allows KM to use Elasticsearch as vector database, thus allowing developers to perform [lexical and semantic search](https://www.elastic.co/search-labs/blog/articles/lexical-and-semantic-search-with-elasticsearch), in addition to [hybrid](https://opster.com/guides/elasticsearch/machine-learning/elasticsearch-hybrid-search/), keyword and full-text search on your *semantic content*. 32 | 33 | ## Pre-requisites 34 | 35 | 1. A running instance of Elasticsearch 36 | 37 | 1. You can install a **local instance** of Elasticsearch using Docker. 38 | To simplify the setup of a running instance of Elasticsearch we prepared the article [Installing the Elastic Stack using Docker Compose](/docker/README.md) that guides you through the process. *The following diagram shows what will be running once the installation is complete.* 39 |
40 | 41 |
42 | 43 | 2. Alternatively you can use a **cloud** service like [Elastic Cloud](https://www.elastic.co/cloud/). The free tier is enough. 44 | 45 | ## Configuration 46 | 47 | The xUnit project UnitTests contains an [appSettings.json](tests/UnitTests/appSettings.json) file that lists all available options. The file reads as follows: 48 | 49 | ``` 50 | { 51 | "OpenAI": { 52 | "ApiKey": "...SECRETS...", 53 | "EmbeddingModelId": "text-embedding-ada-002", 54 | "ModelId": "text-davinci-003", 55 | "ChatModelId": "gpt-3.5-turbo" 56 | }, 57 | "Elasticsearch": { 58 | "CertificateFingerPrint": "...SECRETS...", 59 | "Endpoint": "https://localhost:9200", 60 | "UserName": "...SECRETS...", 61 | "Password": "...SECRETS..." 62 | } 63 | } 64 | ``` 65 | 66 | >*The class used to store configuration is [ElasticsearchConfig](/src/ElasticsearchMemoryStorage/ElasticsearchConfig.cs).* 67 | 68 | This file is supposed to show the available options but it is not meant to store sensitive information such as ```ApiKey```, ```Password``` or ```CertificateFingerPrint```. Modify this file as necessary (e.g. by changing the Endpoint), but add the values for the certificate fingerprint and the password in user secrets. 69 | 70 | ### How to add user secrets 71 | 72 | To add secrets either: 73 | - Open the secrets file in your IDE by right clicking on the project name and selecting Manage User Secrets. 74 | - To read more about user secrets click [here](https://learn.microsoft.com/en-us/aspnet/core/security/app-secrets?view=aspnetcore-8.0&tabs=windows) 75 | 76 | - Add the secrets from the command line by running the following commands: 77 | ``` 78 | > dotnet user-secrets set "OpenAI:ApiKey" "...your Open AI API key..." 79 | > dotnet user-secrets set "Elasticsearch:CertificateFingerPrint" "...your value..." 80 | > dotnet user-secrets set "Elasticsearch:Password" "...your value..." 81 | ``` 82 | 83 | This ultimately results in the following secrets.json additions: 84 | ``` 85 | { 86 | [..] 87 | "OpenAI:ApiKey": "...your Open AI API key...", 88 | "Elasticsearch:CertificateFingerPrint": "...your value...", 89 | "Elasticsearch:Password": "...your value...", 90 | } 91 | ``` 92 | 93 | 94 | ## The .NET Solution 95 | 96 | This is a screenshot of the solution. 97 | We highlighted some of the most important files for you to explore and look at. 98 | 99 |

100 | 101 |

102 | 103 | --- 104 | 105 | Here are some screenshots of the tests included in the project. 106 | This project tries to follow [TDD](https://www.coscreen.co/blog/tdd-in-c-guide/) an uses a test-first approach. The tests are meant to show how to use the library and to teach of the available features. 107 | 108 |

109 | 110 |

111 | 112 | Click [here](tests/UnitTests/DataStorageTests.cs) to see the source code of the test. 113 | 114 | *Always make sure to look at the output window to see details about the execution.* :eyes: 115 | 116 | 117 |

118 | 119 |

120 | 121 | Click [here](tests/UnitTests/IndexManagementTests.cs) to see the source code of the test. 122 | 123 | ## How to add the Elasticsearch adapter to your Kernel Memory project 124 | 125 | In order to add the Elasticsearch adapter to your project you first need to add a reference to the [Freemindlabs.KernelMemory.Elasticsearch](https://www.nuget.org/packages/Freemindlabs.KernelMemory.Elasticsearch) NuGet package. 126 | 127 | ``` 128 | > dotnet add package Freemindlabs.KernelMemory.Elasticsearch 129 | ``` 130 | 131 | Then you can chose to use one of the ```WithElasticsearch``` extensions methods of the interface IKernelMemoryBuilder. 132 | 133 | ```csharp 134 | // From Program.cs of the Service project of the Kernel Memory repository. Line 86. 135 | 136 | [..] 137 | // Loads the Elasticsearch configuration 138 | var esConfig = config.GetServiceConfig(appBuilder.Configuration, "ElasticsearchVectorDb"); 139 | 140 | // Inject memory client and its dependencies 141 | // Note: pass the current service collection to the builder, in order to start the pipeline handlers 142 | IKernelMemory memory = new KernelMemoryBuilder(appBuilder.Services) 143 | .FromAppSettings() 144 | // .With...() // in case you need to set something not already defined by `.FromAppSettings()` 145 | .WithElasticsearch(esConfig) // <--- this 146 | .Build(); 147 | 148 | appBuilder.Services.AddSingleton(memory); 149 | 150 | // Build .NET web app as usual 151 | var app = appBuilder.Build(); 152 | [..] 153 | ``` 154 | 155 | 156 | ## Resources 157 | 158 | 1. :fire: [ How to build a Kernel Memory connector and use Elasticsearch as vector database - Part 1](/content/IMemoryDbArticle.md) 159 | 1. To be relocated and published officially on Microsoft's [devblogs for Semantic kernel](https://devblogs.microsoft.com/semantic-kernel/). 160 | 161 | 1. [A Quick Introduction to Vector Search](https://opster.com/guides/opensearch/opensearch-machine-learning/introduction-to-vector-search/) 162 | 1. [Elasticsearch Hybrid Search](https://opster.com/guides/elasticsearch/machine-learning/elasticsearch-hybrid-search/) 163 | 164 | 1. Elastic's official docs on the client. 165 | 1. NEST 7.17: https://www.elastic.co/guide/en/elasticsearch/client/net-api/7.17/nest-getting-started.html 166 | 1. New client 8.9: https://www.elastic.co/guide/en/elasticsearch/client/net-api/8.9/introduction.html 167 | 1. This client is not yet feature complete. 168 | 1. Look here for details: https://www.elastic.co/guide/en/elasticsearch/client/net-api/current/release-notes-8.0.0.html 169 | 1. In addition, the docs are not up to date. For some stuff we need to lok at NEST's docs. 170 | 171 | 1. [Elasticsearch.net GitHub repository](https://github.com/elastic/elasticsearch-net) 172 | 173 | 1. Semantic Kernel/Memory-Kernel 174 | 1. [Introduction to Semantic Memory (feat. Devis Lucato) | Semantic Kernel](https://www.youtube.com/watch?v=5JYW_uAxwYM) 175 | 1. [11.29.2023 - Semantic Kernel Office Hours (US/Europe Region)](https://www.youtube.com/watch?v=JSca9mVUUJo) 176 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, 6 | which includes all source code repositories managed through our GitHub 7 | organizations, which include [Microsoft](https://github.com/microsoft) 8 | [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), 9 | [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), 10 | and [our GitHub organizations](https://opensource.microsoft.com/). 11 | 12 | If you believe you have found a security vulnerability in any Microsoft-owned 13 | repository that meets 14 | [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), 15 | please report it to us as described below. 16 | 17 | ## Reporting Security Issues 18 | 19 | **Please do not report security vulnerabilities through public GitHub issues.** 20 | 21 | Instead, please report them to the Microsoft Security Response Center (MSRC) at 22 | [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 23 | 24 | If you prefer to submit without logging in, send email to 25 | [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your 26 | message with our PGP key; please download it from the 27 | [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 28 | 29 | You should receive a response within 24 hours. If for some reason you do not, 30 | please follow up via email to ensure we received your original message. 31 | Additional information can be found at 32 | [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 33 | 34 | Please include the requested information listed below (as much as you can 35 | provide) to help us better understand the nature and scope of the possible issue: 36 | 37 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 38 | * Full paths of source file(s) related to the manifestation of the issue 39 | * The location of the affected source code (tag/branch/commit or direct URL) 40 | * Any special configuration required to reproduce the issue 41 | * Step-by-step instructions to reproduce the issue 42 | * Proof-of-concept or exploit code (if possible) 43 | * Impact of the issue, including how an attacker might exploit the issue 44 | 45 | This information will help us triage your report more quickly. 46 | 47 | If you are reporting for a bug bounty, more complete reports can contribute to 48 | a higher bounty award. Please visit our 49 | [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page 50 | for more details about our active programs. 51 | 52 | ## Preferred Languages 53 | 54 | We prefer all communications to be in English. 55 | 56 | ## Policy 57 | 58 | Microsoft follows the principle of 59 | [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 60 | 61 | 62 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # TODOs 2 | 3 | 1. Complete article about IMemoryConnector 4 | - Now that the repo is clean, add code examples to article (MOSTLY DONE) 5 | 6 | 1. Make properties of the mapping not nullable as per the postgres [code](https://github.com/microsoft/kernel-memory-postgres/blob/58df8fa4cee89add3ba6e49e00535aa1f7b43b02/PostgresMemoryStorage/Db/PostgresDbClient.cs#L142) 7 | 8 | 1. Make custom column available. 9 | 1. See [PostgresConfig](https://github.com/microsoft/kernel-memory-postgres/blob/main/PostgresMemoryStorage/PostgresConfig.cs) 10 | 11 | # Done 12 | 13 | 1. Add a new content (text) column to the ES mapping to index the content that is inside the Payload 14 | - Q: Look into Payload as it stores JSON like ```{"file": "blabla.txt", "text": "...the chunk's text...", "vector_provider": "xxxx", "vector_generator: "TODO", "last_update": "20023-12-05T16:23:19" }``` 15 | - [See image here](https://github.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/blob/main/content/images/DataPage2.jpg) 16 | 17 | - See if I need to integrate more of Davis' thoughts into the article -------------------------------------------------------------------------------- /code-analysis.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | all 5 | runtime; build; native; contentfiles; analyzers; buildtransitive 6 | 7 | 8 | all 9 | runtime; build; native; contentfiles; analyzers; buildtransitive 10 | 11 | 12 | all 13 | runtime; build; native; contentfiles; analyzers; buildtransitive 14 | 15 | 16 | all 17 | runtime; build; native; contentfiles; analyzers; buildtransitive 18 | 19 | 20 | all 21 | runtime; build; native; contentfiles; analyzers; buildtransitive 22 | 23 | 24 | all 25 | runtime; build; native; contentfiles; analyzers; buildtransitive 26 | 27 | 28 | -------------------------------------------------------------------------------- /content/images/Connectors.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/Connectors.jpg -------------------------------------------------------------------------------- /content/images/CreateIndices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/CreateIndices.png -------------------------------------------------------------------------------- /content/images/DataPage1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/DataPage1.jpg -------------------------------------------------------------------------------- /content/images/DataPage2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/DataPage2.jpg -------------------------------------------------------------------------------- /content/images/DataPageAllRows.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/DataPageAllRows.jpg -------------------------------------------------------------------------------- /content/images/ESLogo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/ESLogo.jpg -------------------------------------------------------------------------------- /content/images/FML-Logo-Round.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/FML-Logo-Round.gif -------------------------------------------------------------------------------- /content/images/FML-Logo-Round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/FML-Logo-Round.png -------------------------------------------------------------------------------- /content/images/FML-Logo-Square.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/FML-Logo-Square.gif -------------------------------------------------------------------------------- /content/images/FML-Logo-Square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/FML-Logo-Square.png -------------------------------------------------------------------------------- /content/images/FMLLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/FMLLogo.png -------------------------------------------------------------------------------- /content/images/Free Mind Labs logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/Free Mind Labs logo.png -------------------------------------------------------------------------------- /content/images/Free Mind Labs.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/Free Mind Labs.gif -------------------------------------------------------------------------------- /content/images/KnnQuery.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/KnnQuery.jpg -------------------------------------------------------------------------------- /content/images/Mappings.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/Mappings.jpg -------------------------------------------------------------------------------- /content/images/Pipelines.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/Pipelines.jpg -------------------------------------------------------------------------------- /content/images/RAG.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/RAG.jpg -------------------------------------------------------------------------------- /content/images/Solution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/Solution.png -------------------------------------------------------------------------------- /content/images/TestRunning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/content/images/TestRunning.png -------------------------------------------------------------------------------- /docker/.env.example: -------------------------------------------------------------------------------- 1 | # Project namespace (defaults to the current folder name if not set) 2 | COMPOSE_PROJECT_NAME=es-cluster 3 | 4 | # Password for the 'elastic' user (at least 6 characters) 5 | ELASTIC_PASSWORD=changeme 6 | 7 | # Password for the 'kibana_system' user (at least 6 characters) 8 | KIBANA_PASSWORD=changeme 9 | 10 | # Version of Elastic products 11 | #https://www.elastic.co/downloads/past-releases#elasticsearch 12 | STACK_VERSION=8.8.2 13 | 14 | # Set the cluster name 15 | CLUSTER_NAME=docker-cluster 16 | 17 | # Set to 'basic' or 'trial' to automatically start the 30-day trial 18 | LICENSE=basic 19 | #LICENSE=trial 20 | 21 | # Port to expose Elasticsearch HTTP API to the host 22 | ES_PORT=9200 23 | 24 | # Port to expose Kibana to the host 25 | KIBANA_PORT=5601 26 | 27 | # Port to expose Fleet to the host 28 | FLEET_PORT=8220 29 | 30 | # Port to expose APM to the host 31 | APMSERVER_PORT=8200 32 | 33 | # APM Secret Token for POC environments only 34 | ELASTIC_APM_SECRET_TOKEN=supersecrettoken 35 | 36 | # Increase or decrease based on the available host memory (in bytes) 37 | ES_MEM_LIMIT=3073741824 38 | KB_MEM_LIMIT=1073741824 39 | LS_MEM_LIMIT=1073741824 40 | 41 | # SAMPLE Predefined Key only to be used in POC environments 42 | ENCRYPTION_KEY=c72d38b3a14956121ff2170e4030b4715513701fff43e5626eec58b04a30fae3 -------------------------------------------------------------------------------- /docker/.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /docker/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # How to install the Elastic Stack using Docker Compose 2 | 3 | ## Prerequisites 4 | 5 | You need to have [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/install/) running on a Linux box. 6 | 7 | If you are on a Windows 10/11 machine, such Linux box can be a [WSL2](https://learn.microsoft.com/en-us/windows/wsl/about) instance. 8 | 9 | > *See [this article :green_book:](https://www.windowscentral.com/how-install-wsl2-windows-10) to see how to install WSL2 on Windows 10/11, if you are not familiar with the procedure.* 10 | 11 | The remainder of this document shows how to install the Elastic Stack on Windows 11 using WSL2 and Docker Desktop running on it. 12 | 13 | ## The installation 14 | 15 | There are several ways to install and run the Elastic Stack on a development machine. We will install the Elastic Stack using Docker Compose as inspired by the articles of [Eddie Mitchell](https://www.elastic.co/blog/author/eddie-mitchell). 16 | 17 | The environment used for this installation is a Windows 11 machine with [WSL2](https://www.windowscentral.com/how-install-wsl2-windows-10) and [Docker Desktop](https://docs.docker.com/desktop/install/windows-install/) installed. Please read the 18 | 19 | The necessary files have been copied in the ```/docker``` folder so that we can run it directly without the need to Mitchell's repository, and so that we can alter them as necessary in the future. 20 | 21 | The files we might need to access and change have also been grouped under the solution folder ```docker```: 22 | 23 |
24 | 25 |
26 | 27 | - **.env**: this file contains the environment variables that will be used by Docker Compose. 28 | 1. :warning: **The .env file needs to be created manually**, as it is not part of the repository. **Without this file, Docker Compose will not work.** 29 | 1. To create a valid .env file, copy the contents of the ```.env.example``` file and paste them into a new file named ```.env```. Then, update the values of the variables as needed. 30 | 31 | - **.env.example**: [this file](/.env.example) contains a complete example of all the options available. 32 | - It is not used by Docker Compose, and it should only be used as a reference when creating the .env file. This is how such file reads: 33 | 34 |
35 | 36 |
37 | 38 | 39 | > :warning: The default username/password for Kibana and Elasticsearch is ```elastic```/```changeme```. You can change such values in your .env file. 40 | 41 | - **docker-compose.yml**: [this file](./docker-compose.yml) contains the configuration for Docker Compose. The compose file will allow us to run the Elastic Stack on a single machine and get access to the Elasticsearch, Kibana, Logstash, Filebeat, and Metricbeat. 42 | 43 | The remaining files allow to configure options of the individual services. It's unlikely that you will have to change any of them at the beginning. 44 | 45 | - **filebeat.yml**: This file contains the configuration for Filebeat. It is used by the Filebeat container to collect and ship logs to Elasticsearch. *It's is unlikely you will need to change this file.* 46 | 47 | - **kibana.yml**: This file contains the configuration for Kibana. It is used by the Kibana container to connect to Elasticsearch. 48 | 49 | - **logstash.yml**: This file contains the configuration for Logstash. It is used by the Logstash container to connect to Elasticsearch. *It's is unlikely you will need to change this file.* 50 | 51 | - **metricbeat.yml**: This file contains the configuration for Metricbeat. It is used by the Metricbeat container to collect and ship metrics to Elasticsearch. 52 | *It's is unlikely you will need to change this file.* 53 | 54 | - **README.md**: this file. 55 | 56 | 57 | ## High level overview 58 | 59 | From a high-level perspective, the Elastic Stack we will launch is composed of the following components: 60 | 61 |
62 | 63 |
64 | 65 | - **Elasticsearch** (es01): This is the heart of the Elastic Stack, acting as a powerful and scalable search engine. It stores, indexes, and retrieves data, allowing for fast and efficient searching and analysis. 66 | 67 | - **Kibana** (kibana): It serves as the visualization layer in the Elastic Stack. It offers a user-friendly interface to visualize data stored in Elasticsearch. With Kibana, you can create and share dashboards, charts, and reports, making data analysis accessible and insightful, even for those new to data analytics. 68 | 69 | - **Logstash** (logstash01): A data processing pipeline that ingests, transforms, and sends data to Elasticsearch. Logstash allows you to collect data from various sources, process it with a wide range of filters, and enhance it before it gets indexed in Elasticsearch. 70 | 71 | - **Filebeat** (filebeat01): Acting as a lightweight log shipper, Filebeat forwards log data from multiple sources directly to Elasticsearch or Logstash. It simplifies data collection, is resource-efficient, and is ideal for collecting and aggregating log data in real time. 72 | 73 | - **Metricbeat** (metricbeat01): Similar to Filebeat but focused on metrics, Metricbeat collects various system and service metrics. It's essential for real-time monitoring of servers and services, providing valuable insights into their performance and health. 74 | 75 | ## The running environment 76 | 77 | Once you launch the docker compose file, you will have access to the Kibana, from where you will do most of your work: 78 | 79 |
80 | 81 | 82 |
83 | 84 | The following sections will guide you through the installation process. 85 | 86 | ## The configuration files 87 | 88 | Make sure you created the **.env** file as explained above. 89 | 90 | ## Step 1/3: Ensure the vm.max_map_count setting is set to at least 262144 91 | 92 | As explained at the beginning of this document, in these instructions we are using Docker Desktop on top of WSL2. Elasticsearch will run in a container on the Linux host, not on Windows. 93 | 94 | When setting up Elasticsearch on Linux, it's essential to configure the `vm.max_map_count` kernel setting on the Linux host to at least `262144`. This setting is critical for Elasticsearch to startup and to function. 95 | 96 | :warning: Once again: this change has to be made on the Linux machine running Docker, not inside the container nor on Windows. 97 | 98 | There are two ways to set `vm.max_map_count`: 99 | 100 | 1. Temporary: 101 | - This method is quick and useful for testing purposes. The setting can be changed temporarily by executing a command on your Docker host. It's an immediate change but won't persist after a system reboot. Here's how to do it: 102 | 103 | ```bash 104 | # Set vm.max_map_count temporarily 105 | $ sysctl -w vm.max_map_count=262144 106 | ``` 107 | 108 | *This approach is ideal when you need to quickly set up Elasticsearch for short-term use or testing, without the need for the setting to persist after a reboot.* 109 | 110 | 1. Permanent (recommended): 111 | - For long-term use, especially in containerized environments like Docker, you'll want this setting to be permanent. This requires editing a system configuration file to ensure the setting persists across reboots and container restarts. Follow these steps: 112 | 113 | ```bash 114 | # Edit the sysctl configuration file for persistent changes 115 | $ echo 'vm.max_map_count=262144' >> /etc/sysctl.conf 116 | 117 | # Apply the changes without rebooting 118 | $ sysctl -p 119 | ``` 120 | 121 | Additional information can be found [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/vm-max-map-count.html) and in Eddie Mitchell's [original article](https://www.elastic.co/blog/getting-started-with-the-elastic-stack-and-docker-compose-part-2). 122 | 123 | ## Step 2/3: Launch the docker compose file 124 | 125 | Launch a terminal and navigate to the `docker` directory of this repo. 126 | Then run the following command: 127 | 128 | ```bash 129 | $ docker-compose up 130 | ``` 131 | 132 | Be prepared to wait a minute or two for the containers to start up. 133 | In the end your terminal should display something like this: 134 | 135 |
136 | 137 |
138 | 139 | Once the containers are up and running and finish their initial setup you will be able to access the Kibana UI at https://localhost:5601. 140 |
141 | 142 |
143 | 144 | The Elasticsearch API at https://localhost:9200. 145 |
146 | 147 |
148 | 149 | And, from inside Docker desktop, our Compose should look like this: 150 | 151 |
152 | 153 |
154 | 155 | ## Step 3/3: Adjust the Settings of Elastic Agent 156 | 157 | Now that Elasticsearch and Kibana are running we can apply our last configuration step: adjust the settings of the Elastic Agent which is currently not working as expected. 158 | 159 | To see the problem, click on 'Management Fleet': 160 |
161 | 162 |
163 | 164 | 165 | 166 | In the Fleet management screen you should now see the following issues: CPU and Memory are not reading correctly. This is because, by default, our Elastic Agent is attempting to log data to a local Elasticsearch instance, which is not correct for our Docker environment. 167 | 168 |
169 | 170 |
171 | 172 | We will need to perform a couple of updates in the Fleet -> Settings UI in order to resolve this. Click on the 'Settings' tab and then the edit action (*green circle*): 173 |
174 | 175 |
176 | 177 | This should display the following. Notice the red circles. 178 |
179 | 180 |
181 | 182 | We now need to change three values: 183 | 1. **Hosts**: 184 | - Change the value http://elasticsearch:9200 to https://es01:9200 185 | 186 | 1. **The CA fingerprint**: 187 | - We'll need to get the CA fingerprint from the cluster, as explained in the next section. 188 | 1. **Advanced YAML configuration**: 189 | - We'll need to get the CA certificate from the cluster, as explained in the next section. 190 | 191 | ### How to get the CA certificate from the cluster? 192 | 193 | Run the following command to pull the CA certificate from the cluster: 194 | 195 | ```bash 196 | docker cp es-cluster-es01-1:/usr/share/elasticsearch/config/certs/ca/ca.crt /tmp/. 197 | ``` 198 | 199 | >*Note: This command will be different based on either the directory you’re running the docker-compose.yml file from or the COMPOSE_PROJECT_NAME variable that is specified in the .env file.* 200 | 201 | Next, we will need to get the fingerprint of the certificate. For this, we can use an OpenSSL command: 202 | 203 | ```bash 204 | openssl x509 -fingerprint -sha256 -noout -in /tmp/ca.crt | awk -F"=" {' print $2 '} | sed s/://g 205 | ``` 206 | 207 | This will produce a value similar to: 208 | 209 | ``` 210 | C8EEE11A0713CF5E3E49979A548F1D133DE0ED4A9263DA43AE039A883F94A726 211 | `````` 212 | 213 | Finally, we need to get the whole cert into a yml format. We can do this with a `cat` command or just by opening the cert in a text editor: 214 | 215 | ```bash 216 | cat /tmp/ca.crt 217 | ``` 218 |
219 | 220 |
221 | 222 | ### The correct settings 223 | 224 | The final settings should look like this (*ignore the fingerprint*): 225 | 226 |
227 | 228 |
229 | 230 | Don't forget to click “Save and Apply Settings” -> “Save and Deploy.” 231 | 232 |
233 | 234 |
235 | 236 | Your agent should now be running and reporting data to Elasticsearch correctly. 237 | 238 |
239 | 240 |
241 | 242 | And dashboards should work properly: 243 | 244 |
245 | 246 |
247 | 248 | ## Final considerations 249 | 250 | - These instructions have been tested on Windows, using WSL2 and Docker Desktop. 251 | 252 | ## Resources 253 | 254 | 1. [Getting started with the Elastic Stack and Docker Compose: Part 1](https://www.elastic.co/blog/getting-started-with-the-elastic-stack-and-docker-compose) 255 | 1. The Githun repo for this article can be found [here](https://github.com/elkninja/elastic-stack-docker-part-one) 256 | 257 | 2. [Getting started with the Elastic Stack and Docker Compose: Part 2](https://www.elastic.co/blog/getting-started-with-the-elastic-stack-and-docker-compose-part-2) 258 | 1. The Github repo for this article can be found [here](https://github.com/elkninja/elastic-stack-docker-part-two) 259 | 260 | 1. [Install Elasticsearch with Docker](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html) 261 | 262 | 1. [Install Kibana with Docker](https://www.elastic.co/guide/en/kibana/current/docker.html) 263 | -------------------------------------------------------------------------------- /docker/app/dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1 2 | 3 | FROM python:3.9-slim-buster 4 | 5 | WORKDIR /app 6 | 7 | COPY requirements.txt requirements.txt 8 | 9 | RUN pip3 install -r requirements.txt 10 | 11 | COPY main.py main.py 12 | 13 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--log-level", "info", "--workers", "1"] -------------------------------------------------------------------------------- /docker/app/main.py: -------------------------------------------------------------------------------- 1 | from elasticapm.contrib.starlette import ElasticAPM, make_apm_client 2 | from fastapi import FastAPI 3 | from nicegui import ui 4 | from typing import Callable 5 | import asyncio 6 | import functools 7 | import httpx as r 8 | #import psutil 9 | 10 | try: 11 | apm = make_apm_client({ 12 | 'SERVICE_NAME': 'my_python_service', 13 | 'SECRET_TOKEN': 'supersecrettoken', 14 | # SERVER_URL must be set to "fleet-server" if running as a docker container. 15 | # if running as a local python script, then set the url to "LOCALHOST" 16 | 'SERVER_URL': 'http://fleet-server:8200', 17 | 'ENVIRONMENT': 'development' 18 | }) 19 | except Exception as e: 20 | print('failed to create client') 21 | 22 | app = FastAPI() 23 | 24 | try: 25 | app.add_middleware(ElasticAPM, client=apm) 26 | except Exception as e: 27 | print('failed to add APM Middleware') 28 | 29 | 30 | @app.get("/custom_message/{message}") 31 | async def custom_message(message: str): 32 | apm.capture_message(f"Custom Message: {message}") 33 | return {"message": f"Custom Message: {message}"} 34 | 35 | 36 | @app.get("/error") 37 | async def throw_error(): 38 | try: 39 | 1 / 0 40 | except Exception as e: 41 | apm.capture_exception() 42 | return {"message": "Failed Successfully :)"} 43 | 44 | 45 | def init(fastapi_app: FastAPI) -> None: 46 | @ui.page('/', title="APM Demo App") 47 | async def show(): 48 | with ui.header(elevated=True).style('background-color: #3874c8').classes('items-center justify-between'): 49 | ui.markdown('### APM DEMO') 50 | ui.button(on_click=lambda: right_drawer.toggle(), icon='menu').props('flat color=white') 51 | with ui.right_drawer(fixed=False).style('background-color: #ebf1fa').props('bordered') as right_drawer: 52 | ui.chat_message('Hello Elastic Stack User!', 53 | name='APM Robot', 54 | stamp='now', 55 | avatar='https://robohash.org/apm_robot') 56 | ui.chat_message('This app is powered by NICEGUI and FastAPI with Elastic APM Instrumentation :)', 57 | name='APM Robot', 58 | stamp='now', 59 | avatar='https://robohash.org/apm_robot') 60 | ui.chat_message('Please click a button to trigger an APM event.', 61 | name='APM Robot', 62 | stamp='now', 63 | avatar='https://robohash.org/apm_robot') 64 | with ui.footer().style('background-color: #3874c8'): 65 | ui.label('APM DEMO PAGE') 66 | 67 | with ui.card(): 68 | ui.label('Generate Error - Python') 69 | ui.button('Generate', on_click=python_error) 70 | 71 | with ui.card(): 72 | ui.label('Generate Error - JS') 73 | ui.button('Generate', on_click=js_error) 74 | 75 | with ui.card(): 76 | ui.label('Generate Custom Message') 77 | custom_message_text = ui.input(placeholder='Message') 78 | ui.button('Generate').on('click', handler=lambda: gen_custom_message(custom_message_text.value)) 79 | 80 | ui.run_with( 81 | fastapi_app, 82 | storage_secret='supersecret', # NOTE setting a secret is optional but allows for persistent storage per user 83 | ) 84 | 85 | 86 | async def io_bound(callback: Callable, *args: any, **kwargs: any): 87 | '''Makes a blocking function awaitable; pass function as first parameter and its arguments as the rest''' 88 | return await asyncio.get_event_loop().run_in_executor(None, functools.partial(callback, *args, **kwargs)) 89 | 90 | 91 | async def python_error(): 92 | try: 93 | res = await io_bound(r.get, 'http://localhost:8000/error') 94 | ui.notify(res.text) 95 | except Exception as e: 96 | apm.capture_exception() 97 | ui.notify(f'{e}') 98 | 99 | 100 | async def js_error(): 101 | try: 102 | res = await ui.run_javascript('fetch("http://localhost:8000/error")') 103 | ui.notify(f'Message: Failed Successfully :)') 104 | except Exception as e: 105 | apm.capture_exception() 106 | ui.notify(f'{e}') 107 | 108 | 109 | async def gen_custom_message(text_message): 110 | try: 111 | res = await io_bound(r.get, 'http://localhost:8000/custom_message/' + str(text_message)) 112 | ui.notify(res.text) 113 | except Exception as e: 114 | apm.capture_exception() 115 | ui.notify(f'{e}') 116 | 117 | init(app) 118 | 119 | try: 120 | apm.capture_message('App Loaded, Hello World!') 121 | except Exception as e: 122 | print('error: ' + e) 123 | 124 | if __name__ == '__main__': 125 | print('Please start the app with the "uvicorn" command as shown in the start.sh script') 126 | -------------------------------------------------------------------------------- /docker/app/requirements.txt: -------------------------------------------------------------------------------- 1 | elastic-apm==6.17.0 2 | fastapi==0.100.0 3 | httpx==0.24.1 4 | nicegui==1.3.2 5 | starlette==0.27.0 -------------------------------------------------------------------------------- /docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | volumes: 3 | certs: 4 | driver: local 5 | esdata01: 6 | driver: local 7 | kibanadata: 8 | driver: local 9 | metricbeatdata01: 10 | driver: local 11 | filebeatdata01: 12 | driver: local 13 | logstashdata01: 14 | driver: local 15 | fleetserverdata: 16 | driver: local 17 | 18 | networks: 19 | default: 20 | name: elastic 21 | external: false 22 | 23 | services: 24 | setup: 25 | image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION} 26 | volumes: 27 | - certs:/usr/share/elasticsearch/config/certs 28 | user: "0" 29 | command: > 30 | bash -c ' 31 | if [ x${ELASTIC_PASSWORD} == x ]; then 32 | echo "Set the ELASTIC_PASSWORD environment variable in the .env file"; 33 | exit 1; 34 | elif [ x${KIBANA_PASSWORD} == x ]; then 35 | echo "Set the KIBANA_PASSWORD environment variable in the .env file"; 36 | exit 1; 37 | fi; 38 | if [ ! -f config/certs/ca.zip ]; then 39 | echo "Creating CA"; 40 | bin/elasticsearch-certutil ca --silent --pem -out config/certs/ca.zip; 41 | unzip config/certs/ca.zip -d config/certs; 42 | fi; 43 | if [ ! -f config/certs/certs.zip ]; then 44 | echo "Creating certs"; 45 | echo -ne \ 46 | "instances:\n"\ 47 | " - name: es01\n"\ 48 | " dns:\n"\ 49 | " - es01\n"\ 50 | " - localhost\n"\ 51 | " ip:\n"\ 52 | " - 127.0.0.1\n"\ 53 | " - name: kibana\n"\ 54 | " dns:\n"\ 55 | " - kibana\n"\ 56 | " - localhost\n"\ 57 | " ip:\n"\ 58 | " - 127.0.0.1\n"\ 59 | " - name: fleet-server\n"\ 60 | " dns:\n"\ 61 | " - fleet-server\n"\ 62 | " - localhost\n"\ 63 | " ip:\n"\ 64 | " - 127.0.0.1\n"\ 65 | > config/certs/instances.yml; 66 | bin/elasticsearch-certutil cert --silent --pem -out config/certs/certs.zip --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key; 67 | unzip config/certs/certs.zip -d config/certs; 68 | fi; 69 | echo "Setting file permissions" 70 | chown -R root:root config/certs; 71 | find . -type d -exec chmod 750 \{\} \;; 72 | find . -type f -exec chmod 640 \{\} \;; 73 | echo "Waiting for Elasticsearch availability"; 74 | until curl -s --cacert config/certs/ca/ca.crt https://es01:9200 | grep -q "missing authentication credentials"; do sleep 30; done; 75 | echo "Setting kibana_system password"; 76 | until curl -s -X POST --cacert config/certs/ca/ca.crt -u "elastic:${ELASTIC_PASSWORD}" -H "Content-Type: application/json" https://es01:9200/_security/user/kibana_system/_password -d "{\"password\":\"${KIBANA_PASSWORD}\"}" | grep -q "^{}"; do sleep 10; done; 77 | echo "All done!"; 78 | ' 79 | healthcheck: 80 | test: ["CMD-SHELL", "[ -f config/certs/es01/es01.crt ]"] 81 | interval: 1s 82 | timeout: 5s 83 | retries: 120 84 | 85 | es01: 86 | depends_on: 87 | setup: 88 | condition: service_healthy 89 | image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION} 90 | labels: 91 | co.elastic.logs/module: elasticsearch 92 | volumes: 93 | - certs:/usr/share/elasticsearch/config/certs 94 | - esdata01:/usr/share/elasticsearch/data 95 | ports: 96 | - ${ES_PORT}:9200 97 | environment: 98 | - node.name=es01 99 | - cluster.name=${CLUSTER_NAME} 100 | - discovery.type=single-node 101 | - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} 102 | - bootstrap.memory_lock=true 103 | - xpack.security.enabled=true 104 | - xpack.security.http.ssl.enabled=true 105 | - xpack.security.http.ssl.key=certs/es01/es01.key 106 | - xpack.security.http.ssl.certificate=certs/es01/es01.crt 107 | - xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt 108 | - xpack.security.transport.ssl.enabled=true 109 | - xpack.security.transport.ssl.key=certs/es01/es01.key 110 | - xpack.security.transport.ssl.certificate=certs/es01/es01.crt 111 | - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt 112 | - xpack.security.transport.ssl.verification_mode=certificate 113 | - xpack.license.self_generated.type=${LICENSE} 114 | mem_limit: ${ES_MEM_LIMIT} 115 | ulimits: 116 | memlock: 117 | soft: -1 118 | hard: -1 119 | healthcheck: 120 | test: 121 | [ 122 | "CMD-SHELL", 123 | "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'", 124 | ] 125 | interval: 10s 126 | timeout: 10s 127 | retries: 120 128 | 129 | kibana: 130 | depends_on: 131 | es01: 132 | condition: service_healthy 133 | image: docker.elastic.co/kibana/kibana:${STACK_VERSION} 134 | labels: 135 | co.elastic.logs/module: kibana 136 | volumes: 137 | - certs:/usr/share/kibana/config/certs 138 | - kibanadata:/usr/share/kibana/data 139 | - ./kibana.yml:/usr/share/kibana/config/kibana.yml:ro 140 | ports: 141 | - ${KIBANA_PORT}:5601 142 | environment: 143 | - SERVERNAME=kibana 144 | - ELASTICSEARCH_HOSTS=https://es01:9200 145 | - ELASTICSEARCH_USERNAME=kibana_system 146 | - ELASTICSEARCH_PASSWORD=${KIBANA_PASSWORD} 147 | - ELASTICSEARCH_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt 148 | - XPACK_SECURITY_ENCRYPTIONKEY=${ENCRYPTION_KEY} 149 | - XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY=${ENCRYPTION_KEY} 150 | - XPACK_REPORTING_ENCRYPTIONKEY=${ENCRYPTION_KEY} 151 | - XPACK_REPORTING_KIBANASERVER_HOSTNAME=localhost 152 | - SERVER_SSL_ENABLED=true 153 | - SERVER_SSL_CERTIFICATE=config/certs/kibana/kibana.crt 154 | - SERVER_SSL_KEY=config/certs/kibana/kibana.key 155 | - SERVER_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt 156 | - ELASTIC_APM_SECRET_TOKEN=${ELASTIC_APM_SECRET_TOKEN} 157 | mem_limit: ${KB_MEM_LIMIT} 158 | healthcheck: 159 | test: 160 | [ 161 | "CMD-SHELL", 162 | "curl -I -s --cacert config/certs/ca/ca.crt https://localhost:5601 | grep -q 'HTTP/1.1 302 Found'", 163 | ] 164 | interval: 10s 165 | timeout: 10s 166 | retries: 120 167 | 168 | metricbeat01: 169 | depends_on: 170 | es01: 171 | condition: service_healthy 172 | kibana: 173 | condition: service_healthy 174 | image: docker.elastic.co/beats/metricbeat:${STACK_VERSION} 175 | user: root 176 | volumes: 177 | - certs:/usr/share/metricbeat/certs 178 | - metricbeatdata01:/usr/share/metricbeat/data 179 | - "./metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro" 180 | - "/var/run/docker.sock:/var/run/docker.sock:ro" 181 | - "/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro" 182 | - "/proc:/hostfs/proc:ro" 183 | - "/:/hostfs:ro" 184 | environment: 185 | - ELASTIC_USER=elastic 186 | - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} 187 | - ELASTIC_HOSTS=https://es01:9200 188 | - KIBANA_HOSTS=https://kibana:5601 189 | - LOGSTASH_HOSTS=http://logstash01:9600 190 | - CA_CERT=certs/ca/ca.crt 191 | - ES_CERT=certs/es01/es01.crt 192 | - ES_KEY=certs/es01/es01.key 193 | - KB_CERT=certs/kibana/kibana.crt 194 | - KB_KEY=certs/kibana/kibana.key 195 | command: 196 | -strict.perms=false 197 | 198 | filebeat01: 199 | depends_on: 200 | es01: 201 | condition: service_healthy 202 | image: docker.elastic.co/beats/filebeat:${STACK_VERSION} 203 | user: root 204 | volumes: 205 | - certs:/usr/share/filebeat/certs 206 | - filebeatdata01:/usr/share/filebeat/data 207 | - "./filebeat_ingest_data/:/usr/share/filebeat/ingest_data/" 208 | - "./filebeat.yml:/usr/share/filebeat/filebeat.yml:ro" 209 | - "/var/lib/docker/containers:/var/lib/docker/containers:ro" 210 | - "/var/run/docker.sock:/var/run/docker.sock:ro" 211 | environment: 212 | - ELASTIC_USER=elastic 213 | - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} 214 | - ELASTIC_HOSTS=https://es01:9200 215 | - KIBANA_HOSTS=https://kibana:5601 216 | - LOGSTASH_HOSTS=http://logstash01:9600 217 | - CA_CERT=certs/ca/ca.crt 218 | command: 219 | -strict.perms=false 220 | 221 | logstash01: 222 | depends_on: 223 | es01: 224 | condition: service_healthy 225 | kibana: 226 | condition: service_healthy 227 | image: docker.elastic.co/logstash/logstash:${STACK_VERSION} 228 | labels: 229 | co.elastic.logs/module: logstash 230 | user: root 231 | volumes: 232 | - certs:/usr/share/logstash/certs 233 | - logstashdata01:/usr/share/logstash/data 234 | - "./logstash_ingest_data/:/usr/share/logstash/ingest_data/" 235 | - "./logstash.conf:/usr/share/logstash/pipeline/logstash.conf:ro" 236 | environment: 237 | - xpack.monitoring.enabled=false 238 | - ELASTIC_USER=elastic 239 | - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} 240 | - ELASTIC_HOSTS=https://es01:9200 241 | 242 | fleet-server: 243 | depends_on: 244 | kibana: 245 | condition: service_healthy 246 | es01: 247 | condition: service_healthy 248 | image: docker.elastic.co/beats/elastic-agent:${STACK_VERSION} 249 | volumes: 250 | - certs:/certs 251 | - fleetserverdata:/usr/share/elastic-agent 252 | - "/var/lib/docker/containers:/var/lib/docker/containers:ro" 253 | - "/var/run/docker.sock:/var/run/docker.sock:ro" 254 | - "/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro" 255 | - "/proc:/hostfs/proc:ro" 256 | - "/:/hostfs:ro" 257 | ports: 258 | - ${FLEET_PORT}:8220 259 | - ${APMSERVER_PORT}:8200 260 | user: root 261 | environment: 262 | - SSL_CERTIFICATE_AUTHORITIES=/certs/ca/ca.crt 263 | - CERTIFICATE_AUTHORITIES=/certs/ca/ca.crt 264 | - FLEET_CA=/certs/ca/ca.crt 265 | - FLEET_ENROLL=1 266 | - FLEET_INSECURE=true 267 | - FLEET_SERVER_ELASTICSEARCH_CA=/certs/ca/ca.crt 268 | - FLEET_SERVER_ELASTICSEARCH_HOST=https://es01:9200 269 | - FLEET_SERVER_ELASTICSEARCH_INSECURE=true 270 | - FLEET_SERVER_ENABLE=1 271 | - FLEET_SERVER_CERT=/certs/fleet-server/fleet-server.crt 272 | - FLEET_SERVER_CERT_KEY=/certs/fleet-server/fleet-server.key 273 | - FLEET_SERVER_INSECURE_HTTP=true 274 | - FLEET_SERVER_POLICY_ID=fleet-server-policy 275 | - FLEET_URL=https://fleet-server:8220 276 | - KIBANA_FLEET_CA=/certs/ca/ca.crt 277 | - KIBANA_FLEET_SETUP=1 278 | - KIBANA_FLEET_USERNAME=elastic 279 | - KIBANA_FLEET_PASSWORD=${ELASTIC_PASSWORD} 280 | - KIBANA_HOST=https://kibana:5601 281 | 282 | webapp: 283 | build: 284 | context: app 285 | volumes: 286 | - "/var/lib/docker/containers:/var/lib/docker/containers:ro" 287 | - "/var/run/docker.sock:/var/run/docker.sock:ro" 288 | - "/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro" 289 | - "/proc:/hostfs/proc:ro" 290 | - "/:/hostfs:ro" 291 | ports: 292 | - 8000:8000 -------------------------------------------------------------------------------- /docker/filebeat.yml: -------------------------------------------------------------------------------- 1 | filebeat.inputs: 2 | - type: filestream 3 | id: default-filestream 4 | paths: 5 | - ingest_data/*.log 6 | 7 | filebeat.autodiscover: 8 | providers: 9 | - type: docker 10 | hints.enabled: true 11 | 12 | processors: 13 | - add_docker_metadata: ~ 14 | 15 | setup.kibana: 16 | host: ${KIBANA_HOSTS} 17 | username: ${ELASTIC_USER} 18 | password: ${ELASTIC_PASSWORD} 19 | 20 | output.elasticsearch: 21 | hosts: ${ELASTIC_HOSTS} 22 | username: ${ELASTIC_USER} 23 | password: ${ELASTIC_PASSWORD} 24 | ssl: 25 | enabled: true 26 | certificate_authorities: ${CA_CERT} 27 | -------------------------------------------------------------------------------- /docker/images/CACRT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/CACRT.png -------------------------------------------------------------------------------- /docker/images/ComposeRunning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/ComposeRunning.png -------------------------------------------------------------------------------- /docker/images/DevConsole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/DevConsole.png -------------------------------------------------------------------------------- /docker/images/DockerDesktop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/DockerDesktop.png -------------------------------------------------------------------------------- /docker/images/DockerSolutionFolder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/DockerSolutionFolder.png -------------------------------------------------------------------------------- /docker/images/ELKStack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/ELKStack.png -------------------------------------------------------------------------------- /docker/images/ESServer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/ESServer.png -------------------------------------------------------------------------------- /docker/images/ElasticAgentMetrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/ElasticAgentMetrics.png -------------------------------------------------------------------------------- /docker/images/EnvSample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/EnvSample.png -------------------------------------------------------------------------------- /docker/images/FinalAgentConfiguration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/FinalAgentConfiguration.png -------------------------------------------------------------------------------- /docker/images/Fleet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/Fleet.png -------------------------------------------------------------------------------- /docker/images/GoodAgent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/GoodAgent.png -------------------------------------------------------------------------------- /docker/images/InvalidAgent1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/InvalidAgent1.png -------------------------------------------------------------------------------- /docker/images/InvalidAgent2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/InvalidAgent2.png -------------------------------------------------------------------------------- /docker/images/InvalidAgent3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/InvalidAgent3.png -------------------------------------------------------------------------------- /docker/images/SaveAndDeploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/SaveAndDeploy.png -------------------------------------------------------------------------------- /docker/images/WelcomePageKibana.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/docker/images/WelcomePageKibana.png -------------------------------------------------------------------------------- /docker/kibana.yml: -------------------------------------------------------------------------------- 1 | elastic: 2 | apm: 3 | active: true 4 | serverUrl: "http://fleet-server:8200" 5 | secretToken: ${ELASTIC_APM_SECRET_TOKEN} 6 | server.host: "0.0.0.0" 7 | telemetry.enabled: "true" 8 | xpack.fleet.packages: 9 | - name: fleet_server 10 | version: latest 11 | - name: system 12 | version: latest 13 | - name: elastic_agent 14 | version: latest 15 | - name: apm 16 | version: latest 17 | xpack.fleet.agentPolicies: 18 | - name: Fleet-Server-Policy 19 | id: fleet-server-policy 20 | namespace: default 21 | monitoring_enabled: 22 | - logs 23 | - metrics 24 | package_policies: 25 | - name: fleet_server-1 26 | package: 27 | name: fleet_server 28 | - name: system-1 29 | package: 30 | name: system 31 | - name: elastic_agent-1 32 | package: 33 | name: elastic_agent 34 | - name: apm-1 35 | package: 36 | name: apm 37 | inputs: 38 | - type: apm 39 | enabled: true 40 | vars: 41 | - name: host 42 | value: 0.0.0.0:8200 43 | - name: secret_token 44 | value: ${ELASTIC_APM_SECRET_TOKEN} -------------------------------------------------------------------------------- /docker/logstash.conf: -------------------------------------------------------------------------------- 1 | input { 2 | file { 3 | #https://www.elastic.co/guide/en/logstash/current/plugins-inputs-file.html 4 | #default is TAIL which assumes more data will come into the file. 5 | #change to mode => "read" if the file is a complete file. by default, the file will be removed once reading is complete -- backup your files if you need them. 6 | mode => "tail" 7 | path => "/usr/share/logstash/ingest_data/*" 8 | } 9 | } 10 | 11 | filter { 12 | } 13 | 14 | output { 15 | elasticsearch { 16 | index => "logstash-%{+YYYY.MM.dd}" 17 | hosts=> "${ELASTIC_HOSTS}" 18 | user=> "${ELASTIC_USER}" 19 | password=> "${ELASTIC_PASSWORD}" 20 | cacert=> "certs/ca/ca.crt" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /docker/metricbeat.yml: -------------------------------------------------------------------------------- 1 | metricbeat.config.modules: 2 | path: ${path.config}/modules.d/*.yml 3 | reload.enabled: false 4 | 5 | metricbeat.modules: 6 | - module: elasticsearch 7 | xpack.enabled: true 8 | period: 10s 9 | hosts: ${ELASTIC_HOSTS} 10 | username: ${ELASTIC_USER} 11 | password: ${ELASTIC_PASSWORD} 12 | ssl: 13 | enabled: true 14 | certificate_authorities: ${CA_CERT} 15 | 16 | - module: logstash 17 | xpack.enabled: true 18 | period: 10s 19 | hosts: ${LOGSTASH_HOSTS} 20 | 21 | - module: kibana 22 | metricsets: 23 | - stats 24 | period: 10s 25 | hosts: ${KIBANA_HOSTS} 26 | username: ${ELASTIC_USER} 27 | password: ${ELASTIC_PASSWORD} 28 | xpack.enabled: true 29 | ssl: 30 | enabled: true 31 | certificate_authorities: ${CA_CERT} 32 | 33 | - module: docker 34 | metricsets: 35 | - "container" 36 | - "cpu" 37 | - "diskio" 38 | - "healthcheck" 39 | - "info" 40 | #- "image" 41 | - "memory" 42 | - "network" 43 | hosts: ["unix:///var/run/docker.sock"] 44 | period: 10s 45 | enabled: true 46 | 47 | processors: 48 | - add_host_metadata: ~ 49 | - add_docker_metadata: ~ 50 | 51 | output.elasticsearch: 52 | hosts: ${ELASTIC_HOSTS} 53 | username: ${ELASTIC_USER} 54 | password: ${ELASTIC_PASSWORD} 55 | ssl: 56 | enabled: true 57 | certificate_authorities: ${CA_CERT} 58 | 59 | -------------------------------------------------------------------------------- /icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/icon.png -------------------------------------------------------------------------------- /nuget-package.props: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 0.9.5 5 | 6 | 7 | Free Mind Labs and contributors 8 | Free Mind Labs, Inc. 9 | Free Mind Labs 10 | Kernel Memory adapter for Elasticsearch 11 | Elasticsearch connector for Microsoft Kernel Memory, to store and search memory using Elasticsearch vector indexing and Elasticsearch features. 12 | Copilot, Memory, RAG, Kernel Memory, Elasticsearch, AI, Artificial Intelligence, Embeddings, Vector DB, Vector Search, ETL 13 | $(AssemblyName) 14 | 15 | 16 | MIT 17 | © Free Mind Labs, Inc. All rights reserved. 18 | https://github.com/freemindlabsinc/FreeMindLabs.SemanticKernel 19 | https://github.com/freemindlabsinc/FreeMindLabs.SemanticKernel 20 | true 21 | 22 | 23 | icon.png 24 | icon.png 25 | NUGET.md 26 | 27 | 28 | true 29 | snupkg 30 | 31 | 32 | bin\$(Configuration)\$(TargetFramework)\$(AssemblyName).xml 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | true 49 | 50 | 51 | 52 | false 53 | 54 | 55 | -------------------------------------------------------------------------------- /nuget.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /packages/README.md: -------------------------------------------------------------------------------- 1 | # Package folder 2 | 3 | Do not remove this file -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/ConfigurationException.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | using Microsoft.KernelMemory; 4 | 5 | namespace FreeMindLabs.KernelMemory.Elasticsearch; 6 | 7 | /// 8 | /// Exception thrown when the Elasticsearch configuration is invalid in appSettings, secrets, etc. 9 | /// 10 | public class ConfigurationException : ElasticsearchException 11 | { 12 | /// 13 | public ConfigurationException() { } 14 | 15 | /// 16 | public ConfigurationException(string message) : base(message) { } 17 | 18 | /// 19 | public ConfigurationException(string message, Exception? innerException) : base(message, innerException) { } 20 | } 21 | 22 | /// 23 | /// Base exception for all the exceptions thrown by the Elasticsearch connector for KernelMemory 24 | /// 25 | public class ElasticsearchException : KernelMemoryException 26 | { 27 | /// 28 | public ElasticsearchException() { } 29 | 30 | /// 31 | public ElasticsearchException(string message) : base(message) { } 32 | 33 | /// 34 | public ElasticsearchException(string message, Exception? innerException) : base(message, innerException) { } 35 | } 36 | 37 | /// 38 | /// Exception thrown when an index name does pass Elasticsearch validation. 39 | /// 40 | public class InvalidIndexNameException : ElasticsearchException 41 | { 42 | /// 43 | public InvalidIndexNameException(string indexName, IEnumerable errors, Exception? innerException = default) 44 | : base($"The given index name '{indexName}' is invalid. {string.Join(", ", errors)}", innerException) 45 | { 46 | this.IndexName = indexName; 47 | this.Errors = errors; 48 | } 49 | 50 | /// 51 | public InvalidIndexNameException( 52 | (string IndexName, IEnumerable Errors) conversionResult, 53 | Exception? innerException = default) 54 | 55 | => (this.IndexName, this.Errors) = conversionResult; 56 | 57 | /// 58 | /// The index name that failed validation. 59 | /// 60 | public string IndexName { get; } 61 | 62 | /// 63 | /// The list of errors that caused the validation to fail. 64 | /// 65 | public IEnumerable Errors { get; } 66 | } 67 | -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/ElasticsearchConfig.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | using Elastic.Clients.Elasticsearch.Mapping; 4 | 5 | namespace FreeMindLabs.KernelMemory.Elasticsearch; 6 | 7 | /// 8 | /// The configuration for the Elasticsearch connector. 9 | /// Use to instantiate and configure this class. 10 | /// 11 | public class ElasticsearchConfig 12 | { 13 | /// 14 | public ElasticsearchConfig() 15 | { } 16 | 17 | /// 18 | /// The certificate fingerprint for the Elasticsearch instance. 19 | /// See . 20 | /// 21 | public string CertificateFingerPrint { get; set; } = string.Empty; 22 | 23 | /// 24 | /// The Elasticsearch endpoint. 25 | /// 26 | public string Endpoint { get; set; } = string.Empty; 27 | 28 | /// 29 | /// The username used to connect to Elasticsearch. 30 | /// 31 | public string UserName { get; set; } = string.Empty; 32 | 33 | /// 34 | /// The password used to connect to Elasticsearch. 35 | /// 36 | public string Password { get; set; } = string.Empty; 37 | 38 | /// 39 | /// The prefix to be prepend to the index names in Elasticsearch. 40 | /// 41 | public string IndexPrefix { get; set; } = string.Empty; 42 | 43 | /// 44 | /// The number of shards to use for the Elasticsearch index. 45 | /// 46 | public int? ShardCount { get; set; } = 1; 47 | 48 | /// 49 | /// The number of replicas to use for the Elasticsearch index. 50 | /// 51 | public int? ReplicaCount { get; set; } = 0; 52 | 53 | /// 54 | /// A delegate to configure the Elasticsearch index properties. 55 | /// 56 | public Action>? ConfigureProperties { get; internal set; } 57 | } 58 | -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/ElasticsearchConfigBuilder.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | using Microsoft.Extensions.Configuration; 4 | 5 | namespace FreeMindLabs.KernelMemory.Elasticsearch; 6 | 7 | /// 8 | /// The builder for ElasticsearchConfig. 9 | /// 10 | public class ElasticsearchConfigBuilder 11 | { 12 | /// 13 | /// The default Elasticsearch endpoint. 14 | /// 15 | public const string DefaultEndpoint = "https://localhost:9200"; 16 | 17 | /// 18 | /// The default Elasticsearch username. 19 | /// 20 | public const string DefaultUserName = "elastic"; 21 | 22 | /// 23 | /// The name of the section that will contain the configuration for Elasticsearch 24 | /// (e.g. appSettings.json, user secrets, etc.). 25 | /// 26 | public const string DefaultSettingsSection = "Elasticsearch"; 27 | 28 | /// 29 | /// The default prefix to be prepend to the index names in Elasticsearch. 30 | /// 31 | public const string DefaultIndexPrefix = "km."; 32 | 33 | private ElasticsearchConfig _config; 34 | 35 | /// 36 | /// The default constructor. 37 | /// 38 | public ElasticsearchConfigBuilder() 39 | { 40 | this._config = new ElasticsearchConfig(); 41 | this.WithEndpoint(DefaultEndpoint) 42 | .WithIndexPrefix(DefaultIndexPrefix) 43 | .WithCertificateFingerPrint(string.Empty) 44 | .WithUserNameAndPassword(DefaultUserName, string.Empty); 45 | } 46 | 47 | /// 48 | /// Sets Elasticsearch endpoint to connect to. 49 | /// 50 | /// 51 | /// 52 | public ElasticsearchConfigBuilder WithEndpoint(string endpoint) 53 | { 54 | // TODO: validate URL 55 | this._config.Endpoint = endpoint; 56 | return this; 57 | } 58 | 59 | /// 60 | /// Sets the username and password used to connect to Elasticsearch. 61 | /// 62 | /// 63 | /// 64 | /// 65 | public ElasticsearchConfigBuilder WithUserNameAndPassword(string userName, string password) 66 | { 67 | this._config.UserName = userName; 68 | this._config.Password = password; 69 | return this; 70 | } 71 | 72 | /// 73 | /// Sets the certificate fingerprint used to communicate with Elasticsearch. 74 | /// See . 75 | /// 76 | /// 77 | /// 78 | public ElasticsearchConfigBuilder WithCertificateFingerPrint(string certificateFingerPrint) 79 | { 80 | this._config.CertificateFingerPrint = certificateFingerPrint; 81 | return this; 82 | } 83 | 84 | /// 85 | /// Sets the prefix to be prepend to the index names in Elasticsearch. 86 | /// 87 | /// 88 | /// 89 | public ElasticsearchConfigBuilder WithIndexPrefix(string indexPrefix) 90 | { 91 | this._config.IndexPrefix = indexPrefix; 92 | return this; 93 | } 94 | 95 | /// 96 | /// Validates the Elasticsearch configuration. 97 | /// 98 | /// 99 | public ElasticsearchConfigBuilder Validate() 100 | { 101 | // TODO: improve this at some point 102 | const string Prefix = "Invalid Elasticsearch configuration: missing "; 103 | 104 | if (string.IsNullOrWhiteSpace(this._config.Endpoint)) 105 | { 106 | throw new ConfigurationException(Prefix + $"{nameof(ElasticsearchConfig.Endpoint)}."); 107 | } 108 | 109 | if (string.IsNullOrWhiteSpace(this._config.UserName)) 110 | { 111 | throw new ConfigurationException(Prefix + $"{nameof(ElasticsearchConfig.UserName)}."); 112 | } 113 | 114 | if (string.IsNullOrWhiteSpace(this._config.Password)) 115 | { 116 | throw new ConfigurationException(Prefix + $"{nameof(ElasticsearchConfig.Password)}."); 117 | } 118 | 119 | if (string.IsNullOrWhiteSpace(this._config.CertificateFingerPrint)) 120 | { 121 | throw new ConfigurationException(Prefix + $"{nameof(ElasticsearchConfig.CertificateFingerPrint)}"); 122 | } 123 | 124 | return this; 125 | } 126 | 127 | /// 128 | /// Reads the Elasticsearch configuration from the Services section of KernelMemory's configuration. 129 | /// 130 | /// 131 | /// 132 | public ElasticsearchConfigBuilder WithConfiguration(IConfiguration configuration) 133 | { 134 | const string SectionPath = "KernelMemory:Services:Elasticsearch"; 135 | 136 | var kmSvcEsSection = configuration.GetSection(SectionPath); 137 | if (!kmSvcEsSection.Exists()) 138 | { 139 | throw new ConfigurationException($"Missing configuration section {SectionPath}."); 140 | } 141 | 142 | this._config = new ElasticsearchConfig(); 143 | kmSvcEsSection.Bind(this._config); 144 | 145 | configuration.Bind(SectionPath, this._config); 146 | 147 | return this; 148 | } 149 | 150 | /// 151 | /// Sets the number of shards and replicas to use for the Elasticsearch index. 152 | /// 153 | /// 154 | /// 155 | /// 156 | public ElasticsearchConfigBuilder WithShardsAndReplicas(int shards, int replicas) 157 | { 158 | this._config.ShardCount = shards; 159 | this._config.ReplicaCount = replicas; 160 | return this; 161 | } 162 | 163 | /// 164 | /// Builds the ElasticsearchConfig. 165 | /// 166 | /// Indicates if validation should be skipped. 167 | /// 168 | public ElasticsearchConfig Build(bool skipValidation = false) 169 | { 170 | if (!skipValidation) 171 | { 172 | this.Validate(); 173 | } 174 | 175 | return this._config; 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/ElasticsearchConfigExtensions.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | using Elastic.Clients.Elasticsearch; 4 | using Elastic.Transport; 5 | 6 | namespace FreeMindLabs.KernelMemory.Elasticsearch; 7 | 8 | /// 9 | /// Elasticsearch configuration extensions. 10 | /// 11 | public static class ElasticsearchConfigExtensions 12 | { 13 | /// 14 | /// Converts an ElasticsearchConfig to a ElasticsearchClientSettings that can be used 15 | /// to instantiate . 16 | /// 17 | public static ElasticsearchClientSettings ToElasticsearchClientSettings(this ElasticsearchConfig config) 18 | { 19 | ArgumentNullException.ThrowIfNull(config, nameof(config)); 20 | 21 | // TODO: figure out the Dispose issue. It does not feel right. 22 | // See https://www.elastic.co/guide/en/elasticsearch/client/net-api/current/_options_on_elasticsearchclientsettings.html 23 | #pragma warning disable CA2000 // Dispose objects before losing scope 24 | return new ElasticsearchClientSettings(new Uri(config.Endpoint)) 25 | 26 | // TODO: this needs to be more flexible. 27 | .Authentication(new BasicAuthentication(config.UserName, config.Password)) 28 | .DisableDirectStreaming(true) 29 | // TODO: Not sure why I need this. Verify configuration maybe? 30 | .ServerCertificateValidationCallback((sender, certificate, chain, errors) => true) 31 | .CertificateFingerprint(config.CertificateFingerPrint) 32 | .ThrowExceptions(true) // Much easier to work with 33 | #if DEBUG 34 | .DisableDirectStreaming(true) 35 | #endif 36 | ; 37 | #pragma warning restore CA2000 // Dispose objects before losing scope 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/ElasticsearchMemory.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | using System.Runtime.CompilerServices; 4 | using Elastic.Clients.Elasticsearch; 5 | using Elastic.Clients.Elasticsearch.Mapping; 6 | using Elastic.Clients.Elasticsearch.QueryDsl; 7 | using Microsoft.Extensions.Logging; 8 | using Microsoft.KernelMemory; 9 | using Microsoft.KernelMemory.AI; 10 | using Microsoft.KernelMemory.Diagnostics; 11 | using Microsoft.KernelMemory.MemoryStorage; 12 | 13 | namespace FreeMindLabs.KernelMemory.Elasticsearch; 14 | 15 | /// 16 | /// Elasticsearch connector for Kernel Memory. 17 | /// 18 | public class ElasticsearchMemory : IMemoryDb 19 | { 20 | private readonly ITextEmbeddingGenerator _embeddingGenerator; 21 | private readonly IIndexNameHelper _indexNameHelper; 22 | private readonly ElasticsearchConfig _config; 23 | private readonly ILogger _log; 24 | private readonly ElasticsearchClient _client; 25 | 26 | /// 27 | /// Create a new instance of Elasticsearch KM connector 28 | /// 29 | /// Elasticsearch configuration 30 | /// Elasticsearch client 31 | /// Application logger 32 | /// Embedding generator 33 | /// Index name helper 34 | public ElasticsearchMemory( 35 | ElasticsearchConfig config, 36 | ElasticsearchClient client, 37 | ITextEmbeddingGenerator embeddingGenerator, 38 | IIndexNameHelper indexNameHelper, 39 | ILogger? log = null) 40 | { 41 | this._embeddingGenerator = embeddingGenerator ?? throw new ArgumentNullException(nameof(embeddingGenerator)); 42 | this._indexNameHelper = indexNameHelper ?? throw new ArgumentNullException(nameof(indexNameHelper)); 43 | this._config = config ?? throw new ArgumentNullException(nameof(config)); 44 | this._client = client;// new ElasticsearchClient(this._config.ToElasticsearchClientSettings()); // TODO: inject 45 | this._log = log ?? DefaultLogger.Instance; 46 | } 47 | 48 | /// 49 | public async Task CreateIndexAsync( 50 | string index, 51 | int vectorSize, 52 | CancellationToken cancellationToken = default) 53 | { 54 | index = this._indexNameHelper.Convert(index); 55 | 56 | var existsResponse = await this._client.Indices.ExistsAsync(index, cancellationToken).ConfigureAwait(false); 57 | if (existsResponse.Exists) 58 | { 59 | this._log.LogTrace("{MethodName}: Index {Index} already exists.", nameof(CreateIndexAsync), index); 60 | return; 61 | } 62 | 63 | var createIdxResponse = await this._client.Indices.CreateAsync(index, 64 | cfg => 65 | { 66 | cfg.Settings(setts => 67 | { 68 | setts.NumberOfShards(this._config.ShardCount); 69 | setts.NumberOfReplicas(this._config.ReplicaCount); 70 | }); 71 | }, 72 | cancellationToken).ConfigureAwait(false); 73 | 74 | const int Dimensions = 1536; // TODO: make not hardcoded 75 | 76 | var np = new NestedProperty() 77 | { 78 | Properties = new Properties() 79 | { 80 | { ElasticsearchTag.NameField, new KeywordProperty() }, 81 | { ElasticsearchTag.ValueField, new KeywordProperty() } 82 | } 83 | }; 84 | 85 | var mapResponse = await this._client.Indices.PutMappingAsync(index, x => x 86 | .Properties(propDesc => 87 | { 88 | propDesc.Keyword(x => x.Id); 89 | propDesc.Nested(ElasticsearchMemoryRecord.TagsField, np); 90 | propDesc.Text(x => x.Payload, pd => pd.Index(false)); 91 | propDesc.Text(x => x.Content); 92 | propDesc.DenseVector(x => x.Vector, d => d.Index(true).Dims(Dimensions).Similarity("cosine")); 93 | 94 | this._config.ConfigureProperties?.Invoke(propDesc); 95 | }), 96 | cancellationToken).ConfigureAwait(false); 97 | 98 | this._log.LogTrace("{MethodName}: Index {Index} creeated.", nameof(CreateIndexAsync), index); 99 | } 100 | 101 | /// 102 | public async Task> GetIndexesAsync( 103 | CancellationToken cancellationToken = default) 104 | { 105 | var resp = await this._client.Indices.GetAsync(this._config.IndexPrefix + "*", cancellationToken).ConfigureAwait(false); 106 | 107 | var names = resp.Indices 108 | .Select(x => x.Key.ToString().Replace(this._config.IndexPrefix, string.Empty, StringComparison.Ordinal)) 109 | .ToHashSet(StringComparer.OrdinalIgnoreCase); 110 | 111 | this._log.LogTrace("{MethodName}: Returned {IndexCount} indices: {Indices}.", nameof(GetIndexesAsync), names.Count, string.Join(", ", names)); 112 | 113 | return names; 114 | } 115 | 116 | /// 117 | public async Task DeleteIndexAsync( 118 | string index, 119 | CancellationToken cancellationToken = default) 120 | { 121 | index = this._indexNameHelper.Convert(index); 122 | 123 | var delResponse = await this._client.Indices.DeleteAsync( 124 | index, 125 | cancellationToken).ConfigureAwait(false); 126 | 127 | if (delResponse.IsSuccess()) 128 | { 129 | this._log.LogTrace("{MethodName}: Index {Index} deleted.", nameof(DeleteIndexAsync), index); 130 | } 131 | else 132 | { 133 | this._log.LogWarning("{MethodName}: Index {Index} delete failed.", nameof(DeleteIndexAsync), index); 134 | } 135 | } 136 | 137 | /// 138 | public async Task DeleteAsync( 139 | string index, 140 | MemoryRecord record, 141 | CancellationToken cancellationToken = default) 142 | { 143 | index = this._indexNameHelper.Convert(index); 144 | 145 | record = record ?? throw new ArgumentNullException(nameof(record)); 146 | 147 | var delResponse = await this._client.DeleteAsync( 148 | index, 149 | record.Id, 150 | (delReq) => 151 | { 152 | delReq.Refresh(Refresh.WaitFor); 153 | }, 154 | cancellationToken) 155 | .ConfigureAwait(false); 156 | 157 | if (delResponse.IsSuccess()) 158 | { 159 | this._log.LogTrace("{MethodName}: Record {RecordId} deleted.", nameof(DeleteAsync), record.Id); 160 | } 161 | else 162 | { 163 | this._log.LogWarning("{MethodName}: Record {RecordId} delete failed.", nameof(DeleteAsync), record.Id); 164 | } 165 | } 166 | 167 | /// 168 | public async Task UpsertAsync( 169 | string index, 170 | MemoryRecord record, 171 | CancellationToken cancellationToken = default) 172 | { 173 | index = this._indexNameHelper.Convert(index); 174 | 175 | var memRec = ElasticsearchMemoryRecord.FromMemoryRecord(record); 176 | 177 | var response = await this._client.UpdateAsync( 178 | index, 179 | memRec.Id, 180 | (updateReq) => 181 | { 182 | updateReq.Refresh(Refresh.WaitFor); 183 | 184 | var memRec2 = memRec; 185 | updateReq.Doc(memRec2); 186 | updateReq.DocAsUpsert(true); 187 | }, 188 | cancellationToken) 189 | .ConfigureAwait(false); 190 | 191 | if (response.IsSuccess()) 192 | { 193 | this._log.LogTrace("{MethodName}: Record {RecordId} upserted.", nameof(UpsertAsync), memRec.Id); 194 | } 195 | else 196 | { 197 | this._log.LogError("{MethodName}: Record {RecordId} upsert failed.", nameof(UpsertAsync), memRec.Id); 198 | } 199 | 200 | return response.Id; 201 | } 202 | 203 | /// 204 | public async IAsyncEnumerable<(MemoryRecord, double)> GetSimilarListAsync( 205 | string index, 206 | string text, 207 | ICollection? filters = null, 208 | double minRelevance = 0, int limit = 1, bool withEmbeddings = false, [EnumeratorCancellation] CancellationToken cancellationToken = default) 209 | { 210 | if (limit < 0) 211 | { 212 | limit = 10; 213 | } 214 | 215 | index = this._indexNameHelper.Convert(index); 216 | 217 | this._log.LogTrace("{MethodName}: Searching for '{Text}' on index '{IndexName}' with filters {Filters}. {MinRelevance} {Limit} {WithEmbeddings}", 218 | nameof(GetSimilarListAsync), text, index, filters.ToDebugString(), minRelevance, limit, withEmbeddings); 219 | 220 | Embedding qembed = await this._embeddingGenerator.GenerateEmbeddingAsync(text, cancellationToken).ConfigureAwait(false); 221 | var coll = qembed.Data.ToArray(); 222 | 223 | var resp = await this._client.SearchAsync(s => 224 | s.Index(index) 225 | .Knn(qd => 226 | { 227 | qd.k(limit) 228 | .Filter(q => this.ConvertTagFilters(q, filters)) 229 | .NumCandidates(limit + 100) 230 | .Field(x => x.Vector) 231 | .QueryVector(coll); 232 | }), 233 | cancellationToken) 234 | .ConfigureAwait(false); 235 | 236 | if ((resp.HitsMetadata is null) || (resp.HitsMetadata.Hits is null)) 237 | { 238 | this._log.LogWarning("The search returned a null result. Should retry?"); 239 | yield break; 240 | } 241 | 242 | foreach (var hit in resp.HitsMetadata.Hits) 243 | { 244 | if (hit?.Source == null) 245 | { 246 | continue; 247 | } 248 | 249 | this._log.LogTrace("{MethodName} Hit: {HitScore}, {HitId}", nameof(GetSimilarListAsync), hit.Score, hit.Id); 250 | yield return (hit.Source!.ToMemoryRecord(), hit.Score ?? 0); 251 | } 252 | } 253 | 254 | /// 255 | public async IAsyncEnumerable GetListAsync( 256 | string index, 257 | ICollection? filters = null, 258 | int limit = 1, 259 | bool withEmbeddings = false, 260 | [EnumeratorCancellation] 261 | CancellationToken cancellationToken = default) 262 | { 263 | this._log.LogTrace("{MethodName}: querying index '{IndexName}' with filters {Filters}. {Limit} {WithEmbeddings}", 264 | nameof(GetListAsync), index, filters.ToDebugString(), limit, withEmbeddings); 265 | 266 | if (limit < 0) 267 | { 268 | limit = 10; 269 | } 270 | 271 | index = this._indexNameHelper.Convert(index); 272 | 273 | var resp = await this._client.SearchAsync(s => 274 | s.Index(index) 275 | .Size(limit) 276 | .Query(qd => 277 | { 278 | this.ConvertTagFilters(qd, filters); 279 | }), 280 | cancellationToken) 281 | .ConfigureAwait(false); 282 | 283 | if ((resp.HitsMetadata is null) || (resp.HitsMetadata.Hits is null)) 284 | { 285 | yield break; 286 | } 287 | 288 | foreach (var hit in resp.Hits) 289 | { 290 | if (hit?.Source == null) 291 | { 292 | continue; 293 | } 294 | 295 | this._log.LogTrace("{MethodName} Hit: {HitScore}, {HitId}", nameof(GetListAsync), hit.Score, hit.Id); 296 | yield return hit.Source!.ToMemoryRecord(); 297 | } 298 | } 299 | 300 | //private string ConvertIndexName(string index) => ESIndexName.Convert(this._config.IndexPrefix + index); 301 | 302 | private QueryDescriptor ConvertTagFilters( 303 | QueryDescriptor qd, 304 | ICollection? filters = null) 305 | { 306 | if ((filters == null) || (filters.Count == 0)) 307 | { 308 | qd.MatchAll(); 309 | return qd; 310 | } 311 | 312 | filters = filters.Where(f => f.Keys.Count > 0) 313 | .ToList(); // Remove empty filters 314 | 315 | if (filters.Count == 0) 316 | { 317 | qd.MatchAll(); 318 | return qd; 319 | } 320 | 321 | foreach (MemoryFilter filter in filters) 322 | { 323 | List all = new(); 324 | 325 | // Each tag collection is an element of a List>> 326 | foreach (var tagName in filter.Keys) 327 | { 328 | List tagValues = filter[tagName]; 329 | List terms = tagValues.Select(x => (FieldValue)(x ?? FieldValue.Null)) 330 | .ToList(); 331 | // ---------------- 332 | Query newTagQuery = new TermQuery(ElasticsearchMemoryRecord.Tags_Name) { Value = tagName }; 333 | newTagQuery &= new TermsQuery() 334 | { 335 | Field = ElasticsearchMemoryRecord.Tags_Value, 336 | Terms = new TermsQueryField(terms) 337 | }; 338 | var nestedQd = new NestedQuery(); 339 | nestedQd.Path = ElasticsearchMemoryRecord.TagsField; 340 | nestedQd.Query = newTagQuery; 341 | 342 | all.Add(nestedQd); 343 | qd.Bool(bq => bq.Must(all.ToArray())); 344 | } 345 | } 346 | 347 | // --------------------- 348 | 349 | //qd.Nested(nqd => 350 | //{ 351 | // nqd.Path(ElasticsearchMemoryRecord.TagsField); 352 | 353 | // nqd.Query(nq => 354 | // { 355 | // // Each filter is a tag collection. 356 | // foreach (MemoryFilter filter in filters) 357 | // { 358 | // List all = new(); 359 | 360 | // // Each tag collection is an element of a List>> 361 | // foreach (var tagName in filter.Keys) 362 | // { 363 | // List tagValues = filter[tagName]; 364 | // List terms = tagValues.Select(x => (FieldValue)(x ?? FieldValue.Null)) 365 | // .ToList(); 366 | // // ---------------- 367 | 368 | // Query newTagQuery = new TermQuery(ElasticsearchMemoryRecord.Tags_Name) { Value = tagName }; 369 | // newTagQuery &= new TermsQuery() { 370 | // Field = ElasticsearchMemoryRecord.Tags_Value, 371 | // Terms = new TermsQueryField(terms) 372 | // }; 373 | 374 | // all.Add(newTagQuery); 375 | // } 376 | 377 | // nq.Bool(bq => bq.Must(all.ToArray())); 378 | // } 379 | // }); 380 | //}); 381 | 382 | return qd; 383 | } 384 | } 385 | -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/ElasticsearchMemoryFilter.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | using Microsoft.KernelMemory; 4 | 5 | namespace FreeMindLabs.KernelMemory.Elasticsearch; 6 | 7 | /// 8 | /// Extended filtering options available when using Elasticsearch 9 | /// 10 | public class ElasticsearchMemoryFilter : MemoryFilter 11 | { 12 | // ... 13 | } 14 | -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/ElasticsearchMemoryRecord.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | using System.Text.Json; 4 | using System.Text.Json.Serialization; 5 | using Microsoft.KernelMemory; 6 | using Microsoft.KernelMemory.MemoryStorage; 7 | 8 | namespace FreeMindLabs.KernelMemory.Elasticsearch; 9 | 10 | /// 11 | /// Elasticsearch record. 12 | /// 13 | public sealed class ElasticsearchMemoryRecord 14 | { 15 | internal const string IdField = "id"; 16 | internal const string EmbeddingField = "embedding"; 17 | 18 | /// 19 | public const string TagsField = "tags"; 20 | /// 21 | internal static readonly string Tags_Name = TagsField + "." + nameof(ElasticsearchTag.Name).ToLower(); 22 | /// 23 | internal static readonly string Tags_Value = TagsField + "." + nameof(ElasticsearchTag.Value).ToLower(); 24 | 25 | 26 | private const string PayloadField = "payload"; 27 | private const string ContentField = "content"; 28 | 29 | private static readonly JsonSerializerOptions s_jsonOptions = new() 30 | { 31 | AllowTrailingCommas = true, 32 | MaxDepth = 10, 33 | PropertyNameCaseInsensitive = true, 34 | ReadCommentHandling = JsonCommentHandling.Disallow, 35 | WriteIndented = false 36 | }; 37 | 38 | /// 39 | /// TBC 40 | /// 41 | [JsonPropertyName(IdField)] 42 | public string Id { get; set; } = string.Empty; 43 | 44 | /// 45 | /// TBC 46 | /// 47 | [JsonPropertyName(TagsField)] 48 | public List Tags { get; set; } = new(); 49 | 50 | /// 51 | /// TBC 52 | /// 53 | [JsonPropertyName(PayloadField)] 54 | public string Payload { get; set; } = string.Empty; 55 | 56 | /// 57 | /// TBC 58 | /// 59 | [JsonPropertyName(ContentField)] 60 | public string Content { get; set; } = string.Empty; 61 | 62 | /// 63 | /// TBC 64 | /// 65 | [JsonPropertyName(EmbeddingField)] 66 | [JsonConverter(typeof(Embedding.JsonConverter))] 67 | public Embedding Vector { get; set; } = new(); 68 | 69 | /// 70 | /// TBC 71 | /// 72 | public MemoryRecord ToMemoryRecord(bool withEmbedding = true) 73 | { 74 | MemoryRecord result = new() 75 | { 76 | Id = this.Id, 77 | Payload = JsonSerializer.Deserialize>(this.Payload, s_jsonOptions) 78 | ?? new Dictionary() 79 | }; 80 | // TODO: remove magic string 81 | result.Payload["text"] = this.Content; 82 | 83 | if (withEmbedding) 84 | { 85 | result.Vector = this.Vector; 86 | } 87 | 88 | foreach (var tag in this.Tags) 89 | { 90 | result.Tags.Add(tag.Name, tag.Value); 91 | } 92 | 93 | return result; 94 | } 95 | 96 | /// 97 | /// TBC 98 | /// 99 | /// 100 | /// 101 | public static ElasticsearchMemoryRecord FromMemoryRecord(MemoryRecord record) 102 | { 103 | ArgumentNullException.ThrowIfNull(record); 104 | 105 | // TODO: remove magic strings 106 | string content = record.Payload["text"]?.ToString() ?? string.Empty; 107 | string documentId = record.Tags["__document_id"][0] ?? string.Empty; 108 | string filePart = record.Tags["__file_part"][0] ?? string.Empty; 109 | string betterId = $"{documentId}|{filePart}"; 110 | 111 | record.Payload.Remove("text"); // We move the text to the content field. No need to index twice. 112 | 113 | ElasticsearchMemoryRecord result = new() 114 | { 115 | Id = record.Id, 116 | Vector = record.Vector, 117 | Payload = JsonSerializer.Serialize(record.Payload, s_jsonOptions), 118 | Content = content 119 | }; 120 | 121 | foreach (var tag in record.Tags) 122 | { 123 | if ((tag.Value == null) || (tag.Value.Count == 0)) 124 | { 125 | // Key only, with no values 126 | result.Tags.Add(new ElasticsearchTag(name: tag.Key)); 127 | continue; 128 | } 129 | 130 | foreach (var value in tag.Value) 131 | { 132 | // Key with one or more values 133 | result.Tags.Add(new ElasticsearchTag(name: tag.Key, value: value)); 134 | } 135 | } 136 | 137 | return result; 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/ElasticsearchMemoryStorage.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | net6.0 5 | LatestMajor 6 | FreeMindLabs.KernelMemory.Elasticsearch 7 | FreeMindLabs.KernelMemory.Elasticsearch 8 | true 9 | FreeMindLabs.KernelMemory.Elasticsearch 10 | CA1724,NU5104,CA1304,CA1311,IDE0130 11 | 750ea0f7-073f-44fb-b791-08ce5fd978a2 12 | enable 13 | portable 14 | 15 | true 16 | 17 | true 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/ElasticsearchTag.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | using System.Text.Json.Serialization; 4 | 5 | namespace FreeMindLabs.KernelMemory.Elasticsearch; 6 | 7 | /// 8 | /// An elasticsearch tag. 9 | /// 10 | public class ElasticsearchTag 11 | { 12 | /// 13 | public const string NameField = "name"; 14 | 15 | /// 16 | public const string ValueField = "value"; 17 | 18 | /// 19 | /// Instantiates a new instance of . 20 | /// 21 | /// 22 | /// 23 | /// 24 | public ElasticsearchTag(string name, string? value = default) 25 | { 26 | this.Name = name ?? throw new ArgumentNullException(nameof(name)); 27 | this.Value = value; 28 | } 29 | 30 | /// 31 | /// The name of this tag. 32 | /// 33 | [JsonPropertyName(NameField)] 34 | public string Name { get; set; } = string.Empty; 35 | 36 | /// 37 | /// The value of this tag. 38 | /// 39 | [JsonPropertyName(ValueField)] 40 | public string? Value { get; set; } 41 | 42 | /// 43 | public override string ToString() 44 | { 45 | return $"{this.Name}={this.Value}"; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/Extensions/KernelMemoryBuilderExtensions.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | using FreeMindLabs.KernelMemory.Elasticsearch; 4 | using Microsoft.Extensions.DependencyInjection; 5 | 6 | namespace Microsoft.KernelMemory; 7 | 8 | /// 9 | /// Extensions for KernelMemoryBuilder 10 | /// 11 | public static partial class KernelMemoryBuilderExtensions 12 | { 13 | /// 14 | /// Kernel Memory Builder extension method to add the Elasticsearch memory connector. 15 | /// 16 | /// The IKernelMemoryBuilder instance 17 | /// The application configuration" 18 | public static IKernelMemoryBuilder WithElasticsearch(this IKernelMemoryBuilder builder, 19 | ElasticsearchConfig configuration) 20 | { 21 | builder.Services.AddElasticsearchAsVectorDb(configuration); 22 | 23 | return builder; 24 | } 25 | 26 | /// 27 | /// Extension method to add the Elasticsearch memory connector. 28 | /// 29 | /// 30 | /// 31 | /// 32 | public static IKernelMemoryBuilder WithElasticsearch(this IKernelMemoryBuilder builder, 33 | Action configure) 34 | { 35 | ArgumentNullException.ThrowIfNull(configure, nameof(configure)); 36 | 37 | var cfg = new ElasticsearchConfigBuilder(); 38 | configure(cfg); 39 | 40 | builder.Services.AddElasticsearchAsVectorDb(cfg.Build()); 41 | return builder; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/Extensions/MemoryFilterExtensions.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | namespace Microsoft.KernelMemory; 4 | 5 | /// 6 | /// Extensions methods for MemoryFilter. 7 | /// 8 | public static class MemoryFilterExtensions 9 | { 10 | /// 11 | /// Displays the MemoryFilter in a human-readable format. 12 | /// 13 | /// 14 | /// 15 | public static string ToDebugString(this MemoryFilter? filter) 16 | { 17 | if (filter == null) 18 | { 19 | return string.Empty; 20 | } 21 | 22 | // Prints all the tags in the record 23 | var tags = filter.Select(x => $"({x.Key}={string.Join("|", x.Value)})"); 24 | return string.Join(" & ", tags); 25 | } 26 | 27 | /// 28 | /// Displays the MemoryFilter(s) in a human-readable format. 29 | /// 30 | /// 31 | /// 32 | public static string ToDebugString(this IEnumerable? filters) 33 | { 34 | if (filters == null) 35 | { 36 | return string.Empty; 37 | } 38 | 39 | // Prints all the tags in the record 40 | var tags = filters.Select(x => x.ToDebugString()); 41 | return string.Join(" & ", tags); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/Extensions/ServiceCollectionExtensions.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | using Elastic.Clients.Elasticsearch; 4 | using FreeMindLabs.KernelMemory.Elasticsearch; 5 | using Microsoft.KernelMemory.MemoryStorage; 6 | 7 | namespace Microsoft.Extensions.DependencyInjection; 8 | 9 | /// 10 | /// Extensions for KernelMemoryBuilder and generic DI 11 | /// 12 | public static partial class ServiceCollectionExtensions 13 | { 14 | /// 15 | /// Inject Elasticsearch as the default implementation of IMemoryDb 16 | /// 17 | public static IServiceCollection AddElasticsearchAsVectorDb(this IServiceCollection services, 18 | ElasticsearchConfig esConfig) 19 | { 20 | ArgumentNullException.ThrowIfNull(esConfig, nameof(esConfig)); 21 | 22 | // The ElasticsearchClient type is thread-safe and can be shared and 23 | // reused across multiple threads in consuming applications. 24 | // See https://www.elastic.co/guide/en/elasticsearch/client/net-api/current/recommendations.html 25 | services.AddSingleton(sp => 26 | { 27 | var esConfig = sp.GetRequiredService(); 28 | return new ElasticsearchClient(esConfig.ToElasticsearchClientSettings()); 29 | }); 30 | 31 | return services 32 | .AddSingleton() 33 | .AddSingleton(esConfig) 34 | .AddSingleton(); 35 | } 36 | 37 | /// 38 | /// Inject Elasticsearch as the default implementation of IMemoryDb 39 | /// 40 | public static IServiceCollection AddElasticsearchAsVectorDb(this IServiceCollection services, 41 | Action configure) 42 | { 43 | ArgumentNullException.ThrowIfNull(configure, nameof(configure)); 44 | 45 | var cfg = new ElasticsearchConfigBuilder(); 46 | configure(cfg); 47 | 48 | return services.AddElasticsearchAsVectorDb(cfg.Build()); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/IIndexNameHelper.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | namespace FreeMindLabs.KernelMemory.Elasticsearch; 4 | 5 | /// 6 | /// A utility class to help with Elasticsearch index names. 7 | /// It applies 8 | /// 9 | public interface IIndexNameHelper 10 | { 11 | /// 12 | /// Attempts to convert the given index name to a valid Elasticsearch index name. 13 | /// 14 | /// The index name to convert. 15 | /// The result of the conversion. The result includes the converted index name if the conversion succeeded, or a list of errors if the conversion failed. 16 | /// A structure containing the actual index name or a list of errors if the conversion failed. 17 | /// 18 | public bool TryConvert(string indexName, out (string ActualIndexName, IEnumerable Errors) result); 19 | 20 | /// 21 | /// Converts the given index name to a valid Elasticsearch index name. 22 | /// It throws an exception if the conversion fails. 23 | /// 24 | /// The index name to convert. 25 | /// The converted index name. 26 | public string Convert(string indexName); 27 | } 28 | -------------------------------------------------------------------------------- /src/ElasticsearchMemoryStorage/IndexNameHelper.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | namespace FreeMindLabs.KernelMemory.Elasticsearch; 4 | 5 | /// 6 | public class IndexNameHelper : IIndexNameHelper 7 | { 8 | /// 9 | public IndexNameHelper(ElasticsearchConfig config) 10 | { 11 | this.IndexPrefix = config.IndexPrefix; 12 | } 13 | 14 | /// 15 | /// The prefix to use for all index names. 16 | /// 17 | public string IndexPrefix { get; } 18 | 19 | /// 20 | public bool TryConvert(string indexName, out (string ActualIndexName, IEnumerable Errors) result) 21 | { 22 | // Convert to lowercase and replace underscores with hyphens to 23 | // have a consistent behavior with other storage types supported by Kernel Memory. (see #18) 24 | indexName = (this.IndexPrefix + indexName) 25 | .Replace("_", "-", StringComparison.Ordinal) 26 | .Trim() 27 | .ToLower(); 28 | 29 | // Check for null or whitespace 30 | if (string.IsNullOrWhiteSpace(indexName)) 31 | { 32 | result = ("default", Array.Empty()); 33 | return true; 34 | } 35 | 36 | var errors = new List(); 37 | 38 | // Check for invalid start characters 39 | if (indexName.StartsWith('-') || indexName.StartsWith('_')) 40 | { 41 | errors.Add("An index name cannot start with a hyphen (-) or underscore (_)."); 42 | } 43 | 44 | // Check for invalid characters 45 | if (indexName.Any(x => !char.IsLetterOrDigit(x) && x != '-')) 46 | { 47 | errors.Add("An index name can only contain letters, digits, and hyphens (-)."); 48 | } 49 | 50 | // Check for length (max 255 bytes) 51 | if (System.Text.Encoding.UTF8.GetByteCount(indexName) > 255) 52 | { 53 | errors.Add("An index name cannot be longer than 255 bytes."); 54 | } 55 | 56 | // Avoid names that are dot-only or dot and numbers 57 | if (indexName.All(c => c == '.' || char.IsDigit(c))) 58 | { 59 | errors.Add("Index name cannot be only dots or dots and numbers."); 60 | } 61 | 62 | if (errors.Count > 0) 63 | { 64 | result = (string.Empty, errors); 65 | return false; 66 | } 67 | 68 | result = (indexName, Array.Empty()); 69 | return true; 70 | } 71 | 72 | /// 73 | public string Convert(string indexName) 74 | { 75 | if (!this.TryConvert(indexName, out var result)) 76 | { 77 | throw new InvalidIndexNameException(result); 78 | } 79 | 80 | return result.ActualIndexName; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /tests/UnitTests/Data/file1-Wikipedia-Carbon.txt: -------------------------------------------------------------------------------- 1 | Carbon (from Latin carbo 'coal') is a chemical element with the symbol C and atomic number 6. It is nonmetallic and tetravalent—its atom making four electrons available to form covalent chemical bonds. It belongs to group 14 of the periodic table.[14] Carbon makes up about 0.025 percent of Earth's crust.[15] Three isotopes occur naturally, 12C and 13C being stable, while 14C is a radionuclide, decaying with a half-life of about 5,730 years.[16] Carbon is one of the few elements known since antiquity.[17] 2 | 3 | Carbon is the 15th most abundant element in the Earth's crust, and the fourth most abundant element in the universe by mass after hydrogen, helium, and oxygen. Carbon's abundance, its unique diversity of organic compounds, and its unusual ability to form polymers at the temperatures commonly encountered on Earth, enables this element to serve as a common element of all known life. It is the second most abundant element in the human body by mass (about 18.5%) after oxygen.[18] 4 | 5 | The atoms of carbon can bond together in diverse ways, resulting in various allotropes of carbon. Well-known allotropes include graphite, diamond, amorphous carbon, and fullerenes. The physical properties of carbon vary widely with the allotropic form. For example, graphite is opaque and black, while diamond is highly transparent. Graphite is soft enough to form a streak on paper (hence its name, from the Greek verb "γράφειν" which means "to write"), while diamond is the hardest naturally occurring material known. Graphite is a good electrical conductor while diamond has a low electrical conductivity. Under normal conditions, diamond, carbon nanotubes, and graphene have the highest thermal conductivities of all known materials. All carbon allotropes are solids under normal conditions, with graphite being the most thermodynamically stable form at standard temperature and pressure. They are chemically resistant and require high temperature to react even with oxygen. 6 | 7 | The most common oxidation state of carbon in inorganic compounds is +4, while +2 is found in carbon monoxide and transition metal carbonyl complexes. The largest sources of inorganic carbon are limestones, dolomites and carbon dioxide, but significant quantities occur in organic deposits of coal, peat, oil, and methane clathrates. Carbon forms a vast number of compounds, with about two hundred million having been described and indexed;[19] and yet that number is but a fraction of the number of theoretically possible compounds under standard conditions. 8 | 9 | The allotropes of carbon include graphite, one of the softest known substances, and diamond, the hardest naturally occurring substance. It bonds readily with other small atoms, including other carbon atoms, and is capable of forming multiple stable covalent bonds with suitable multivalent atoms. Carbon is a component element in the large majority of all chemical compounds, with about two hundred million examples having been described in the published chemical literature.[19] Carbon also has the highest sublimation point of all elements. At atmospheric pressure it has no melting point, as its triple point is at 10.8 ± 0.2 megapascals (106.6 ± 2.0 atm; 1,566 ± 29 psi) and 4,600 ± 300 K (4,330 ± 300 °C; 7,820 ± 540 °F),[3][4] so it sublimes at about 3,900 K (3,630 °C; 6,560 °F).[21][22] Graphite is much more reactive than diamond at standard conditions, despite being more thermodynamically stable, as its delocalised pi system is much more vulnerable to attack. For example, graphite can be oxidised by hot concentrated nitric acid at standard conditions to mellitic acid, C6(CO2H)6, which preserves the hexagonal units of graphite while breaking up the larger structure.[23] 10 | 11 | Carbon sublimes in a carbon arc, which has a temperature of about 5800 K (5,530 °C or 9,980 °F). Thus, irrespective of its allotropic form, carbon remains solid at higher temperatures than the highest-melting-point metals such as tungsten or rhenium. Although thermodynamically prone to oxidation, carbon resists oxidation more effectively than elements such as iron and copper, which are weaker reducing agents at room temperature. 12 | 13 | Carbon is the sixth element, with a ground-state electron configuration of 1s22s22p2, of which the four outer electrons are valence electrons. Its first four ionisation energies, 1086.5, 2352.6, 4620.5 and 6222.7 kJ/mol, are much higher than those of the heavier group-14 elements. The electronegativity of carbon is 2.5, significantly higher than the heavier group-14 elements (1.8–1.9), but close to most of the nearby nonmetals, as well as some of the second- and third-row transition metals. Carbon's covalent radii are normally taken as 77.2 pm (C−C), 66.7 pm (C=C) and 60.3 pm (C≡C), although these may vary depending on coordination number and what the carbon is bonded to. In general, covalent radius decreases with lower coordination number and higher bond order.[24] 14 | 15 | Carbon-based compounds form the basis of all known life on Earth, and the carbon-nitrogen-oxygen cycle provides a small portion of the energy produced by the Sun, and most of the energy in larger stars (e.g. Sirius). Although it forms an extraordinary variety of compounds, most forms of carbon are comparatively unreactive under normal conditions. At standard temperature and pressure, it resists all but the strongest oxidizers. It does not react with sulfuric acid, hydrochloric acid, chlorine or any alkalis. At elevated temperatures, carbon reacts with oxygen to form carbon oxides and will rob oxygen from metal oxides to leave the elemental metal. This exothermic reaction is used in the iron and steel industry to smelt iron and to control the carbon content of steel: 16 | 17 | Fe 18 | 3O 19 | 4 + 4 C(s) + 2 O 20 | 2 → 3 Fe(s) + 4 CO 21 | 2(g). 22 | 23 | Carbon reacts with sulfur to form carbon disulfide, and it reacts with steam in the coal-gas reaction used in coal gasification: 24 | 25 | C(s) + H2O(g) → CO(g) + H2(g). 26 | 27 | Carbon combines with some metals at high temperatures to form metallic carbides, such as the iron carbide cementite in steel and tungsten carbide, widely used as an abrasive and for making hard tips for cutting tools. 28 | 29 | -------------------------------------------------------------------------------- /tests/UnitTests/Data/file2-Wikipedia-Moon.txt: -------------------------------------------------------------------------------- 1 | The Moon is Earth's only natural satellite. Its diameter is about one-quarter of Earth's (comparable to the width of Australia),[17] making it the fifth largest satellite in the Solar System and the largest and most massive relative to its parent planet. It is larger than all known dwarf planets in the Solar System.[18] The Moon is a planetary-mass object with a differentiated rocky body, making it a satellite planet under the geophysical definitions of the term. It lacks any significant atmosphere, hydrosphere, or magnetic field. Its surface gravity is about one-sixth of Earth's at 0.1654 g—Jupiter's moon Io is the only satellite in the Solar System known to have a higher surface gravity and density. 2 | 3 | The Moon orbits Earth at an average distance of 384,400 km (238,900 mi), or about 30 times Earth's diameter. Its gravitational influence is the main driver of Earth's tides and very slowly lengthens Earth's day. The Moon's orbit around Earth has a sidereal period of 27.3 days. During each synodic period of 29.5 days, the amount of the Moon's Earth-facing surface that is illuminated by the Sun varies from none up to nearly 100%, resulting in lunar phases that form the basis for the months of a lunar calendar.[19] The Moon is tidally locked to Earth, which means that the length of a full rotation of the Moon on its own axis causes its same side (the near side) to always face Earth, and the somewhat longer lunar day is the same as the synodic period. Due to cyclical shifts in perspective (libration), 59% of the lunar surface is visible from Earth. -------------------------------------------------------------------------------- /tests/UnitTests/Data/file3-lorem-ipsum.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/tests/UnitTests/Data/file3-lorem-ipsum.docx -------------------------------------------------------------------------------- /tests/UnitTests/Data/file4-SK-Readme.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/tests/UnitTests/Data/file4-SK-Readme.pdf -------------------------------------------------------------------------------- /tests/UnitTests/Data/file5-NASA-news.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/tests/UnitTests/Data/file5-NASA-news.pdf -------------------------------------------------------------------------------- /tests/UnitTests/Data/file6-ANWC-image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/freemindlabsinc/FreeMindLabs.KernelMemory.Elasticsearch/2e866b13eec18ecc2635e7498d7824f4ad389627/tests/UnitTests/Data/file6-ANWC-image.jpg -------------------------------------------------------------------------------- /tests/UnitTests/DataStorageTests.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | using System.Globalization; 3 | using Elastic.Clients.Elasticsearch; 4 | using FreeMindLabs.KernelMemory.Elasticsearch; 5 | using Microsoft.KernelMemory; 6 | using Microsoft.KernelMemory.AI; 7 | using Microsoft.KernelMemory.DataFormats.Text; 8 | using Microsoft.KernelMemory.MemoryStorage; 9 | using Xunit; 10 | using Xunit.Abstractions; 11 | 12 | namespace UnitTests; 13 | 14 | public class DataStorageTests : ElasticsearchTestBase 15 | { 16 | public DataStorageTests(ITestOutputHelper output, IMemoryDb memoryDb, ITextEmbeddingGenerator textEmbeddingGenerator, ElasticsearchClient client, 17 | IIndexNameHelper indexNameHelper) 18 | : base(output, client, indexNameHelper) 19 | { 20 | this.MemoryDb = memoryDb ?? throw new ArgumentNullException(nameof(memoryDb)); 21 | this.TextEmbeddingGenerator = textEmbeddingGenerator ?? throw new ArgumentNullException(nameof(textEmbeddingGenerator)); 22 | } 23 | 24 | public IMemoryDb MemoryDb { get; } 25 | public ITextEmbeddingGenerator TextEmbeddingGenerator { get; } 26 | 27 | [Fact] 28 | public async Task CanUpsertOneTextDocumentAndDeleteAsync() 29 | { 30 | // We upsert the file 31 | var docIds = await DataStorageTests.UpsertTextFilesAsync( 32 | memoryDb: this.MemoryDb, 33 | textEmbeddingGenerator: this.TextEmbeddingGenerator, 34 | output: this.Output, 35 | indexName: nameof(CanUpsertOneTextDocumentAndDeleteAsync), 36 | fileNames: new[] 37 | { 38 | "Data/file1-Wikipedia-Carbon.txt" 39 | }).ConfigureAwait(false); 40 | 41 | // Deletes the document 42 | var deletes = docIds.Select(id => new MemoryRecord() 43 | { 44 | Id = id 45 | }); 46 | 47 | foreach (var deleteRec in deletes) 48 | { 49 | await this.MemoryDb.DeleteAsync(nameof(CanUpsertOneTextDocumentAndDeleteAsync), deleteRec) 50 | .ConfigureAwait(false); 51 | } 52 | 53 | // Verfies that the documents are gone 54 | var indexName = this.IndexNameHelper.Convert(nameof(CanUpsertOneTextDocumentAndDeleteAsync)); 55 | var res = await this.Client.CountAsync(r => r.Index(indexName)) 56 | .ConfigureAwait(false); 57 | Assert.Equal(0, res.Count); 58 | } 59 | 60 | [Fact] 61 | public async Task CanUpsertTwoTextFilesAndGetSimilarListAsync() 62 | { 63 | await DataStorageTests.UpsertTextFilesAsync( 64 | memoryDb: this.MemoryDb, 65 | textEmbeddingGenerator: this.TextEmbeddingGenerator, 66 | output: this.Output, 67 | indexName: nameof(CanUpsertTwoTextFilesAndGetSimilarListAsync), 68 | fileNames: new[] 69 | { 70 | "Data/file1-Wikipedia-Carbon.txt", 71 | "Data/file2-Wikipedia-Moon.txt" 72 | }).ConfigureAwait(false); 73 | 74 | // Gets documents that are similar to the word "carbon" . 75 | var foundSomething = false; 76 | 77 | var textToMatch = "carbon"; 78 | await foreach (var result in this.MemoryDb.GetSimilarListAsync( 79 | index: nameof(CanUpsertTwoTextFilesAndGetSimilarListAsync), 80 | text: textToMatch, 81 | limit: 1)) 82 | { 83 | this.Output.WriteLine($"Found a document matching '{textToMatch}': {result.Item1.Payload["file"]}."); 84 | return; 85 | }; 86 | 87 | Assert.True(foundSomething, "It should have found something..."); 88 | } 89 | 90 | public static string GuidWithoutDashes() => Guid.NewGuid().ToString().Replace("-", "", StringComparison.OrdinalIgnoreCase).ToLower(CultureInfo.CurrentCulture); 91 | 92 | public static async Task> UpsertTextFilesAsync( 93 | IMemoryDb memoryDb, 94 | ITextEmbeddingGenerator textEmbeddingGenerator, 95 | ITestOutputHelper output, 96 | string indexName, 97 | IEnumerable fileNames) 98 | { 99 | ArgumentNullException.ThrowIfNull(memoryDb); 100 | ArgumentNullException.ThrowIfNull(textEmbeddingGenerator); 101 | ArgumentNullException.ThrowIfNull(output); 102 | ArgumentNullException.ThrowIfNull(indexName); 103 | ArgumentNullException.ThrowIfNull(fileNames); 104 | 105 | // IMemoryDb does not create the index automatically. 106 | await memoryDb.CreateIndexAsync(indexName, 1536) 107 | .ConfigureAwait(false); 108 | 109 | var results = new List(); 110 | foreach (var fileName in fileNames) 111 | { 112 | // Reads the text from the file 113 | string fullText = await File.ReadAllTextAsync(fileName) 114 | .ConfigureAwait(false); 115 | 116 | // Splits the text into lines of up to 1000 tokens each 117 | var lines = TextChunker.SplitPlainTextLines(fullText, 118 | maxTokensPerLine: 1000, 119 | tokenCounter: null); 120 | 121 | // Splits the line into paragraphs 122 | var paragraphs = TextChunker.SplitPlainTextParagraphs(lines, 123 | maxTokensPerParagraph: 1000, 124 | overlapTokens: 100); 125 | 126 | output.WriteLine($"File '{fileName}' contains {paragraphs.Count} paragraphs."); 127 | 128 | // Indexes each paragraph as a separate document 129 | var paraIdx = 0; 130 | var documentId = GuidWithoutDashes() + GuidWithoutDashes(); 131 | var fileId = GuidWithoutDashes(); 132 | 133 | foreach (var paragraph in paragraphs) 134 | { 135 | var embedding = await textEmbeddingGenerator.GenerateEmbeddingAsync(paragraph) 136 | .ConfigureAwait(false); 137 | 138 | output.WriteLine($"Indexed paragraph {++paraIdx}/{paragraphs.Count}. {paragraph.Length} characters."); 139 | 140 | var filePartId = GuidWithoutDashes(); 141 | 142 | var esId = $"d={documentId}//p={filePartId}"; 143 | 144 | var mrec = new MemoryRecord() 145 | { 146 | Id = esId, 147 | Payload = new Dictionary() 148 | { 149 | { "file", fileName }, 150 | { "text", paragraph }, 151 | { "vector_provider", textEmbeddingGenerator.GetType().Name }, 152 | { "vector_generator", "TODO" }, 153 | { "last_update", DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss") }, 154 | { "text_embedding_generator", textEmbeddingGenerator.GetType().Name } 155 | }, 156 | Tags = new TagCollection() 157 | { 158 | { "__document_id", documentId }, 159 | { "__file_type", "text/plain" }, 160 | { "__file_id", fileId }, 161 | { "__file_part", filePartId } 162 | 163 | }, 164 | Vector = embedding 165 | }; 166 | 167 | var res = await memoryDb.UpsertAsync(indexName, mrec) 168 | .ConfigureAwait(false); 169 | 170 | results.Add(res); 171 | } 172 | 173 | output.WriteLine(""); 174 | } 175 | 176 | return results; 177 | } 178 | } 179 | 180 | -------------------------------------------------------------------------------- /tests/UnitTests/ElasticsearchTestBase.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | using Elastic.Clients.Elasticsearch; 3 | using FreeMindLabs.KernelMemory.Elasticsearch; 4 | using Xunit; 5 | using Xunit.Abstractions; 6 | 7 | namespace UnitTests; 8 | 9 | /// 10 | /// A simple base class for Elasticsearch tests. 11 | /// It ensures that all indices created by the test methods of the derived class are 12 | /// deleted before and after the tests. This ensures that Elasticsearch is left in a clean state 13 | /// or that subsequent tests don't fail because of left-over indices. 14 | /// 15 | public abstract class ElasticsearchTestBase : IAsyncLifetime 16 | { 17 | protected ElasticsearchTestBase(ITestOutputHelper output, ElasticsearchClient client, IIndexNameHelper indexNameHelper) 18 | { 19 | this.Output = output ?? throw new ArgumentNullException(nameof(output)); 20 | this.Client = client ?? throw new ArgumentNullException(nameof(client)); 21 | this.IndexNameHelper = indexNameHelper ?? throw new ArgumentNullException(nameof(indexNameHelper)); 22 | } 23 | 24 | public ITestOutputHelper Output { get; } 25 | public ElasticsearchClient Client { get; } 26 | public IIndexNameHelper IndexNameHelper { get; } 27 | 28 | public async Task InitializeAsync() 29 | { 30 | // Within a single test class, the tests are executed sequentially by default so 31 | // there is no chance for a method to finish and delete indices of other methods before the next 32 | // method starts executing. 33 | //var delIndexResponse = await this.Client.Indices.DeleteAsync(indices: this.con) 34 | // .ConfigureAwait(false); 35 | 36 | var indicesFound = await this.Client.DeleteIndicesOfTestAsync(this.GetType(), this.IndexNameHelper).ConfigureAwait(false); 37 | 38 | if (indicesFound.Any()) 39 | { 40 | this.Output.WriteLine($"Deleted left-over test indices: {string.Join(", ", indicesFound)}"); 41 | this.Output.WriteLine(""); 42 | } 43 | } 44 | 45 | public async Task DisposeAsync() 46 | { 47 | var indicesFound = await this.Client.DeleteIndicesOfTestAsync(this.GetType(), this.IndexNameHelper).ConfigureAwait(false); 48 | 49 | if (indicesFound.Any()) 50 | { 51 | this.Output.WriteLine($"Deleted test indices: {string.Join(", ", indicesFound)}"); 52 | this.Output.WriteLine(""); 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /tests/UnitTests/IndexManagementTests.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | using Elastic.Clients.Elasticsearch; 3 | using FreeMindLabs.KernelMemory.Elasticsearch; 4 | using Microsoft.KernelMemory.MemoryStorage; 5 | using Xunit; 6 | using Xunit.Abstractions; 7 | 8 | namespace UnitTests; 9 | 10 | public class IndexManagementTests : ElasticsearchTestBase 11 | { 12 | public IndexManagementTests(ITestOutputHelper output, IMemoryDb memoryDb, ElasticsearchClient client, IIndexNameHelper indexNameHelper) 13 | : base(output, client, indexNameHelper) 14 | { 15 | this.MemoryDb = memoryDb ?? throw new ArgumentNullException(nameof(memoryDb)); 16 | } 17 | 18 | public IMemoryDb MemoryDb { get; } 19 | 20 | [Fact] 21 | public async Task CanCreateAndDeleteIndexAsync() 22 | { 23 | var indexName = nameof(CanCreateAndDeleteIndexAsync); 24 | var vectorSize = 1536; 25 | 26 | // Creates the index using IMemoryDb 27 | await this.MemoryDb.CreateIndexAsync(indexName, vectorSize) 28 | .ConfigureAwait(false); 29 | 30 | // Verifies the index is created using the ES client 31 | var actualIndexName = this.IndexNameHelper.Convert(nameof(CanCreateAndDeleteIndexAsync)); 32 | var resp = await this.Client.Indices.ExistsAsync(actualIndexName) 33 | .ConfigureAwait(false); 34 | Assert.True(resp.Exists); 35 | this.Output.WriteLine($"The index '{actualIndexName}' was created successfully."); 36 | 37 | // Deletes the index 38 | await this.MemoryDb.DeleteIndexAsync(indexName) 39 | .ConfigureAwait(false); 40 | 41 | // Verifies the index is deleted using the ES client 42 | resp = await this.Client.Indices.ExistsAsync(actualIndexName) 43 | .ConfigureAwait(false); 44 | Assert.False(resp.Exists); 45 | this.Output.WriteLine($"The index '{actualIndexName}' was deleted successfully."); 46 | } 47 | 48 | [Fact] 49 | public async Task CanGetIndicesAsync() 50 | { 51 | var indexNames = new[] 52 | { 53 | this.IndexNameHelper.Convert(nameof(CanGetIndicesAsync) + "-First"), 54 | this.IndexNameHelper.Convert(nameof(CanGetIndicesAsync) + "-Second") 55 | }; 56 | 57 | // Creates the indices using IMemoryDb 58 | foreach (var indexName in indexNames) 59 | { 60 | await this.MemoryDb.CreateIndexAsync(indexName, 1536) 61 | .ConfigureAwait(false); 62 | } 63 | 64 | // Verifies the indices are returned 65 | var indices = await this.MemoryDb.GetIndexesAsync() 66 | .ConfigureAwait(false); 67 | 68 | Assert.True(indices.All(nme => indices.Contains(nme))); 69 | 70 | // Cleans up 71 | foreach (var indexName in indexNames) 72 | { 73 | await this.MemoryDb.DeleteIndexAsync(indexName) 74 | .ConfigureAwait(false); 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /tests/UnitTests/IndexnameTests.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | using FreeMindLabs.KernelMemory.Elasticsearch; 3 | using Xunit; 4 | using Xunit.Abstractions; 5 | 6 | namespace UnitTests; 7 | 8 | public class IndexnameTests 9 | { 10 | private readonly ITestOutputHelper _output; 11 | private readonly IIndexNameHelper _indexNameHelper; 12 | 13 | public IndexnameTests(ITestOutputHelper output, IIndexNameHelper indexNameHelper) 14 | { 15 | this._output = output ?? throw new ArgumentNullException(nameof(output)); 16 | this._indexNameHelper = indexNameHelper ?? throw new ArgumentNullException(nameof(indexNameHelper)); 17 | } 18 | 19 | [Theory] 20 | [InlineData("")] // default index 21 | [InlineData("nondefault")] 22 | [InlineData("WithUppercase")] 23 | [InlineData("With-Dashes")] 24 | [InlineData("123numberfirst")] 25 | public void GoodIndexNamesAreAccepted(string indexName) 26 | { 27 | Assert.True(this._indexNameHelper.TryConvert(indexName, out var convResult)); 28 | Assert.Empty(convResult.Errors); 29 | 30 | this._output.WriteLine($"The index name '{indexName}' will be translated to '{convResult.ActualIndexName}'."); 31 | } 32 | 33 | [Theory] 34 | // An index name cannot start with a hyphen (-) or underscore (_). 35 | //[InlineData("-test", 1)] 36 | //[InlineData("test_", 1)] 37 | // An index name can only contain letters, digits, and hyphens (-). 38 | [InlineData("test space", 1)] 39 | [InlineData("test/slash", 1)] 40 | [InlineData("test\\backslash", 1)] 41 | [InlineData("test.dot", 1)] 42 | [InlineData("test:colon", 1)] 43 | [InlineData("test*asterisk", 1)] 44 | [InlineData("testgreater", 1)] 46 | [InlineData("test|pipe", 1)] 47 | [InlineData("test?question", 1)] 48 | [InlineData("test\"quote", 1)] 49 | [InlineData("test'quote", 1)] 50 | [InlineData("test`backtick", 1)] 51 | [InlineData("test~tilde", 1)] 52 | [InlineData("test!exclamation", 1)] 53 | // Avoid names that are dot-only or dot and numbers 54 | // Multi error 55 | [InlineData(".", 1)] 56 | [InlineData("..", 1)] 57 | [InlineData("1.2.3", 1)] 58 | //[InlineData("_test", 1)] 59 | 60 | public void BadIndexNamesAreRejected(string indexName, int errorCount) 61 | { 62 | // Creates the index using IMemoryDb 63 | var exception = Assert.Throws(() => 64 | { 65 | this._indexNameHelper.Convert(indexName); 66 | }); 67 | 68 | this._output.WriteLine( 69 | $"The index name '{indexName}' had the following errors:\n{string.Join("\n", exception.Errors)}" + 70 | $"" + 71 | $"The expected number of errors was {errorCount}."); 72 | 73 | Assert.True(errorCount == exception.Errors.Count(), $"The number of errprs expected is different than the number of errors found."); 74 | } 75 | 76 | [Fact] 77 | public void IndexNameCannotBeLongerThan255Bytes() 78 | { 79 | var indexName = new string('a', 256); 80 | var exception = Assert.Throws(() => 81 | { 82 | this._indexNameHelper.Convert(indexName); 83 | }); 84 | 85 | Assert.Equal(1, exception.Errors.Count()); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /tests/UnitTests/KernelMemoryTests.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | using Elastic.Clients.Elasticsearch; 3 | using FreeMindLabs.KernelMemory.Elasticsearch; 4 | using Microsoft.KernelMemory; 5 | using Xunit; 6 | using Xunit.Abstractions; 7 | 8 | namespace UnitTests; 9 | public class KernelMemoryTests : ElasticsearchTestBase 10 | { 11 | private const string NoAnswer = "INFO NOT FOUND"; 12 | 13 | public KernelMemoryTests(ITestOutputHelper output, IKernelMemory kernelMemory, ElasticsearchClient client, IIndexNameHelper indexNameHelper) 14 | : base(output, client, indexNameHelper) 15 | { 16 | this.KernelMemory = kernelMemory ?? throw new ArgumentNullException(nameof(kernelMemory)); 17 | } 18 | 19 | public IKernelMemory KernelMemory { get; } 20 | 21 | private const string NotFound = "INFO NOT FOUND"; 22 | 23 | [Fact] 24 | [System.Diagnostics.CodeAnalysis.SuppressMessage("Reliability", "CA2007:Consider calling ConfigureAwait on the awaited task", Justification = "")] 25 | public async Task ItSupportsMultipleFiltersAsync() 26 | { 27 | // This is an adaptation of the same test in Elasticsearch.FunctionalTests 28 | 29 | string indexName = nameof(ItSupportsMultipleFiltersAsync); 30 | this.Output.WriteLine($"Index name: {indexName}"); 31 | 32 | const string Id = "ItSupportsMultipleFilters-file1-NASA-news.pdf"; 33 | const string Found = "spacecraft"; 34 | 35 | this.Output.WriteLine("Uploading document"); 36 | await this.KernelMemory.ImportDocumentAsync( 37 | new Document(Id) 38 | .AddFile("data/file5-NASA-news.pdf") 39 | .AddTag("type", "news") 40 | .AddTag("user", "admin") 41 | .AddTag("user", "owner"), 42 | index: indexName, 43 | steps: Constants.PipelineWithoutSummary); 44 | 45 | while (!await this.KernelMemory.IsDocumentReadyAsync(documentId: Id, index: indexName)) 46 | { 47 | this.Output.WriteLine("Waiting for memory ingestion to complete..."); 48 | await Task.Delay(TimeSpan.FromSeconds(2)); 49 | } 50 | 51 | // Multiple filters: unknown users cannot see the memory 52 | var answer = await this.KernelMemory.AskAsync("What is Orion?", filters: new List 53 | { 54 | MemoryFilters.ByTag("user", "someone1"), 55 | MemoryFilters.ByTag("user", "someone2"), 56 | }, index: indexName); 57 | this.Output.WriteLine(answer.Result); 58 | Assert.Contains(NotFound, answer.Result, StringComparison.OrdinalIgnoreCase); 59 | 60 | // Multiple filters: unknown users cannot see the memory even if the type is correct (testing AND logic) 61 | answer = await this.KernelMemory.AskAsync("What is Orion?", filters: new List 62 | { 63 | MemoryFilters.ByTag("user", "someone1").ByTag("type", "news"), 64 | MemoryFilters.ByTag("user", "someone2").ByTag("type", "news"), 65 | }, index: indexName); 66 | this.Output.WriteLine(answer.Result); 67 | Assert.Contains(NotFound, answer.Result, StringComparison.OrdinalIgnoreCase); 68 | 69 | // Multiple filters: AND + OR logic works 70 | answer = await this.KernelMemory.AskAsync("What is Orion?", filters: new List 71 | { 72 | MemoryFilters.ByTag("user", "someone1").ByTag("type", "news"), 73 | MemoryFilters.ByTag("user", "admin").ByTag("type", "fact"), 74 | }, index: indexName); 75 | this.Output.WriteLine(answer.Result); 76 | Assert.Contains(NotFound, answer.Result, StringComparison.OrdinalIgnoreCase); 77 | 78 | // Multiple filters: OR logic works 79 | answer = await this.KernelMemory.AskAsync("What is Orion?", filters: new List 80 | { 81 | MemoryFilters.ByTag("user", "someone1"), 82 | MemoryFilters.ByTag("user", "admin"), 83 | }, index: indexName); 84 | this.Output.WriteLine(answer.Result); 85 | Assert.Contains(Found, answer.Result, StringComparison.OrdinalIgnoreCase); 86 | 87 | // Multiple filters: OR logic works 88 | answer = await this.KernelMemory.AskAsync("What is Orion?", filters: new List 89 | { 90 | MemoryFilters.ByTag("user", "someone1").ByTag("type", "news"), 91 | MemoryFilters.ByTag("user", "admin").ByTag("type", "news"), 92 | }, index: indexName); 93 | this.Output.WriteLine(answer.Result); 94 | Assert.Contains(Found, answer.Result, StringComparison.OrdinalIgnoreCase); 95 | 96 | await this.KernelMemory.DeleteDocumentAsync(Id, index: indexName); 97 | 98 | this.Output.WriteLine("Deleting index"); 99 | await this.KernelMemory.DeleteIndexAsync(indexName); 100 | } 101 | 102 | [Fact] 103 | public async Task ItSupportsTagsAsync() 104 | { 105 | // This is an adaptation of the same test in Elasticsearch.FunctionalTests 106 | 107 | // Arrange 108 | const string Id = "ItSupportTags-file1-NASA-news.pdf"; 109 | await this.KernelMemory.ImportDocumentAsync( 110 | "data/file5-NASA-news.pdf", 111 | documentId: Id, 112 | tags: new TagCollection 113 | { 114 | { "type", "news" }, 115 | { "type", "test" }, 116 | { "ext", "pdf" } 117 | }, 118 | steps: Constants.PipelineWithoutSummary).ConfigureAwait(false); 119 | 120 | while (!await this.KernelMemory.IsDocumentReadyAsync(documentId: Id).ConfigureAwait(false)) 121 | { 122 | this.Output.WriteLine("Waiting for memory ingestion to complete..."); 123 | await Task.Delay(TimeSpan.FromSeconds(2)).ConfigureAwait(false); 124 | } 125 | 126 | // Act 127 | var defaultRetries = 0;// withRetries ? 4 : 0; 128 | 129 | var retries = defaultRetries; 130 | var answer1 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "news")).ConfigureAwait(false); 131 | this.Output.WriteLine("answer1: " + answer1.Result); 132 | while (retries-- > 0 && !answer1.Result.Contains("spacecraft")) 133 | { 134 | await Task.Delay(TimeSpan.FromSeconds(1)).ConfigureAwait(false); 135 | answer1 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "news")).ConfigureAwait(false); 136 | this.Output.WriteLine("answer1: " + answer1.Result); 137 | } 138 | 139 | retries = defaultRetries; 140 | var answer2 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "test")).ConfigureAwait(false); 141 | this.Output.WriteLine("answer2: " + answer2.Result); 142 | while (retries-- > 0 && !answer2.Result.Contains("spacecraft")) 143 | { 144 | await Task.Delay(TimeSpan.FromSeconds(1)).ConfigureAwait(false); 145 | answer2 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "test")).ConfigureAwait(false); 146 | this.Output.WriteLine("answer2: " + answer2.Result); 147 | } 148 | 149 | retries = defaultRetries; 150 | var answer3 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("ext", "pdf")).ConfigureAwait(false); 151 | this.Output.WriteLine("answer3: " + answer3.Result); 152 | while (retries-- > 0 && !answer3.Result.Contains("spacecraft")) 153 | { 154 | await Task.Delay(TimeSpan.FromSeconds(1)).ConfigureAwait(false); 155 | answer3 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "test")).ConfigureAwait(false); 156 | this.Output.WriteLine("answer3: " + answer3.Result); 157 | } 158 | 159 | var answer4 = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("foo", "bar")).ConfigureAwait(false); 160 | this.Output.WriteLine(answer4.Result); 161 | 162 | // Assert 163 | Assert.Contains("spacecraft", answer1.Result, StringComparison.OrdinalIgnoreCase); 164 | Assert.Contains("spacecraft", answer2.Result, StringComparison.OrdinalIgnoreCase); 165 | Assert.Contains("spacecraft", answer3.Result, StringComparison.OrdinalIgnoreCase); 166 | Assert.Contains("NOT FOUND", answer4.Result, StringComparison.OrdinalIgnoreCase); 167 | } 168 | 169 | [Fact] 170 | public async Task ItSupportsASingleFilterAsync() 171 | { 172 | // This is an adaptation of the same test in Elasticsearch.FunctionalTests 173 | 174 | string indexName = nameof(ItSupportsASingleFilterAsync); 175 | const string Id = "ItSupportsASingleFilter-file1-NASA-news.pdf"; 176 | const string Found = "spacecraft"; 177 | 178 | this.Output.WriteLine("Uploading document"); 179 | await this.KernelMemory.ImportDocumentAsync( 180 | new Document(Id) 181 | .AddFile("data/file5-NASA-news.pdf") 182 | .AddTag("type", "news") 183 | .AddTag("user", "admin") 184 | .AddTag("user", "owner"), 185 | index: indexName, 186 | steps: Constants.PipelineWithoutSummary).ConfigureAwait(false); 187 | 188 | while (!await this.KernelMemory.IsDocumentReadyAsync(documentId: Id, index: indexName).ConfigureAwait(false)) 189 | { 190 | this.Output.WriteLine("Waiting for memory ingestion to complete..."); 191 | await Task.Delay(TimeSpan.FromSeconds(2)).ConfigureAwait(false); 192 | } 193 | 194 | //await Task.Delay(TimeSpan.FromSeconds(4)).ConfigureAwait(false); 195 | 196 | MemoryAnswer answer; 197 | // Simple filter: unknown user cannot see the memory 198 | answer = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("user", "someone"), index: indexName).ConfigureAwait(false); 199 | this.Output.WriteLine(answer.Result); 200 | Assert.Contains(NotFound, answer.Result, StringComparison.OrdinalIgnoreCase); 201 | 202 | // Simple filter: test AND logic: valid type + invalid user 203 | answer = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "news").ByTag("user", "someone"), index: indexName).ConfigureAwait(false); 204 | this.Output.WriteLine(answer.Result); 205 | Assert.Contains(NotFound, answer.Result, StringComparison.OrdinalIgnoreCase); 206 | 207 | // Simple filter: test AND logic: invalid type + valid user 208 | answer = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "fact").ByTag("user", "owner"), index: indexName).ConfigureAwait(false); 209 | this.Output.WriteLine(answer.Result); 210 | //Assert.Contains(Found, answer.Result, StringComparison.OrdinalIgnoreCase); 211 | Assert.Contains(NotFound, answer.Result, StringComparison.OrdinalIgnoreCase); 212 | 213 | // Simple filter: known user can see the memory 214 | answer = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("user", "admin"), index: indexName).ConfigureAwait(false); 215 | this.Output.WriteLine(answer.Result); 216 | Assert.Contains(Found, answer.Result, StringComparison.OrdinalIgnoreCase); 217 | 218 | // Simple filter: known user can see the memory 219 | answer = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("user", "owner"), index: indexName).ConfigureAwait(false); 220 | this.Output.WriteLine(answer.Result); 221 | Assert.Contains(Found, answer.Result, StringComparison.OrdinalIgnoreCase); 222 | 223 | // Simple filter: test AND logic with correct values 224 | answer = await this.KernelMemory.AskAsync("What is Orion?", filter: MemoryFilters.ByTag("type", "news").ByTag("user", "owner"), index: indexName).ConfigureAwait(false); 225 | this.Output.WriteLine(answer.Result); 226 | Assert.Contains(Found, answer.Result, StringComparison.OrdinalIgnoreCase); 227 | 228 | this.Output.WriteLine("Deleting memories extracted from the document"); 229 | await this.KernelMemory.DeleteDocumentAsync(Id, index: indexName).ConfigureAwait(false); 230 | 231 | this.Output.WriteLine("Deleting index"); 232 | await this.KernelMemory.DeleteIndexAsync(indexName).ConfigureAwait(false); 233 | } 234 | 235 | [Fact] 236 | public async Task CanImportOneDocumentAndAskAsync() 237 | { 238 | var indexName = nameof(CanImportOneDocumentAndAskAsync); 239 | 240 | // Imports a document into the index 241 | var id = await this.KernelMemory.ImportDocumentAsync( 242 | filePath: "Data/file1-Wikipedia-Carbon.txt", 243 | documentId: "doc001", 244 | tags: new TagCollection 245 | { 246 | { "indexedOn", DateTime.UtcNow.ToString("yyyy-MM-dd'T'HH:mm:ss.fffzzz") } 247 | }, 248 | index: indexName) 249 | .ConfigureAwait(false); 250 | 251 | this.Output.WriteLine($"Indexed document with id '{id}'."); 252 | 253 | // Waits for the documents to be saved 254 | var actualIndexName = this.IndexNameHelper.Convert(indexName); 255 | //await this.Client.WaitForDocumentsAsync(actualIndexName, expectedDocuments: 2) 256 | // .ConfigureAwait(false); 257 | 258 | // Asks a question on the data we just inserted 259 | MemoryAnswer? answer = await this.TryToGetTopAnswerAsync(indexName, "What can carbon bond to?") 260 | .ConfigureAwait(false); 261 | this.PrintAnswerOfDocument(answer, "doc001"); 262 | } 263 | 264 | [Fact] 265 | public async Task CanImportTwoDocumentsAndAskAsync() 266 | { 267 | var indexName = nameof(CanImportTwoDocumentsAndAskAsync); 268 | 269 | // Proceeds 270 | var docId = await this.KernelMemory.ImportDocumentAsync( 271 | "Data/file1-Wikipedia-Carbon.txt", 272 | index: indexName, 273 | documentId: "doc001").ConfigureAwait(false); 274 | 275 | this.Output.WriteLine($"Indexed {docId}"); 276 | 277 | docId = await this.KernelMemory.ImportDocumentAsync( 278 | new Document("doc002") 279 | .AddFiles(new[] { 280 | "Data/file2-Wikipedia-Moon.txt", 281 | "Data/file3-lorem-ipsum.docx", 282 | "Data/file4-SK-Readme.pdf" }) 283 | .AddTag("user", "Blake"), 284 | index: indexName) 285 | .ConfigureAwait(false); 286 | 287 | this.Output.WriteLine($"Indexed {docId}"); 288 | 289 | docId = await this.KernelMemory.ImportDocumentAsync(new Document("doc003") 290 | .AddFile("Data/file5-NASA-news.pdf") 291 | .AddTag("user", "Taylor") 292 | .AddTag("collection", "meetings") 293 | .AddTag("collection", "NASA") 294 | .AddTag("collection", "space") 295 | .AddTag("type", "news"), 296 | index: indexName) 297 | .ConfigureAwait(false); 298 | 299 | this.Output.WriteLine($"Indexed {docId}"); 300 | 301 | // Waits for the documents to be saved 302 | var actualIndexName = this.IndexNameHelper.Convert(indexName); 303 | //await this.Client.WaitForDocumentsAsync(actualIndexName, expectedDocuments: 10) 304 | // .ConfigureAwait(false); 305 | 306 | // This should return a citation to doc001 307 | var answer = await this.KernelMemory.AskAsync("What's E = m*c^2?", indexName) 308 | .ConfigureAwait(false); 309 | 310 | this.PrintAnswerOfDocument(answer, "doc001"); 311 | 312 | // This should return a citation to doc002 313 | answer = await this.KernelMemory.AskAsync("What's Semantic Kernel?", indexName) 314 | .ConfigureAwait(false); 315 | 316 | this.PrintAnswerOfDocument(answer, "doc002"); 317 | } 318 | 319 | private void PrintAnswerOfDocument(MemoryAnswer? answer, string expectedDocumentId) 320 | { 321 | ArgumentNullException.ThrowIfNull(answer); 322 | 323 | this.Output.WriteLine($"Question: {answer.Question}"); 324 | this.Output.WriteLine($"Answer: {answer.Result}"); 325 | 326 | var foundDocumentReference = false; 327 | foreach (var citation in answer.RelevantSources) 328 | { 329 | this.Output.WriteLine($" - {citation.SourceName} - {citation.Link} [{citation.Partitions.First().LastUpdate:D}]"); 330 | 331 | if (citation.DocumentId == expectedDocumentId) 332 | { 333 | foundDocumentReference = true; 334 | } 335 | } 336 | 337 | if (!foundDocumentReference) 338 | { 339 | throw new InvalidOperationException($"It should have found a citation to document '{expectedDocumentId}'."); 340 | } 341 | } 342 | 343 | private async Task TryToGetTopAnswerAsync(string indexName, string question) 344 | { 345 | MemoryAnswer? answer = null; 346 | 347 | // We need to wait a bit for the indexing to complete, so this is why we retry a few times with a delay. 348 | // TODO: add Polly. 349 | for (int i = 0; i < 3; i++) 350 | { 351 | answer = await this.KernelMemory.AskAsync( 352 | question: question, 353 | index: indexName, 354 | filter: null, 355 | filters: null, 356 | minRelevance: 0) 357 | .ConfigureAwait(false); 358 | 359 | if (answer.Result != NoAnswer) 360 | { 361 | break; 362 | } 363 | 364 | await Task.Delay(500) 365 | .ConfigureAwait(false); 366 | } 367 | 368 | return answer; 369 | } 370 | } 371 | -------------------------------------------------------------------------------- /tests/UnitTests/SearchTests.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | using Elastic.Clients.Elasticsearch; 3 | using FreeMindLabs.KernelMemory.Elasticsearch; 4 | using Microsoft.KernelMemory; 5 | using Microsoft.KernelMemory.AI; 6 | using Microsoft.KernelMemory.MemoryStorage; 7 | using Xunit; 8 | using Xunit.Abstractions; 9 | 10 | namespace UnitTests; 11 | 12 | public class SearchTests : ElasticsearchTestBase 13 | { 14 | public SearchTests(ITestOutputHelper output, IMemoryDb memoryDb, ITextEmbeddingGenerator textEmbeddingGenerator, ElasticsearchClient client, IIndexNameHelper indexNameHelper) 15 | : base(output, client, indexNameHelper) 16 | { 17 | this.MemoryDb = memoryDb ?? throw new ArgumentNullException(nameof(memoryDb)); 18 | this.TextEmbeddingGenerator = textEmbeddingGenerator ?? throw new ArgumentNullException(nameof(textEmbeddingGenerator)); 19 | } 20 | 21 | public IMemoryDb MemoryDb { get; } 22 | public ITextEmbeddingGenerator TextEmbeddingGenerator { get; } 23 | 24 | [Fact] 25 | public async Task CanGetListWithTagsAsync() 26 | { 27 | const int ExpectedTotalParagraphs = 4; 28 | 29 | // We upsert the file 30 | var docIds = await DataStorageTests.UpsertTextFilesAsync( 31 | memoryDb: this.MemoryDb, 32 | textEmbeddingGenerator: this.TextEmbeddingGenerator, 33 | output: this.Output, 34 | indexName: nameof(CanGetListWithTagsAsync), 35 | fileNames: new[] 36 | { 37 | "Data/file1-Wikipedia-Carbon.txt", 38 | "Data/file2-Wikipedia-Moon.txt" 39 | }) 40 | .ConfigureAwait(false); 41 | 42 | // docsIds is a list of values like "d=3ed7b0787d484496ab25d50b2a887f8cf63193954fc844689116766434c11887//p=b84ee5e4841c4ab2877e30293752f7cc" 43 | Assert.Equal(expected: ExpectedTotalParagraphs, actual: docIds.Count()); 44 | docIds = docIds.Select(x => x.Split("//")[0].Split("=")[1]).Distinct().ToList(); 45 | 46 | this.Output.WriteLine($"Indexed returned the following ids:\n{string.Join("\n", docIds)}"); 47 | 48 | var expectedDocs = docIds.Count(); 49 | 50 | // Gets documents that are similar to the word "carbon" . 51 | var filter = new MemoryFilter(); 52 | filter.Add("__file_type", "text/plain"); 53 | filter.Add("__document_id", docIds.Select(x => (string?)x).ToList()); 54 | 55 | var idx = 0; 56 | this.Output.WriteLine($"Filter: {filter.ToDebugString()}.\n"); 57 | 58 | await foreach (var result in this.MemoryDb.GetListAsync( 59 | index: nameof(CanGetListWithTagsAsync), 60 | filters: new[] { filter }, 61 | limit: 100, 62 | withEmbeddings: false)) 63 | { 64 | var fileName = result.Payload["file"]; 65 | this.Output.WriteLine($"Match #{idx++}: {fileName}"); 66 | }; 67 | 68 | Assert.Equal(expected: ExpectedTotalParagraphs, actual: idx); 69 | } 70 | 71 | [Fact] 72 | public async Task CanGetListWithEmptyFiltersAsync() 73 | { 74 | await foreach (var result in this.MemoryDb.GetListAsync( 75 | index: nameof(CanGetListWithTagsAsync), 76 | filters: new[] { new MemoryFilter() }, // <-- KM has a test to make sure this works. 77 | limit: 100, 78 | withEmbeddings: false)) 79 | { }; 80 | 81 | // If it gets here, the test passed. 82 | } 83 | } 84 | 85 | -------------------------------------------------------------------------------- /tests/UnitTests/Startup.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | using System.Reflection; 4 | using Microsoft.Extensions.Configuration; 5 | using Microsoft.Extensions.DependencyInjection; 6 | using Microsoft.KernelMemory; 7 | using Microsoft.KernelMemory.ContentStorage.DevTools; 8 | using Microsoft.KernelMemory.FileSystem.DevTools; 9 | 10 | namespace UnitTests; 11 | 12 | /// 13 | /// Sets up dependency injection for unit tests. 14 | /// 15 | public class Startup 16 | { 17 | private readonly IConfiguration _configuration; 18 | 19 | public Startup() 20 | { 21 | // We read from the local appSettings.json and the same user secrets 22 | // as the Microsoft Semantic Kernel team. 23 | this._configuration = new ConfigurationBuilder() 24 | .AddJsonFile("appsettings.json", optional: true, reloadOnChange: true) 25 | .AddUserSecrets(Assembly.GetExecutingAssembly()) // Same secrets as SK and KM :smile: 26 | .Build(); 27 | } 28 | 29 | public void ConfigureServices(IServiceCollection services) 30 | { 31 | // We use the same OpenAI API key as in Kernel Memory. 32 | const string OpenAIKeyPath = "KernelMemory:Services:OpenAI:APIKey"; 33 | 34 | // TODO: Uses only OpenAI API stuff for now. Make more flexible. 35 | var openApiKey = this._configuration[OpenAIKeyPath] ?? throw new ArgumentException($"OpenAI API key is required. [path: {OpenAIKeyPath}]"); 36 | 37 | // Kernel Memory with Elasticsearch 38 | IKernelMemoryBuilder kmBldr = new KernelMemoryBuilder(services) 39 | .WithSimpleFileStorage(new SimpleFileStorageConfig() 40 | { 41 | Directory = "ContentStorage", 42 | StorageType = FileSystemTypes.Volatile 43 | }) 44 | .WithElasticsearch(esBldr => 45 | { 46 | esBldr.WithConfiguration(this._configuration); 47 | 48 | // Alternatively we can use the other builder methods: 49 | //esBldr.WithEndpoint(ElasticsearchConfigBuilder.DefaultEndpoint) 50 | // .WithShardsAndReplicas(1, 0) 51 | // .WithIndexPrefix(ElasticsearchConfigBuilder.DefaultIndexPrefix) 52 | // .WithCertificateFingerPrint("...") 53 | // .WithUserNameAndPassword(ElasticsearchConfigBuilder.DefaultUserName, "...") 54 | // .WithIndexPrefix("km-"); 55 | 56 | }) 57 | .WithOpenAIDefaults(apiKey: openApiKey); 58 | 59 | var kernelMemory = kmBldr.Build(); 60 | 61 | services.AddSingleton(kernelMemory); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /tests/UnitTests/TestsHelper.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Free Mind Labs, Inc. All rights reserved. 2 | 3 | using System.Reflection; 4 | using Elastic.Clients.Elasticsearch; 5 | using FreeMindLabs.KernelMemory.Elasticsearch; 6 | 7 | namespace UnitTests; 8 | 9 | /// 10 | /// Extension methods for tests on Elasticsearch. 11 | /// 12 | internal static class TestsHelper 13 | { 14 | /// 15 | /// Deletes all indices that are created by all test methods of the given class. 16 | /// Indices must have the same name of a test method to be automatically deleted. 17 | /// 18 | public static async Task> DeleteIndicesOfTestAsync(this ElasticsearchClient client, Type unitTestType, IIndexNameHelper indexNameHelper) 19 | { 20 | ArgumentNullException.ThrowIfNull(client); 21 | ArgumentNullException.ThrowIfNull(unitTestType); 22 | ArgumentNullException.ThrowIfNull(indexNameHelper); 23 | 24 | // Iterates thru all method names of the test class and deletes the indice with the same name 25 | var methods = unitTestType.GetMethods(BindingFlags.Public | BindingFlags.Instance) 26 | .Where(m => 27 | (m.GetCustomAttribute() != null) 28 | || 29 | (m.GetCustomAttribute() != null) 30 | ) 31 | .ToArray(); 32 | if (methods.Length == 0) 33 | { 34 | throw new ArgumentException($"No public test methods found in class '{unitTestType.Name}'."); 35 | } 36 | 37 | var result = new List(); 38 | foreach (var method in methods) 39 | { 40 | var indexName = indexNameHelper.Convert(method.Name); 41 | var delResp = await client.Indices.DeleteAsync(indices: indexName) 42 | .ConfigureAwait(false); 43 | 44 | if (delResp.IsSuccess()) 45 | { 46 | result.Add(indexName); 47 | } 48 | } 49 | 50 | return result; 51 | } 52 | 53 | ///// 54 | ///// Queries the given index for documents until the expected number of documents is found 55 | ///// or the max number of retries is reached. 56 | ///// It throws an exception if the expected number of documents is not found. 57 | ///// 58 | //public static async Task WaitForDocumentsAsync(this ElasticsearchClient client, string realIndexName, int expectedDocuments, int maxRetries = 3, int msDelay = 500) 59 | //{ 60 | // ArgumentNullException.ThrowIfNull(client); 61 | // ArgumentNullException.ThrowIfNull(realIndexName); 62 | 63 | // return; 64 | 65 | // var foundCount = 0; 66 | // for (int i = 0; i < maxRetries; i++) 67 | // { 68 | // // We search for all documents 69 | // var results = await client 70 | // .SearchAsync(sr => 71 | // { 72 | // sr.Index(realIndexName) 73 | // .Query(q => q.MatchAll()); 74 | // }) 75 | // .ConfigureAwait(false); 76 | 77 | // foundCount = results?.HitsMetadata?.Hits?.Count ?? 0; 78 | 79 | // // If we found all documents, we can return 80 | // if ((expectedDocuments == 0) && (foundCount == 0)) 81 | // { 82 | // return; 83 | // } 84 | // else if (foundCount >= expectedDocuments) 85 | // { 86 | // return; 87 | // } 88 | 89 | // await Task.Delay(msDelay).ConfigureAwait(false); 90 | // } 91 | 92 | // throw new InvalidOperationException($"It should have inserted {expectedDocuments} documents but only {foundCount}..."); 93 | //} 94 | } 95 | -------------------------------------------------------------------------------- /tests/UnitTests/UnitTests.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | enable 6 | true 7 | CS1591,CA1861,CA1305,CA1307 8 | 5ee045b0-aea3-4f08-8d31-32d1a6f8fed0 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | all 18 | runtime; build; native; contentfiles; analyzers; buildtransitive 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | all 28 | runtime; build; native; contentfiles; analyzers; buildtransitive 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | Always 41 | 42 | 43 | PreserveNewest 44 | 45 | 46 | PreserveNewest 47 | 48 | 49 | PreserveNewest 50 | 51 | 52 | PreserveNewest 53 | 54 | 55 | PreserveNewest 56 | 57 | 58 | PreserveNewest 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /tests/UnitTests/appSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "KernelMemory": { 3 | "Services": { 4 | "OpenAI": { 5 | "TextModel": "gpt-3.5-turbo", 6 | "EmbeddingModel": "text-embedding-ada-002", 7 | "APIKey": "...SECRETS...", 8 | "OrgId": "", 9 | "MaxRetries": 10 10 | }, 11 | "Elasticsearch": { 12 | "Endpoint": "https://localhost:9200", 13 | "CertificateFingerPrint": "...SECRETS...", 14 | "Username": "elastic", 15 | "Password": "...SECRETS...", 16 | "IndexPrefix": "kmtests-" 17 | } 18 | } 19 | }, 20 | "Logging": { 21 | "LogLevel": { 22 | "Default": "Trace" 23 | } 24 | }, 25 | "AllowedHosts": "*" 26 | } --------------------------------------------------------------------------------