├── .github └── workflows │ ├── build-release.yml │ ├── dotnet-tests.yml │ ├── ghpages-mkdocs.yml │ └── mkdocs-build.yml ├── .gitignore ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── assets ├── build5nines-sharpvector-console-screenshot.jpg ├── github-opengraph-build5nines-sharpvector-dark.jpg └── github-opengraph-build5nines-sharpvector-light.jpg ├── docs ├── docs │ ├── concepts │ │ └── index.md │ ├── embeddings │ │ ├── index.md │ │ ├── ollama │ │ │ └── index.md │ │ └── openai │ │ │ └── index.md │ ├── get-started │ │ ├── data-management │ │ │ └── index.md │ │ ├── index.md │ │ ├── metadata │ │ │ └── index.md │ │ └── search │ │ │ └── index.md │ ├── images │ │ ├── favicon.png │ │ ├── logo.png │ │ └── samples │ │ │ └── build5nines-sharpvector-console-screenshot.jpg │ ├── index.md │ ├── license │ │ └── index.md │ ├── persistence │ │ └── index.md │ ├── resources │ │ └── index.md │ ├── samples │ │ └── index.md │ └── text-chunking │ │ └── index.md ├── mkdocs.yml ├── overrides │ └── main.html ├── requirements.txt └── update-theme.sh ├── samples └── genai-rag-onnx │ ├── Program.cs │ └── genai-rag-onnx.csproj └── src ├── .vs ├── ProjectEvaluation │ ├── sharpvector.metadata.v6.1 │ └── sharpvector.projects.v6.1 └── SharpVector │ ├── FileContentIndex │ ├── 1f76313d-a8de-47ce-81a3-ac1fc7438030.vsidx │ ├── a904092f-4585-40f5-9f93-3556202611e1.vsidx │ ├── e534ca41-141b-4115-9099-c4b3a40cc99e.vsidx │ ├── ece43a1c-28dc-443c-bc82-afc0d35267af.vsidx │ └── read.lock │ └── v17 │ ├── .futdcache.v2 │ ├── .suo │ └── TestStore │ └── 0 │ ├── 000.testlog │ └── testlog.manifest ├── .vscode ├── launch.json └── tasks.json ├── Build5Nines.SharpVector.Ollama ├── BasicOllamaMemoryVectorDatabase.cs ├── Build5Nines.SharpVector.Ollama.csproj ├── Embeddings │ └── OllamaEmbeddingsGenerator.cs ├── OllamaMemoryVectorDatabase.cs └── docs │ ├── LICENSE │ └── README.md ├── Build5Nines.SharpVector.OpenAI ├── BasicOpenAIMemoryVectorDatabase.cs ├── Build5Nines.SharpVector.OpenAI.csproj ├── Embeddings │ └── OpenAIEmbeddingsGenerator.cs ├── OpenAIMemoryVectorDatabase.cs ├── OpenAIMemoryVectorDatabaseBase.cs └── docs │ ├── LICENSE │ └── README.md ├── Build5Nines.SharpVector.Playground ├── Build5Nines.SharpVector.Playground.csproj ├── Components │ ├── App.razor │ ├── Layout │ │ ├── MainLayout.razor │ │ ├── MainLayout.razor.css │ │ ├── NavMenu.razor │ │ └── NavMenu.razor.css │ ├── Pages │ │ ├── Error.razor │ │ └── Home.razor │ ├── Routes.razor │ └── _Imports.razor ├── Program.cs ├── Properties │ └── launchSettings.json ├── appsettings.Development.json ├── appsettings.json └── wwwroot │ ├── app.css │ ├── bootstrap │ ├── bootstrap.min.css │ └── bootstrap.min.css.map │ └── favicon.png ├── Build5Nines.SharpVector ├── BasicMemoryVectorDatabase.cs ├── Build5Nines.SharpVector.csproj ├── Data │ ├── TextChunkingMethod.cs │ ├── TextChunkingOptions.cs │ └── TextDataLoader.cs ├── DatabaseFile.cs ├── DatabaseFileException.cs ├── DatabaseInfo.cs ├── Embeddings │ └── IEmbeddingsGenerator.cs ├── IVectorDatabase.cs ├── IVectorDatabaseExtensions.cs ├── Id │ ├── GuidIdGenerator.cs │ ├── IIdGenerator.cs │ ├── IntIdGenerator.cs │ └── NumericIdGenerator.cs ├── MemoryVectorDatabase.cs ├── MemoryVectorDatabaseBase.cs ├── Preprocessing │ ├── BasicTextPreprocessor.cs │ └── ITextPreprocessor.cs ├── VectorCompare │ ├── CosineSimilarityVectorComparerAsync.cs │ ├── EuclideanDistanceVectorComparerAsync.cs │ └── IVectorComparer.cs ├── VectorComparison.cs ├── VectorStore │ ├── IVectorStore.cs │ ├── IVectorStoreWithVocabulary.cs │ ├── MemoryDictionaryVectorStore.cs │ └── MemoryDictionaryVectorStoreWithVocabulary.cs ├── VectorTextDatabaseItem.cs ├── VectorTextItem.cs ├── VectorTextResult.cs ├── VectorTextResultItem.cs ├── Vectorization │ ├── BagOfWordsVectorizer.cs │ └── IVectorizer.cs ├── Vocabulary │ ├── DictionaryVocabularyStore.cs │ └── IVocabularyStore.cs └── docs │ ├── LICENSE │ └── README.md ├── ConsoleTest ├── ConsoleTest.csproj ├── Program.cs └── movies.json ├── OllamaConsoleTest ├── OllamaConsoleTest.csproj └── Program.cs ├── OpenAIConsoleTest ├── OpenAIConsoleTest.csproj ├── Program.cs └── movies.json ├── SharpVector.sln ├── SharpVectorOpenAITest ├── BasicOpenAIMemoryVectorDatabaseTest.cs ├── SharpVectorOpenAITest.csproj └── UnitTest1.cs ├── SharpVectorPerformance ├── MemoryVectorDatabasePerformance.cs ├── Program.cs └── SharpVectorPerformance.csproj ├── SharpVectorTest ├── Data │ ├── TextDataLoaderAsyncTests.cs │ └── TextDataLoaderTests.cs ├── Preprocessing │ └── BasicTextPreprocessorTests.cs ├── Regression │ ├── RegressionTests.cs │ └── regression-vector-database-v2.0.2.b59vdb ├── SharpVectorTest.csproj ├── VectorDatabaseTests.cs ├── VectorStore │ └── MemoryDictionaryVectorStoreTest.cs └── movies.json ├── build-release.sh └── run.sh /.github/workflows/build-release.yml: -------------------------------------------------------------------------------- 1 | name: Build and Release 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - dev 8 | paths-ignore: 9 | - 'docs/**' 10 | - mkdocs.yml 11 | pull_request: 12 | branches: 13 | - main 14 | - dev 15 | paths-ignore: 16 | - 'docs/**' 17 | - mkdocs.yml 18 | workflow_dispatch: 19 | 20 | jobs: 21 | build: 22 | runs-on: ubuntu-latest 23 | defaults: 24 | run: 25 | working-directory: src 26 | 27 | steps: 28 | - name: Checkout code 29 | uses: actions/checkout@v2 30 | 31 | - name: Setup .NET Core 32 | uses: actions/setup-dotnet@v2 33 | with: 34 | dotnet-version: '8.0.x' # Adjust the version as needed 35 | 36 | - name: Restore dependencies 37 | run: dotnet restore 38 | 39 | - name: Build 40 | run: dotnet build --configuration Release --no-restore 41 | 42 | - name: Performance Test 43 | run: dotnet run --project SharpVectorPerformance --configuration Release 44 | 45 | # - name: Publish 46 | # run: dotnet publish --configuration Release --output ./publish --no-build 47 | 48 | # - name: Upload artifact 49 | # uses: actions/upload-artifact@v4 50 | # with: 51 | # name: release-build 52 | # path: ./publish 53 | 54 | - name: Performance Results 55 | run: | 56 | echo "## Performance Results" > $GITHUB_STEP_SUMMARY 57 | cat ./BenchmarkDotNet.Artifacts/results/SharpVectorPerformance.MemoryVectorDatabasePerformance-report-github.md >> $GITHUB_STEP_SUMMARY 58 | 59 | - name: Upload Performance artifact 60 | uses: actions/upload-artifact@v4 61 | with: 62 | name: performance-results 63 | path: './src/BenchmarkDotNet.Artifacts/*' 64 | 65 | - name: Upload Nuget artifact 66 | uses: actions/upload-artifact@v4 67 | with: 68 | name: nuget-package 69 | path: '**/*.nupkg' -------------------------------------------------------------------------------- /.github/workflows/dotnet-tests.yml: -------------------------------------------------------------------------------- 1 | name: .NET Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - dev 8 | paths-ignore: 9 | - 'docs/**' 10 | - 'mkdocs.yml' 11 | pull_request: 12 | branches: 13 | - main 14 | - dev 15 | paths-ignore: 16 | - 'docs/**' 17 | - 'mkdocs.yml' 18 | workflow_dispatch: 19 | 20 | jobs: 21 | build: 22 | runs-on: ubuntu-latest 23 | defaults: 24 | run: 25 | working-directory: src 26 | 27 | steps: 28 | - name: Checkout code 29 | uses: actions/checkout@v2 30 | 31 | - name: Setup .NET Core 32 | uses: actions/setup-dotnet@v2 33 | with: 34 | dotnet-version: '8.0.x' # Adjust the version as needed 35 | 36 | - name: Restore dependencies 37 | run: dotnet restore 38 | 39 | - name: Build 40 | run: dotnet build --no-restore 41 | 42 | - name: Run tests with code coverage 43 | run: dotnet test --no-build --verbosity normal --results-directory "./TestResults/Coverage/" --collect:"XPlat Code Coverage" 44 | 45 | - name: Upload test results artifact 46 | uses: actions/upload-artifact@v4 47 | with: 48 | name: test-results 49 | path: '**/TestResults/**' 50 | -------------------------------------------------------------------------------- /.github/workflows/ghpages-mkdocs.yml: -------------------------------------------------------------------------------- 1 | name: Build MKDocs Site 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | - dev 8 | paths: 9 | - .github/workflows/ghpages-mkdocs.yml 10 | - docs/** 11 | - mkdocs.yml 12 | workflow_dispatch: 13 | 14 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 15 | permissions: 16 | contents: read 17 | pages: write 18 | id-token: write 19 | 20 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 21 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 22 | concurrency: 23 | group: "pages" 24 | cancel-in-progress: false 25 | 26 | jobs: 27 | build: 28 | runs-on: ubuntu-latest 29 | 30 | steps: 31 | - name: Checkout code 32 | uses: actions/checkout@v3 33 | with: 34 | fetch-depth: 0 # Fetch all history for all branches and tags, not just the default branch. 35 | # This is needed to ensure that the commit SHA is available for the deployment. 36 | # See 37 | sparse-checkout: | 38 | docs 39 | mkdocs.yml 40 | .github/workflows/ghpages-mkdocs.yml 41 | 42 | - name: Setup pages 43 | id: pages 44 | uses: actions/configure-pages@v5 45 | 46 | 47 | - name: Set up Python 48 | uses: actions/setup-python@v4 49 | with: 50 | python-version: '3.x' # Use the latest version of Python 3 51 | 52 | - name: Install dependencies 53 | run: | 54 | python -m pip install --upgrade pip 55 | pip install -r ./docs/requirements.txt 56 | 57 | - name: Build documentation 58 | # Outputs to the './_site' directory by default 59 | run: | 60 | mkdocs build --site-dir ./_site --config-file ./docs/mkdocs.yml 61 | 62 | - name: Upload artifact 63 | # Automatically upload an artifact from the './_site' directory by default 64 | uses: actions/upload-pages-artifact@v3 65 | with: 66 | path: ./docs/_site 67 | -------------------------------------------------------------------------------- /.github/workflows/mkdocs-build.yml: -------------------------------------------------------------------------------- 1 | name: Deploy MKDocs Site to Pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - .github/workflows/ghpages-mkdocs.yml 9 | - docs/** 10 | - mkdocs.yml 11 | workflow_dispatch: 12 | 13 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 14 | permissions: 15 | contents: read 16 | pages: write 17 | id-token: write 18 | 19 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 20 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 21 | concurrency: 22 | group: "pages" 23 | cancel-in-progress: false 24 | 25 | jobs: 26 | build: 27 | runs-on: ubuntu-latest 28 | 29 | steps: 30 | - name: Checkout code 31 | uses: actions/checkout@v3 32 | with: 33 | fetch-depth: 0 # Fetch all history for all branches and tags, not just the default branch. 34 | # This is needed to ensure that the commit SHA is available for the deployment. 35 | # See 36 | sparse-checkout: | 37 | docs 38 | mkdocs.yml 39 | .github/workflows/ghpages-mkdocs.yml 40 | 41 | - name: Setup pages 42 | id: pages 43 | uses: actions/configure-pages@v5 44 | 45 | 46 | - name: Set up Python 47 | uses: actions/setup-python@v4 48 | with: 49 | python-version: '3.x' # Use the latest version of Python 3 50 | 51 | - name: Install dependencies 52 | run: | 53 | python -m pip install --upgrade pip 54 | pip install -r ./docs/requirements.txt 55 | 56 | - name: Build documentation 57 | # Outputs to the './_site' directory by default 58 | run: | 59 | mkdocs build --site-dir ./_site --config-file ./docs/mkdocs.yml 60 | 61 | - name: Upload artifact 62 | # Automatically upload an artifact from the './_site' directory by default 63 | uses: actions/upload-pages-artifact@v3 64 | with: 65 | path: ./docs/_site 66 | 67 | deploy: 68 | runs-on: ubuntu-latest 69 | needs: build 70 | environment: 71 | name: github-pages 72 | url: ${{ steps.deployment.outputs.page_url }} 73 | steps: 74 | - name: Deploy to GitHub Pages 75 | id: deployment 76 | uses: actions/deploy-pages@v4 77 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | obj 3 | bin 4 | 5 | .DS_Store 6 | 7 | BenchmarkDotNet.Artifacts/ 8 | TestResults/ 9 | 10 | docs/site 11 | .cache 12 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## v2.1.1 9 | 10 | Add: 11 | 12 | - Add optional `filter` parameter to `.Search` and `.SearchAsync` methods that is of type `Func` that is called for each text item in the database for more advanced filtering prior to performing vector similarity search and returning results. If undefined or `null` it's ignored. 13 | 14 | ## v2.1.0 15 | 16 | Add: 17 | 18 | - Added `VectorTextResultItem.Id` property so it's easy to get the database ID for search results if necessary. 19 | - `IVectorDatabase` now inherits from `IEnumerable` so you can easily look through the texts documents that have been added to the database. 20 | 21 | Fixed: 22 | 23 | - Fixed text tokenization to correctly remove special characters 24 | - Update `BasicTextPreprocessor` to support Emoji characters too 25 | - Refactorings for more Clean Code 26 | 27 | Breaking Changes: 28 | 29 | - The `.Search` and `.SearchAsync` methods now return a `IVectorTextResultItem` instead of `VectorTextResultItem`. If you're using things like the documentation shows, then you wont see any changes or have any issues with this update. 30 | 31 | ## v2.0.4 (2025-04-16) 32 | 33 | Add: 34 | 35 | - Add Ollama support via `Build5Nines.SharpVector.Ollama` nuget package 36 | - Added `Build5Nines.SharpVector.Embeddings.IEmbeddingsGenerator` to support creating external embeddings providers 37 | - Added `Build5Nines.OpenAI.IOpenAIMemoryVectorDatabase` interface 38 | 39 | Fixed: 40 | 41 | - Internal refactoring of save/load database persistence file code to make more maintainable and reusable going forward. 42 | - Implement some performance tweaks in the code; adding `const string` and other best practices to help overall performance for handling larger amounts of data. 43 | 44 | ## 2.0.2 (2025-04-15) 45 | 46 | Add: 47 | 48 | - Add `SerializeToBinaryStream` and `DeserializeFromBinaryStream` methods to replace (and mark obsolete) `SerializeToJsonStream` and `DeserializeFromJsonStream` methods. They read/write binary zip file data, not json, so they were named incorrectly. 49 | 50 | Fixed: 51 | 52 | - Fixed `.LoadFromFile` method that was previously inaccessible. 53 | 54 | ## 2.0.1 (2025-03-17) 55 | 56 | Added: 57 | 58 | - Expose internal vector array of `VectorTextItem` from `VectorTextResultItem.Vectors` property, to make vector array accessible for consuming code in cases where access is required. This is mostly for more flexible usage of the library. 59 | - Added Overlapping Window text chunking (`TextChunkingMethod.OverlappingWindow`) to `TextDataLoader` for enhanced document segmentation with overlapping content, improving metadata extraction and search result relevance. 60 | 61 | Fixed: 62 | 63 | - When using `Data.TextDataLoader` with `TextChunkingMethod.FixedLength` it was splitting on a space character which wouldn't work correctly with Chinese text characters. This is now fixed to work correctly with Chinese characters too. 64 | 65 | ## v2.0.0 (2025-02-23) 66 | 67 | Added: 68 | 69 | - Add data persistence capability to save/load from a file or to/from a `Stream` (Both SharpVector and SharpVector.OpenAI) 70 | - Add Chinese language/character support 71 | 72 | Breaking Change: 73 | 74 | - Refactor `IVocabularyStore` to be used within `MemoryDictionaryVectorStoreWithVocabulary`. This simplifies implementation of `MemoryVectorDatabaseBase`, and helps to enable data persistence capability. 75 | 76 | Notes: 77 | 78 | - The breaking change only applies if the base classes are being used. If the `BasicMemoryVectorDatabase` is being used, this will likely not break applications that depend on this library. However, in some instances where explicitly depending on `VectorTextResult` it's properties (without using `var` in consuming code) there might be minor code changes needed when migrating from previous versions of the library. 79 | 80 | ## v1.0.1 (2025-02-06) 81 | 82 | - Upgrade to .NET 8 or higher 83 | 84 | ### v1.0.0 (2024-05-24) 85 | 86 | Added: 87 | 88 | - Simplify object model by combining Async and non-Async classes, `BasicMemoryVectorDatabase` now support both synchronous and asynchronous operations. 89 | - Refactored to remove unnecessary classes where the `Async` versions will work just fine. 90 | - Improve async/await and multi-threading use 91 | 92 | ### v0.9.8-beta (2024-05-20) 93 | 94 | Added: 95 | 96 | - Added `Async` version of classes to support multi-threading 97 | - Metadata is no longer required when calling `.AddText()` and `.AddTextAsync()` 98 | - Refactor `IVectorSimilarityCalculator` to `IVectorComparer` and `CosineVectorSimilarityCalculatorAsync` to `CosineSimilarityVectorComparerAsync` 99 | - Add new `EuclideanDistanceVectorComparerAsync` 100 | - Fix `MemoryVectorDatabase` to no longer requird unused `TId` generic type 101 | - Rename `VectorSimilarity` and `Similarity` properties to `VectorComparison` 102 | 103 | ### v0.9.5-beta (2024-05-18) 104 | 105 | Added: 106 | 107 | - Add `TextDataLoader` class to provide support for different methods of text chunking when loading documents into the vector database. 108 | 109 | ### v0.9.0-beta (2024-05-18) 110 | 111 | Added: 112 | 113 | - Introduced the `BasicMemoryVectorDatabase` class as the basic Vector Database implementations that uses a Bag of Words vectorization strategy, with Cosine similarity, a dictionary vocabulary store, and a basic text preprocessor. 114 | - Add more C# Generics use, so the library is more customizable when used, and custom vector databases can be implemented if desired. 115 | - Added `VectorTextResultItem.Similarity` so consuming code can inspect similarity of the Text in the vector search results. 116 | - Update `.Search` method to support search result paging and threshold support for similarity comparison 117 | - Add some basic Unit Tests 118 | 119 | ### v0.8.0-beta (2024-05-17) 120 | 121 | Added: 122 | 123 | - Initial release - let's do this! 124 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | chris@build5nines.com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Build5Nines LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Build5Nines SharpVector - The lightweight, in-memory, Semantic Search, Text Vector Database for any C# / .NET Applications 2 | 3 | `Build5Nines.SharpVector` is an in-memory vector database library designed for .NET applications. It allows you to store, search, and manage text data using vector representations. The library is customizable and extensible, enabling support for different vector comparison methods, preprocessing techniques, and vectorization strategies. 4 | 5 | [![.NET Core Tests](https://github.com/Build5Nines/SharpVector/actions/workflows/dotnet-tests.yml/badge.svg)](https://github.com/Build5Nines/SharpVector/actions/workflows/dotnet-tests.yml) 6 | [![Build and Release](https://github.com/Build5Nines/SharpVector/actions/workflows/build-release.yml/badge.svg)](https://github.com/Build5Nines/SharpVector/actions/workflows/build-release.yml) 7 | ![Libraries.io dependency status for GitHub repo](https://img.shields.io/librariesio/github/build5nines/sharpvector) 8 | 9 | [![NuGet](https://img.shields.io/nuget/v/Build5Nines.SharpVector.svg)](https://www.nuget.org/packages/Build5Nines.SharpVector/) 10 | [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) 11 | ![Framework: .NET 8+](https://img.shields.io/badge/framework-.NET%208%2B-blue) 12 | ![Semantic Search: Enabled](https://img.shields.io/badge/semantic%20search-enabled-purple) 13 | ![Gen AI: Ready](https://img.shields.io/badge/Gen%20AI-ready-purple) 14 | 15 | Vector databases are used with Semantic Search and [Generative AI](https://build5nines.com/what-is-generative-ai/?utm_source=github&utm_medium=sharpvector) solutions augmenting the LLM (Large Language Model) with the ability to load additional context data with the AI prompt using the [RAG (Retrieval-Augmented Generation)](https://build5nines.com/what-is-retrieval-augmented-generation-rag/?utm_source=github&utm_medium=sharpvector) design pattern. 16 | 17 | While there are lots of large databases that can be used to build Vector Databases (like Azure CosmosDB, PostgreSQL w/ pgvector, Azure AI Search, Elasticsearch, and more), there are not many options for a lightweight vector database that can be embedded into any .NET application. Build5Nines SharpVector is the lightweight in-memory Text Vector Database for use in any .NET application that you're looking for! 18 | 19 | ### [Documentation](https://sharpvector.build5nines.com) | [Get Started](https://sharpvector.build5nines.com/get-started/) | [Samples](https://sharpvector.build5nines.com/samples/) 20 | 21 | ## Nuget Package 22 | 23 | The `Build5Nines.SharpVector` library is available as a Nuget Package to easily include into your .NET projects: 24 | 25 | ```bash 26 | dotnet add package Build5Nines.SharpVector 27 | ``` 28 | 29 | You can view it on Nuget.org here: 30 | 31 | ## Maintained By 32 | 33 | The **Build5Nines SharpVector** project is maintained by [Chris Pietschmann](https://pietschsoft.com?utm_source=github&utm_medium=sharpvector), founder of [Build5Nines](https://build5nines.com?utm_source=github&utm_medium=sharpvector), Microsoft MVP, HashiCorp Ambassador, and Microsoft Certified Trainer (MCT). 34 | -------------------------------------------------------------------------------- /assets/build5nines-sharpvector-console-screenshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/assets/build5nines-sharpvector-console-screenshot.jpg -------------------------------------------------------------------------------- /assets/github-opengraph-build5nines-sharpvector-dark.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/assets/github-opengraph-build5nines-sharpvector-dark.jpg -------------------------------------------------------------------------------- /assets/github-opengraph-build5nines-sharpvector-light.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/assets/github-opengraph-build5nines-sharpvector-light.jpg -------------------------------------------------------------------------------- /docs/docs/embeddings/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Embeddings 3 | --- 4 | # :fontawesome-solid-square-binary: Embeddings 5 | 6 | `Build5Nines.SharpVector` includes the following support for using AI Models to generate the text embeddings for the vector database instead of generating them locally. The use of an AI Embeddings model can greatly increase the quality of the semantic search. 7 | 8 |
9 | 10 | - :simple-openai:{ .lg .middle } __OpenAI Embeddings__ 11 | 12 | --- 13 | 14 | Use OpenAI and/or Azure OpenAI Service embeddings models like `text-embedding-ada-002` or others. 15 | 16 | [:octicons-arrow-right-24: Getting started](openai/index.md) 17 | 18 | - :simple-ollama:{ .lg .middle } __Ollama Embeddings__ 19 | 20 | --- 21 | 22 | Use Ollama embeddings models like `nomic-embed-text` or others. 23 | 24 | [:octicons-arrow-right-24: Get Started](ollama/index.md) 25 | 26 |
-------------------------------------------------------------------------------- /docs/docs/embeddings/ollama/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Ollama Embeddings 3 | description: Integrate Ollama embedding models with SharpVector to supercharge your semantic search and AI features in .NET apps. 4 | --- 5 | 6 | # :simple-ollama: Ollama Embeddings 7 | 8 | Integrating [Ollama](https://ollama.com) embedding modes with `Build5Nines.SharpVector` enhances the semantic search capabilities of your .NET applications. By leveraging models like `nomic-embed-text` or others, you can generate higher quality vector representations of text, leading to more accurate and contextually relevant search results. 9 | 10 | ## Why Use an Ollama Embedding Model? 11 | 12 | While **SharpVector** includes basic embedding generation, utilizing an Ollama embedding model offers significant advantages: 13 | 14 | - **Improved Search Accuracy**: Embedding models capture the semantic meaning of text more accurately, resulting in more relevant search outcomes. 15 | - **Pre-trained on Extensive Data**: These models are trained on vast datasets, enhancing their robustness and generalization capabilities. 16 | - **Optimized for Performance**: Designed for efficient retrieval and indexing, Ollama embedding models facilitate faster search operations. 17 | 18 | ## Getting Started 19 | 20 | To integrate an Ollama embedding model with SharpVector, install the `Build5Nines.SharpVector.Ollama` NuGet package: 21 | 22 | === ".NET CLI" 23 | ```bash 24 | dotnet add package Build5Nines.SharpVector.Ollama 25 | ``` 26 | === "Package Manager" 27 | ```powershell 28 | Nuget\Install-Package Build5Nines.SharpVector.Ollama 29 | ``` 30 | 31 | This package includes the core `Build5Nines.SharpVector` library and dependencies required to connect to Ollama's embedding API. 32 | 33 | ## Initialize the Vector Database using Ollama 34 | 35 | With the Ollama embedding model running, initialize the the **SharpVector** database: 36 | 37 | ```csharp 38 | using Build5Nines.SharpVector.Ollama; 39 | 40 | var modelName = "nomic-embed-text"; 41 | 42 | // For connecting to Locally running ('localhost') Ollama 43 | var vectorDatabase = new BasicOllamaMemoryVectorDatabase(modelName) 44 | 45 | // For connecting to a different Ollama endpoint URL 46 | var ollamaEndpoint = "http:/localhost:11434/api/embeddings"; 47 | var vactorDatabase = new BasicOllamaMemoryVectorDatabase(ollamaEndpoint, modelName); 48 | ``` 49 | 50 | ## Adding Text Data 51 | 52 | To add text documents to the vector database: 53 | 54 | ```csharp 55 | // sync 56 | vectorDatabase.AddText(documentText, metadataText); 57 | 58 | // async 59 | await vectorDatabase.AddTextAsync(documentText, metadataText); 60 | ``` 61 | 62 | - `documentText`: The textual content to be vectorized. 63 | - `metadataText`: Associated metadata (e.g., document title, JSON string) stored alongside the vectorized text. 64 | 65 | !!! note 66 | Metadata is not vectorized but is retrieved with search results, providing context. 67 | 68 | ## Performing Similarity Search 69 | 70 | The `SearchAsync` method returns documents whose vector representations closely match the query vector, based on similarity metrics like cosine similarity. 71 | 72 | ```csharp 73 | var query = "your search query"; 74 | var results = await vectorDatabase.SearchAsync(query); 75 | ``` 76 | 77 | The `.SearchAsync` method supports additional arguments to help with searching the vector database: 78 | 79 | ```csharp 80 | var results = await vectorDatabase.SearchAsync(queryText, 81 | threshold: 0.001f // 0.2f - Cosine Similarity 82 | pageIndex: 0, // page index of search results (default: 0) 83 | pageCount: 10 // Number of results per page to return (default: no limit) 84 | ); 85 | ``` 86 | 87 | - `queryText`: The text query to search within the vector database. 88 | - `threshold`: The similarity threshold to use for searching the vector database using Cosine Similarity method. 89 | - `pageIndex`: The page index of search results to return. Default is `0`. 90 | - `pageCount`: The number of search results to return per page. Default is "no limit" (aka return all results) 91 | 92 | ## Summary 93 | 94 | Integrating an Ollama embedding model with **Build5Nines.SharpVector** empowers your .NET applications with advanced semantic search capabilities. By leveraging high-quality vector representations, you can achieve more accurate and context-aware search results, enhancing the overall user experience. 95 | -------------------------------------------------------------------------------- /docs/docs/embeddings/openai/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: OpenAI Embeddings 3 | description: Integrate OpenAI’s powerful embeddings with SharpVector to supercharge your semantic search and AI features in .NET apps. 4 | --- 5 | 6 | # :simple-openai: OpenAI Embeddings 7 | 8 | Integrating OpenAI embeddings with **Build5Nines.SharpVector** enhances the semantic search capabilities of your .NET applications. By leveraging models like `text-embedding-ada-002`, you can generate high-quality vector representations of text, leading to more accurate and contextually relevant search results. 9 | 10 | ## Why Use OpenAI Embeddings? 11 | 12 | While **SharpVector** includes basic embedding generation, utilizing OpenAI's advanced models offers significant advantages: 13 | 14 | - **Improved Search Accuracy**: OpenAI's embeddings capture the semantic meaning of text, resulting in more relevant search outcomes. 15 | - **Pre-trained on Extensive Data**: These models are trained on vast datasets, enhancing their robustness and generalization capabilities. 16 | - **Optimized for Performance**: Designed for efficient retrieval and indexing, OpenAI embeddings facilitate faster search operations. 17 | 18 | ## Getting Started 19 | 20 | ### Installation 21 | 22 | To integrate OpenAI embeddings with SharpVector, install the `Build5Nines.SharpVector.OpenAI` NuGet package: 23 | 24 | === ".NET CLI" 25 | ```bash 26 | dotnet add package Build5Nines.SharpVector.OpenAI 27 | ``` 28 | === "Package Manager" 29 | ```powershell 30 | Nuget\Install-Package Build5Nines.SharpVector.OpenAI 31 | ``` 32 | 33 | This package includes the core `Build5Nines.SharpVector` library and dependencies required to connect to OpenAI's embedding services. 34 | 35 | ### Setting Up the Embedding Client 36 | 37 | === "OpenAI" 38 | If you're using OpenAI's API directly: 39 | 40 | ```csharp 41 | using OpenAI; 42 | 43 | var openAIKey = "your-api-key"; 44 | var modelName = "text-embedding-ada-002"; 45 | 46 | var openAIClient = new OpenAIClient(openAIKey); 47 | var embeddingClient = openAIClient.GetEmbeddingClient(modelName); 48 | ``` 49 | === "Azure OpenAI" 50 | For applications utilizing Azure OpenAI: 51 | 52 | ```csharp 53 | using Azure; 54 | using Azure.AI.OpenAI; 55 | 56 | var openAIUri = new Uri("https://your-resource-name.openai.azure.com/"); 57 | var openAIKey = "your-api-key"; 58 | var modelName = "text-embedding-ada-002"; 59 | 60 | var openAIClient = new AzureOpenAIClient(openAIUri, new AzureKeyCredential(openAIKey)); 61 | var embeddingClient = openAIClient.GetEmbeddingClient(modelName); 62 | ``` 63 | 64 | ### Initializing the Vector Database 65 | 66 | With the embedding client set up, initialize the in-memory vector database: 67 | 68 | ```csharp 69 | using Build5Nines.SharpVector.OpenAI; 70 | 71 | var vectorDatabase = new BasicOpenAIMemoryVectorDatabase(embeddingClient); 72 | ``` 73 | 74 | - `embeddingClient`: The OpenAI Embedding Client ot use for generating the vector embeddings. 75 | 76 | ## Adding Text Data 77 | 78 | To add text documents to the vector database: 79 | 80 | ```csharp 81 | // sync 82 | vectorDatabase.AddText(documentText, metadataText); 83 | 84 | // async 85 | await vectorDatabase.AddTextAsync(documentText, metadataText); 86 | ``` 87 | 88 | - `documentText`: The textual content to be vectorized. 89 | - `metadataText`: Associated metadata (e.g., document title, JSON string) stored alongside the vectorized text. 90 | 91 | !!! note 92 | Metadata is not vectorized but is retrieved with search results, providing context. 93 | 94 | ## Performing Similarity Search 95 | 96 | The `SearchAsync` method returns documents whose vector representations closely match the query vector, based on similarity metrics like cosine similarity. 97 | 98 | ```csharp 99 | var query = "your search query"; 100 | var results = await vectorDatabase.SearchAsync(query); 101 | ``` 102 | 103 | The `.SearchAsync` method supports additional arguments to help with searching the vector database: 104 | 105 | ```csharp 106 | var results = await vectorDatabase.SearchAsync(queryText, 107 | threshold: 0.001f // 0.2f - Cosine Similarity 108 | pageIndex: 0, // page index of search results (default: 0) 109 | pageCount: 10 // Number of results per page to return (default: no limit) 110 | ); 111 | ``` 112 | 113 | - `queryText`: The text query to search within the vector database. 114 | - `threshold`: The similarity threshold to use for searching the vector database using Cosine Similarity method. 115 | - `pageIndex`: The page index of search results to return. Default is `0`. 116 | - `pageCount`: The number of search results to return per page. Default is "no limit" (aka return all results) 117 | 118 | ## Summary 119 | 120 | Integrating OpenAI embeddings with **Build5Nines.SharpVector** empowers your .NET applications with advanced semantic search capabilities. By leveraging high-quality vector representations, you can achieve more accurate and context-aware search results, enhancing the overall user experience. 121 | -------------------------------------------------------------------------------- /docs/docs/get-started/data-management/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Data Management 3 | 4 | --- 5 | # :material-database-edit-outline: Data Management 6 | 7 | Since `Build5Nines.SharpVector` is a database, it also has data management methods available. These methods enable you to add, remove, and update the text documents that are vectorized and indexed within the semantic database. 8 | 9 | ## Get Text Item ID 10 | 11 | Every text item within a `Build5Nines.SharpVector` database is assigned a unique identifier (ID). There are a few ways to get access to the ID of the text items. 12 | 13 | === ".AddText()" 14 | 15 | When adding an individual text item to the vector database, the ID value will be returned: 16 | 17 | ```csharp 18 | var id = vdb.AddText(txt, metadata); 19 | 20 | var id = await vdb.AddTextAsync(txt, metadata); 21 | ``` 22 | 23 | === ".Search()" 24 | 25 | When you perform a semantic search, the search results will contain the list of texts; each have an ID property. 26 | 27 | ```csharp 28 | var results = vdb.Search("query text"); 29 | 30 | foreach(var text in results.Texts) { 31 | var id = text.Id; 32 | var text = text.Text; 33 | var metadata = text.Metadata; 34 | // do something here 35 | } 36 | ``` 37 | 38 | === "Enumerator" 39 | 40 | The `IVectorDatabase` classes implement `IEnumerable` so you can easily loop through all the text items that have been added to the database. 41 | 42 | ```csharp 43 | foreach(var item in vdb) { 44 | var id = item.Id; 45 | var text = item.Text; 46 | var metadata = item.Metadata; 47 | var vector = item.Vector; 48 | 49 | // do something here 50 | } 51 | ``` 52 | 53 | ## Get 54 | 55 | If you know the `id` of a Text item in the database, you can retrieve it directly. 56 | 57 | ### Get By Id 58 | 59 | The `.GetText` method can be used to retrieve a text item from the vector database directly. 60 | 61 | ```csharp 62 | vdb.GetText(id); 63 | ``` 64 | 65 | ## Update 66 | 67 | Once text items have been added to the database "Update" methods can be used to modify them. 68 | 69 | ### Update Text 70 | 71 | The `.UpdateText` method can be used to update the `Text` value, and associated vectors will be updated. 72 | 73 | ```csharp 74 | vdb.UpdateText(id, newTxt); 75 | ``` 76 | 77 | When the `Text` is updated, new vector embeddings are generated for the new text. 78 | 79 | ### Update Metadata 80 | 81 | The `.UpdateTextMetadata` method can be used to update the `Metadata` for a given text item by `Id`. 82 | 83 | ```csharp 84 | vdb.UpdateTextMetadata(id, newTxt); 85 | ``` 86 | 87 | When `Metadata` is updated, the vector embeddings are not updated. 88 | 89 | ### Update Text and Metadata 90 | 91 | The `.UpdateTextAndMetadata` method can be used to update the `Text` and `Metadata` for a text item in the database for the given text item `Id`. 92 | 93 | ```csharp 94 | vdb.UpdateTextAndMetadata(id, newTxt, newMetadata); 95 | ``` 96 | 97 | ## Delete 98 | 99 | The vector database supports the ability to delete text items. 100 | 101 | ### Delete Text 102 | 103 | The `.DeleteText` method can be used to delete a text item form the database for the given `Id'. 104 | 105 | ```csharp 106 | vdb.DeleteText(id); 107 | ``` 108 | -------------------------------------------------------------------------------- /docs/docs/get-started/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Get Started 3 | description: Get up and running with SharpVector in minutes. Learn how to install, initialize, and begin storing and searching vectorized text data. 4 | --- 5 | # :octicons-rocket-24: Get Started 6 | 7 | It's really easy to get started with using `Build5Nines.SharpVector`. Simply follow the below steps. 8 | 9 | ## Prerequisites 10 | 11 | Using `Build5Nines.SharpVector` requires the following: 12 | 13 | - .NET 8.0 or later 14 | 15 | ## Install Nuget Package 16 | 17 | The `Build5Nines.SharpVector` library is available as a [Nuget package](https://www.nuget.org/packages/Build5Nines.SharpVector): 18 | 19 | === ".NET CLI" 20 | ```bash 21 | dotnet add package Build5Nines.SharpVector 22 | ``` 23 | 24 | === "Package Manager" 25 | ```powershell 26 | Nuget\Install-Package Build5Nines.SharpVector 27 | ``` 28 | 29 | ## Basic example 30 | 31 | The following is a basic example of using `Build5Nines.SharpVector` to create and use an in-memory vector database within a C# application: 32 | 33 | ```csharp 34 | using Build5Nines.SharpVector; 35 | 36 | // Create a Vector Database with metadata of type string 37 | var vdb = new BasicMemoryVectorDatabase(); 38 | // The Metadata is declared using generics, so you can store whatever data you need there. 39 | 40 | // Load Vector Database with some sample text data 41 | // Text is the movie description, and Metadata is the movie title with release year in this example 42 | vdb.AddText("Iron Man (2008) is a Marvel Studios action, adventure, and sci-fi movie about Tony Stark (Robert Downey Jr.), a billionaire inventor and weapons developer who is kidnapped by terrorists and forced to build a weapon. Instead, Tony uses his ingenuity to build a high-tech suit of armor and escape, becoming the superhero Iron Man. He then returns to the United States to refine the suit and use it to fight crime and terrorism.", "Iron Man (2008)"); 43 | vdb.AddText("The Lion King is a 1994 Disney animated film about a young lion cub named Simba who is the heir to the throne of an African savanna.", "The Lion King (1994)"); 44 | vdb.AddText("Aladdin is a 2019 live-action Disney adaptation of the 1992 animated classic of the same name about a street urchin who finds a magic lamp and uses a genie's wishes to become a prince so he can marry Princess Jasmine.", "Alladin (2019)"); 45 | vdb.AddText("The Little Mermaid is a 2023 live-action adaptation of Disney's 1989 animated film of the same name. The movie is about Ariel, the youngest of King Triton's daughters, who is fascinated by the human world and falls in love with Prince Eric.", "The Little Mermaid"); 46 | vdb.AddText("Frozen is a 2013 Disney movie about a fearless optimist named Anna who sets off on a journey to find her sister Elsa, whose icy powers have trapped their kingdom in eternal winter.", "Frozen (2013)"); 47 | 48 | // Perform a Vector Search 49 | var result = vdb.Search(newPrompt, pageCount: 5); // return the first 5 results 50 | 51 | if (!result.IsEmpty) 52 | { 53 | Console.WriteLine("Similar Text Found:"); 54 | foreach (var item in result.Texts) 55 | { 56 | Console.WriteLine(item.Metadata); 57 | Console.WriteLine(item.Text); 58 | } 59 | } else { 60 | Console.WriteLine("No results found."); 61 | } 62 | ``` 63 | 64 | 🚀 You are now using an in-memory vector database to implement semantic text searching within your app! 65 | 66 | !!! info 67 | The `Build5Nines.SharpVector` library, be default, supports local text vector generation. However, there is also additional support for both [OpenAI and Ollama embeddings models](../embeddings) for using higher quality, more robust vector generation. 68 | -------------------------------------------------------------------------------- /docs/docs/get-started/metadata/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Metadata 3 | --- 4 | # :material-database-cog-outline: Metadata 5 | 6 | The `Build5Nines.SharpVector` vector database enables semantic search for `Text` that is stored in the database. Being able to semantically search text is an extremely useful way to lookup more information related to the text. For this purpose, `Metadata` is stored alongside the `Text` within the vector database. This way, when `Text` is found when performing a semantic search, then the matching `Metadata` is also retrieved. 7 | 8 | ## Adding Metadata 9 | 10 | The `.AddText` and `.AddTextAsync` methods access 2 arguments: 11 | 12 | - `text`: The `Text` that is added to the vector database and has vector embeddings generated for. 13 | - `metadata`: This is additional data / information that is stored alongside the `Text`. 14 | 15 | ```csharp 16 | vdb.AddText(text, metadata); 17 | 18 | await vdb.AddText(text, metadata); 19 | ``` 20 | 21 | ## JSON and String Metadata 22 | 23 | When using the `BasicMemoryVectorDatabase` class, the `Metadata` values will always be of type `String`. This enables you to store a variety of values here, including: 24 | 25 | - **JSON data**: You can serialize any data to a JSON string for storage in the `Metadata` associated with a text item in the database. 26 | - **`String` value**: You can store any other string value as the `Metadata` associated with a text item in the database. This could be a URL, Filename, or other information. 27 | 28 | !!! info "OpenAI and Ollama Support" 29 | When working with the [OpenAI](../../embeddings/openai/index.md) `BasicOpenAIMemoryVectorDatabase` and [Ollama](../../embeddings/ollama/index.md) `BasicOllamaMemoryVectorDatabase`, the `Metadata` data type is also `String`. 30 | 31 | Here are some examples of storing `string` metadata and retrieving it from the database: 32 | 33 | === "JSON data" 34 | 35 | ```csharp 36 | // create vector database 37 | var vdb = new BasicMemoryVectorDatabase(); 38 | 39 | // some text to store in the vector database 40 | var text = "some text value"; 41 | // serialize an object to json to store as metadata 42 | var json = JsonSerializer.Serialize(new MyMetadata{ 43 | Url = "https://build5nines.com", 44 | Author = "Chris Pietschmann" 45 | }); 46 | 47 | // Add text with metadata to vector database 48 | vdb.AddText(text, json); 49 | 50 | // perform semantic search 51 | var results = vdb.Search("something to search", pageCount: 5); 52 | 53 | // Loop through search results 54 | foreach(var item in results.Texts) { 55 | var text = item.Text; 56 | var json = item.Metadata; 57 | var metadata = JsonSerializer.Deserialize(json); 58 | 59 | // do something with results and metadata 60 | } 61 | ``` 62 | 63 | === "String value" 64 | 65 | ```csharp 66 | // create vector database 67 | var vdb = new BasicMemoryVectorDatabase(); 68 | 69 | // some text to store in the vector database 70 | var text = "some text value"; 71 | // some metadata to store 72 | var metadata = "https://build5nines.com"; 73 | 74 | // Add text with metadata to vector database 75 | vdb.AddText(text, metadata); 76 | 77 | // perform semantic search 78 | var results = vdb.Search("something to search", pageCount: 5); 79 | 80 | // Loop through search results 81 | foreach(var item in results.Texts) { 82 | var text = item.Text; 83 | var metadata = item.Metadata; 84 | 85 | // do something with results and metadata 86 | } 87 | ``` 88 | 89 | ## Custom Metadata Type 90 | 91 | The `MemoryVectorDatabase` generic class allows you to create a vector database that uses your own custom class as the metadata by defining that class using generics. This enables you to store a native .NET object as the metadata alongside the text in the vector database. 92 | 93 | Here's an example of using the `MemoryVectorDatabase` with a .NET class for the `Metadata`: 94 | 95 | ```csharp 96 | // create vector database 97 | var vdb = new MemoryVectorDatabase(); 98 | 99 | // some text to store in the vector database 100 | var text = "some text value"; 101 | // an object to store as metadata 102 | var metadata = new MyMetadata{ 103 | Url = "https://build5nines.com", 104 | Author = "Chris Pietschmann" 105 | }; 106 | 107 | // Add text with metadata to vector database 108 | vdb.AddText(text, metadata); 109 | 110 | // perform semantic search 111 | var results = vdb.Search("something to search", pageCount: 5); 112 | 113 | // Loop through search results 114 | foreach(var item in results.Texts) { 115 | var text = item.Text; 116 | var metadata = item.Metadata; 117 | 118 | var url = metadata.Url; 119 | var author = metadata.Author; 120 | 121 | // do something with results and metadata 122 | } 123 | ``` 124 | 125 | This will offer better performance with scenarios that require more complex metadata since you no longer need to handle serialization to/from JSON. 126 | 127 | !!! info "OpenAI and Ollama Support" 128 | The `OpenAIMemoryVectorDatabase` and `OllamaMemoryVectorDatabase` generic classes can also be used to define your own `Metadata` type when working with [OpenAI and Ollama embeddings](../../embeddings/index.md). 129 | -------------------------------------------------------------------------------- /docs/docs/get-started/search/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Semantic Search 3 | --- 4 | # :material-file-search: Semantic Search 5 | 6 | Once text items and their associated metadata have been added to the vector database, the database can be used for semantic search to find matching text items for a given query. 7 | 8 | The `BasicMemoryVectorDatabase` and `MemoryVectorDatabase<>` classes both contain `.Search` and `.SearchAsync` methods that can be used to perform semantic search on the database: 9 | 10 | === "Sync" 11 | 12 | ```csharp 13 | var query = "some text to search"; 14 | var results = vdb.Search(query); 15 | ``` 16 | 17 | === "Async" 18 | 19 | ```csharp 20 | var query = "some text to search"; 21 | var results = await vdb.SearchAsync(query); 22 | ``` 23 | 24 | ## Metadata Filters 25 | 26 | The `.Search` and `.SearchAsync` methods also include the ability to pre-filter the search results based on a boolean evaluation of the `Metadata` for the text item. This check is run before the vector similarity search is performed, and can help increase search performance on large datasets. 27 | 28 | Here are a couple examples of using the `filter` parameter to perform `Metadata` filtering when performing semantic searches: 29 | 30 | === "Sync" 31 | 32 | ```csharp 33 | var vdb = new BasicMemoryVectorDatabase(); 34 | 35 | // load text and metadata into database 36 | 37 | var query = "some text to search"; 38 | var results = vdb.Search( 39 | query, 40 | filter: (metadata) => { 41 | // perform some operation to check metadata 42 | // return true or false 43 | return metadata.Contains("B59"); 44 | } 45 | ); 46 | ``` 47 | 48 | === "Async" 49 | 50 | ```csharp 51 | var vdb = new MemoryVectorDatabase(); 52 | 53 | // load text and metadata into database 54 | 55 | var query = "some text to search"; 56 | var results = vdb.SearchAsync( 57 | query, 58 | filter: async (metadata) => { 59 | // perform some operation to check metadata 60 | // return true or false 61 | return metadata.LastName == "Pietschmann"; 62 | } 63 | ); 64 | ``` 65 | 66 | !!! info "OpenAI and Ollama Support" 67 | 68 | This functionality works the same with both [:simple-openai: OpenAI and :simple-ollama: Ollama supported vector databases](../../embeddings/index.md) too. 69 | 70 | ## Paging 71 | 72 | The `.Search` and `.SearchAsync` methods also include the ability to perform paging on the text items returned from the semantic search. This is performed after the similarity search and the `filter` has been applied to the search results. This is done using the optional `pageCount` and `pageIndex` paramters. 73 | 74 | Here are a couple examples of using the `pageCount` and `pageIndex` parameters to perform paging with the semantic search results: 75 | 76 | === "Sync" 77 | 78 | ```csharp 79 | var vdb = new BasicMemoryVectorDatabase(); 80 | 81 | // load text and metadata into database 82 | 83 | var query = "some text to search"; 84 | var results = vdb.Search( 85 | query, 86 | pageIndex: 0, // return first page of results (default: 0) 87 | pageCount: 6 // limit length of this page of results (default: unlimited) 88 | ); 89 | ``` 90 | 91 | === "Async" 92 | 93 | ```csharp 94 | var vdb = new MemoryVectorDatabase(); 95 | 96 | // load text and metadata into database 97 | 98 | var query = "some text to search"; 99 | var results = vdb.SearchAsync( 100 | query, 101 | pageIndex: 0, // return first page of results (default: 0) 102 | pageCount: 6 // limit length of this page of results (default: unlimited) 103 | ); 104 | ``` 105 | 106 | The `pageIndex` and `pageIndex` paramters are optional, and can be used individually or together. 107 | -------------------------------------------------------------------------------- /docs/docs/images/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/docs/docs/images/favicon.png -------------------------------------------------------------------------------- /docs/docs/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/docs/docs/images/logo.png -------------------------------------------------------------------------------- /docs/docs/images/samples/build5nines-sharpvector-console-screenshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/docs/docs/images/samples/build5nines-sharpvector-console-screenshot.jpg -------------------------------------------------------------------------------- /docs/docs/license/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: License 3 | description: Review the MIT license terms for using and contributing to the SharpVector open-source project. 4 | date: 2025-04-13 5 | --- 6 | 7 | # :octicons-file-badge-24: License 8 | 9 | ```text 10 | MIT License 11 | 12 | Copyright (c) 2025 Build5Nines LLC 13 | 14 | Permission is hereby granted, free of charge, to any person obtaining a copy 15 | of this software and associated documentation files (the "Software"), to deal 16 | in the Software without restriction, including without limitation the rights 17 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 18 | copies of the Software, and to permit persons to whom the Software is 19 | furnished to do so, subject to the following conditions: 20 | 21 | The above copyright notice and this permission notice shall be included in all 22 | copies or substantial portions of the Software. 23 | 24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 29 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 | SOFTWARE. 31 | ``` 32 | -------------------------------------------------------------------------------- /docs/docs/persistence/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Data Persistence 3 | --- 4 | # :octicons-file-24: Data Persistence 5 | 6 | The `Build5Nines.SharpVector` library provides easy-to-use methods for saving a memory-based vector database to a file or stream and loading it again later. This is particularly useful for caching indexed content between runs, deploying pre-built vector stores, or shipping databases with your application. 7 | 8 | ## :material-file: File Persistence 9 | 10 | `Build5Nines.SharpVector` supports persisting the vector database to a file. 11 | 12 | !!! info 13 | This functionality is implemented as methods available to both the `Build5Nines.SharpVector.BasicMemoryVectorDatabase` and `Build5Nines.SharpVector.OpenAI.BasicOpenAIMemoryVectorDatabase`. These methods are actually extensions on the base `IVectorDatabase` interface, so all implementations of this interface will have this capability. 14 | 15 | ### Save to File 16 | 17 | To persist your `BasicMemoryVectorDatabase` to disk, use the `SaveToFile` or `SaveToFileAsync` methods: 18 | 19 | ```csharp 20 | var vdb = new BasicMemoryVectorDatabase(); 21 | 22 | var filePath = "vectordata.b59vdb"; 23 | 24 | // persist vector database to file asynchronously 25 | await vdb.SaveToFileAsync(filePath); 26 | 27 | // -- or -- 28 | 29 | // persist vector database to file 30 | vdb.SaveToFile(filePath); 31 | ``` 32 | 33 | !!! info 34 | The file extension used in this example is `.b59vdb`, however this is arbitrary. The library doesn't look at the file extension. It only reads the binary contents of the file; which is actually in ZIP file format. 35 | 36 | ### Load from File 37 | 38 | To load a previously saved vector database from disk, use the `LoadFromFile` or `LoadFromFileAsync` methods: 39 | 40 | ```csharp 41 | var vdb = new BasicMemoryVectorDatabase(); 42 | 43 | var filePath = "vectordata.b59vdb"; 44 | 45 | // load vector database from file 46 | vdb.LoadFromFile(filePath); 47 | 48 | // -- or -- 49 | 50 | // load vector database from file asynchronously 51 | await vdb.LoadFromFileAsync(filePath); 52 | ``` 53 | 54 | ## :material-file-move: Persist to Stream 55 | 56 | The underlying methods used by `SaveToFile` and `LoadFromFile` methods for serializing the vector database to a `Stream` are available to use directly. This provides support for reading/writing to `MemoryStream` (or other streams) if the vector database needs to be persisted to something other than the local file system. 57 | 58 | !!! info 59 | These `SerializeToBinaryStream` and `DeserializeFromBinaryStream` methods are available in `v2.0.2` and later. 60 | 61 | ### Write to Stream 62 | 63 | To persist your `BasicMemoryVectorDatabase` to a JSON stream, use the `SerializeToBinaryStream` or `SerializeToBinaryStreamAsync` methods: 64 | 65 | ```csharp 66 | var vdb = new BasicMemoryVectorDatabase(); 67 | 68 | var stream = new MemoryStream(); 69 | 70 | // serialize to JSON stream 71 | vdb.SerializeToBinaryStream(stream); 72 | 73 | // -- or -- 74 | 75 | // serialize asynchronously to JSON stream 76 | await vdb.SerializeToBinaryStreamAsync(stream); 77 | ``` 78 | 79 | ### Read from Stream 80 | 81 | To load your `BasicMemoryVectorDatabase` from JSON stream, use the `DeserializeFromBinaryStream` and `DeserializeFromBinaryStreamAsync` methods: 82 | 83 | ```csharp 84 | // Be sure Stream position is at the start 85 | stream.Position = 0; 86 | 87 | // deserialize from JSON stream 88 | vdb.DeserializeFromBinaryStream(stream); 89 | 90 | // -- or --- 91 | 92 | // deserialize asynchronously from JSON stream 93 | await vdb.DeserializeFromBinaryStreamAsync(stream); 94 | ``` 95 | -------------------------------------------------------------------------------- /docs/docs/resources/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Resources 3 | description: Dive deeper with curated resources, links, and tools for working with vector databases, semantic search, and SharpVector. 4 | --- 5 | 6 | # :octicons-link-24: Resources 7 | 8 | ## Tutorials 9 | 10 | Here's a couple helpful tutorial links with additional documentation and examples on using `Build5Nines.SharpVector` in your own projects: 11 | 12 | - [Perform Vector Database Similarity Search in .NET Apps using Build5Nines.SharpVector](https://build5nines.com/using-build5nines-sharpvector-for-vector-similarity-search-in-net-applications/?utm_source=github&utm_medium=sharpvector) by Chris Pietschmann 13 | - [Enhanced In-Memory Text Vector Search in .NET with SharpVector and OpenAI Embeddings](https://build5nines.com/enhanced-in-memory-text-vector-search-in-net-with-sharpvector-and-openai-embeddings/?utm_source=github&utm_medium=sharpvector) by Chris Pietschmann 14 | - [Build a Generative AI + RAG App in C# with Phi-3, ONNX, and SharpVector](https://build5nines.com/build-a-generative-ai-rag-app-in-c-with-phi-3-onnx-and-sharpvector/?utm_source=github&utm_medium=sharpvector) by Chris Pietschmann 15 | - [Implementing Local RAG using Phi-3 ONNX Runtime and Sidecar Pattern on Linux App Service](https://azure.github.io/AppService/2024/09/03/Phi3-vector.html) by Tulika Chaudharie (Principal Product Manager at Microsoft for Azure App Service) 16 | -------------------------------------------------------------------------------- /docs/docs/samples/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Samples 3 | description: Explore real-world code samples to see SharpVector in action. Build search engines, intelligent note apps, and more. 4 | --- 5 | 6 | # :material-run-fast: Samples 7 | 8 | ## Sample Console App 9 | 10 | The sample console app in this repo show example usage of `Build5Nines.SharpVector`. 11 | 12 | It loads a list of movie titles and descriptions from a JSON file, then allows the user to type in prompts to search the database and return the best matches. 13 | 14 | [View Source](https://github.com/Build5Nines/SharpVector/blob/main/src/ConsoleTest/Program.cs) 15 | 16 | Here's a screenshot of the test console app running: 17 | 18 | ![Screenshot of sample console app in the terminal](../images/samples/build5nines-sharpvector-console-screenshot.jpg) 19 | 20 | ## Generative AI + RAG + ONNX Model 21 | 22 | This example takes a [ONNX](https://onnxruntime.ai/docs/get-started/with-csharp.html) Generative AI app and extends it to implement Retrieval Augmented Generation (RAG) using `Build5Nines.SharpVector`. 23 | 24 | [View Source](https://github.com/Build5Nines/SharpVector/blob/main/samples/genai-rag-onnx/Program.cs) 25 | 26 | Read the "[Build a Generative AI + RAG App in C# with Phi-3, ONNX, and SharpVector 27 | ](https://build5nines.com/build-a-generative-ai-rag-app-in-c-with-phi-3-onnx-and-sharpvector/)" article for a detailed explanation of building this sample app. 28 | -------------------------------------------------------------------------------- /docs/docs/text-chunking/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Text Chunking 3 | description: Learn how to break large documents into smaller chunks to improve vector search relevance and optimize embedding performance. 4 | --- 5 | # :material-script-text: Text Chunking 6 | 7 | **Text chunking** is the process of breaking up large documents into smaller segments ("chunks") before embedding and storing them in a vector database. This allows for more accurate semantic search and improves performance in applications that deal with large bodies of text. 8 | 9 | SharpVector supports several chunking strategies via the `TextDataLoader` class, making it easy to break down documents automatically. 10 | 11 | --- 12 | 13 | ## Why Chunk Text? 14 | 15 | Chunking large documents improves search quality by: 16 | 17 | - Isolating meaningful sections for embedding (e.g. a paragraph or sentence) 18 | - Reducing noise and improving semantic match precision 19 | - Allowing matches on specific pieces of content rather than full documents 20 | 21 | --- 22 | 23 | ## Getting Started with `TextDataLoader` 24 | 25 | ### Add References 26 | 27 | Add the necessary namespaces for the `TextDataLoader` class: 28 | 29 | ```csharp 30 | using Build5Nines.SharpVector.Data; 31 | ``` 32 | 33 | ### Instantiate Vector Database 34 | 35 | Create a new vector database. This is the vector database you will be loading chunked text into. 36 | 37 | ```csharp 38 | using Build5Nines.SharpVector; 39 | 40 | var vdb = new BasicMemoryVectorDatabase(); 41 | ``` 42 | 43 | To use the `TextDataLoader`, you need to instantiate an instance of the class passing in the necessary types that match the `TId` and `TMetadata` of the `IVectorDatabase<>` interface. The `BasicMemoryVectorDatabase` class is setup with these types: 44 | 45 | - `TId` of `int`: This is the type for the internal ID for Text items in the vector database. 46 | - `TMetadata` of `string`: This is the type of the Metadata object stored along with the Text items in the vector database. 47 | 48 | !!! info 49 | Most cases you'll likely be using the `BasicMemoryVectorDatabase` class, but the library incudes interfaces and base classes to allow for extensibility to use different `TId` and `TMetadata` types as necessary. 50 | 51 | ### Instantiate `TextDataLoader` 52 | 53 | ```csharp 54 | var loader = new TextDataLoader(vdb); 55 | ``` 56 | 57 | --- 58 | 59 | ## Chunking Methods 60 | 61 | The `TextDataLoader` class can be used to load documents into the vector database with automatic chunking and metadata assignment. Each chunk must be associated with some metadata — even if it's just a minimal description — using the `RetrieveMetadata` function. 62 | 63 | === "Paragraph" 64 | 65 | Splits the text into logical paragraphs: 66 | 67 | ```csharp 68 | string document = LoadDocumentText(); 69 | loader.AddDocument(document, new TextChunkingOptions 70 | { 71 | Method = TextChunkingMethod.Paragraph, 72 | RetrieveMetadata = (chunk) => { 73 | return "{ \"chunkSize\": \"" + chunk.Length + "\" }"; 74 | } 75 | }); 76 | ``` 77 | 78 | === "Sentence" 79 | 80 | Breaks text into individual sentences using punctuation boundaries: 81 | 82 | ```csharp 83 | string document = LoadDocumentText(); 84 | loader.AddDocument(document, new TextChunkingOptions 85 | { 86 | Method = TextChunkingMethod.Sentence, 87 | RetrieveMetadata = (chunk) => { 88 | return "{ \"chunkSize\": \"" + chunk.Length + "\" }"; 89 | } 90 | }); 91 | ``` 92 | 93 | === "Fixed-Length" 94 | 95 | Divides the text into fixed character lengths, useful for very large documents or uniform sections: 96 | 97 | ```csharp 98 | string document = LoadDocumentText(); 99 | loader.AddDocument(document, new TextChunkingOptions 100 | { 101 | Method = TextChunkingMethod.FixedLength, 102 | ChunkSize = 150, 103 | RetrieveMetadata = (chunk) => { 104 | return "{ \"chunkSize\": \"" + chunk.Length + "\" }"; 105 | } 106 | }); 107 | ``` 108 | 109 | === "Overlapping Window" 110 | 111 | Split the text into overlapping windows. 112 | 113 | ```csharp 114 | string document = LoadDocumentText(); 115 | loader.AddDocument(document, new TextChunkingOptions 116 | { 117 | Method = TextChunkingMethod.OverlappingWindow, 118 | ChunkSize = 150, 119 | // Number of words to overlap text chunks 120 | OverlapSize = 50, 121 | RetrieveMetadata = (chunk) => { 122 | return "{ \"chunkSize\": \"" + chunk.Length + "\" }"; 123 | } 124 | } 125 | ``` 126 | 127 | !!! info "Optimization Tip" 128 | Use chunking method and size that best aligns with your content type and retrieval goals. For larger documents, text chunking will be required to get the best semantic search results returned from the vector database. 129 | 130 | Some experimentation on your data set may be required to find the text chunking strategy that works best for your solution. 131 | 132 | --- 133 | 134 | ## Customize Metadata 135 | 136 | The `RetrieveMetadata` delegate allows you to generate metadata per chunk. The following example will store a JSON string as the metadata that contains the filename of the document and the date/time it was indexed into the vector database. 137 | 138 | ```csharp 139 | string filename = "document.txt"; 140 | string document = LoadDocumentText(filename); 141 | loader.AddDocument(document, new TextChunkingOptions 142 | { 143 | Method = TextChunkingMethod.Paragraph, 144 | RetrieveMetadata = (chunk) => { 145 | var json = JsonSerializer.Serialize(new { 146 | documentFileName = filename, 147 | timeIndexed = DataTime.UtcNow.ToString("o") 148 | }); 149 | return json; 150 | } 151 | }); 152 | ``` 153 | 154 | This metadata is stored alongside each vector and returned in search results, allowing context-aware interfaces. 155 | 156 | --- 157 | 158 | ## Summary 159 | 160 | Chunking text before indexing enhances SharpVector's ability to deliver relevant and focused semantic search results. With support for multiple chunking strategies and flexible metadata, it's easy to adapt to different content and application needs. 161 | 162 | Use `TextDataLoader` to simplify loading, chunking, and organizing your text data — and supercharge your vector search accuracy! 163 | -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Build5Nines.SharpVector 2 | site_url: https://sharpvector.build5nines.com 3 | site_author: Build5Nines LLC & Chris Pietschmann 4 | 5 | theme: 6 | name: material # null 7 | # custom_dir: themes/material 8 | custom_dir: overrides # add custom header scripts 9 | language: en 10 | favicon: images/favicon.png 11 | logo: images/logo.png 12 | icon: 13 | repo: fontawesome/brands/github 14 | features: 15 | - tabs 16 | - content.action.edit 17 | # - content.action.view 18 | # - navigation.instant 19 | - content.code.copy 20 | - content.code.annotate 21 | - navigation.expand # sidebar collapsible sections open 22 | - navigation.footer 23 | - navigation.tabs 24 | #- navigation.tabs.sticky # tabs stick when scrolling down page 25 | - navigation.path # add breadcrumbs 26 | - navigation.indexes # default 'index.md' in folder is section page 27 | - navigation.top 28 | - search.suggest 29 | - search.highlight 30 | - content.tabs.link # Ensures site-wide switch to same tab name 31 | - toc.follow 32 | - toc.integrate 33 | 34 | palette: 35 | - media: "(prefers-color-scheme)" 36 | toggle: 37 | icon: material/link 38 | name: Switch to light mode 39 | - media: "(prefers-color-scheme: light)" 40 | scheme: default 41 | primary: blue 42 | accent: blue 43 | toggle: 44 | icon: material/weather-sunny 45 | name: Switch to dark mode 46 | - media: "(prefers-color-scheme: dark)" 47 | scheme: slate 48 | primary: black 49 | accent: blue 50 | toggle: 51 | icon: material/weather-night 52 | name: Switch to system preference 53 | 54 | repo_name: Build5Nines/SharpVector 55 | repo_url: https://github.com/Build5Nines/SharpVector 56 | edit_uri: edit/main/docs/docs/ 57 | 58 | copyright: '© 2025 Build5Nines LLC' 59 | 60 | markdown_extensions: 61 | - abbr 62 | - admonition 63 | - attr_list 64 | - md_in_html 65 | - toc: 66 | permalink: true 67 | toc_depth: 2 68 | - pymdownx.critic 69 | - pymdownx.caret 70 | - pymdownx.keys 71 | - pymdownx.mark 72 | - pymdownx.tilde 73 | - pymdownx.details 74 | - pymdownx.superfences 75 | - pymdownx.tasklist: 76 | custom_checkbox: true 77 | - pymdownx.emoji: 78 | emoji_index: !!python/name:material.extensions.emoji.twemoji 79 | emoji_generator: !!python/name:material.extensions.emoji.to_svg 80 | - pymdownx.highlight: 81 | auto_title: true 82 | linenums: true 83 | - pymdownx.snippets 84 | - pymdownx.tabbed: 85 | alternate_style: true 86 | slugify: !!python/object/apply:pymdownx.slugs.slugify 87 | kwds: 88 | case: lower 89 | 90 | plugins: 91 | - search: 92 | lang: 93 | - en 94 | - social 95 | - git-revision-date-localized: 96 | type: timeago 97 | locale: en 98 | - git-committers: # mkdocs-git-committers-plugin-2 plugin to show contributors on footer of 99 | repository: Build5Nines/SharpVector 100 | branch: main 101 | # - with-pdf: 102 | # cover_subtitle: "Lightweight, In-memory, Semantic Search, Text Vector Database to embed in any .NET Application" 103 | # author: Build5Nines LLC 104 | # output_path: build5nines-sharpvector.pdf 105 | 106 | 107 | extra: 108 | # consent: 109 | # title: "Cookies & Privacy" 110 | # description: "This website uses cookies to ensure you get the best experience on our website." 111 | # policy: "https://build5nines.com/privacy-policy/" 112 | # actions: 113 | # accept: "Accept" 114 | # reject: "Decline" 115 | # manage: "Manage" 116 | analytics: 117 | provider: google 118 | property: G-7KST9RSCN3 119 | generator: false 120 | social: 121 | - icon: fontawesome/brands/github 122 | link: https://github.com/Build5Nines 123 | - icon: fontawesome/brands/bluesky 124 | link: https://bsky.app/profile/build5nines.bsky.social 125 | - icon: fontawesome/brands/twitter 126 | link: https://twitter.com/build5nines 127 | - icon: fontawesome/brands/linkedin 128 | link: https://www.linkedin.com/company/build5nines 129 | - icon: fontawesome/brands/youtube 130 | link: https://www.youtube.com/@build5nines 131 | 132 | nav: 133 | - Discover: 134 | - index.md 135 | - 🚀 Basic usage: /#basic-usage 136 | - 💡 Key Features: /#key-features 137 | - 🧠 Use Cases: /#use-cases 138 | - ⚙️ Local, OpenAI, or Ollama Embeddings: /#local-openai-or-ollama-embeddings-generation 139 | - 🙌 Contributing: /#contributing 140 | - Get Started: 141 | - get-started/index.md 142 | - Prerequisites: get-started/#prerequisites 143 | - Install Nuget Package: get-started/#install-nuget-package 144 | - Basic Example: get-started/#basic-example 145 | - Metadata: get-started/metadata/index.md 146 | - Semantic search: get-started/search/index.md 147 | - Data Management: get-started/data-management/index.md 148 | 149 | - Concepts: 150 | - concepts/index.md 151 | - What is a Vector Database?: concepts/#what-is-a-vector-database 152 | - Text Vectorization: concepts/#text-vectorization 153 | - Semantic Search: concepts/#semantic-search 154 | - Retrieval Augmented Generation: concepts/#retrieval-augmented-generation-rag 155 | - Text Chunking: 156 | - text-chunking/index.md 157 | - Why Chunk Text?: text-chunking/#why-chunk-text 158 | - Getting Started: text-chunking/#getting-started-with-textdataloader 159 | - Chunking Methods: text-chunking/#chunking-methods 160 | - Customize Metadata: text-chunking/#customize-metadata 161 | - Summary: text-chunking/#summary 162 | - Persistence: 163 | - persistence/index.md 164 | - File Persistence: persistence/#file-persistence 165 | - Persist to Stream: persistence/#persist-to-stream 166 | - Embeddings: 167 | - embeddings/index.md 168 | - OpenAI Embeddings: embeddings/openai/index.md 169 | - Ollama Embeddings: embeddings/ollama/index.md 170 | - Samples: 171 | - samples/index.md 172 | - Console App: samples/#sample-console-app 173 | - Generative AI + RAG + ONNX: samples/#generative-ai-rag-onnx-model 174 | - Resources: 175 | - resources/index.md 176 | - License: 177 | - license/index.md -------------------------------------------------------------------------------- /docs/overrides/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block scripts %} 4 | 5 | {{ super() }} 6 | 7 | 8 | 9 | 10 | 11 | {% endblock %} -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs 2 | mkdocs-material 3 | mkdocs-material[imaging] 4 | pymdown-extensions 5 | markdown-include 6 | mkdocs-git-committers-plugin-2 7 | mkdocs-git-revision-date-localized-plugin 8 | mkdocs-with-pdf 9 | -------------------------------------------------------------------------------- /docs/update-theme.sh: -------------------------------------------------------------------------------- 1 | rm -rf ./themes/material 2 | 3 | git clone https://github.com/yakworks/docmark.git ./themes/.github 4 | 5 | cp -r ./themes/.github/material ./themes/material 6 | 7 | rm -rf ./themes/.github 8 | -------------------------------------------------------------------------------- /samples/genai-rag-onnx/genai-rag-onnx.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | net8.0 6 | genai_rag_onnx 7 | enable 8 | enable 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/.vs/ProjectEvaluation/sharpvector.metadata.v6.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/src/.vs/ProjectEvaluation/sharpvector.metadata.v6.1 -------------------------------------------------------------------------------- /src/.vs/ProjectEvaluation/sharpvector.projects.v6.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/src/.vs/ProjectEvaluation/sharpvector.projects.v6.1 -------------------------------------------------------------------------------- /src/.vs/SharpVector/FileContentIndex/1f76313d-a8de-47ce-81a3-ac1fc7438030.vsidx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/src/.vs/SharpVector/FileContentIndex/1f76313d-a8de-47ce-81a3-ac1fc7438030.vsidx -------------------------------------------------------------------------------- /src/.vs/SharpVector/FileContentIndex/a904092f-4585-40f5-9f93-3556202611e1.vsidx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/src/.vs/SharpVector/FileContentIndex/a904092f-4585-40f5-9f93-3556202611e1.vsidx -------------------------------------------------------------------------------- /src/.vs/SharpVector/FileContentIndex/e534ca41-141b-4115-9099-c4b3a40cc99e.vsidx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/src/.vs/SharpVector/FileContentIndex/e534ca41-141b-4115-9099-c4b3a40cc99e.vsidx -------------------------------------------------------------------------------- /src/.vs/SharpVector/FileContentIndex/ece43a1c-28dc-443c-bc82-afc0d35267af.vsidx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/src/.vs/SharpVector/FileContentIndex/ece43a1c-28dc-443c-bc82-afc0d35267af.vsidx -------------------------------------------------------------------------------- /src/.vs/SharpVector/FileContentIndex/read.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/src/.vs/SharpVector/FileContentIndex/read.lock -------------------------------------------------------------------------------- /src/.vs/SharpVector/v17/.futdcache.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/src/.vs/SharpVector/v17/.futdcache.v2 -------------------------------------------------------------------------------- /src/.vs/SharpVector/v17/.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/src/.vs/SharpVector/v17/.suo -------------------------------------------------------------------------------- /src/.vs/SharpVector/v17/TestStore/0/000.testlog: -------------------------------------------------------------------------------- 1 | !!tItseT -------------------------------------------------------------------------------- /src/.vs/SharpVector/v17/TestStore/0/testlog.manifest: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/src/.vs/SharpVector/v17/TestStore/0/testlog.manifest -------------------------------------------------------------------------------- /src/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": ".NET Core Launch (console)", 9 | "type": "coreclr", 10 | "request": "launch", 11 | "WARNING01": "*********************************************************************************", 12 | "WARNING02": "The C# extension was unable to automatically decode projects in the current", 13 | "WARNING03": "workspace to create a runnable launch.json file. A template launch.json file has", 14 | "WARNING04": "been created as a placeholder.", 15 | "WARNING05": "", 16 | "WARNING06": "If the server is currently unable to load your project, you can attempt to", 17 | "WARNING07": "resolve this by restoring any missing project dependencies (example: run 'dotnet", 18 | "WARNING08": "restore') and by fixing any reported errors from building the projects in your", 19 | "WARNING09": "workspace.", 20 | "WARNING10": "If this allows the server to now load your project then --", 21 | "WARNING11": " * Delete this file", 22 | "WARNING12": " * Open the Visual Studio Code command palette (View->Command Palette)", 23 | "WARNING13": " * run the command: '.NET: Generate Assets for Build and Debug'.", 24 | "WARNING14": "", 25 | "WARNING15": "If your project requires a more complex launch configuration, you may wish to", 26 | "WARNING16": "delete this configuration and pick a different template using the 'Add", 27 | "WARNING17": "Configuration...' button at the bottom of this file.", 28 | "WARNING18": "*********************************************************************************", 29 | "preLaunchTask": "build", 30 | "program": "${workspaceFolder}/ConsoleTest/bin/Debug/net8.0/ConsoleTest.dll", 31 | "args": [], 32 | "cwd": "${workspaceFolder}", 33 | "console": "internalConsole", 34 | "stopAtEntry": false 35 | }, 36 | { 37 | "name": ".NET Core Attach", 38 | "type": "coreclr", 39 | "request": "attach" 40 | }, 41 | { 42 | "name": ".NET Tests", 43 | "type": "coreclr", 44 | "request": "launch", 45 | "preLaunchTask": "build", 46 | "program": "${workspaceFolder}/SharpVectorTest/bin/Debug/net8.0/SharpVectorTest.dll", 47 | "args": [], 48 | "cwd": "${workspaceFolder}", 49 | "stopAtEntry": false, 50 | "console": "internalConsole" 51 | } 52 | ] 53 | } -------------------------------------------------------------------------------- /src/.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | // See https://go.microsoft.com/fwlink/?LinkId=733558 3 | // for the documentation about the tasks.json format 4 | "version": "2.0.0", 5 | "tasks": [ 6 | { 7 | "label": "build", 8 | "command": "dotnet", 9 | "type": "shell", 10 | "args": [ 11 | "build", 12 | // Ask dotnet build to generate full paths for file names. 13 | "/property:GenerateFullPaths=true", 14 | // Do not generate summary otherwise it leads to duplicate errors in Problems panel 15 | "/consoleloggerparameters:NoSummary" 16 | ], 17 | "group": "build", 18 | "presentation": { 19 | "reveal": "silent" 20 | }, 21 | "problemMatcher": "$msCompile" 22 | } 23 | ] 24 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Ollama/BasicOllamaMemoryVectorDatabase.cs: -------------------------------------------------------------------------------- 1 | using Build5Nines.SharpVector.Embeddings; 2 | 3 | namespace Build5Nines.SharpVector.Ollama; 4 | 5 | /// 6 | /// A basic implementation of an vector database that uses an in-memory dictionary to store vectors generated using the specified OpenAI embeddings client, with integer keys and string metadata values. 7 | /// 8 | public class BasicOllamaMemoryVectorDatabase : OllamaMemoryVectorDatabase 9 | { 10 | public BasicOllamaMemoryVectorDatabase(string model) 11 | : this( 12 | new Embeddings.OllamaEmbeddingsGenerator(model) 13 | ) 14 | { } 15 | 16 | public BasicOllamaMemoryVectorDatabase(string ollamaEndpoint, string model) 17 | : this( 18 | new Embeddings.OllamaEmbeddingsGenerator(ollamaEndpoint, model) 19 | ) 20 | { } 21 | 22 | public BasicOllamaMemoryVectorDatabase(IEmbeddingsGenerator embeddingsGenerator) 23 | : base(embeddingsGenerator) 24 | { } 25 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Ollama/Build5Nines.SharpVector.Ollama.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | net8.0 5 | enable 6 | enable 7 | true 8 | 9 | Build5Nines.SharpVector.Ollama 10 | https://sharpvector.build5nines.com 11 | https://github.com/Build5Nines/SharpVector 12 | 2.0.3 13 | Lightweight In-memory Vector Database to embed in any .NET Applications that integrates with Ollama Embedding models for vector generation. 14 | Copyright (c) 2025 Build5Nines LLC 15 | README.md 16 | LICENSE 17 | Chris Pietschmann 18 | Build5Nines LLC 19 | vector;search;database;data;rag;ollama;embeddings;azure;microsoft; 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Ollama/Embeddings/OllamaEmbeddingsGenerator.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | using System.Text.Json; 3 | using System.Text.Json.Serialization; 4 | using Build5Nines.SharpVector.Embeddings; 5 | 6 | namespace Build5Nines.SharpVector.Ollama.Embeddings; 7 | 8 | public class OllamaEmbeddingsGenerator : IEmbeddingsGenerator 9 | { 10 | public string Model { get; set; } 11 | 12 | public string Endpoint { get; set; } 13 | 14 | /// 15 | /// Creates a new instance of the class. 16 | /// This constructor uses the default Ollama embeddings endpoint URL. 17 | /// 18 | /// Ollama embeddings model 19 | public OllamaEmbeddingsGenerator(string model) 20 | : this("http://localhost:11434/api/embeddings", model) 21 | { } 22 | 23 | /// 24 | /// Creates a new instance of the class. 25 | /// 26 | /// Ollama embeddings endpoint URL. 27 | /// Ollama embeddings model 28 | public OllamaEmbeddingsGenerator(string ollamaEndpoint, string model) 29 | { 30 | Endpoint = ollamaEndpoint; 31 | Model = model; 32 | } 33 | 34 | /// 35 | /// Generates embeddings for the given text using the specified Ollama model. 36 | /// 37 | /// The text to generate embeddings for. 38 | /// An array of floats representing the generated embeddings. 39 | public async Task GenerateEmbeddingsAsync(string text) 40 | { 41 | var requestBody = new 42 | { 43 | model = Model, 44 | prompt = text 45 | }; 46 | 47 | var json = JsonSerializer.Serialize(requestBody); 48 | var content = new StringContent(json, Encoding.UTF8, "application/json"); 49 | 50 | var httpClient = new HttpClient(); 51 | var response = await httpClient.PostAsync(Endpoint, content); 52 | response.EnsureSuccessStatusCode(); 53 | 54 | var responseString = await response.Content.ReadAsStringAsync(); 55 | var embeddingResponse = JsonSerializer.Deserialize(responseString); 56 | 57 | return embeddingResponse?.Embedding ?? Array.Empty(); 58 | } 59 | 60 | private class OllamaEmbeddingResponse 61 | { 62 | [JsonPropertyName("embedding")] 63 | public float[]? Embedding { get; set; } 64 | } 65 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Ollama/OllamaMemoryVectorDatabase.cs: -------------------------------------------------------------------------------- 1 | using Build5Nines.SharpVector.Id; 2 | using Build5Nines.SharpVector.VectorCompare; 3 | using Build5Nines.SharpVector.VectorStore; 4 | using Build5Nines.SharpVector.Embeddings; 5 | 6 | namespace Build5Nines.SharpVector.Ollama; 7 | 8 | /// 9 | /// An interface for a vector database that uses OpenAI for embedding generation. 10 | /// 11 | /// 12 | /// 13 | public interface IOllamaMemoryVectorDatabase : IVectorDatabase 14 | where TId : notnull 15 | { } 16 | 17 | /// 18 | /// A simple in-memory database for storing and querying vectorized text items. 19 | /// This database uses OpenAI to generate embeddings, and performs Cosine similarity search. 20 | /// 21 | /// Defines the data type for the Metadata stored with the Text. 22 | public class OllamaMemoryVectorDatabase 23 | : MemoryVectorDatabaseBase< 24 | int, 25 | TMetadata, 26 | MemoryDictionaryVectorStore, 27 | IntIdGenerator, 28 | CosineSimilarityVectorComparer 29 | >, IOllamaMemoryVectorDatabase 30 | { 31 | public OllamaMemoryVectorDatabase(string model) 32 | : this( 33 | new Embeddings.OllamaEmbeddingsGenerator(model) 34 | ) 35 | { } 36 | 37 | public OllamaMemoryVectorDatabase(string ollamaEndpoint, string model) 38 | : this( 39 | new Embeddings.OllamaEmbeddingsGenerator(ollamaEndpoint, model) 40 | ) 41 | { } 42 | 43 | public OllamaMemoryVectorDatabase(IEmbeddingsGenerator embeddingsGenerator) 44 | : base( 45 | embeddingsGenerator, 46 | new MemoryDictionaryVectorStore() 47 | ) 48 | { } 49 | } 50 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Ollama/docs/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Build5Nines LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Ollama/docs/README.md: -------------------------------------------------------------------------------- 1 | Build5Nines.SharpVector.Ollama is the lightweight in-memory Vector Database for use in any .NET application that connects to an embeddings model running in Ollama for generating the text embeddings. 2 | 3 | The `Build5Nines.SharpVector.Ollama.BasicOllamaMemoryVectorDatabase` class uses an Ollama embeddings model with Cosine similarity search. 4 | 5 | [Read Documentation](https://sharpvector.build5nines.com) 6 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.OpenAI/BasicOpenAIMemoryVectorDatabase.cs: -------------------------------------------------------------------------------- 1 | using OpenAI.Embeddings; 2 | 3 | namespace Build5Nines.SharpVector.OpenAI 4 | { 5 | 6 | /// 7 | /// A basic implementation of an vector database that uses an in-memory dictionary to store vectors generated using the specified OpenAI embeddings client, with integer keys and string metadata values. 8 | /// 9 | public class BasicOpenAIMemoryVectorDatabase : OpenAIMemoryVectorDatabase 10 | { 11 | public BasicOpenAIMemoryVectorDatabase(EmbeddingClient embeddingClient) 12 | : base(embeddingClient) 13 | { } 14 | } 15 | 16 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.OpenAI/Build5Nines.SharpVector.OpenAI.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | net8.0 5 | enable 6 | enable 7 | true 8 | 9 | Build5Nines.SharpVector.OpenAI 10 | https://sharpvector.build5nines.com 11 | https://github.com/Build5Nines/SharpVector 12 | 2.0.3 13 | Lightweight In-memory Vector Database to embed in any .NET Applications that integrates with OpenAI Embedding model for vector generation. 14 | Copyright (c) 2025 Build5Nines LLC 15 | README.md 16 | LICENSE 17 | Chris Pietschmann 18 | Build5Nines LLC 19 | vector;search;database;data;rag;openai;embeddings;azure;microsoft; 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.OpenAI/Embeddings/OpenAIEmbeddingsGenerator.cs: -------------------------------------------------------------------------------- 1 | using Build5Nines.SharpVector.Embeddings; 2 | using OpenAI.Embeddings; 3 | 4 | namespace Build5Nines.SharpVector.OpenAI.Embeddings; 5 | 6 | public class OpenAIEmbeddingsGenerator : IEmbeddingsGenerator 7 | { 8 | protected EmbeddingClient EmbeddingClient { get; private set; } 9 | 10 | public OpenAIEmbeddingsGenerator(EmbeddingClient embeddingClient) 11 | { 12 | EmbeddingClient = embeddingClient; 13 | } 14 | public async Task GenerateEmbeddingsAsync(string text) 15 | { 16 | var result = await EmbeddingClient.GenerateEmbeddingAsync(text); 17 | var embedding = result.Value; 18 | var vector = embedding.ToFloats(); 19 | return vector.ToArray(); 20 | } 21 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.OpenAI/OpenAIMemoryVectorDatabase.cs: -------------------------------------------------------------------------------- 1 | using Build5Nines.SharpVector.Id; 2 | using Build5Nines.SharpVector.VectorCompare; 3 | using Build5Nines.SharpVector.VectorStore; 4 | using OpenAI.Embeddings; 5 | 6 | namespace Build5Nines.SharpVector.OpenAI; 7 | 8 | /// 9 | /// An interface for a vector database that uses OpenAI for embedding generation. 10 | /// 11 | /// 12 | /// 13 | public interface IOpenAIMemoryVectorDatabase : IVectorDatabase 14 | where TId : notnull 15 | { } 16 | 17 | /// 18 | /// A simple in-memory database for storing and querying vectorized text items. 19 | /// This database uses OpenAI to generate embeddings, and performs Cosine similarity search. 20 | /// 21 | /// Defines the data type for the Metadata stored with the Text. 22 | public class OpenAIMemoryVectorDatabase 23 | : OpenAIMemoryVectorDatabaseBase< 24 | int, 25 | TMetadata, 26 | MemoryDictionaryVectorStore, 27 | IntIdGenerator, 28 | CosineSimilarityVectorComparer 29 | > 30 | { 31 | public OpenAIMemoryVectorDatabase(EmbeddingClient embeddingClient) 32 | : base( 33 | embeddingClient, 34 | new MemoryDictionaryVectorStore() 35 | ) 36 | { } 37 | } 38 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.OpenAI/docs/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Build5Nines LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.OpenAI/docs/README.md: -------------------------------------------------------------------------------- 1 | Build5Nines.SharpVector.OpenAI is the lightweight in-memory Vector Database for use in any .NET application that connects to an embeddings model running in Azure OpenAI for generating the text embeddings. 2 | 3 | The `Build5Nines.SharpVector.OpenAI.BasicOpenAIMemoryVectorDatabase` class uses an OpenAI Embeddings Client with Cosine similarity search. 4 | 5 | [Read Documentation](https://sharpvector.build5nines.com) 6 | 7 | ## Tutorials 8 | 9 | - [Enhanced In-Memory Text Vector Search in .NET with SharpVector and OpenAI Embeddings](https://build5nines.com/enhanced-in-memory-text-vector-search-in-net-with-sharpvector-and-openai-embeddings/?utm_source=github&utm_medium=sharpvector) by Chris Pietschmann -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/Build5Nines.SharpVector.Playground.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net8.0 5 | enable 6 | enable 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/Components/App.razor: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/Components/Layout/MainLayout.razor: -------------------------------------------------------------------------------- 1 | @inherits LayoutComponentBase 2 | 3 |
4 | @* *@ 7 | 8 |
9 |
10 |

Build5Nines.SharpVector Playground

11 | @* View Source *@ 12 |
13 | 14 |
15 | @Body 16 |
17 |
18 |
19 | 20 |
21 | An unhandled error has occurred. 22 | Reload 23 | 🗙 24 |
25 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/Components/Layout/MainLayout.razor.css: -------------------------------------------------------------------------------- 1 | .page { 2 | position: relative; 3 | display: flex; 4 | flex-direction: column; 5 | } 6 | 7 | main { 8 | flex: 1; 9 | } 10 | 11 | .sidebar { 12 | background-image: linear-gradient(180deg, rgb(5, 39, 103) 0%, #3a0647 70%); 13 | } 14 | 15 | .top-row { 16 | background-color: #f7f7f7; 17 | border-bottom: 0.1em solid #d6d5d5; 18 | padding-left: 1em; 19 | height: 3.5rem; 20 | display: flex; 21 | align-items: center; 22 | } 23 | 24 | .top-row ::deep a, .top-row ::deep .btn-link { 25 | white-space: nowrap; 26 | margin-left: 1.5rem; 27 | text-decoration: none; 28 | } 29 | 30 | .top-row ::deep a:hover, .top-row ::deep .btn-link:hover { 31 | text-decoration: underline; 32 | } 33 | 34 | .top-row ::deep a:first-child { 35 | overflow: hidden; 36 | text-overflow: ellipsis; 37 | } 38 | 39 | @media (max-width: 640.98px) { 40 | .top-row { 41 | justify-content: space-between; 42 | } 43 | 44 | .top-row ::deep a, .top-row ::deep .btn-link { 45 | margin-left: 0; 46 | } 47 | } 48 | 49 | @media (min-width: 641px) { 50 | .page { 51 | flex-direction: row; 52 | } 53 | 54 | .sidebar { 55 | width: 250px; 56 | height: 100vh; 57 | position: sticky; 58 | top: 0; 59 | } 60 | 61 | .top-row { 62 | position: sticky; 63 | top: 0; 64 | z-index: 1; 65 | } 66 | 67 | .top-row.auth ::deep a:first-child { 68 | flex: 1; 69 | text-align: right; 70 | width: 0; 71 | } 72 | 73 | .top-row, article { 74 | padding-left: 2rem !important; 75 | padding-right: 1.5rem !important; 76 | } 77 | } 78 | 79 | #blazor-error-ui { 80 | background: lightyellow; 81 | bottom: 0; 82 | box-shadow: 0 -1px 2px rgba(0, 0, 0, 0.2); 83 | display: none; 84 | left: 0; 85 | padding: 0.6rem 1.25rem 0.7rem 1.25rem; 86 | position: fixed; 87 | width: 100%; 88 | z-index: 1000; 89 | } 90 | 91 | #blazor-error-ui .dismiss { 92 | cursor: pointer; 93 | position: absolute; 94 | right: 0.75rem; 95 | top: 0.5rem; 96 | } 97 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/Components/Layout/NavMenu.razor: -------------------------------------------------------------------------------- 1 |  6 | 7 | 8 | 9 | 18 | 19 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/Components/Layout/NavMenu.razor.css: -------------------------------------------------------------------------------- 1 | .navbar-toggler { 2 | appearance: none; 3 | cursor: pointer; 4 | width: 3.5rem; 5 | height: 2.5rem; 6 | color: white; 7 | position: absolute; 8 | top: 0.5rem; 9 | right: 1rem; 10 | border: 1px solid rgba(255, 255, 255, 0.1); 11 | background: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 30 30'%3e%3cpath stroke='rgba%28255, 255, 255, 0.55%29' stroke-linecap='round' stroke-miterlimit='10' stroke-width='2' d='M4 7h22M4 15h22M4 23h22'/%3e%3c/svg%3e") no-repeat center/1.75rem rgba(255, 255, 255, 0.1); 12 | } 13 | 14 | .navbar-toggler:checked { 15 | background-color: rgba(255, 255, 255, 0.5); 16 | } 17 | 18 | .top-row { 19 | height: 3.5rem; 20 | background-color: rgba(0,0,0,0.4); 21 | } 22 | 23 | .navbar-brand { 24 | font-size: 1.1rem; 25 | } 26 | 27 | .bi { 28 | display: inline-block; 29 | position: relative; 30 | width: 1.25rem; 31 | height: 1.25rem; 32 | margin-right: 0.75rem; 33 | top: -1px; 34 | background-size: cover; 35 | } 36 | 37 | .bi-house-door-fill-nav-menu { 38 | background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='white' class='bi bi-house-door-fill' viewBox='0 0 16 16'%3E%3Cpath d='M6.5 14.5v-3.505c0-.245.25-.495.5-.495h2c.25 0 .5.25.5.5v3.5a.5.5 0 0 0 .5.5h4a.5.5 0 0 0 .5-.5v-7a.5.5 0 0 0-.146-.354L13 5.793V2.5a.5.5 0 0 0-.5-.5h-1a.5.5 0 0 0-.5.5v1.293L8.354 1.146a.5.5 0 0 0-.708 0l-6 6A.5.5 0 0 0 1.5 7.5v7a.5.5 0 0 0 .5.5h4a.5.5 0 0 0 .5-.5Z'/%3E%3C/svg%3E"); 39 | } 40 | 41 | .bi-plus-square-fill-nav-menu { 42 | background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='white' class='bi bi-plus-square-fill' viewBox='0 0 16 16'%3E%3Cpath d='M2 0a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V2a2 2 0 0 0-2-2H2zm6.5 4.5v3h3a.5.5 0 0 1 0 1h-3v3a.5.5 0 0 1-1 0v-3h-3a.5.5 0 0 1 0-1h3v-3a.5.5 0 0 1 1 0z'/%3E%3C/svg%3E"); 43 | } 44 | 45 | .bi-list-nested-nav-menu { 46 | background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='white' class='bi bi-list-nested' viewBox='0 0 16 16'%3E%3Cpath fill-rule='evenodd' d='M4.5 11.5A.5.5 0 0 1 5 11h10a.5.5 0 0 1 0 1H5a.5.5 0 0 1-.5-.5zm-2-4A.5.5 0 0 1 3 7h10a.5.5 0 0 1 0 1H3a.5.5 0 0 1-.5-.5zm-2-4A.5.5 0 0 1 1 3h10a.5.5 0 0 1 0 1H1a.5.5 0 0 1-.5-.5z'/%3E%3C/svg%3E"); 47 | } 48 | 49 | .nav-item { 50 | font-size: 0.9rem; 51 | padding-bottom: 0.5rem; 52 | } 53 | 54 | .nav-item:first-of-type { 55 | padding-top: 1rem; 56 | } 57 | 58 | .nav-item:last-of-type { 59 | padding-bottom: 1rem; 60 | } 61 | 62 | .nav-item ::deep .nav-link { 63 | color: #d7d7d7; 64 | background: none; 65 | border: none; 66 | border-radius: 4px; 67 | height: 3rem; 68 | display: flex; 69 | align-items: center; 70 | line-height: 3rem; 71 | width: 100%; 72 | } 73 | 74 | .nav-item ::deep a.active { 75 | background-color: rgba(255,255,255,0.37); 76 | color: white; 77 | } 78 | 79 | .nav-item ::deep .nav-link:hover { 80 | background-color: rgba(255,255,255,0.1); 81 | color: white; 82 | } 83 | 84 | .nav-scrollable { 85 | display: none; 86 | } 87 | 88 | .navbar-toggler:checked ~ .nav-scrollable { 89 | display: block; 90 | } 91 | 92 | @media (min-width: 641px) { 93 | .navbar-toggler { 94 | display: none; 95 | } 96 | 97 | .nav-scrollable { 98 | /* Never collapse the sidebar for wide screens */ 99 | display: block; 100 | 101 | /* Allow sidebar to scroll for tall menus */ 102 | height: calc(100vh - 3.5rem); 103 | overflow-y: auto; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/Components/Pages/Error.razor: -------------------------------------------------------------------------------- 1 | @page "/Error" 2 | @using System.Diagnostics 3 | 4 | Error 5 | 6 |

Error.

7 |

An error occurred while processing your request.

8 | 9 | @if (ShowRequestId) 10 | { 11 |

12 | Request ID: @RequestId 13 |

14 | } 15 | 16 |

Development Mode

17 |

18 | Swapping to Development environment will display more detailed information about the error that occurred. 19 |

20 |

21 | The Development environment shouldn't be enabled for deployed applications. 22 | It can result in displaying sensitive information from exceptions to end users. 23 | For local debugging, enable the Development environment by setting the ASPNETCORE_ENVIRONMENT environment variable to Development 24 | and restarting the app. 25 |

26 | 27 | @code{ 28 | [CascadingParameter] 29 | private HttpContext? HttpContext { get; set; } 30 | 31 | private string? RequestId { get; set; } 32 | private bool ShowRequestId => !string.IsNullOrEmpty(RequestId); 33 | 34 | protected override void OnInitialized() => 35 | RequestId = Activity.Current?.Id ?? HttpContext?.TraceIdentifier; 36 | } 37 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/Components/Routes.razor: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/Components/_Imports.razor: -------------------------------------------------------------------------------- 1 | @using System.Net.Http 2 | @using System.Net.Http.Json 3 | @using Microsoft.AspNetCore.Components.Forms 4 | @using Microsoft.AspNetCore.Components.Routing 5 | @using Microsoft.AspNetCore.Components.Web 6 | @using static Microsoft.AspNetCore.Components.Web.RenderMode 7 | @using Microsoft.AspNetCore.Components.Web.Virtualization 8 | @using Microsoft.JSInterop 9 | @using Build5Nines.SharpVector 10 | @using Build5Nines.SharpVector.Data; 11 | @using Build5Nines.SharpVector.Playground 12 | @using Build5Nines.SharpVector.Playground.Components 13 | @using BlazorMonaco 14 | @using BlazorMonaco.Editor 15 | @using BlazorMonaco.Languages -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/Program.cs: -------------------------------------------------------------------------------- 1 | using Build5Nines.SharpVector.Playground.Components; 2 | 3 | var builder = WebApplication.CreateBuilder(args); 4 | 5 | // Add services to the container. 6 | builder.Services.AddRazorComponents() 7 | .AddInteractiveServerComponents(); 8 | 9 | var app = builder.Build(); 10 | 11 | // Configure the HTTP request pipeline. 12 | if (!app.Environment.IsDevelopment()) 13 | { 14 | app.UseExceptionHandler("/Error", createScopeForErrors: true); 15 | // The default HSTS value is 30 days. You may want to change this for production scenarios, see https://aka.ms/aspnetcore-hsts. 16 | app.UseHsts(); 17 | } 18 | 19 | app.UseHttpsRedirection(); 20 | 21 | app.UseStaticFiles(); 22 | app.UseAntiforgery(); 23 | 24 | app.MapRazorComponents() 25 | .AddInteractiveServerRenderMode(); 26 | 27 | app.Run(); 28 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json.schemastore.org/launchsettings.json", 3 | "iisSettings": { 4 | "windowsAuthentication": false, 5 | "anonymousAuthentication": true, 6 | "iisExpress": { 7 | "applicationUrl": "http://localhost:55365", 8 | "sslPort": 44358 9 | } 10 | }, 11 | "profiles": { 12 | "http": { 13 | "commandName": "Project", 14 | "dotnetRunMessages": true, 15 | "launchBrowser": true, 16 | "applicationUrl": "http://localhost:5188", 17 | "environmentVariables": { 18 | "ASPNETCORE_ENVIRONMENT": "Development" 19 | } 20 | }, 21 | "https": { 22 | "commandName": "Project", 23 | "dotnetRunMessages": true, 24 | "launchBrowser": true, 25 | "applicationUrl": "https://localhost:7156;http://localhost:5188", 26 | "environmentVariables": { 27 | "ASPNETCORE_ENVIRONMENT": "Development" 28 | } 29 | }, 30 | "IIS Express": { 31 | "commandName": "IISExpress", 32 | "launchBrowser": true, 33 | "environmentVariables": { 34 | "ASPNETCORE_ENVIRONMENT": "Development" 35 | } 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/appsettings.Development.json: -------------------------------------------------------------------------------- 1 | { 2 | "Logging": { 3 | "LogLevel": { 4 | "Default": "Information", 5 | "Microsoft.AspNetCore": "Warning" 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/appsettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "Logging": { 3 | "LogLevel": { 4 | "Default": "Information", 5 | "Microsoft.AspNetCore": "Warning" 6 | } 7 | }, 8 | "AllowedHosts": "*" 9 | } 10 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/wwwroot/app.css: -------------------------------------------------------------------------------- 1 | html, body { 2 | font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; 3 | } 4 | 5 | a, .btn-link { 6 | color: #006bb7; 7 | } 8 | 9 | .btn-primary { 10 | color: #fff; 11 | background-color: #1b6ec2; 12 | border-color: #1861ac; 13 | } 14 | 15 | .btn:focus, .btn:active:focus, .btn-link.nav-link:focus, .form-control:focus, .form-check-input:focus { 16 | box-shadow: 0 0 0 0.1rem white, 0 0 0 0.25rem #258cfb; 17 | } 18 | 19 | .content { 20 | padding-top: 1.1rem; 21 | } 22 | 23 | h1:focus { 24 | outline: none; 25 | } 26 | 27 | .valid.modified:not([type=checkbox]) { 28 | outline: 1px solid #26b050; 29 | } 30 | 31 | .invalid { 32 | outline: 1px solid #e50000; 33 | } 34 | 35 | .validation-message { 36 | color: #e50000; 37 | } 38 | 39 | .blazor-error-boundary { 40 | background: url(data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iNTYiIGhlaWdodD0iNDkiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgeG1sbnM6eGxpbms9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkveGxpbmsiIG92ZXJmbG93PSJoaWRkZW4iPjxkZWZzPjxjbGlwUGF0aCBpZD0iY2xpcDAiPjxyZWN0IHg9IjIzNSIgeT0iNTEiIHdpZHRoPSI1NiIgaGVpZ2h0PSI0OSIvPjwvY2xpcFBhdGg+PC9kZWZzPjxnIGNsaXAtcGF0aD0idXJsKCNjbGlwMCkiIHRyYW5zZm9ybT0idHJhbnNsYXRlKC0yMzUgLTUxKSI+PHBhdGggZD0iTTI2My41MDYgNTFDMjY0LjcxNyA1MSAyNjUuODEzIDUxLjQ4MzcgMjY2LjYwNiA1Mi4yNjU4TDI2Ny4wNTIgNTIuNzk4NyAyNjcuNTM5IDUzLjYyODMgMjkwLjE4NSA5Mi4xODMxIDI5MC41NDUgOTIuNzk1IDI5MC42NTYgOTIuOTk2QzI5MC44NzcgOTMuNTEzIDI5MSA5NC4wODE1IDI5MSA5NC42NzgyIDI5MSA5Ny4wNjUxIDI4OS4wMzggOTkgMjg2LjYxNyA5OUwyNDAuMzgzIDk5QzIzNy45NjMgOTkgMjM2IDk3LjA2NTEgMjM2IDk0LjY3ODIgMjM2IDk0LjM3OTkgMjM2LjAzMSA5NC4wODg2IDIzNi4wODkgOTMuODA3MkwyMzYuMzM4IDkzLjAxNjIgMjM2Ljg1OCA5Mi4xMzE0IDI1OS40NzMgNTMuNjI5NCAyNTkuOTYxIDUyLjc5ODUgMjYwLjQwNyA1Mi4yNjU4QzI2MS4yIDUxLjQ4MzcgMjYyLjI5NiA1MSAyNjMuNTA2IDUxWk0yNjMuNTg2IDY2LjAxODNDMjYwLjczNyA2Ni4wMTgzIDI1OS4zMTMgNjcuMTI0NSAyNTkuMzEzIDY5LjMzNyAyNTkuMzEzIDY5LjYxMDIgMjU5LjMzMiA2OS44NjA4IDI1OS4zNzEgNzAuMDg4N0wyNjEuNzk1IDg0LjAxNjEgMjY1LjM4IDg0LjAxNjEgMjY3LjgyMSA2OS43NDc1QzI2Ny44NiA2OS43MzA5IDI2Ny44NzkgNjkuNTg3NyAyNjcuODc5IDY5LjMxNzkgMjY3Ljg3OSA2Ny4xMTgyIDI2Ni40NDggNjYuMDE4MyAyNjMuNTg2IDY2LjAxODNaTTI2My41NzYgODYuMDU0N0MyNjEuMDQ5IDg2LjA1NDcgMjU5Ljc4NiA4Ny4zMDA1IDI1OS43ODYgODkuNzkyMSAyNTkuNzg2IDkyLjI4MzcgMjYxLjA0OSA5My41Mjk1IDI2My41NzYgOTMuNTI5NSAyNjYuMTE2IDkzLjUyOTUgMjY3LjM4NyA5Mi4yODM3IDI2Ny4zODcgODkuNzkyMSAyNjcuMzg3IDg3LjMwMDUgMjY2LjExNiA4Ni4wNTQ3IDI2My41NzYgODYuMDU0N1oiIGZpbGw9IiNGRkU1MDAiIGZpbGwtcnVsZT0iZXZlbm9kZCIvPjwvZz48L3N2Zz4=) no-repeat 1rem/1.8rem, #b32121; 41 | padding: 1rem 1rem 1rem 3.7rem; 42 | color: white; 43 | } 44 | 45 | .blazor-error-boundary::after { 46 | content: "An error has occurred." 47 | } 48 | 49 | .darker-border-checkbox.form-check-input { 50 | border-color: #929292; 51 | } 52 | 53 | 54 | #playgroundEditor { 55 | height: 10em; 56 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector.Playground/wwwroot/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/src/Build5Nines.SharpVector.Playground/wwwroot/favicon.png -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/BasicMemoryVectorDatabase.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector; 2 | 3 | /// 4 | /// A basic implementation of an vector database that uses an in-memory dictionary to store vectors, with integer keys and string metadata values. 5 | /// 6 | public class BasicMemoryVectorDatabase : MemoryVectorDatabase 7 | { } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Build5Nines.SharpVector.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | net8.0 5 | enable 6 | enable 7 | true 8 | 9 | Build5Nines.SharpVector 10 | https://sharpvector.build5nines.com 11 | https://github.com/Build5Nines/SharpVector 12 | 2.1.1 13 | Lightweight In-memory Vector Database to embed in any .NET Applications 14 | Copyright (c) 2025 Build5Nines LLC 15 | README.md 16 | LICENSE 17 | Chris Pietschmann 18 | Build5Nines LLC 19 | vector;search;database;data;rag;search;llm;generative ai;ai;genai 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Data/TextChunkingMethod.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.Data; 2 | 3 | public enum TextChunkingMethod 4 | { 5 | /// 6 | /// Split the text into paragraphs 7 | /// 8 | Paragraph, 9 | /// 10 | /// Split the text into sentences 11 | /// 12 | Sentence, 13 | /// 14 | /// Split the text into fixed length chunks 15 | /// 16 | FixedLength, 17 | /// 18 | /// Split the text into overlapping windows 19 | /// 20 | OverlappingWindow 21 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Data/TextChunkingOptions.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.Data; 2 | 3 | public class TextChunkingOptions 4 | { 5 | public TextChunkingOptions() 6 | { 7 | Method = TextChunkingMethod.Paragraph; 8 | ChunkSize = 100; 9 | #pragma warning disable CS8603 // Possible null reference return. 10 | RetrieveMetadata = (chunk) => default; 11 | #pragma warning restore CS8603 // Possible null reference return. 12 | OverlapSize = 50; 13 | } 14 | 15 | /// 16 | /// The method to use for chunking the text. Default is Paragraph. 17 | /// 18 | public TextChunkingMethod Method { get; set; } 19 | 20 | /// 21 | /// The length in tokens (aka "words") of each chunk of text. Default is 100. 22 | /// Only used by TextChunkingMethod.FixedLength and TextChunkingMethod.OverlappingWindow. 23 | /// 24 | public int ChunkSize { get; set; } 25 | 26 | /// 27 | /// Lambda function to retrieve custom metadata for each chunk 28 | /// 29 | public Func RetrieveMetadata { get; set; } 30 | 31 | /// 32 | /// The number of words to overlap text chunks when using using TextChunkingMethod.OverlappingWindow. Default is 50. 33 | /// 34 | public int OverlapSize { get; set; } 35 | } 36 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Data/TextDataLoader.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.Data; 2 | 3 | using System.ComponentModel.DataAnnotations; 4 | using System.Text.RegularExpressions; 5 | using Build5Nines.SharpVector.Preprocessing; 6 | 7 | public class TextDataLoader 8 | where TId : notnull 9 | where TMetadata : notnull 10 | { 11 | public TextDataLoader(IVectorDatabase vectorDatabase) 12 | { 13 | VectorDatabase = vectorDatabase; 14 | } 15 | 16 | const string _space = " "; 17 | 18 | public IVectorDatabase VectorDatabase { get; private set; } 19 | 20 | public IEnumerable AddDocument(string document, TextChunkingOptions chunkingOptions) 21 | { 22 | if (chunkingOptions.RetrieveMetadata == null) 23 | throw new ValidationException("TextChunkingOptions.RetrieveMetadata must be set"); 24 | 25 | var chunks = ChunkText(document, chunkingOptions); 26 | var ids = new List(); 27 | 28 | foreach (var chunk in chunks) 29 | { 30 | var id = VectorDatabase.AddText(chunk, chunkingOptions.RetrieveMetadata.Invoke(chunk)); 31 | ids.Add(id); 32 | } 33 | 34 | return ids; 35 | } 36 | 37 | protected List ChunkText(string text, TextChunkingOptions chunkingOptions) 38 | { 39 | switch (chunkingOptions.Method) 40 | { 41 | case TextChunkingMethod.Paragraph: 42 | return SplitIntoParagraphs(text); 43 | case TextChunkingMethod.Sentence: 44 | return SplitIntoSentences(text); 45 | case TextChunkingMethod.FixedLength: 46 | return SplitIntoChunks(text, chunkingOptions.ChunkSize); 47 | case TextChunkingMethod.OverlappingWindow: 48 | return SplitIntoOverlappingWindows(text, chunkingOptions.ChunkSize, chunkingOptions.OverlapSize); 49 | default: 50 | throw new ArgumentException("Invalid chunking method"); 51 | } 52 | } 53 | 54 | protected static List SplitIntoParagraphs(string text) 55 | { 56 | return text.Split(new[] { "\r\n\r\n", "\n\n" }, StringSplitOptions.RemoveEmptyEntries).ToList(); 57 | } 58 | 59 | protected static List SplitIntoSentences(string text) 60 | { 61 | return Regex.Split(text, @"(?<=[\.!\?])\s+").ToList(); 62 | } 63 | 64 | protected static List SplitIntoChunks(string text, int chunkSize) 65 | { 66 | var words = SplitIntoTokens(text); 67 | var chunks = new List(); 68 | 69 | for (int i = 0; i < words.Length; i += chunkSize) 70 | { 71 | chunks.Add(JoinTokens(words.Skip(i).Take(chunkSize))); 72 | } 73 | 74 | return chunks; 75 | } 76 | 77 | protected static List SplitIntoOverlappingWindows(string text, int chunkSize, int overlap) 78 | { 79 | var tokens = SplitIntoTokens(text); 80 | var chunks = new List(); 81 | 82 | if (overlap >= chunkSize) 83 | throw new ArgumentException("Overlap must be smaller than chunk size"); 84 | 85 | // Calculate the step size 86 | int step = chunkSize - overlap; 87 | int tokenLength = tokens.Length; 88 | for (int i = 0; i < tokenLength; i += step) 89 | { 90 | var chunk = JoinTokens(tokens.Skip(i).Take(chunkSize)); 91 | if (!string.IsNullOrWhiteSpace(chunk)) 92 | chunks.Add(chunk); 93 | 94 | if (i + chunkSize >= tokenLength) 95 | break; 96 | } 97 | return chunks; 98 | } 99 | 100 | private static string JoinTokens(IEnumerable tokens) 101 | { 102 | if (tokens == null) return string.Empty; 103 | 104 | var fullText = new System.Text.StringBuilder(); 105 | foreach (var token in tokens) 106 | { 107 | if (IsChinese(token)) 108 | fullText.Append(token); 109 | else 110 | fullText.Append(_space + token); 111 | } 112 | return fullText.ToString().Trim(); 113 | } 114 | 115 | private static bool IsChinese(string token) 116 | { 117 | // Checks if the token consists entirely of Chinese (CJK Unified Ideograph) characters. 118 | return System.Text.RegularExpressions.Regex.IsMatch(token, @"^\p{IsCJKUnifiedIdeographs}+$"); 119 | } 120 | 121 | protected static string[] SplitIntoTokens(string text) 122 | { 123 | var processor = new BasicTextPreprocessor(); 124 | return processor.TokenizeAndPreprocess(text).ToArray(); 125 | } 126 | 127 | public async Task> AddDocumentAsync(string document, TextChunkingOptions chunkingOptions) 128 | { 129 | if (chunkingOptions.RetrieveMetadata == null) 130 | throw new ValidationException("TextChunkingOptions.RetrieveMetadata must be set"); 131 | 132 | var chunks = await ChunkTextAsync(document, chunkingOptions); 133 | var ids = new List(); 134 | object _lock = new object(); 135 | await Parallel.ForEachAsync(chunks, async (chunk, cancellationToken) => 136 | { 137 | var id = await VectorDatabase.AddTextAsync(chunk, chunkingOptions.RetrieveMetadata.Invoke(chunk)); 138 | lock (_lock) { 139 | ids.Add(id); 140 | } 141 | }); 142 | 143 | return ids; 144 | } 145 | 146 | private async Task> ChunkTextAsync(string text, TextChunkingOptions chunkingOptions) 147 | { 148 | return await Task.Run(() => ChunkText(text, chunkingOptions)); 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/DatabaseFileException.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector; 2 | 3 | public class DatabaseFileException : Exception 4 | { 5 | public DatabaseFileException() 6 | { 7 | } 8 | 9 | public DatabaseFileException(string message) 10 | : base(message) 11 | { 12 | } 13 | 14 | public DatabaseFileException(string message, Exception innerException) 15 | : base(message, innerException) 16 | { 17 | } 18 | } 19 | 20 | public class DatabaseFileInfoException : DatabaseFileException 21 | { 22 | public DatabaseFileInfoException() 23 | { 24 | } 25 | 26 | public DatabaseFileInfoException(string message) 27 | : base(message) 28 | { 29 | } 30 | 31 | public DatabaseFileInfoException(string message, Exception innerException) 32 | : base(message, innerException) 33 | { 34 | } 35 | } 36 | 37 | public class DatabaseFileSchemaException : DatabaseFileException 38 | { 39 | public DatabaseFileSchemaException() 40 | { 41 | } 42 | 43 | public DatabaseFileSchemaException(string message) 44 | : base(message) 45 | { 46 | } 47 | 48 | public DatabaseFileSchemaException(string message, Exception innerException) 49 | : base(message, innerException) 50 | { 51 | } 52 | } 53 | 54 | public class DatabaseFileVersionException : DatabaseFileException 55 | { 56 | public DatabaseFileVersionException() 57 | { 58 | } 59 | 60 | public DatabaseFileVersionException(string message) 61 | : base(message) 62 | { 63 | } 64 | 65 | public DatabaseFileVersionException(string message, Exception innerException) 66 | : base(message, innerException) 67 | { 68 | } 69 | } 70 | 71 | public class DatabaseFileClassTypeException : DatabaseFileException 72 | { 73 | public DatabaseFileClassTypeException() 74 | { 75 | } 76 | 77 | public DatabaseFileClassTypeException(string message) 78 | : base(message) 79 | { 80 | } 81 | 82 | public DatabaseFileClassTypeException(string message, Exception innerException) 83 | : base(message, innerException) 84 | { 85 | } 86 | } 87 | 88 | public class DatabaseFileMissingEntryException : DatabaseFileException 89 | { 90 | public DatabaseFileMissingEntryException(string message, string missingEntry) 91 | : base(message) 92 | { 93 | MissingEntry = missingEntry; 94 | } 95 | 96 | public string MissingEntry { get; private set; } 97 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/DatabaseInfo.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector; 2 | 3 | public class DatabaseInfo 4 | { 5 | internal const string SupportedVersion = "1.0.0"; 6 | internal const string SupportedSchema = "Build5Nines.SharpVector"; 7 | 8 | public DatabaseInfo() 9 | : this(null, null, null) 10 | { } 11 | public DatabaseInfo(string? classType) 12 | : this(SupportedSchema, SupportedVersion, classType) 13 | { } 14 | 15 | public DatabaseInfo(string? schema, string? version, string? classType) 16 | { 17 | Schema = schema; 18 | Version = version; 19 | ClassType = classType; 20 | } 21 | 22 | public string? Schema { get; set; } 23 | public string? Version { get; set; } 24 | public string? ClassType { get; set; } 25 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Embeddings/IEmbeddingsGenerator.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.Embeddings; 2 | 3 | public interface IEmbeddingsGenerator 4 | { 5 | Task GenerateEmbeddingsAsync(string text); 6 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/IVectorDatabase.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection.Metadata; 2 | 3 | namespace Build5Nines.SharpVector; 4 | 5 | /// 6 | /// An interface for a vector database 7 | /// 8 | /// 9 | /// 10 | /// 11 | public interface IVectorDatabase 12 | : IEnumerable> 13 | where TId : notnull 14 | { 15 | /// 16 | /// Adds a new text with Metadata to the database and returns its ID 17 | /// 18 | /// 19 | /// 20 | /// 21 | TId AddText(TDocument text, TMetadata? metadata = default(TMetadata)); 22 | 23 | /// 24 | /// Adds a new text with Metadata to the database and returns its ID 25 | /// 26 | /// 27 | /// 28 | /// 29 | Task AddTextAsync(TDocument text, TMetadata? metadata = default(TMetadata)); 30 | 31 | /// 32 | /// Get all the Ids for each text the database. 33 | /// 34 | /// 35 | IEnumerable GetIds(); 36 | 37 | /// 38 | /// Retrieves a Text by its ID 39 | /// 40 | /// 41 | /// 42 | /// 43 | IVectorTextItem GetText(TId id); 44 | 45 | /// 46 | /// Deletes a Text by its ID 47 | /// 48 | /// 49 | /// 50 | IVectorTextItem DeleteText(TId id); 51 | 52 | /// 53 | /// Updates a Text by its ID 54 | /// 55 | /// 56 | /// 57 | /// 58 | void UpdateText(TId id, TDocument text); 59 | 60 | /// 61 | /// Updates the Metadata of a Text by its ID 62 | /// 63 | /// 64 | /// 65 | /// 66 | void UpdateTextMetadata(TId id, TMetadata metadata); 67 | 68 | /// 69 | /// Updates a Text by its ID with new text and metadata values 70 | /// 71 | /// 72 | /// 73 | /// 74 | void UpdateTextAndMetadata(TId id, TDocument text, TMetadata metadata); 75 | 76 | /// 77 | /// Performs a vector search to find the top N most similar texts to the given text 78 | /// 79 | /// The query prompt to search by. 80 | /// The similarity threshold to filter by. 81 | /// The page index of the search results. Default is 0. 82 | /// The number of search results per page. Default is Null and returns all results. 83 | /// A filter function to apply to the metadata of the results. 84 | /// The search results as an IVectorTextResult object. 85 | IVectorTextResult Search(TDocument queryText, float? threshold = null, int pageIndex = 0, int? pageCount = null, Func? filter = null); 86 | 87 | /// 88 | /// Performs an asynchronous search vector search to find the top N most similar texts to the given text 89 | /// 90 | /// The query prompt to search by. 91 | /// The similarity threshold to filter by. 92 | /// The page index of the search results. Default is 0. 93 | /// The number of search results per page. Default is Null and returns all results. 94 | /// A filter function to apply to the metadata of the results. 95 | /// The search results as an IVectorTextResult object. 96 | Task> SearchAsync(TDocument queryText, float? threshold = null, int pageIndex = 0, int? pageCount = null, Func>? filter = null); 97 | 98 | 99 | [Obsolete("Use SerializeToBinaryStreamAsync Instead")] 100 | Task SerializeToJsonStreamAsync(Stream stream); 101 | 102 | [Obsolete("Use SerializeToBinaryStream Instead")] 103 | void SerializeToJsonStream(Stream stream); 104 | 105 | [Obsolete("Use DeserializeToBinaryStreamAsync Instead")] 106 | Task DeserializeFromJsonStreamAsync(Stream stream); 107 | 108 | [Obsolete("Use DeserializeToBinaryStream Instead")] 109 | void DeserializeFromJsonStream(Stream stream); 110 | 111 | 112 | 113 | /// 114 | /// Serializes the Database to a JSON stream 115 | /// 116 | /// 117 | /// 118 | Task SerializeToBinaryStreamAsync(Stream stream); 119 | 120 | /// 121 | /// Serializes the Database to a JSON stream 122 | /// 123 | /// 124 | /// 125 | void SerializeToBinaryStream(Stream stream); 126 | 127 | /// 128 | /// Deserializes the Database from a JSON stream 129 | /// 130 | /// 131 | /// 132 | Task DeserializeFromBinaryStreamAsync(Stream stream); 133 | 134 | /// 135 | /// Deserializes the Database from a JSON stream 136 | /// 137 | /// 138 | /// 139 | void DeserializeFromBinaryStream(Stream stream); 140 | } 141 | 142 | public interface IVectorDatabase 143 | : IVectorDatabase 144 | where TId : notnull 145 | { } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/IVectorDatabaseExtensions.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace Build5Nines.SharpVector; 4 | 5 | /// 6 | /// Save and Load extnesion methods fo IVectorDatabase<> 7 | /// 8 | public static class IVectorDatabaseExtensions 9 | { 10 | public static async Task SaveToFileAsync(this IVectorDatabase vectorDatabase, string filePath) 11 | where TId : notnull 12 | { 13 | using (var stream = new FileStream(filePath, FileMode.Create, FileAccess.Write)) 14 | { 15 | await vectorDatabase.SerializeToBinaryStreamAsync(stream); 16 | } 17 | } 18 | 19 | public static void SaveToFile(this IVectorDatabase vectorDatabase, string filePath) 20 | where TId : notnull 21 | { 22 | using (var stream = new FileStream(filePath, FileMode.Create, FileAccess.Write)) 23 | { 24 | vectorDatabase.SerializeToBinaryStream(stream); 25 | } 26 | } 27 | 28 | public static async Task LoadFromFileAsync(this IVectorDatabase vectorDatabase, string filePath) 29 | where TId : notnull 30 | { 31 | using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read)) 32 | { 33 | await vectorDatabase.DeserializeFromBinaryStreamAsync(stream); 34 | } 35 | } 36 | 37 | public static void LoadFromFile(this IVectorDatabase vectorDatabase, string filePath) 38 | where TId : notnull 39 | { 40 | using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read)) 41 | { 42 | vectorDatabase.DeserializeFromBinaryStream(stream); 43 | } 44 | } 45 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Id/GuidIdGenerator.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.Id; 2 | 3 | public class GuidIdGenerator : IIdGenerator 4 | { 5 | public Guid NewId() 6 | { 7 | return Guid.NewGuid(); 8 | } 9 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Id/IIdGenerator.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.Id; 2 | 3 | public interface IIdGenerator 4 | where TId : notnull 5 | { 6 | /// 7 | /// Generates a new ID. 8 | /// 9 | /// 10 | TId NewId(); 11 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Id/IntIdGenerator.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.Id; 2 | 3 | public class IntIdGenerator : NumericIdGenerator 4 | { 5 | public IntIdGenerator() : base() 6 | { } 7 | 8 | public IntIdGenerator(int mostRecentId) : base(mostRecentId) 9 | { } 10 | } 11 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Id/NumericIdGenerator.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.Id; 2 | 3 | public class NumericIdGenerator : IIdGenerator 4 | where TId : struct 5 | { 6 | public NumericIdGenerator() 7 | { } 8 | 9 | public NumericIdGenerator(TId mostRecentId) : this() 10 | { 11 | this._lastId = mostRecentId; 12 | } 13 | 14 | private readonly object _lock = new object(); 15 | private TId _lastId = default(TId); 16 | 17 | public TId NewId() { 18 | lock(_lock) { 19 | dynamic current = _lastId; 20 | current++; 21 | _lastId = current; 22 | return _lastId; 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/MemoryVectorDatabase.cs: -------------------------------------------------------------------------------- 1 | using Build5Nines.SharpVector.Vocabulary; 2 | using Build5Nines.SharpVector.Id; 3 | using Build5Nines.SharpVector.Preprocessing; 4 | using Build5Nines.SharpVector.Vectorization; 5 | using Build5Nines.SharpVector.VectorCompare; 6 | using Build5Nines.SharpVector.VectorStore; 7 | 8 | namespace Build5Nines.SharpVector; 9 | 10 | public interface IMemoryVectorDatabase : IVectorDatabase 11 | where TId : notnull 12 | { } 13 | 14 | /// 15 | /// A simple in-memory database for storing and querying vectorized text items. 16 | /// This database uses a Bag of Words vectorization strategy, with Cosine similarity, a dictionary vocabulary store, and a basic text preprocessor. 17 | /// 18 | /// Defines the data type for the Metadata stored with the Text. 19 | public class MemoryVectorDatabase 20 | : MemoryVectorDatabaseBase< 21 | int, 22 | TMetadata, 23 | MemoryDictionaryVectorStoreWithVocabulary, string, int>, 24 | DictionaryVocabularyStore, 25 | string, int, 26 | IntIdGenerator, 27 | BasicTextPreprocessor, 28 | BagOfWordsVectorizer, 29 | CosineSimilarityVectorComparer 30 | >, IMemoryVectorDatabase, IVectorDatabase 31 | { 32 | public MemoryVectorDatabase() 33 | : base( 34 | new MemoryDictionaryVectorStoreWithVocabulary, string, int>( 35 | new DictionaryVocabularyStore() 36 | ) 37 | ) 38 | { } 39 | 40 | 41 | [Obsolete("Use DeserializeFromBinaryStreamAsync instead.")] 42 | public override async Task DeserializeFromJsonStreamAsync(Stream stream) 43 | { 44 | await DeserializeFromBinaryStreamAsync(stream); 45 | } 46 | 47 | [Obsolete("Use DeserializeFromBinaryStream instead.")] 48 | public override void DeserializeFromJsonStream(Stream stream) 49 | { 50 | DeserializeFromBinaryStream(stream); 51 | } 52 | 53 | /// 54 | /// Deserializes the database from a binary stream. 55 | /// 56 | /// 57 | /// 58 | public override async Task DeserializeFromBinaryStreamAsync(Stream stream) 59 | { 60 | await base.DeserializeFromBinaryStreamAsync(stream); 61 | 62 | // Re-initialize the IdGenerator with the max Id value from the VectorStore 63 | _idGenerator = new IntIdGenerator(VectorStore.GetIds().Max()); 64 | } 65 | 66 | /// 67 | /// Deserializes the database from a binary stream. 68 | /// 69 | /// 70 | public override void DeserializeFromBinaryStream(Stream stream) 71 | { 72 | base.DeserializeFromBinaryStream(stream); 73 | 74 | // Re-initialize the IdGenerator with the max Id value from the VectorStore 75 | _idGenerator = new IntIdGenerator(VectorStore.GetIds().Max()); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Preprocessing/BasicTextPreprocessor.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.Preprocessing; 2 | 3 | using System.Globalization; 4 | using System.Text; 5 | using System.Text.RegularExpressions; 6 | 7 | public class BasicTextPreprocessor : ITextPreprocessor 8 | { 9 | private const string space = " "; 10 | private const char charSpace = ' '; 11 | 12 | private const string regexChineseCharactersPattern = @"\p{IsCJKUnifiedIdeographs}"; 13 | private const string regexRemovePunctuation = @"[\p{P}$^`~=+|<>]"; // @"[\p{P}]"; 14 | // private const string regexTokenize = @"[\p{IsCJKUnifiedIdeographs}]|\p{So}\p{Sk}|[a-z0-9]+"; 15 | private const string regexWhitespacePattern = @"\s+"; 16 | private const string regexEmojiPattern = @"[\p{So}\uD83C-\uDBFF\uDC00-\uDFFF]"; 17 | 18 | public IEnumerable TokenizeAndPreprocess(string text) 19 | { 20 | if (string.IsNullOrWhiteSpace(text)) return Array.Empty(); 21 | 22 | // Tokens should always be lower case 23 | text = text.ToLower(); 24 | 25 | // Remove punctuation (excluding Chinese characters) 26 | text = Regex.Replace(text, regexRemovePunctuation, string.Empty); 27 | 28 | // Space pad special characters (Emoji and Chinese characters) 29 | text = SpacePadSpecialCharacters(text); 30 | 31 | // Remove extra whitespace characters 32 | text = Regex.Replace(text, regexWhitespacePattern, space).Trim(); 33 | 34 | // Split to Token array 35 | return text.Split(charSpace); 36 | 37 | 38 | // // Check if text contains Chinese characters using the CJK Unified Ideographs block 39 | // if (Regex.IsMatch(text, regexChineseCharactersPattern)) 40 | // { 41 | // if (Regex.IsMatch(text, regexEmojiPattern)) 42 | // { 43 | // // Has Emoji 44 | // text = SpacePadSpecialCharacters(text, new string[] { regexEmojiPattern, regexChineseCharactersPattern }); 45 | // // remove extra whitespace characters 46 | // text = Regex.Replace(text, regexWhitespacePattern, space).Trim(); 47 | // } else { 48 | // // No Emoji 49 | // // Tokenize either by matching individual Chinese characters or contiguous word tokens (for Latin letters/digits) 50 | // var tokens = Regex.Matches(text, regexTokenize) 51 | // .Cast() 52 | // .Select(m => m.Value); 53 | // return tokens; 54 | // } 55 | // } 56 | // else 57 | // { 58 | // // if text contains emojis 59 | // if (Regex.IsMatch(text, regexEmojiPattern)) 60 | // { 61 | // text = SpacePadSpecialCharacters(text, new string[] { regexEmojiPattern }); 62 | // } 63 | 64 | // // remove extra whitespace characters 65 | // text = Regex.Replace(text, regexWhitespacePattern, space).Trim(); 66 | // } 67 | 68 | // return text.Split(charSpace); 69 | } 70 | 71 | public async Task> TokenizeAndPreprocessAsync(string text) 72 | { 73 | return await Task.Run(() => TokenizeAndPreprocess(text)); 74 | } 75 | 76 | 77 | private static string SpacePadSpecialCharacters(string text) 78 | { 79 | var spacePadPatterns = new List(); 80 | 81 | // Contains Chinese characters? 82 | if (Regex.IsMatch(text, regexChineseCharactersPattern)) 83 | { 84 | // Space pad Chinese characters 85 | spacePadPatterns.Add(regexChineseCharactersPattern); 86 | } 87 | 88 | // Contains Emoji? 89 | if (Regex.IsMatch(text, regexEmojiPattern)) 90 | { 91 | // Space pad Emoji characters 92 | spacePadPatterns.Add(regexEmojiPattern); 93 | } 94 | 95 | if (spacePadPatterns.Count > 0) 96 | { 97 | // Space pad special characters based on the patterns selected 98 | text = SpacePadSpecialCharacters(text, spacePadPatterns.ToArray()); 99 | } 100 | 101 | return text; 102 | } 103 | 104 | private static string SpacePadSpecialCharacters(string text, string[] regexPatterns) 105 | { 106 | var enumerator = StringInfo.GetTextElementEnumerator(text); 107 | StringBuilder sb = new StringBuilder(); 108 | int i; 109 | while(enumerator.MoveNext()) 110 | { 111 | var element = enumerator.GetTextElement(); 112 | 113 | for (i = 0; i < regexPatterns.Length; i++) 114 | { 115 | if (Regex.IsMatch(element, regexPatterns[i])) 116 | { 117 | element = space + element + space; 118 | break; 119 | } 120 | } 121 | 122 | sb.Append(element); 123 | } 124 | return sb.ToString(); 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Preprocessing/ITextPreprocessor.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.Preprocessing; 2 | 3 | public interface ITextPreprocessor 4 | { 5 | IEnumerable TokenizeAndPreprocess(TToken text); 6 | Task> TokenizeAndPreprocessAsync(TToken text); 7 | } 8 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/VectorCompare/CosineSimilarityVectorComparerAsync.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.InteropServices; 3 | 4 | namespace Build5Nines.SharpVector.VectorCompare; 5 | 6 | public class CosineSimilarityVectorComparer : IVectorComparer 7 | { 8 | /// 9 | /// Calculates the cosine similarity between two vectors. 10 | /// 11 | /// Cosine Similarity is a metric used to measure how similar two vectors are. It calculates the cosine of the angle between two vectors projected in a multi-dimensional space. The result of the cosine similarity ranges from -1 to 1. 12 | /// 13 | /// 14 | /// 15 | /// 16 | /// 17 | public async Task CalculateAsync(float[] vectorA, float[] vectorB) 18 | { 19 | return await Task.Run(() => Calculate(vectorA, vectorB)); 20 | } 21 | 22 | /// 23 | /// Calculates the cosine similarity between two vectors. 24 | /// 25 | /// Cosine Similarity is a metric used to measure how similar two vectors are. It calculates the cosine of the angle between two vectors projected in a multi-dimensional space. The result of the cosine similarity ranges from -1 to 1. 26 | /// 27 | /// 28 | /// 29 | /// 30 | /// 31 | public float Calculate(float[] vectorA, float[] vectorB) 32 | { 33 | if (vectorA.Length != vectorB.Length) 34 | { 35 | throw new ArgumentException("Vectors must be of the same length."); 36 | } 37 | 38 | float dotProduct = 0; 39 | float magnitudeA = 0; 40 | float magnitudeB = 0; 41 | 42 | for (int i = 0; i < vectorA.Length; i++) 43 | { 44 | dotProduct += vectorA[i] * vectorB[i]; 45 | magnitudeA += vectorA[i] * vectorA[i]; 46 | magnitudeB += vectorB[i] * vectorB[i]; 47 | } 48 | 49 | magnitudeA = (float)Math.Sqrt(magnitudeA); 50 | magnitudeB = (float)Math.Sqrt(magnitudeB); 51 | 52 | if (magnitudeA == 0 || magnitudeB == 0) 53 | { 54 | return 0; 55 | } 56 | 57 | return dotProduct / (magnitudeA * magnitudeB); 58 | } 59 | 60 | public IEnumerable> Sort(IEnumerable> results) 61 | { 62 | return results.OrderByDescending(s => s.VectorComparison); 63 | } 64 | 65 | public async Task>> SortAsync(IEnumerable> results) 66 | { 67 | return await Task.Run(() => Sort(results)); 68 | } 69 | 70 | public bool IsWithinThreshold(float? threshold, float vectorComparisonValue) 71 | { 72 | if (threshold == null) 73 | { 74 | return true; 75 | } 76 | var thresholdToCompare = threshold ?? (float)0.0f; 77 | var thresholdIsEqual = Math.Abs(vectorComparisonValue - thresholdToCompare) < 1e-6f; // epsilon; 78 | return thresholdIsEqual || vectorComparisonValue > thresholdToCompare; 79 | } 80 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/VectorCompare/EuclideanDistanceVectorComparerAsync.cs: -------------------------------------------------------------------------------- 1 | using System.Runtime.InteropServices; 2 | 3 | namespace Build5Nines.SharpVector.VectorCompare; 4 | 5 | public class EuclideanDistanceVectorComparer : IVectorComparer 6 | { 7 | /// 8 | /// Calculates the Euclidean distance between two vectors. 9 | /// 10 | /// 11 | /// 12 | /// 13 | /// 14 | public async Task CalculateAsync(float[] vectorA, float[] vectorB) 15 | { 16 | return await Task.Run(() => Calculate(vectorA, vectorB)); 17 | } 18 | 19 | /// 20 | /// Calculates the Euclidean distance between two vectors. 21 | /// 22 | /// 23 | /// 24 | /// 25 | /// 26 | public float Calculate(float[] vectorA, float[] vectorB) 27 | { 28 | if (vectorA.Length != vectorB.Length) 29 | { 30 | throw new ArgumentException("Vectors must be of the same length."); 31 | } 32 | 33 | float sumOfSquares = 0f; 34 | 35 | for (int i = 0; i < vectorA.Length; i++) 36 | { 37 | float difference = vectorA[i] - vectorB[i]; 38 | sumOfSquares += difference * difference; 39 | } 40 | 41 | return (float)Math.Sqrt(sumOfSquares); 42 | } 43 | 44 | public IEnumerable> Sort(IEnumerable> results) 45 | { 46 | return results.OrderBy(s => s.VectorComparison); 47 | } 48 | 49 | public async Task>> SortAsync(IEnumerable> results) 50 | { 51 | return await Task.Run(() => Sort(results)); 52 | } 53 | 54 | public bool IsWithinThreshold(float? threshold, float vectorComparisonValue) 55 | { 56 | if (threshold == null) 57 | { 58 | return true; 59 | } 60 | var thresholdToCompare = threshold ?? (float)0.0f; 61 | var thresholdIsEqual = Math.Abs(vectorComparisonValue - thresholdToCompare) < 1e-6f; // epsilon; 62 | return thresholdIsEqual || vectorComparisonValue < thresholdToCompare; 63 | } 64 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/VectorCompare/IVectorComparer.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.VectorCompare; 2 | 3 | public interface IVectorComparer 4 | { 5 | /// 6 | /// Calculates a comparison between two vectors 7 | /// 8 | /// 9 | /// 10 | /// 11 | /// 12 | float Calculate(float[] vectorA, float[] vectorB); 13 | 14 | /// 15 | /// Sorts the results of a comparison 16 | /// 17 | /// 18 | /// 19 | /// 20 | IEnumerable> Sort(IEnumerable> results); 21 | 22 | /// 23 | /// Determines if the comparison is within threshold threshold 24 | /// 25 | /// 26 | /// 27 | /// 28 | bool IsWithinThreshold(float? threshold, float vectorComparisonValue); 29 | 30 | /// 31 | /// Calculates a comparison between two vectors asynchronously 32 | /// 33 | /// 34 | /// 35 | /// 36 | /// 37 | Task CalculateAsync(float[] vectorA, float[] vectorB); 38 | 39 | /// 40 | /// Sorts the results of a comparison asynchronously 41 | /// 42 | /// 43 | /// 44 | /// 45 | Task>> SortAsync(IEnumerable> results); 46 | } 47 | 48 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/VectorComparison.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector; 2 | 3 | //public record VectorComparison(TId Id, float vectorComparison); 4 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/VectorStore/IVectorStore.cs: -------------------------------------------------------------------------------- 1 | using System.Collections; 2 | using System.Runtime.Serialization; 3 | 4 | namespace Build5Nines.SharpVector.VectorStore; 5 | 6 | /// 7 | /// Interface for a vector store. 8 | /// 9 | /// 10 | /// 11 | /// 12 | public interface IVectorStore 13 | : IEnumerable>>, 14 | IReadOnlyCollection>>, 15 | IEnumerable, 16 | IAsyncEnumerable>> 17 | { 18 | /// 19 | /// Retrieves a text and metadata by its ID 20 | /// 21 | /// 22 | /// 23 | /// 24 | IVectorTextItem Get(TId id); 25 | 26 | /// 27 | /// Gets all the Ids for every text. 28 | /// 29 | /// 30 | public IEnumerable GetIds(); 31 | 32 | /// 33 | /// Retrieves a text and metadata by its ID 34 | /// 35 | /// 36 | /// 37 | /// 38 | void Set(TId id, VectorTextItem item); 39 | 40 | /// 41 | /// Retrieves a text and metadata by its ID asynchronously 42 | /// 43 | /// 44 | /// 45 | /// 46 | Task SetAsync(TId id, VectorTextItem item); 47 | 48 | /// 49 | /// Deletes a text by its ID 50 | /// 51 | /// 52 | /// The removed text item 53 | /// 54 | IVectorTextItem Delete(TId id); 55 | 56 | /// 57 | /// Checks if the database contains a key 58 | /// 59 | /// 60 | /// 61 | bool ContainsKey(TId id); 62 | 63 | /// 64 | /// Serializes the Vector Store to a JSON stream 65 | /// 66 | /// 67 | /// 68 | Task SerializeToJsonStreamAsync(Stream stream); 69 | 70 | /// 71 | /// Deserializes the Vector Store from a JSON stream 72 | /// 73 | /// 74 | /// 75 | Task DeserializeFromJsonStreamAsync(Stream stream); 76 | } 77 | 78 | public interface IVectorStore : IVectorStore 79 | { } 80 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/VectorStore/IVectorStoreWithVocabulary.cs: -------------------------------------------------------------------------------- 1 | 2 | using Build5Nines.SharpVector.Vocabulary; 3 | 4 | namespace Build5Nines.SharpVector.VectorStore; 5 | 6 | /// 7 | /// Interface for a vector store with a vocabulary. 8 | /// 9 | /// 10 | /// 11 | /// 12 | /// 13 | /// 14 | public interface IVectorStoreWithVocabulary 15 | : IVectorStore 16 | where TId : notnull 17 | where TVocabularyKey : notnull 18 | where TVocabularyStore : IVocabularyStore 19 | { 20 | /// 21 | /// The Vocabulary Store used to store the vocabulary of the database 22 | /// 23 | TVocabularyStore VocabularyStore { get; } 24 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/VectorStore/MemoryDictionaryVectorStore.cs: -------------------------------------------------------------------------------- 1 | using System.Collections; 2 | using System.Collections.Concurrent; 3 | using System.Collections.Generic; 4 | using System.Text; 5 | using System.Text.Json; 6 | 7 | namespace Build5Nines.SharpVector.VectorStore; 8 | 9 | /// 10 | /// A thread safe simple in-memory database for storing and querying vectorized text items. 11 | /// 12 | /// 13 | /// 14 | public class MemoryDictionaryVectorStore : IVectorStore 15 | where TId : notnull 16 | { 17 | private ConcurrentDictionary> _database; 18 | 19 | /// 20 | /// The number of items in the database 21 | /// 22 | public int Count => _database.Count; 23 | 24 | public MemoryDictionaryVectorStore() { 25 | _database = new ConcurrentDictionary>(); 26 | } 27 | 28 | /// 29 | /// Retrieves a text and metadata by its ID 30 | /// 31 | /// 32 | /// 33 | /// 34 | public void Set(TId id, VectorTextItem item) 35 | { 36 | _database.AddOrUpdate(id, item, (key, oldValue) => item); 37 | } 38 | 39 | /// 40 | /// Gets all the Ids for every text. 41 | /// 42 | /// 43 | public IEnumerable GetIds() 44 | { 45 | return _database.Keys; 46 | } 47 | 48 | /// 49 | /// Retrieves a text and metadata by its ID asynchronously 50 | /// 51 | /// 52 | /// 53 | /// 54 | public async Task SetAsync(TId id, VectorTextItem item) 55 | { 56 | await Task.Run(() => Set(id, item)); 57 | } 58 | 59 | /// 60 | /// Retrieves a text and metadata by its ID 61 | /// 62 | /// 63 | /// 64 | /// 65 | public IVectorTextItem Get(TId id) 66 | { 67 | if (_database.TryGetValue(id, out var entry)) 68 | { 69 | return entry; 70 | } 71 | throw new KeyNotFoundException($"Text with ID {id} not found."); 72 | } 73 | 74 | /// 75 | /// Deletes a text by its ID 76 | /// 77 | /// 78 | /// The removed text item 79 | /// 80 | public IVectorTextItem Delete(TId id) 81 | { 82 | if (_database.ContainsKey(id)) 83 | { 84 | VectorTextItem? itemRemoved; 85 | _database.Remove(id, out itemRemoved); 86 | #pragma warning disable CS8603 // Possible null reference return. 87 | return itemRemoved; 88 | #pragma warning restore CS8603 // Possible null reference return. 89 | } 90 | else 91 | { 92 | throw new KeyNotFoundException($"Text with ID {id} not found."); 93 | } 94 | } 95 | 96 | /// 97 | /// Checks if the database contains a key 98 | /// 99 | /// 100 | /// 101 | public bool ContainsKey(TId id) => _database.ContainsKey(id); 102 | 103 | 104 | 105 | 106 | public IEnumerator>> GetEnumerator() 107 | { 108 | return _database.GetEnumerator(); 109 | } 110 | 111 | System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() 112 | { 113 | return _database.GetEnumerator(); 114 | } 115 | 116 | #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously 117 | public async IAsyncEnumerator>> GetAsyncEnumerator(CancellationToken cancellationToken = default) 118 | #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously 119 | { 120 | foreach (var item in _database) 121 | { 122 | yield return item; 123 | } 124 | } 125 | 126 | public virtual async Task SerializeToJsonStreamAsync(Stream stream) 127 | { 128 | if (stream == null) 129 | { 130 | throw new ArgumentNullException(nameof(stream)); 131 | } 132 | 133 | await JsonSerializer.SerializeAsync>>(stream, _database); 134 | } 135 | 136 | public virtual async Task DeserializeFromJsonStreamAsync(Stream stream) 137 | { 138 | if (stream == null) 139 | { 140 | throw new ArgumentNullException(nameof(stream)); 141 | } 142 | 143 | this._database = await JsonSerializer.DeserializeAsync>>(stream) ?? new ConcurrentDictionary>(); 144 | } 145 | } 146 | 147 | /// 148 | /// A thread safe simple in-memory database for storing and querying vectorized text items. 149 | /// This is a simplified version of the MemoryDictionaryVectorStore class that uses string as the Document type. 150 | /// 151 | /// 152 | /// 153 | public class MemoryDictionaryVectorStore 154 | : MemoryDictionaryVectorStore 155 | where TId : notnull 156 | { } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/VectorStore/MemoryDictionaryVectorStoreWithVocabulary.cs: -------------------------------------------------------------------------------- 1 | 2 | using Build5Nines.SharpVector.VectorStore; 3 | using Build5Nines.SharpVector.Vocabulary; 4 | 5 | /// 6 | /// A thread safe simple in-memory database for storing and querying vectorized text items with a vocabulary. 7 | /// 8 | /// 9 | /// 10 | /// 11 | /// 12 | /// 13 | public class MemoryDictionaryVectorStoreWithVocabulary 14 | : MemoryDictionaryVectorStore, IVectorStoreWithVocabulary 15 | where TId : notnull 16 | where TVocabularyKey : notnull 17 | where TVocabularyStore : IVocabularyStore 18 | { 19 | public TVocabularyStore VocabularyStore { get; } 20 | 21 | public MemoryDictionaryVectorStoreWithVocabulary(TVocabularyStore vocabularyStore) 22 | { 23 | VocabularyStore = vocabularyStore; 24 | } 25 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/VectorTextDatabaseItem.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector; 2 | 3 | public interface IVectorTextDatabaseItem 4 | { 5 | TId Id { get; } 6 | TDocument Text { get; } 7 | TMetadata? Metadata { get; } 8 | float[] Vector { get; } 9 | } 10 | 11 | public class VectorTextDatabaseItem 12 | : IVectorTextDatabaseItem 13 | { 14 | public VectorTextDatabaseItem(TId id, TDocument text, TMetadata? metadata, float[] vector) 15 | { 16 | Id = id; 17 | Text = text; 18 | Metadata = metadata; 19 | Vector = vector; 20 | } 21 | 22 | public TId Id { get; private set; } 23 | public TDocument Text { get; private set; } 24 | public TMetadata? Metadata { get; private set; } 25 | public float[] Vector { get; private set; } 26 | } 27 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/VectorTextItem.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector; 2 | 3 | /// 4 | /// An interface for storing a text with its metadata and vector data. 5 | /// 6 | /// 7 | /// 8 | public interface IVectorTextItem 9 | { 10 | TDocument Text { get; set; } 11 | TMetadata? Metadata { get; set; } 12 | float[] Vector { get; set; } 13 | } 14 | 15 | /// 16 | /// An interface for storing a text with its metadata and vector. 17 | /// 18 | /// 19 | public interface IVectorTextItem : IVectorTextItem 20 | { } 21 | 22 | /// 23 | /// A class for storing a text with its metadata and vector. 24 | /// 25 | /// 26 | /// 27 | public class VectorTextItem : IVectorTextItem 28 | { 29 | public VectorTextItem(TDocument text, TMetadata? metadata, float[] vector) 30 | { 31 | Text = text; 32 | Metadata = metadata; 33 | Vector = vector; 34 | } 35 | 36 | public TDocument Text { get; set; } 37 | public TMetadata? Metadata { get; set; } 38 | public float[] Vector { get; set; } 39 | } 40 | 41 | /// 42 | /// A class for storing a text with its metadata and vector data. 43 | /// 44 | /// 45 | public class VectorTextItem : VectorTextItem, IVectorTextItem 46 | { 47 | public VectorTextItem(string text, TMetadata? metadata, float[] vector) 48 | : base(text, metadata, vector) 49 | { } 50 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/VectorTextResult.cs: -------------------------------------------------------------------------------- 1 | 2 | namespace Build5Nines.SharpVector; 3 | 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Runtime.InteropServices; 7 | 8 | public interface IVectorTextResult 9 | { 10 | IEnumerable> Texts { get; } 11 | 12 | /// 13 | /// Returns true if the search returned no results. 14 | /// 15 | bool IsEmpty { get; } 16 | 17 | /// 18 | /// The total count of Texts found in the search results. 19 | /// 20 | int TotalCount { get; } 21 | 22 | /// 23 | /// The current page index of the search results. 24 | /// 25 | public int PageIndex { get; } 26 | 27 | /// 28 | /// The total number of pages of search results. 29 | /// 30 | public int TotalPages { get; } 31 | } 32 | 33 | public interface IVectorTextResult 34 | : IVectorTextResult 35 | { } 36 | 37 | public class VectorTextResult 38 | : IVectorTextResult 39 | { 40 | public VectorTextResult(int totalCount, int pageIndex, int totalPages, IEnumerable> texts) 41 | { 42 | Texts = texts; 43 | TotalCount = totalCount; 44 | PageIndex = pageIndex; 45 | TotalPages = totalPages; 46 | } 47 | 48 | /// 49 | /// Returns true if the search returned no results. 50 | /// 51 | public IEnumerable> Texts { get; private set; } 52 | 53 | public bool IsEmpty { get => Texts == null || !Texts.Any(); } 54 | 55 | /// 56 | /// The total count of Texts found in the search results. 57 | /// 58 | public int TotalCount { get; private set; } 59 | 60 | /// 61 | /// The current page index of the search results. 62 | /// 63 | public int PageIndex { get; private set; } 64 | 65 | /// 66 | /// The total number of pages of search results. 67 | /// 68 | public int TotalPages { get; private set; } 69 | } 70 | 71 | public class VectorTextResult 72 | : VectorTextResult, IVectorTextResult 73 | { 74 | public VectorTextResult(int totalCount, int pageIndex, int totalPages, IEnumerable> texts) 75 | : base(totalCount, pageIndex, totalPages, texts) 76 | { } 77 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/VectorTextResultItem.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Immutable; 2 | using Build5Nines.SharpVector.Id; 3 | 4 | namespace Build5Nines.SharpVector; 5 | 6 | public interface IVectorTextResultItem 7 | { 8 | TDocument Text{ get; } 9 | TMetadata? Metadata { get; } 10 | 11 | float VectorComparison { get; } 12 | } 13 | 14 | public interface IVectorTextResultItem 15 | : IVectorTextResultItem 16 | { 17 | TId Id { get; } 18 | } 19 | 20 | public interface IVectorTextResultItem 21 | : IVectorTextResultItem, IVectorTextResultItem 22 | { } 23 | 24 | public class VectorTextResultItem 25 | : IVectorTextResultItem, IVectorTextResultItem 26 | { 27 | private IVectorTextItem _item; 28 | private TId _id; 29 | 30 | public VectorTextResultItem(TId id, IVectorTextItem item, float vectorComparison) 31 | { 32 | _id = id; 33 | _item = item; 34 | VectorComparison = vectorComparison; 35 | } 36 | 37 | public TDocument Text { get => _item.Text; } 38 | public TMetadata? Metadata { get => _item.Metadata; } 39 | public TId Id { get => _id; } 40 | 41 | public ImmutableArray Vectors { get => ImmutableArray.Create(_item.Vector); } 42 | 43 | public float VectorComparison { get; private set; } 44 | } 45 | 46 | public class VectorTextResultItem 47 | : VectorTextResultItem, IVectorTextResultItem 48 | { 49 | public VectorTextResultItem(int id, IVectorTextItem item, float vectorComparison) 50 | : base(id, item, vectorComparison) 51 | { } 52 | } 53 | 54 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Vectorization/BagOfWordsVectorizer.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.Vectorization; 2 | 3 | using Build5Nines.SharpVector.Vocabulary; 4 | 5 | /// 6 | /// A class that vectorizes a collection of tokens 7 | /// 8 | /// 9 | /// 10 | public class BagOfWordsVectorizer : IVectorizer 11 | where TVocabularyKey : notnull 12 | where TVocabularyValue : notnull 13 | { 14 | public async Task GenerateVectorFromTokensAsync(IVocabularyStore vocabularyStore, IEnumerable tokens) 15 | { 16 | return await Task.Run(() => GenerateVectorFromTokens(vocabularyStore, tokens)); 17 | } 18 | 19 | /// 20 | /// Generates vectors from tokens using the vocabulary. 21 | /// 22 | /// The vocabulary store to use for vectorization 23 | /// The tokens to generate a vector from 24 | /// 25 | public float[] GenerateVectorFromTokens(IVocabularyStore vocabularyStore, IEnumerable tokens) 26 | { 27 | dynamic count = vocabularyStore.Count; 28 | var vector = new float[count]; 29 | 30 | foreach (var token in tokens) 31 | { 32 | if (vocabularyStore.TryGetValue(token, out var index)) 33 | { 34 | vector[index]++; 35 | } 36 | } 37 | 38 | return vector; 39 | } 40 | 41 | /// 42 | /// Method to normalize vectors to a specific length by padding or truncating 43 | /// 44 | /// 45 | /// 46 | /// 47 | public float[] NormalizeVector(float[] vector, TVocabularyValue length) 48 | { 49 | var intLength = Convert.ToInt32(length); 50 | float[] normalizedVector = new float[intLength]; 51 | Array.Copy(vector, normalizedVector, (long)Math.Min(vector.Length, intLength)); 52 | 53 | // Normalize the vector 54 | float magnitude = (float)Math.Sqrt(normalizedVector.Sum(v => v * v)); 55 | if (magnitude > 0) 56 | { 57 | for (int i = 0; i < normalizedVector.Length; i++) 58 | { 59 | normalizedVector[i] /= magnitude; 60 | } 61 | } 62 | // else 63 | // { 64 | // // If magnitude is zero, return the vector as it is 65 | // // or handle it as per your requirement 66 | // // For example, you can use a small value to avoid division by zero 67 | // for (int i = 0; i < normalizedVector.Length; i++) 68 | // { 69 | // //normalizedVector[i] = 0; // or 70 | // normalizedVector[i] = 1e-10f; 71 | // } 72 | // } 73 | 74 | return normalizedVector; 75 | } 76 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Vectorization/IVectorizer.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.Vectorization; 2 | 3 | using Build5Nines.SharpVector.Vocabulary; 4 | 5 | /// 6 | /// An interface for classes that vectorizes a collection of tokens 7 | /// 8 | /// 9 | /// 10 | public interface IVectorizer 11 | where TVocabularyKey : notnull 12 | where TVocabularyValue : notnull 13 | { 14 | /// 15 | /// Generates vectors from tokens using the vocabulary. 16 | /// 17 | /// The vocabulary store to use for vectorization 18 | /// The tokens to generate a vector from 19 | /// 20 | float[] GenerateVectorFromTokens(IVocabularyStore vocabularyStore, IEnumerable tokens); 21 | 22 | /// 23 | /// Generates vectors from tokens using the vocabulary asynchronously. 24 | /// 25 | /// 26 | /// 27 | /// 28 | Task GenerateVectorFromTokensAsync(IVocabularyStore vocabularyStore, IEnumerable tokens); 29 | 30 | /// 31 | /// Method to normalize vectors to a specific length by padding or truncating 32 | /// 33 | /// 34 | /// 35 | /// 36 | float[] NormalizeVector(float[] vector, TVocabularyValue length); 37 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Vocabulary/DictionaryVocabularyStore.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Concurrent; 2 | using System.Text.Json; 3 | 4 | namespace Build5Nines.SharpVector.Vocabulary; 5 | 6 | /// 7 | /// A thread safe simple in-memory database for storing and querying vectorized text items. 8 | /// 9 | /// 10 | public class DictionaryVocabularyStore : IVocabularyStore 11 | where TKey : notnull 12 | { 13 | private ConcurrentDictionary _vocabulary; 14 | 15 | public DictionaryVocabularyStore() 16 | { 17 | _vocabulary = new ConcurrentDictionary(); 18 | } 19 | 20 | private object _lock = new object(); 21 | 22 | public void Update(IEnumerable tokens) 23 | { 24 | lock(_lock) { 25 | foreach (var token in tokens) 26 | { 27 | if (!_vocabulary.ContainsKey(token)) 28 | { 29 | _vocabulary[token] = Count; 30 | } 31 | } 32 | } 33 | } 34 | 35 | public async Task UpdateAsync(IEnumerable tokens) 36 | { 37 | await Task.Run(() => Update(tokens)); 38 | } 39 | 40 | public int Count { get => _vocabulary.Count; } 41 | 42 | public bool TryGetValue(TKey token, out int index) 43 | { 44 | return _vocabulary.TryGetValue(token, out index); 45 | } 46 | 47 | public async Task SerializeToJsonStreamAsync(Stream stream) 48 | { 49 | if (stream == null) 50 | { 51 | throw new ArgumentNullException(nameof(stream)); 52 | } 53 | await JsonSerializer.SerializeAsync>(stream, _vocabulary); 54 | } 55 | 56 | public async Task DeserializeFromJsonStreamAsync(Stream stream) 57 | { 58 | if (stream == null) 59 | { 60 | throw new ArgumentNullException(nameof(stream)); 61 | } 62 | 63 | this._vocabulary = await JsonSerializer.DeserializeAsync>(stream) ?? new ConcurrentDictionary(); 64 | } 65 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/Vocabulary/IVocabularyStore.cs: -------------------------------------------------------------------------------- 1 | namespace Build5Nines.SharpVector.Vocabulary; 2 | 3 | 4 | public interface IVocabularyStore 5 | where TKey : notnull 6 | { 7 | /// 8 | /// Updates the vocabulary store 9 | /// 10 | /// 11 | /// 12 | void Update(IEnumerable tokens); 13 | 14 | /// 15 | /// Updates the vocabulary store asynchronously 16 | /// 17 | /// 18 | /// 19 | Task UpdateAsync(IEnumerable tokens); 20 | 21 | /// 22 | /// The number of items in the vocabulary store 23 | /// 24 | TValue Count { get; } 25 | 26 | /// 27 | /// Retrieves the index of a token 28 | /// 29 | /// 30 | /// 31 | /// 32 | bool TryGetValue(TKey token, out int index); 33 | 34 | /// 35 | /// Serializes the Vocabulary Store to a JSON stream 36 | /// 37 | /// 38 | /// 39 | Task SerializeToJsonStreamAsync(Stream stream); 40 | 41 | /// 42 | /// Deserializes the Vocabulary Store from a JSON stream 43 | /// 44 | /// 45 | /// 46 | Task DeserializeFromJsonStreamAsync(Stream stream); 47 | } -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/docs/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Build5Nines LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/Build5Nines.SharpVector/docs/README.md: -------------------------------------------------------------------------------- 1 | Build5Nines.SharpVector is the lightweight in-memory Vector Database for use in any .NET application. 2 | 3 | The `Build5Nines.SharpVector.BasicMemoryVectorDatabase` class uses a Bag of Words vectorization strategy, with Cosine similarity, a dictionary vocabulary store, and a basic text preprocessor. 4 | 5 | ### Example Usage: Load and Search Vector Database 6 | 7 | ```csharp 8 | // Create a Vector Database with metadata of type string 9 | var vdb = new BasicMemoryVectorDatabase(); 10 | // The Metadata is declared using generics, so you can store whatever data you need there. 11 | 12 | // Load Vector Database with some sample text data 13 | // Text is the movie description, and Metadata is the movie title with release year in this example 14 | vdb.AddText("Iron Man (2008) is a Marvel Studios action, adventure, and sci-fi movie about Tony Stark (Robert Downey Jr.), a billionaire inventor and weapons developer who is kidnapped by terrorists and forced to build a weapon. Instead, Tony uses his ingenuity to build a high-tech suit of armor and escape, becoming the superhero Iron Man. He then returns to the United States to refine the suit and use it to fight crime and terrorism.", "Iron Man (2008)"); 15 | vdb.AddText("The Lion King is a 1994 Disney animated film about a young lion cub named Simba who is the heir to the throne of an African savanna.", "The Lion King (1994)"); 16 | vdb.AddText("Aladdin is a 2019 live-action Disney adaptation of the 1992 animated classic of the same name about a street urchin who finds a magic lamp and uses a genie's wishes to become a prince so he can marry Princess Jasmine.", "Alladin (2019)"); 17 | vdb.AddText("The Little Mermaid is a 2023 live-action adaptation of Disney's 1989 animated film of the same name. The movie is about Ariel, the youngest of King Triton's daughters, who is fascinated by the human world and falls in love with Prince Eric.", "The Little Mermaid"); 18 | vdb.AddText("Frozen is a 2013 Disney movie about a fearless optimist named Anna who sets off on a journey to find her sister Elsa, whose icy powers have trapped their kingdom in eternal winter.", "Frozen (2013)"); 19 | 20 | // Perform a Vector Search 21 | var result = vdb.Search(newPrompt, pageCount: 5); // return the first 5 results 22 | 23 | if (result.HasResults) 24 | { 25 | Console.WriteLine("Similar Text Found:"); 26 | foreach (var item in result.Texts) 27 | { 28 | Console.WriteLine(item.Metadata); 29 | Console.WriteLine(item.Text); 30 | } 31 | } 32 | ``` 33 | 34 | ### Example Usage: Loading with Different Text Chunking Methods 35 | 36 | Also, the `TextDataLoader` can be used to help load text documents into the Vector Database with support for multiple different text chunking methods: 37 | 38 | ```csharp 39 | /// Paragraph Chunking 40 | var loader = new TextDataLoader(vdb); 41 | loader.AddDocument(document, new TextChunkingOptions 42 | { 43 | Method = TextChunkingMethod.Paragraph, 44 | RetrieveMetadata = (chunk) => { 45 | // add some basic metadata since this can't be null 46 | return "{ chuckSize: \"" + chunk.Length + "\" }"; 47 | } 48 | }); 49 | ``` 50 | 51 | ## Tutorials 52 | 53 | Here's a couple helpful tutorial links with additional documentation and examples on using `Build5Nines.SharpVector` in your own projects: 54 | 55 | - [Perform Vector Database Similarity Search in .NET Apps using Build5Nines.SharpVector](https://build5nines.com/using-build5nines-sharpvector-for-vector-similarity-search-in-net-applications/) by Chris Pietschmann 56 | - [Build a Generative AI + RAG App in C# with Phi-3, ONNX, and SharpVector](https://build5nines.com/build-a-generative-ai-rag-app-in-c-with-phi-3-onnx-and-sharpvector/) by Chris Pietschmann 57 | - [Implementing Local RAG using Phi-3 ONNX Runtime and Sidecar Pattern on Linux App Service](https://azure.github.io/AppService/2024/09/03/Phi3-vector.html) by Tulika Chaudharie (Principal Product Manager at Microsoft for Azure App Service) 58 | -------------------------------------------------------------------------------- /src/ConsoleTest/ConsoleTest.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Exe 9 | net8.0 10 | enable 11 | enable 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /src/OllamaConsoleTest/OllamaConsoleTest.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Exe 10 | net8.0 11 | enable 12 | enable 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/OllamaConsoleTest/Program.cs: -------------------------------------------------------------------------------- 1 | using Build5Nines.SharpVector; 2 | using Build5Nines.SharpVector.Ollama; 3 | using Build5Nines.SharpVector.Ollama.Embeddings; 4 | 5 | 6 | Console.WriteLine("Test OllamaEmbeddingsGenerator"); 7 | 8 | var generator = new OllamaEmbeddingsGenerator("nomic-embed-text"); 9 | var embeddings = await generator.GenerateEmbeddingsAsync("Hello World"); 10 | 11 | foreach (var item in embeddings) 12 | { 13 | Console.Write(item + ", "); 14 | } 15 | Console.WriteLine(""); 16 | 17 | Console.WriteLine("Test BasicOllamaMemoryVectorDatabase"); 18 | 19 | var vdb = new BasicOllamaMemoryVectorDatabase("nomic-embed-text"); //"http://localhost:11434/api/embeddings", "nomic-embed-text"); 20 | 21 | vdb.AddText("Hello World", "metadata"); 22 | vdb.AddText("Hola", "metadata2"); 23 | 24 | var result = vdb.Search("Hola Senior"); 25 | 26 | foreach (var item in result.Texts) 27 | { 28 | Console.WriteLine($"{item.Text} - {item.Metadata} - {item.VectorComparison}"); 29 | } 30 | -------------------------------------------------------------------------------- /src/OpenAIConsoleTest/OpenAIConsoleTest.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | net8.0 6 | enable 7 | enable 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /src/OpenAIConsoleTest/Program.cs: -------------------------------------------------------------------------------- 1 | // See https://aka.ms/new-console-template for more information 2 | 3 | using System.Diagnostics; 4 | using System.Text.Json; 5 | using Azure; 6 | using Azure.AI.OpenAI; 7 | using Build5Nines.SharpVector; 8 | using Build5Nines.SharpVector.OpenAI; 9 | 10 | Console.WriteLine("Hello, World!"); 11 | 12 | //var openAIUri = new Uri("https://api.openai.com/"); 13 | var openAIUri = new Uri("https://{name}.openai.azure.com/"); 14 | var openAIKey = "xxxxxxxxxx"; 15 | var modelName = "text-embedding-ada-002"; 16 | 17 | var openAIClient = new AzureOpenAIClient(openAIUri, new AzureKeyCredential(openAIKey)); 18 | 19 | var embeddingClient = openAIClient.GetEmbeddingClient(modelName); 20 | 21 | var vdb = new BasicOpenAIMemoryVectorDatabase(embeddingClient); 22 | 23 | 24 | var jsonString = await File.ReadAllTextAsync("movies.json"); 25 | 26 | var importTimer = new Stopwatch(); 27 | importTimer.Start(); 28 | 29 | 30 | 31 | using (JsonDocument document = JsonDocument.Parse(jsonString)) 32 | { 33 | JsonElement root = document.RootElement; 34 | JsonElement movies = root.GetProperty("movies"); 35 | 36 | await Parallel.ForEachAsync(movies.EnumerateArray(), async (movie, cancellationToken) => 37 | { 38 | Console.WriteLine($"Processing movie: {movie.GetProperty("title").GetString()}"); 39 | 40 | var text = movie.GetProperty("description").GetString(); 41 | var metadata = movie.GetProperty("title").GetString(); 42 | 43 | if (!string.IsNullOrWhiteSpace(text) && !string.IsNullOrWhiteSpace(metadata)) 44 | { 45 | await vdb.AddTextAsync(text, metadata); 46 | } 47 | }); 48 | 49 | // foreach (JsonElement movie in movies.EnumerateArray()) 50 | // { 51 | // var text = movie.GetProperty("description").GetString(); 52 | // var metadata = movie.GetProperty("title").GetString(); 53 | 54 | // if (!string.IsNullOrWhiteSpace(text) && !string.IsNullOrWhiteSpace(metadata)) 55 | // { 56 | // await vdb.AddTextAsync(text, metadata); 57 | // } 58 | // } 59 | } 60 | 61 | importTimer.Stop(); 62 | Console.WriteLine("Movie data imported into Vector Database."); 63 | Console.WriteLine($"Import took {importTimer.ElapsedMilliseconds} ms"); 64 | 65 | // Allow user to search for similar text 66 | Console.WriteLine("Type in prompt text, or type 'exit' to exit the app."); 67 | Console.WriteLine("What movie or TV show are you looking for? Try describing it in a few words."); 68 | 69 | 70 | while(true) { 71 | Console.Write("Prompt: "); 72 | var newPrompt = Console.ReadLine(); 73 | if (newPrompt == "exit") { 74 | break; 75 | } 76 | 77 | Console.WriteLine(string.Empty); 78 | 79 | if (newPrompt != null) { 80 | var timer = new Stopwatch(); 81 | timer.Start(); 82 | 83 | var pageSize = 3; 84 | // result = await vdb.Search(newPrompt, 85 | var result = await vdb.SearchAsync(newPrompt, 86 | threshold: 0.001f, // 0.2f, // Cosine Similarity - Only return results with similarity greater than this threshold 87 | // threshold: (float)1.4f, // Euclidean Distance - Only return results with distance less than this threshold 88 | 89 | //pageIndex: 0, // Page index of the search results (default is 0; the first page) 90 | pageCount: pageSize // Number of search results per page or max number to return 91 | ); 92 | 93 | timer.Stop(); 94 | Console.WriteLine($"Search took {timer.ElapsedMilliseconds} ms"); 95 | 96 | 97 | if (result == null || result.IsEmpty) 98 | { 99 | Console.WriteLine("No similar text found."); 100 | } else { 101 | Console.WriteLine("Similar Text Found!"); 102 | 103 | var firstItemIndex = result.PageIndex * pageSize + 1; 104 | var lastItemIndex = firstItemIndex + (pageSize > result.Texts.Count() ? result.Texts.Count() : pageSize) - 1; 105 | 106 | Console.WriteLine($"Page: {result.PageIndex + 1} (Showing {firstItemIndex} to {lastItemIndex} of Total {result.TotalCount})"); 107 | Console.WriteLine(string.Empty); 108 | foreach (var item in result.Texts) 109 | { 110 | Console.WriteLine($"Metadata: {item.Metadata}"); 111 | Console.WriteLine($"Vector Comparison: {item.VectorComparison}"); 112 | Console.WriteLine(item.Text); 113 | Console.WriteLine(string.Empty); 114 | } 115 | } 116 | } 117 | } -------------------------------------------------------------------------------- /src/SharpVector.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.0.31903.59 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{cae970b1-fe01-4c10-a66f-23b0383b50f5}") = "ConsoleTest", "ConsoleTest\ConsoleTest.csproj", "{B535888B-58C6-4EC5-B3E2-E900A2149065}" 7 | EndProject 8 | Project("{509f7238-2b33-467a-b94a-8a649d18df4b}") = "Build5Nines.SharpVector", "Build5Nines.SharpVector\Build5Nines.SharpVector.csproj", "{770C2E6B-4B00-4F4D-9D38-F43D299EC0E6}" 9 | EndProject 10 | Project("{23abc664-2b8b-4c01-80d2-145553f45972}") = "SharpVectorTest", "SharpVectorTest\SharpVectorTest.csproj", "{42ff2370-2d81-4384-be82-35fd11d7dab8}" 11 | EndProject 12 | Project("{c2585652-05bb-43a4-b96c-d45fee8bc629}") = "SharpVectorPerformance", "SharpVectorPerformance\SharpVectorPerformance.csproj", "{AFF76051-E043-45EB-9B5F-05D9C45D0DC7}" 13 | EndProject 14 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Build5Nines.SharpVector.OpenAI", "Build5Nines.SharpVector.OpenAI\Build5Nines.SharpVector.OpenAI.csproj", "{CABF1DBE-8FE1-4EDF-B5DD-B1BFB88D93C3}" 15 | EndProject 16 | Project("{d241a75f-12b7-476d-8ad0-3fc3eae491ea}") = "Build5Nines.SharpVector.Ollama", "Build5Nines.SharpVector.Ollama\Build5Nines.SharpVector.Ollama.csproj", "{f64a2af6-c0cd-41cf-879e-db5ef9c33375}" 17 | EndProject 18 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharpVectorOpenAITest", "SharpVectorOpenAITest\SharpVectorOpenAITest.csproj", "{04E08FA2-C4B4-47B4-ABB0-6FD57EA5FFFB}" 19 | EndProject 20 | Project("{953bc932-6273-4fcc-8789-50057e494d39}") = "OpenAIConsoleTest", "OpenAIConsoleTest\OpenAIConsoleTest.csproj", "{0B5B1D8C-9D1C-4779-880E-09B8F1BD1DD2}" 21 | EndProject 22 | Project("{619c7671-0831-4096-b1bf-39c6ee0595a4}") = "OllamaConsoleTest", "OllamaConsoleTest\OllamaConsoleTest.csproj", "{e211095e-33d5-4ba4-a9f7-9d6057d807a7}" 23 | EndProject 24 | Project("{b170972d-92ac-48a4-af26-e44dff4801aa}") = "Build5Nines.SharpVector.Playground", "Build5Nines.SharpVector.Playground\Build5Nines.SharpVector.Playground.csproj", "{DF4912BA-17B3-458B-B4D9-AD75287EAC45}" 25 | EndProject 26 | Global 27 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 28 | Debug|Any CPU = Debug|Any CPU 29 | Release|Any CPU = Release|Any CPU 30 | EndGlobalSection 31 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 32 | {B535888B-58C6-4EC5-B3E2-E900A2149065}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 33 | {B535888B-58C6-4EC5-B3E2-E900A2149065}.Debug|Any CPU.Build.0 = Debug|Any CPU 34 | {B535888B-58C6-4EC5-B3E2-E900A2149065}.Release|Any CPU.ActiveCfg = Release|Any CPU 35 | {B535888B-58C6-4EC5-B3E2-E900A2149065}.Release|Any CPU.Build.0 = Release|Any CPU 36 | {770C2E6B-4B00-4F4D-9D38-F43D299EC0E6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 37 | {770C2E6B-4B00-4F4D-9D38-F43D299EC0E6}.Debug|Any CPU.Build.0 = Debug|Any CPU 38 | {770C2E6B-4B00-4F4D-9D38-F43D299EC0E6}.Release|Any CPU.ActiveCfg = Release|Any CPU 39 | {770C2E6B-4B00-4F4D-9D38-F43D299EC0E6}.Release|Any CPU.Build.0 = Release|Any CPU 40 | {42ff2370-2d81-4384-be82-35fd11d7dab8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 41 | {42ff2370-2d81-4384-be82-35fd11d7dab8}.Debug|Any CPU.Build.0 = Debug|Any CPU 42 | {42ff2370-2d81-4384-be82-35fd11d7dab8}.Release|Any CPU.ActiveCfg = Release|Any CPU 43 | {42ff2370-2d81-4384-be82-35fd11d7dab8}.Release|Any CPU.Build.0 = Release|Any CPU 44 | {AFF76051-E043-45EB-9B5F-05D9C45D0DC7}.Release|Any CPU.ActiveCfg = Release|Any CPU 45 | {AFF76051-E043-45EB-9B5F-05D9C45D0DC7}.Release|Any CPU.Build.0 = Release|Any CPU 46 | {AFF76051-E043-45EB-9B5F-05D9C45D0DC7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 47 | {AFF76051-E043-45EB-9B5F-05D9C45D0DC7}.Debug|Any CPU.Build.0 = Debug|Any CPU 48 | {CABF1DBE-8FE1-4EDF-B5DD-B1BFB88D93C3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 49 | {CABF1DBE-8FE1-4EDF-B5DD-B1BFB88D93C3}.Debug|Any CPU.Build.0 = Debug|Any CPU 50 | {CABF1DBE-8FE1-4EDF-B5DD-B1BFB88D93C3}.Release|Any CPU.ActiveCfg = Release|Any CPU 51 | {CABF1DBE-8FE1-4EDF-B5DD-B1BFB88D93C3}.Release|Any CPU.Build.0 = Release|Any CPU 52 | {f64a2af6-c0cd-41cf-879e-db5ef9c33375}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 53 | {f64a2af6-c0cd-41cf-879e-db5ef9c33375}.Debug|Any CPU.Build.0 = Debug|Any CPU 54 | {f64a2af6-c0cd-41cf-879e-db5ef9c33375}.Release|Any CPU.ActiveCfg = Release|Any CPU 55 | {f64a2af6-c0cd-41cf-879e-db5ef9c33375}.Release|Any CPU.Build.0 = Release|Any CPU 56 | {04E08FA2-C4B4-47B4-ABB0-6FD57EA5FFFB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 57 | {04E08FA2-C4B4-47B4-ABB0-6FD57EA5FFFB}.Debug|Any CPU.Build.0 = Debug|Any CPU 58 | {04E08FA2-C4B4-47B4-ABB0-6FD57EA5FFFB}.Release|Any CPU.ActiveCfg = Release|Any CPU 59 | {04E08FA2-C4B4-47B4-ABB0-6FD57EA5FFFB}.Release|Any CPU.Build.0 = Release|Any CPU 60 | {0B5B1D8C-9D1C-4779-880E-09B8F1BD1DD2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 61 | {0B5B1D8C-9D1C-4779-880E-09B8F1BD1DD2}.Debug|Any CPU.Build.0 = Debug|Any CPU 62 | {0B5B1D8C-9D1C-4779-880E-09B8F1BD1DD2}.Release|Any CPU.ActiveCfg = Release|Any CPU 63 | {0B5B1D8C-9D1C-4779-880E-09B8F1BD1DD2}.Release|Any CPU.Build.0 = Release|Any CPU 64 | {DF4912BA-17B3-458B-B4D9-AD75287EAC45}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 65 | {DF4912BA-17B3-458B-B4D9-AD75287EAC45}.Debug|Any CPU.Build.0 = Debug|Any CPU 66 | {DF4912BA-17B3-458B-B4D9-AD75287EAC45}.Release|Any CPU.ActiveCfg = Release|Any CPU 67 | {DF4912BA-17B3-458B-B4D9-AD75287EAC45}.Release|Any CPU.Build.0 = Release|Any CPU 68 | {e211095e-33d5-4ba4-a9f7-9d6057d807a7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 69 | {e211095e-33d5-4ba4-a9f7-9d6057d807a7}.Debug|Any CPU.Build.0 = Debug|Any CPU 70 | {e211095e-33d5-4ba4-a9f7-9d6057d807a7}.Release|Any CPU.ActiveCfg = Release|Any CPU 71 | {e211095e-33d5-4ba4-a9f7-9d6057d807a7}.Release|Any CPU.Build.0 = Release|Any CPU 72 | EndGlobalSection 73 | GlobalSection(SolutionProperties) = preSolution 74 | HideSolutionNode = FALSE 75 | EndGlobalSection 76 | GlobalSection(ExtensibilityGlobals) = postSolution 77 | SolutionGuid = {861BA76B-E825-4CC4-81F7-97A00FA2BD48} 78 | EndGlobalSection 79 | EndGlobal 80 | -------------------------------------------------------------------------------- /src/SharpVectorOpenAITest/BasicOpenAIMemoryVectorDatabaseTest.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.VisualStudio.TestTools.UnitTesting; 2 | using Moq; 3 | using OpenAI; 4 | using OpenAI.Embeddings; 5 | using Build5Nines.SharpVector.OpenAI; 6 | using System.ClientModel; 7 | using System.Threading; 8 | using System.Threading.Tasks; 9 | using System.Collections.Generic; 10 | 11 | namespace Build5Nines.SharpVector.OpenAI.Tests 12 | { 13 | [TestClass] 14 | public class BasicMemoryVectorDatabaseTest 15 | { 16 | private Mock? _mockEmbeddingClient; 17 | private BasicOpenAIMemoryVectorDatabase? _database; 18 | 19 | [TestInitialize] 20 | public void Setup() 21 | { 22 | _mockEmbeddingClient = new Mock(); 23 | _database = new BasicOpenAIMemoryVectorDatabase(_mockEmbeddingClient.Object); 24 | } 25 | 26 | [TestMethod] 27 | public void TestInitialization() 28 | { 29 | Assert.IsNotNull(_database); 30 | } 31 | 32 | [TestMethod] 33 | public async Task Test_SaveLoad_01() 34 | { 35 | var filename = "openai_test_saveload_01.b59vdb"; 36 | #pragma warning disable CS8604 // Possible null reference argument. 37 | await _database.SaveToFileAsync(filename); 38 | #pragma warning restore CS8604 // Possible null reference argument. 39 | 40 | await _database.LoadFromFileAsync(filename); 41 | } 42 | 43 | } 44 | } -------------------------------------------------------------------------------- /src/SharpVectorOpenAITest/SharpVectorOpenAITest.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net8.0 5 | enable 6 | enable 7 | 8 | false 9 | true 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /src/SharpVectorOpenAITest/UnitTest1.cs: -------------------------------------------------------------------------------- 1 | namespace SharpVectorOpenAITest; 2 | 3 | [TestClass] 4 | public class UnitTest1 5 | { 6 | [TestMethod] 7 | public void TestMethod1() 8 | { 9 | } 10 | } -------------------------------------------------------------------------------- /src/SharpVectorPerformance/Program.cs: -------------------------------------------------------------------------------- 1 | // See https://aka.ms/new-console-template for more information 2 | using BenchmarkDotNet.Running; 3 | 4 | namespace SharpVectorPerformance; 5 | 6 | public class Program 7 | { 8 | public static void Main(string[] args) 9 | { 10 | BenchmarkRunner.Run(); 11 | } 12 | } -------------------------------------------------------------------------------- /src/SharpVectorPerformance/SharpVectorPerformance.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | net8.0 6 | enable 7 | enable 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/SharpVectorTest/Preprocessing/BasicTextPreprocessorTests.cs: -------------------------------------------------------------------------------- 1 | namespace SharpVectorTest.Preprocessing; 2 | 3 | using System.Diagnostics; 4 | using System.Threading.Tasks; 5 | using Build5Nines.SharpVector; 6 | using Build5Nines.SharpVector.Embeddings; 7 | using Build5Nines.SharpVector.Id; 8 | using Build5Nines.SharpVector.Preprocessing; 9 | using Build5Nines.SharpVector.VectorCompare; 10 | using Build5Nines.SharpVector.Vectorization; 11 | using Build5Nines.SharpVector.VectorStore; 12 | using Build5Nines.SharpVector.Vocabulary; 13 | 14 | [TestClass] 15 | public class VectorDatabaseTests 16 | { 17 | [TestMethod] 18 | public void TokenizeAndPreprocess_Null() 19 | { 20 | var preprocessor = new BasicTextPreprocessor(); 21 | #pragma warning disable CS8625 // Cannot convert null literal to non-nullable reference type. 22 | var tokens = preprocessor.TokenizeAndPreprocess(null); 23 | #pragma warning restore CS8625 // Cannot convert null literal to non-nullable reference type. 24 | 25 | Assert.AreEqual(0, tokens.Count()); 26 | } 27 | 28 | [TestMethod] 29 | public void TokenizeAndPreprocess_Empty() 30 | { 31 | var preprocessor = new BasicTextPreprocessor(); 32 | var tokens = preprocessor.TokenizeAndPreprocess(string.Empty); 33 | 34 | Assert.AreEqual(0, tokens.Count()); 35 | } 36 | 37 | [TestMethod] 38 | public void TokenizeAndPreprocess_Whitespace() 39 | { 40 | var preprocessor = new BasicTextPreprocessor(); 41 | var tokens = preprocessor.TokenizeAndPreprocess(" "); 42 | 43 | Assert.AreEqual(0, tokens.Count()); 44 | } 45 | 46 | [TestMethod] 47 | public void TokenizeAndPreprocess_Punctuation_01() 48 | { 49 | var preprocessor = new BasicTextPreprocessor(); 50 | var tokens = preprocessor.TokenizeAndPreprocess("Hello.!@#$%^&*()`~世-_=+ 界{}[]|:;\"',.<>/?!"); 51 | 52 | var expectedTokens = new List { "hello", "世", "界"}; 53 | for(var i = 0; i < expectedTokens.Count; i++) 54 | { 55 | Assert.AreEqual(expectedTokens[i], tokens.ElementAt(i), $"Index: {i} does not match"); 56 | } 57 | } 58 | 59 | [TestMethod] 60 | public void TokenizeAndPreprocess_Punctuation_02() 61 | { 62 | var preprocessor = new BasicTextPreprocessor(); 63 | var tokens = preprocessor.TokenizeAndPreprocess("Hello.!@#$%^&*()`~-_=+{}[]|:;\"',.<>/?"); 64 | 65 | var expectedTokens = new List { "hello" }; 66 | for(var i = 0; i < expectedTokens.Count; i++) 67 | { 68 | Assert.AreEqual(expectedTokens[i], tokens.ElementAt(i), $"Index: {i} does not match"); 69 | } 70 | } 71 | 72 | [TestMethod] 73 | public void TokenizeAndPreprocess_Punctuation_03() 74 | { 75 | var preprocessor = new BasicTextPreprocessor(); 76 | var tokens = preprocessor.TokenizeAndPreprocess("Hello.🔥!@#$%^&*()`~世-_=+ 界{}[]|:;\"',.<>/?"); 77 | 78 | var expectedTokens = new List { "hello", "🔥", "世", "界"}; 79 | for(var i = 0; i < expectedTokens.Count; i++) 80 | { 81 | Assert.AreEqual(expectedTokens[i], tokens.ElementAt(i), $"Index: {i} does not match"); 82 | } 83 | } 84 | 85 | [TestMethod] 86 | public void TokenizeAndPreprocess_Punctuation_04() 87 | { 88 | var preprocessor = new BasicTextPreprocessor(); 89 | var tokens = preprocessor.TokenizeAndPreprocess("Hello.!@#🔥$%^&*()`~-_=+{}[]|:;\"',.<>/?"); 90 | 91 | var expectedTokens = new List { "hello", "🔥" }; 92 | for(var i = 0; i < expectedTokens.Count; i++) 93 | { 94 | Assert.AreEqual(expectedTokens[i], tokens.ElementAt(i), $"Index: {i} does not match"); 95 | } 96 | } 97 | 98 | [TestMethod] 99 | public void TokenizeAndPreprocess_01() 100 | { 101 | var preprocessor = new BasicTextPreprocessor(); 102 | var tokens = preprocessor.TokenizeAndPreprocess("Hello, world! 你好,世界!"); 103 | 104 | var expectedTokens = new List { "hello", "world", "你", "好", "世", "界" }; 105 | for(var i = 0; i < expectedTokens.Count; i++) 106 | { 107 | Assert.AreEqual(expectedTokens[i], tokens.ElementAt(i), $"Index: {i} does not match"); 108 | } 109 | } 110 | 111 | [TestMethod] 112 | public void TokenizeAndPreprocess_02() 113 | { 114 | var preprocessor = new BasicTextPreprocessor(); 115 | var tokens = preprocessor.TokenizeAndPreprocess("Hello, World! How are you?"); 116 | 117 | var expectedTokens = new List { "hello", "world", "how", "are", "you" }; 118 | for(var i = 0; i < expectedTokens.Count; i++) 119 | { 120 | Assert.AreEqual(expectedTokens[i], tokens.ElementAt(i), $"Index: {i} does not match"); 121 | } 122 | } 123 | 124 | [TestMethod] 125 | public void TokenizeAndPreprocess_03() 126 | { 127 | var preprocessor = new BasicTextPreprocessor(); 128 | var tokens = preprocessor.TokenizeAndPreprocess("Hello, World! 👑🔥 How are you? 🔥."); 129 | 130 | var expectedTokens = new List { "hello", "world", "👑", "🔥", "how", "are", "you", "🔥" }; 131 | for(var i = 0; i < expectedTokens.Count; i++) 132 | { 133 | Assert.AreEqual(expectedTokens[i], tokens.ElementAt(i), $"Index: {i} does not match ::" + String.Join("-", tokens)); 134 | } 135 | } 136 | 137 | [TestMethod] 138 | public void TokenizeAndPreprocess_04() 139 | { 140 | var preprocessor = new BasicTextPreprocessor(); 141 | var tokens = preprocessor.TokenizeAndPreprocess("Hello, world! 👑🔥你好,世界!👑 "); 142 | 143 | var expectedTokens = new List { "hello", "world", "👑", "🔥", "你", "好", "世", "界", "👑" }; 144 | for(var i = 0; i < expectedTokens.Count; i++) 145 | { 146 | Assert.AreEqual(expectedTokens[i], tokens.ElementAt(i), $"Index: {i} does not match ::" + String.Join("-", tokens)); 147 | } 148 | } 149 | } -------------------------------------------------------------------------------- /src/SharpVectorTest/Regression/RegressionTests.cs: -------------------------------------------------------------------------------- 1 | namespace SharpVectorTest.Regression; 2 | 3 | using System.Diagnostics; 4 | using System.Threading.Tasks; 5 | using Build5Nines.SharpVector; 6 | using Build5Nines.SharpVector.Id; 7 | using Build5Nines.SharpVector.Preprocessing; 8 | using Build5Nines.SharpVector.VectorCompare; 9 | using Build5Nines.SharpVector.Vectorization; 10 | using Build5Nines.SharpVector.VectorStore; 11 | using Build5Nines.SharpVector.Vocabulary; 12 | 13 | [TestClass] 14 | public class RegressionTests 15 | { 16 | [TestMethod] 17 | public void VectorDatabaseVersion_2_0_2_001() 18 | { 19 | var vdb = new MemoryVectorDatabase(); 20 | 21 | vdb.LoadFromFile("Regression/regression-vector-database-v2.0.2.b59vdb"); 22 | 23 | var results = vdb.Search("Lion King"); 24 | 25 | Assert.AreEqual(1, results.Texts.Count()); 26 | Assert.IsTrue(results.Texts.First().Text.Contains("Lion King")); 27 | Assert.AreEqual("{ value: \"JSON Metadata Value\" }", results.Texts.First().Metadata); 28 | Assert.AreEqual(0.3396831452846527, results.Texts.First().VectorComparison); 29 | } 30 | 31 | [TestMethod] 32 | public async Task LoadVectorDatabaseInfo_2_0_2_001() 33 | { 34 | var file = new FileStream("Regression/regression-vector-database-v2.0.2.b59vdb", FileMode.Open, FileAccess.Read); 35 | var dbinfo = await DatabaseFile.LoadDatabaseInfoFromZipArchiveAsync(file); 36 | 37 | Assert.AreEqual("Build5Nines.SharpVector", dbinfo.Schema); 38 | Assert.AreEqual("1.0.0", dbinfo.Version); 39 | Assert.AreEqual("Build5Nines.SharpVector.MemoryVectorDatabase\u00601[[System.String, System.Private.CoreLib, Version=8.0.0.0, Culture=neutral, PublicKeyToken=7cec85d7bea7798e]]", dbinfo.ClassType); 40 | } 41 | } -------------------------------------------------------------------------------- /src/SharpVectorTest/Regression/regression-vector-database-v2.0.2.b59vdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Build5Nines/SharpVector/3334bac9d6ce0d421ee471ed78631e2c0f259aaf/src/SharpVectorTest/Regression/regression-vector-database-v2.0.2.b59vdb -------------------------------------------------------------------------------- /src/SharpVectorTest/SharpVectorTest.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net8.0 5 | enable 6 | enable 7 | 8 | false 9 | true 10 | 11 | 12 | 13 | 14 | runtime; build; native; contentfiles; analyzers; buildtransitive 15 | all 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | Always 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/SharpVectorTest/VectorStore/MemoryDictionaryVectorStoreTest.cs: -------------------------------------------------------------------------------- 1 | using Build5Nines.SharpVector; 2 | using Build5Nines.SharpVector.VectorStore; 3 | 4 | namespace SharpVectorTest.VectorStore; 5 | 6 | [TestClass] 7 | public class MemoryDictionaryVectorStoreTests 8 | { 9 | [TestMethod] 10 | public async Task SerializeDeserializeStream_001() 11 | { 12 | var vectorStore = new MemoryDictionaryVectorStore(); 13 | vectorStore.Set(1, new VectorTextItem("key1", "1", new float[] { 1.0F, 2.0F, 3.0F })); 14 | vectorStore.Set(2, new VectorTextItem("key2", "2", new float[] { 4.0F, 5.0F, 6.0F })); 15 | vectorStore.Set(3, new VectorTextItem("key3", "3", new float[] { 7.0F, 8.0F, 9.0F })); 16 | vectorStore.Set(4, new VectorTextItem("key4", "4", new float[] { 10.0F, 11.0F, 12.0F })); 17 | 18 | 19 | var stream = new MemoryStream(); 20 | await vectorStore.SerializeToJsonStreamAsync(stream); 21 | 22 | stream.Position = 0; // move to beginning of stream 23 | 24 | var vectorStoreTwo = new MemoryDictionaryVectorStore(); 25 | await vectorStoreTwo.DeserializeFromJsonStreamAsync(stream); 26 | 27 | Assert.AreEqual(4, vectorStoreTwo.Count()); 28 | 29 | Assert.AreEqual(3, vectorStoreTwo.Get(1).Vector.Length); 30 | Assert.AreEqual(3, vectorStoreTwo.Get(2).Vector.Length); 31 | Assert.AreEqual(3, vectorStoreTwo.Get(3).Vector.Length); 32 | Assert.AreEqual(3, vectorStoreTwo.Get(4).Vector.Length); 33 | 34 | Assert.AreEqual(1.0, vectorStoreTwo.Get(1).Vector[0]); 35 | Assert.AreEqual(2.0, vectorStoreTwo.Get(1).Vector[1]); 36 | Assert.AreEqual(3.0, vectorStoreTwo.Get(1).Vector[2]); 37 | 38 | Assert.AreEqual(4.0, vectorStoreTwo.Get(2).Vector[0]); 39 | Assert.AreEqual(5.0, vectorStoreTwo.Get(2).Vector[1]); 40 | Assert.AreEqual(6.0, vectorStoreTwo.Get(2).Vector[2]); 41 | 42 | Assert.AreEqual(7.0, vectorStoreTwo.Get(3).Vector[0]); 43 | Assert.AreEqual(8.0, vectorStoreTwo.Get(3).Vector[1]); 44 | Assert.AreEqual(9.0, vectorStoreTwo.Get(3).Vector[2]); 45 | 46 | Assert.AreEqual(10.0, vectorStoreTwo.Get(4).Vector[0]); 47 | Assert.AreEqual(11.0, vectorStoreTwo.Get(4).Vector[1]); 48 | Assert.AreEqual(12.0, vectorStoreTwo.Get(4).Vector[2]); 49 | } 50 | 51 | [TestMethod] 52 | public void MemoryVectorStore_001() 53 | { 54 | var vectorStore = new MemoryDictionaryVectorStore(); 55 | vectorStore.Set(1, new VectorTextItem("key1", "1", new float[] { 1.0F, 2.0F, 3.0F })); 56 | vectorStore.Set(2, new VectorTextItem("key2", "2", new float[] { 4.0F, 5.0F, 6.0F })); 57 | vectorStore.Set(3, new VectorTextItem("key3", "3", new float[] { 7.0F, 8.0F, 9.0F })); 58 | vectorStore.Set(4, new VectorTextItem("key4", "4", new float[] { 10.0F, 11.0F, 12.0F })); 59 | 60 | var item = vectorStore.Get(2); 61 | Assert.AreEqual("key2", item.Text); 62 | } 63 | 64 | [TestMethod] 65 | public void MemoryVectorStore_002() 66 | { 67 | var vectorStore = new MemoryDictionaryVectorStore(); 68 | vectorStore.Set(1, new VectorTextItem("key1", "1", new float[] { 1.0F, 2.0F, 3.0F })); 69 | vectorStore.Set(2, new VectorTextItem("key2", "2", new float[] { 4.0F, 5.0F, 6.0F })); 70 | vectorStore.Set(3, new VectorTextItem("key3", "3", new float[] { 7.0F, 8.0F, 9.0F })); 71 | vectorStore.Set(4, new VectorTextItem("key4", "4", new float[] { 10.0F, 11.0F, 12.0F })); 72 | 73 | foreach(var item in vectorStore) 74 | { 75 | Assert.IsNotNull(item.Value); 76 | Assert.AreNotEqual(0, item.Key); 77 | } 78 | } 79 | 80 | } -------------------------------------------------------------------------------- /src/build-release.sh: -------------------------------------------------------------------------------- 1 | dotnet build --configuration Release -------------------------------------------------------------------------------- /src/run.sh: -------------------------------------------------------------------------------- 1 | dotnet run --project "ConsoleTest" --------------------------------------------------------------------------------