├── .cargo └── config.toml ├── .editorconfig ├── .flake8 ├── .github ├── CODE_OF_CONDUCT.md ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ ├── do_chore.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md ├── SECURITY.md └── workflows │ ├── publish-docs.yml │ └── quality-check.yml ├── .gitignore ├── .prettierrc.yml ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── build.rs ├── docs ├── CNAME ├── assets │ ├── favicon64.png │ └── wordmark.png ├── blog │ ├── .authors.yml │ └── index.md ├── changelog.md ├── contributing.md ├── css │ └── style.css └── index.md ├── mkdocs.yml ├── protos └── database.proto ├── requirements.txt ├── rustfmt.toml └── src ├── cores ├── database.rs ├── index.rs ├── mod.rs └── storage.rs ├── main.rs ├── protos.rs ├── types ├── filter.rs ├── metric.rs ├── mod.rs ├── record.rs └── vector.rs └── utils ├── kmeans.rs └── mod.rs /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [env] 2 | RUST_TEST_THREADS = "1" 3 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root=true 2 | 3 | [*] 4 | charset = utf-8 5 | indent_style = space 6 | insert_final_newline = true 7 | trim_trailing_whitespace = true 8 | max_line_length = 80 9 | 10 | [*.{rs, py}] 11 | indent_size = 4 12 | 13 | [*.{yml, html, css, js, ts, md}] 14 | indent_size = 2 15 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = .venv, target 3 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | - Demonstrating empathy and kindness toward other people 21 | - Being respectful of differing opinions, viewpoints, and experiences 22 | - Giving and gracefully accepting constructive feedback 23 | - Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | - Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | - The use of sexualized language or imagery, and sexual attention or advances of 31 | any kind 32 | - Trolling, insulting or derogatory comments, and personal or political attacks 33 | - Public or private harassment 34 | - Publishing others' private information, such as a physical or email address, 35 | without their explicit permission 36 | - Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official email address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | edwin@oasysai.com. All complaints will be reviewed and investigated promptly and 64 | fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series of 86 | actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or permanent 93 | ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within the 113 | community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.1. The Community Impact Guidelines were inspired by [Mozilla's Code of 119 | Conduct Enforcement Ladder][mozilla_coc]. 120 | 121 | [homepage]: https://www.contributor-covenant.org 122 | [mozilla_coc]: https://github.com/mozilla/diversity 123 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐞 Report Bug 3 | about: Report an unexpected behavior or a malfunctioning feature. 4 | title: "BUG: " 5 | labels: bug 6 | assignees: "" 7 | --- 8 | 9 | ### Short Description 10 | 11 | Please describe the issue you are experiencing in a few sentences. 12 | 13 | ### Error Message 14 | 15 | If you received an error message, please paste some parts of it here. 16 | 17 | ```txt 18 | 19 | ``` 20 | 21 | ### Steps to Reproduce 22 | 23 | What are the minimal steps to reproduce the behavior? 24 | 25 | Example: 26 | 27 | 1. Import the library in ... 28 | 2. Initialize the object with ... 29 | 3. Call the function ... 30 | 31 | ### Expected Behavior 32 | 33 | What do you expect to happen? 34 | 35 | ### Additional Context 36 | 37 | Add any other context about the problem here like error traces, etc. 38 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | 3 | contact_links: 4 | - name: ❓ Ask Question 5 | url: https://github.com/oasysai/oasysdb/discussions 6 | about: Ask general questions or share ideas on Discussions. 7 | 8 | - name: 💬 Join Discord 9 | url: https://discord.gg/bDhQrkqNP4 10 | about: Join the Discord server to help shape the future of OasysDB. 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/do_chore.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🧹 Do Chore 3 | about: Documentation updates, code refactoring, or other chores. 4 | title: "CHORE: " 5 | labels: chore 6 | assignees: "" 7 | --- 8 | 9 | ### Description 10 | 11 | Please describe the chore you suggest in a few sentences. 12 | 13 | Chore examples: 14 | 15 | - Updating documentation 16 | - Adding tests or examples 17 | - Refactoring parts of the codebase 18 | 19 | ### Context 20 | 21 | Why is this chore beneficial for the project and its community? 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🛠️ Feature Request 3 | about: Request a new feature or an improvement to an existing feature. 4 | title: "FEAT: " 5 | labels: enhancement 6 | assignees: "" 7 | --- 8 | 9 | ### Use Case 10 | 11 | What's the use case for this feature? How would you use it? 12 | 13 | ### Potential Solution 14 | 15 | On the high level, how would you like the feature to be implemented? 16 | 17 | ### Additional Context 18 | 19 | Add context about the feature like links to similar implementations. 20 | 21 | For example: 22 | 23 | - Link to a similar feature in another project 24 | - Screenshot of the feature functionality 25 | - Research papers or articles about the feature 26 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Purpose 2 | 3 | Describe the problem solved or feature added by this PR. 4 | 5 | ### Approach 6 | 7 | How does this PR solve the problem or add the feature? 8 | 9 | ### Testing 10 | 11 | - [ ] I have tested this PR locally. 12 | - [ ] If applicable, I added tests to cover my changes. 13 | 14 | How did you test this PR? How should the reviewer test this PR? 15 | 16 | ### Chore Checklist 17 | 18 | - [ ] I formatted my code according to the style and linter guidelines. 19 | - [ ] If applicable, I updated the documentation accordingly. 20 | -------------------------------------------------------------------------------- /.github/SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | Thank you for taking the time to report a security issue. We are trying our best 4 | to make this project safe for everyone. We appreciate your efforts to disclose 5 | the issue responsibly and will make every effort to acknowledge your 6 | contributions. 7 | 8 | ## Reporting a vulnerability 9 | 10 | **Please do not report security vulnerabilities through public GitHub issues.** 11 | 12 | If you believe you have found a security vulnerability, please send an email to 13 | edwin@oasysai.com. Please include as many details as possible, these may 14 | include: 15 | 16 | - Impact of the vulnerability. 17 | - Steps to reproduce. 18 | - Possible solutions. 19 | - Location of the vulnerability like file or line number. 20 | - If applicable, proof-of-concept or exploit code. 21 | -------------------------------------------------------------------------------- /.github/workflows/publish-docs.yml: -------------------------------------------------------------------------------- 1 | name: Publish Docs 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | push: 7 | branches: 8 | - main 9 | 10 | paths: 11 | - "docs/**" 12 | - "mkdocs.yml" 13 | 14 | permissions: 15 | id-token: write 16 | pages: write 17 | contents: write 18 | 19 | jobs: 20 | build-docs: 21 | name: Build documentation 22 | runs-on: ubuntu-latest 23 | steps: 24 | - name: Checkout the code 25 | uses: actions/checkout@v4 26 | 27 | - name: Install Python 28 | uses: actions/setup-python@v5 29 | with: 30 | python-version: 3.x 31 | 32 | - name: Install dependencies 33 | run: pip install mkdocs-material 34 | 35 | - name: Publish the documentation 36 | run: | 37 | mkdocs gh-deploy --force --message "cd: deploy docs from {sha}" 38 | 39 | publish-docs: 40 | name: Publish documentation 41 | runs-on: ubuntu-latest 42 | needs: build-docs 43 | environment: 44 | name: Docs 45 | url: ${{ steps.deployment.outputs.page_url }} 46 | steps: 47 | - name: Checkout 48 | uses: actions/checkout@v4 49 | with: 50 | ref: gh-pages 51 | 52 | - name: Setup pages 53 | uses: actions/configure-pages@v5 54 | 55 | - name: Upload artifact 56 | uses: actions/upload-pages-artifact@v3 57 | with: 58 | path: "." 59 | 60 | - name: Deploy to GitHub Pages 61 | id: deployment 62 | uses: actions/deploy-pages@v4 63 | -------------------------------------------------------------------------------- /.github/workflows/quality-check.yml: -------------------------------------------------------------------------------- 1 | name: Quality Check 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | pull_request: 7 | paths-ignore: 8 | - "docs/**" 9 | - "clients/**" 10 | 11 | push: 12 | branches: 13 | - main 14 | paths-ignore: 15 | - "docs/**" 16 | - "clients/**" 17 | 18 | jobs: 19 | quality-check: 20 | name: Run All Checks 21 | runs-on: ubuntu-latest 22 | steps: 23 | - name: Checkout Code 24 | uses: actions/checkout@v4 25 | 26 | - name: Install Rust Toolchain 27 | uses: dtolnay/rust-toolchain@stable 28 | with: 29 | components: rustfmt, clippy 30 | 31 | - name: Install Protobuf Compiler 32 | run: | 33 | sudo apt update && sudo apt upgrade -y 34 | sudo apt install -y protobuf-compiler libprotobuf-dev 35 | 36 | - name: Run Formatter 37 | run: cargo fmt -- --check 38 | 39 | - name: Run Linter 40 | run: cargo clippy -- -D warnings 41 | 42 | - name: Run Tests 43 | run: cargo test --all-features -- --test-threads 1 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # OasysDB tests. 2 | odb* 3 | oasysdb* 4 | 5 | # Rust stuff. 6 | debug 7 | target 8 | 9 | # Python stuff. 10 | __pycache__ 11 | .pytest_cache 12 | .venv 13 | *.so 14 | *.py[cod] 15 | 16 | # Benchmarking. 17 | *.ivecs 18 | *.fvecs 19 | 20 | # Misc. 21 | .vscode 22 | .ds_store 23 | 24 | # Environment variables. 25 | .env 26 | .env.* 27 | !.env.example 28 | -------------------------------------------------------------------------------- /.prettierrc.yml: -------------------------------------------------------------------------------- 1 | bracketSpacing: true 2 | singleQuote: false 3 | trailingComma: "none" 4 | semi: false 5 | tabWidth: 2 6 | printWidth: 80 7 | proseWrap: "always" 8 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "addr2line" 7 | version = "0.22.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" 10 | dependencies = [ 11 | "gimli", 12 | ] 13 | 14 | [[package]] 15 | name = "adler" 16 | version = "1.0.2" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 19 | 20 | [[package]] 21 | name = "aho-corasick" 22 | version = "1.1.3" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 25 | dependencies = [ 26 | "memchr", 27 | ] 28 | 29 | [[package]] 30 | name = "allocator-api2" 31 | version = "0.2.18" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" 34 | 35 | [[package]] 36 | name = "anstream" 37 | version = "0.6.15" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" 40 | dependencies = [ 41 | "anstyle", 42 | "anstyle-parse", 43 | "anstyle-query", 44 | "anstyle-wincon", 45 | "colorchoice", 46 | "is_terminal_polyfill", 47 | "utf8parse", 48 | ] 49 | 50 | [[package]] 51 | name = "anstyle" 52 | version = "1.0.8" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" 55 | 56 | [[package]] 57 | name = "anstyle-parse" 58 | version = "0.2.5" 59 | source = "registry+https://github.com/rust-lang/crates.io-index" 60 | checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" 61 | dependencies = [ 62 | "utf8parse", 63 | ] 64 | 65 | [[package]] 66 | name = "anstyle-query" 67 | version = "1.1.1" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" 70 | dependencies = [ 71 | "windows-sys 0.52.0", 72 | ] 73 | 74 | [[package]] 75 | name = "anstyle-wincon" 76 | version = "3.0.4" 77 | source = "registry+https://github.com/rust-lang/crates.io-index" 78 | checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" 79 | dependencies = [ 80 | "anstyle", 81 | "windows-sys 0.52.0", 82 | ] 83 | 84 | [[package]] 85 | name = "anyhow" 86 | version = "1.0.86" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" 89 | 90 | [[package]] 91 | name = "async-stream" 92 | version = "0.3.5" 93 | source = "registry+https://github.com/rust-lang/crates.io-index" 94 | checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" 95 | dependencies = [ 96 | "async-stream-impl", 97 | "futures-core", 98 | "pin-project-lite", 99 | ] 100 | 101 | [[package]] 102 | name = "async-stream-impl" 103 | version = "0.3.5" 104 | source = "registry+https://github.com/rust-lang/crates.io-index" 105 | checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" 106 | dependencies = [ 107 | "proc-macro2", 108 | "quote", 109 | "syn", 110 | ] 111 | 112 | [[package]] 113 | name = "async-trait" 114 | version = "0.1.81" 115 | source = "registry+https://github.com/rust-lang/crates.io-index" 116 | checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" 117 | dependencies = [ 118 | "proc-macro2", 119 | "quote", 120 | "syn", 121 | ] 122 | 123 | [[package]] 124 | name = "atomic-waker" 125 | version = "1.1.2" 126 | source = "registry+https://github.com/rust-lang/crates.io-index" 127 | checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" 128 | 129 | [[package]] 130 | name = "autocfg" 131 | version = "1.3.0" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" 134 | 135 | [[package]] 136 | name = "axum" 137 | version = "0.7.5" 138 | source = "registry+https://github.com/rust-lang/crates.io-index" 139 | checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" 140 | dependencies = [ 141 | "async-trait", 142 | "axum-core", 143 | "bytes", 144 | "futures-util", 145 | "http", 146 | "http-body", 147 | "http-body-util", 148 | "itoa", 149 | "matchit", 150 | "memchr", 151 | "mime", 152 | "percent-encoding", 153 | "pin-project-lite", 154 | "rustversion", 155 | "serde", 156 | "sync_wrapper 1.0.1", 157 | "tower", 158 | "tower-layer", 159 | "tower-service", 160 | ] 161 | 162 | [[package]] 163 | name = "axum-core" 164 | version = "0.4.3" 165 | source = "registry+https://github.com/rust-lang/crates.io-index" 166 | checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3" 167 | dependencies = [ 168 | "async-trait", 169 | "bytes", 170 | "futures-util", 171 | "http", 172 | "http-body", 173 | "http-body-util", 174 | "mime", 175 | "pin-project-lite", 176 | "rustversion", 177 | "sync_wrapper 0.1.2", 178 | "tower-layer", 179 | "tower-service", 180 | ] 181 | 182 | [[package]] 183 | name = "backtrace" 184 | version = "0.3.73" 185 | source = "registry+https://github.com/rust-lang/crates.io-index" 186 | checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" 187 | dependencies = [ 188 | "addr2line", 189 | "cc", 190 | "cfg-if", 191 | "libc", 192 | "miniz_oxide", 193 | "object", 194 | "rustc-demangle", 195 | ] 196 | 197 | [[package]] 198 | name = "base64" 199 | version = "0.22.1" 200 | source = "registry+https://github.com/rust-lang/crates.io-index" 201 | checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" 202 | 203 | [[package]] 204 | name = "bincode" 205 | version = "1.3.3" 206 | source = "registry+https://github.com/rust-lang/crates.io-index" 207 | checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" 208 | dependencies = [ 209 | "serde", 210 | ] 211 | 212 | [[package]] 213 | name = "bitflags" 214 | version = "2.6.0" 215 | source = "registry+https://github.com/rust-lang/crates.io-index" 216 | checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" 217 | 218 | [[package]] 219 | name = "byteorder" 220 | version = "1.5.0" 221 | source = "registry+https://github.com/rust-lang/crates.io-index" 222 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 223 | 224 | [[package]] 225 | name = "bytes" 226 | version = "1.7.1" 227 | source = "registry+https://github.com/rust-lang/crates.io-index" 228 | checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" 229 | 230 | [[package]] 231 | name = "cc" 232 | version = "1.1.13" 233 | source = "registry+https://github.com/rust-lang/crates.io-index" 234 | checksum = "72db2f7947ecee9b03b510377e8bb9077afa27176fdbff55c51027e976fdcc48" 235 | dependencies = [ 236 | "shlex", 237 | ] 238 | 239 | [[package]] 240 | name = "cfg-if" 241 | version = "1.0.0" 242 | source = "registry+https://github.com/rust-lang/crates.io-index" 243 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 244 | 245 | [[package]] 246 | name = "clap" 247 | version = "4.5.16" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019" 250 | dependencies = [ 251 | "clap_builder", 252 | ] 253 | 254 | [[package]] 255 | name = "clap_builder" 256 | version = "4.5.15" 257 | source = "registry+https://github.com/rust-lang/crates.io-index" 258 | checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6" 259 | dependencies = [ 260 | "anstream", 261 | "anstyle", 262 | "clap_lex", 263 | "strsim", 264 | ] 265 | 266 | [[package]] 267 | name = "clap_lex" 268 | version = "0.7.2" 269 | source = "registry+https://github.com/rust-lang/crates.io-index" 270 | checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" 271 | 272 | [[package]] 273 | name = "colorchoice" 274 | version = "1.0.2" 275 | source = "registry+https://github.com/rust-lang/crates.io-index" 276 | checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" 277 | 278 | [[package]] 279 | name = "crossbeam-deque" 280 | version = "0.8.5" 281 | source = "registry+https://github.com/rust-lang/crates.io-index" 282 | checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" 283 | dependencies = [ 284 | "crossbeam-epoch", 285 | "crossbeam-utils", 286 | ] 287 | 288 | [[package]] 289 | name = "crossbeam-epoch" 290 | version = "0.9.18" 291 | source = "registry+https://github.com/rust-lang/crates.io-index" 292 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 293 | dependencies = [ 294 | "crossbeam-utils", 295 | ] 296 | 297 | [[package]] 298 | name = "crossbeam-utils" 299 | version = "0.8.20" 300 | source = "registry+https://github.com/rust-lang/crates.io-index" 301 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" 302 | 303 | [[package]] 304 | name = "dotenv" 305 | version = "0.15.0" 306 | source = "registry+https://github.com/rust-lang/crates.io-index" 307 | checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f" 308 | 309 | [[package]] 310 | name = "either" 311 | version = "1.13.0" 312 | source = "registry+https://github.com/rust-lang/crates.io-index" 313 | checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" 314 | 315 | [[package]] 316 | name = "equivalent" 317 | version = "1.0.1" 318 | source = "registry+https://github.com/rust-lang/crates.io-index" 319 | checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" 320 | 321 | [[package]] 322 | name = "errno" 323 | version = "0.3.9" 324 | source = "registry+https://github.com/rust-lang/crates.io-index" 325 | checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" 326 | dependencies = [ 327 | "libc", 328 | "windows-sys 0.52.0", 329 | ] 330 | 331 | [[package]] 332 | name = "fastrand" 333 | version = "2.1.0" 334 | source = "registry+https://github.com/rust-lang/crates.io-index" 335 | checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" 336 | 337 | [[package]] 338 | name = "fixedbitset" 339 | version = "0.4.2" 340 | source = "registry+https://github.com/rust-lang/crates.io-index" 341 | checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" 342 | 343 | [[package]] 344 | name = "fnv" 345 | version = "1.0.7" 346 | source = "registry+https://github.com/rust-lang/crates.io-index" 347 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 348 | 349 | [[package]] 350 | name = "foldhash" 351 | version = "0.1.3" 352 | source = "registry+https://github.com/rust-lang/crates.io-index" 353 | checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" 354 | 355 | [[package]] 356 | name = "futures-channel" 357 | version = "0.3.30" 358 | source = "registry+https://github.com/rust-lang/crates.io-index" 359 | checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" 360 | dependencies = [ 361 | "futures-core", 362 | ] 363 | 364 | [[package]] 365 | name = "futures-core" 366 | version = "0.3.30" 367 | source = "registry+https://github.com/rust-lang/crates.io-index" 368 | checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" 369 | 370 | [[package]] 371 | name = "futures-sink" 372 | version = "0.3.30" 373 | source = "registry+https://github.com/rust-lang/crates.io-index" 374 | checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" 375 | 376 | [[package]] 377 | name = "futures-task" 378 | version = "0.3.30" 379 | source = "registry+https://github.com/rust-lang/crates.io-index" 380 | checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" 381 | 382 | [[package]] 383 | name = "futures-util" 384 | version = "0.3.30" 385 | source = "registry+https://github.com/rust-lang/crates.io-index" 386 | checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" 387 | dependencies = [ 388 | "futures-core", 389 | "futures-task", 390 | "pin-project-lite", 391 | "pin-utils", 392 | ] 393 | 394 | [[package]] 395 | name = "getrandom" 396 | version = "0.2.15" 397 | source = "registry+https://github.com/rust-lang/crates.io-index" 398 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" 399 | dependencies = [ 400 | "cfg-if", 401 | "libc", 402 | "wasi", 403 | ] 404 | 405 | [[package]] 406 | name = "gimli" 407 | version = "0.29.0" 408 | source = "registry+https://github.com/rust-lang/crates.io-index" 409 | checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" 410 | 411 | [[package]] 412 | name = "h2" 413 | version = "0.4.6" 414 | source = "registry+https://github.com/rust-lang/crates.io-index" 415 | checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" 416 | dependencies = [ 417 | "atomic-waker", 418 | "bytes", 419 | "fnv", 420 | "futures-core", 421 | "futures-sink", 422 | "http", 423 | "indexmap 2.4.0", 424 | "slab", 425 | "tokio", 426 | "tokio-util", 427 | "tracing", 428 | ] 429 | 430 | [[package]] 431 | name = "hashbrown" 432 | version = "0.12.3" 433 | source = "registry+https://github.com/rust-lang/crates.io-index" 434 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 435 | 436 | [[package]] 437 | name = "hashbrown" 438 | version = "0.14.5" 439 | source = "registry+https://github.com/rust-lang/crates.io-index" 440 | checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" 441 | 442 | [[package]] 443 | name = "hashbrown" 444 | version = "0.15.0" 445 | source = "registry+https://github.com/rust-lang/crates.io-index" 446 | checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" 447 | dependencies = [ 448 | "allocator-api2", 449 | "equivalent", 450 | "foldhash", 451 | "rayon", 452 | "serde", 453 | ] 454 | 455 | [[package]] 456 | name = "heck" 457 | version = "0.5.0" 458 | source = "registry+https://github.com/rust-lang/crates.io-index" 459 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 460 | 461 | [[package]] 462 | name = "hermit-abi" 463 | version = "0.3.9" 464 | source = "registry+https://github.com/rust-lang/crates.io-index" 465 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" 466 | 467 | [[package]] 468 | name = "http" 469 | version = "1.1.0" 470 | source = "registry+https://github.com/rust-lang/crates.io-index" 471 | checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" 472 | dependencies = [ 473 | "bytes", 474 | "fnv", 475 | "itoa", 476 | ] 477 | 478 | [[package]] 479 | name = "http-body" 480 | version = "1.0.1" 481 | source = "registry+https://github.com/rust-lang/crates.io-index" 482 | checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" 483 | dependencies = [ 484 | "bytes", 485 | "http", 486 | ] 487 | 488 | [[package]] 489 | name = "http-body-util" 490 | version = "0.1.2" 491 | source = "registry+https://github.com/rust-lang/crates.io-index" 492 | checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" 493 | dependencies = [ 494 | "bytes", 495 | "futures-util", 496 | "http", 497 | "http-body", 498 | "pin-project-lite", 499 | ] 500 | 501 | [[package]] 502 | name = "httparse" 503 | version = "1.9.4" 504 | source = "registry+https://github.com/rust-lang/crates.io-index" 505 | checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" 506 | 507 | [[package]] 508 | name = "httpdate" 509 | version = "1.0.3" 510 | source = "registry+https://github.com/rust-lang/crates.io-index" 511 | checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" 512 | 513 | [[package]] 514 | name = "hyper" 515 | version = "1.4.1" 516 | source = "registry+https://github.com/rust-lang/crates.io-index" 517 | checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" 518 | dependencies = [ 519 | "bytes", 520 | "futures-channel", 521 | "futures-util", 522 | "h2", 523 | "http", 524 | "http-body", 525 | "httparse", 526 | "httpdate", 527 | "itoa", 528 | "pin-project-lite", 529 | "smallvec", 530 | "tokio", 531 | "want", 532 | ] 533 | 534 | [[package]] 535 | name = "hyper-timeout" 536 | version = "0.5.1" 537 | source = "registry+https://github.com/rust-lang/crates.io-index" 538 | checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793" 539 | dependencies = [ 540 | "hyper", 541 | "hyper-util", 542 | "pin-project-lite", 543 | "tokio", 544 | "tower-service", 545 | ] 546 | 547 | [[package]] 548 | name = "hyper-util" 549 | version = "0.1.7" 550 | source = "registry+https://github.com/rust-lang/crates.io-index" 551 | checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9" 552 | dependencies = [ 553 | "bytes", 554 | "futures-channel", 555 | "futures-util", 556 | "http", 557 | "http-body", 558 | "hyper", 559 | "pin-project-lite", 560 | "socket2", 561 | "tokio", 562 | "tower", 563 | "tower-service", 564 | "tracing", 565 | ] 566 | 567 | [[package]] 568 | name = "indexmap" 569 | version = "1.9.3" 570 | source = "registry+https://github.com/rust-lang/crates.io-index" 571 | checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" 572 | dependencies = [ 573 | "autocfg", 574 | "hashbrown 0.12.3", 575 | ] 576 | 577 | [[package]] 578 | name = "indexmap" 579 | version = "2.4.0" 580 | source = "registry+https://github.com/rust-lang/crates.io-index" 581 | checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c" 582 | dependencies = [ 583 | "equivalent", 584 | "hashbrown 0.14.5", 585 | ] 586 | 587 | [[package]] 588 | name = "is_terminal_polyfill" 589 | version = "1.70.1" 590 | source = "registry+https://github.com/rust-lang/crates.io-index" 591 | checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 592 | 593 | [[package]] 594 | name = "itertools" 595 | version = "0.13.0" 596 | source = "registry+https://github.com/rust-lang/crates.io-index" 597 | checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" 598 | dependencies = [ 599 | "either", 600 | ] 601 | 602 | [[package]] 603 | name = "itoa" 604 | version = "1.0.11" 605 | source = "registry+https://github.com/rust-lang/crates.io-index" 606 | checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" 607 | 608 | [[package]] 609 | name = "lazy_static" 610 | version = "1.5.0" 611 | source = "registry+https://github.com/rust-lang/crates.io-index" 612 | checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" 613 | 614 | [[package]] 615 | name = "libc" 616 | version = "0.2.156" 617 | source = "registry+https://github.com/rust-lang/crates.io-index" 618 | checksum = "a5f43f184355eefb8d17fc948dbecf6c13be3c141f20d834ae842193a448c72a" 619 | 620 | [[package]] 621 | name = "linux-raw-sys" 622 | version = "0.4.14" 623 | source = "registry+https://github.com/rust-lang/crates.io-index" 624 | checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" 625 | 626 | [[package]] 627 | name = "log" 628 | version = "0.4.22" 629 | source = "registry+https://github.com/rust-lang/crates.io-index" 630 | checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" 631 | 632 | [[package]] 633 | name = "matchit" 634 | version = "0.7.3" 635 | source = "registry+https://github.com/rust-lang/crates.io-index" 636 | checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" 637 | 638 | [[package]] 639 | name = "memchr" 640 | version = "2.7.4" 641 | source = "registry+https://github.com/rust-lang/crates.io-index" 642 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 643 | 644 | [[package]] 645 | name = "mime" 646 | version = "0.3.17" 647 | source = "registry+https://github.com/rust-lang/crates.io-index" 648 | checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" 649 | 650 | [[package]] 651 | name = "miniz_oxide" 652 | version = "0.7.4" 653 | source = "registry+https://github.com/rust-lang/crates.io-index" 654 | checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" 655 | dependencies = [ 656 | "adler", 657 | ] 658 | 659 | [[package]] 660 | name = "mio" 661 | version = "1.0.2" 662 | source = "registry+https://github.com/rust-lang/crates.io-index" 663 | checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" 664 | dependencies = [ 665 | "hermit-abi", 666 | "libc", 667 | "wasi", 668 | "windows-sys 0.52.0", 669 | ] 670 | 671 | [[package]] 672 | name = "multimap" 673 | version = "0.10.0" 674 | source = "registry+https://github.com/rust-lang/crates.io-index" 675 | checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" 676 | 677 | [[package]] 678 | name = "nu-ansi-term" 679 | version = "0.46.0" 680 | source = "registry+https://github.com/rust-lang/crates.io-index" 681 | checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" 682 | dependencies = [ 683 | "overload", 684 | "winapi", 685 | ] 686 | 687 | [[package]] 688 | name = "oasysdb" 689 | version = "0.8.0" 690 | dependencies = [ 691 | "bincode", 692 | "clap", 693 | "dotenv", 694 | "hashbrown 0.15.0", 695 | "prost", 696 | "rand", 697 | "rayon", 698 | "serde", 699 | "simsimd", 700 | "tokio", 701 | "tonic", 702 | "tonic-build", 703 | "tracing", 704 | "tracing-subscriber", 705 | "uuid", 706 | ] 707 | 708 | [[package]] 709 | name = "object" 710 | version = "0.36.3" 711 | source = "registry+https://github.com/rust-lang/crates.io-index" 712 | checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9" 713 | dependencies = [ 714 | "memchr", 715 | ] 716 | 717 | [[package]] 718 | name = "once_cell" 719 | version = "1.19.0" 720 | source = "registry+https://github.com/rust-lang/crates.io-index" 721 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" 722 | 723 | [[package]] 724 | name = "overload" 725 | version = "0.1.1" 726 | source = "registry+https://github.com/rust-lang/crates.io-index" 727 | checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" 728 | 729 | [[package]] 730 | name = "percent-encoding" 731 | version = "2.3.1" 732 | source = "registry+https://github.com/rust-lang/crates.io-index" 733 | checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" 734 | 735 | [[package]] 736 | name = "petgraph" 737 | version = "0.6.5" 738 | source = "registry+https://github.com/rust-lang/crates.io-index" 739 | checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" 740 | dependencies = [ 741 | "fixedbitset", 742 | "indexmap 2.4.0", 743 | ] 744 | 745 | [[package]] 746 | name = "pin-project" 747 | version = "1.1.5" 748 | source = "registry+https://github.com/rust-lang/crates.io-index" 749 | checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" 750 | dependencies = [ 751 | "pin-project-internal", 752 | ] 753 | 754 | [[package]] 755 | name = "pin-project-internal" 756 | version = "1.1.5" 757 | source = "registry+https://github.com/rust-lang/crates.io-index" 758 | checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" 759 | dependencies = [ 760 | "proc-macro2", 761 | "quote", 762 | "syn", 763 | ] 764 | 765 | [[package]] 766 | name = "pin-project-lite" 767 | version = "0.2.14" 768 | source = "registry+https://github.com/rust-lang/crates.io-index" 769 | checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" 770 | 771 | [[package]] 772 | name = "pin-utils" 773 | version = "0.1.0" 774 | source = "registry+https://github.com/rust-lang/crates.io-index" 775 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 776 | 777 | [[package]] 778 | name = "ppv-lite86" 779 | version = "0.2.20" 780 | source = "registry+https://github.com/rust-lang/crates.io-index" 781 | checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" 782 | dependencies = [ 783 | "zerocopy", 784 | ] 785 | 786 | [[package]] 787 | name = "prettyplease" 788 | version = "0.2.20" 789 | source = "registry+https://github.com/rust-lang/crates.io-index" 790 | checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" 791 | dependencies = [ 792 | "proc-macro2", 793 | "syn", 794 | ] 795 | 796 | [[package]] 797 | name = "proc-macro2" 798 | version = "1.0.86" 799 | source = "registry+https://github.com/rust-lang/crates.io-index" 800 | checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" 801 | dependencies = [ 802 | "unicode-ident", 803 | ] 804 | 805 | [[package]] 806 | name = "prost" 807 | version = "0.13.1" 808 | source = "registry+https://github.com/rust-lang/crates.io-index" 809 | checksum = "e13db3d3fde688c61e2446b4d843bc27a7e8af269a69440c0308021dc92333cc" 810 | dependencies = [ 811 | "bytes", 812 | "prost-derive", 813 | ] 814 | 815 | [[package]] 816 | name = "prost-build" 817 | version = "0.13.1" 818 | source = "registry+https://github.com/rust-lang/crates.io-index" 819 | checksum = "5bb182580f71dd070f88d01ce3de9f4da5021db7115d2e1c3605a754153b77c1" 820 | dependencies = [ 821 | "bytes", 822 | "heck", 823 | "itertools", 824 | "log", 825 | "multimap", 826 | "once_cell", 827 | "petgraph", 828 | "prettyplease", 829 | "prost", 830 | "prost-types", 831 | "regex", 832 | "syn", 833 | "tempfile", 834 | ] 835 | 836 | [[package]] 837 | name = "prost-derive" 838 | version = "0.13.1" 839 | source = "registry+https://github.com/rust-lang/crates.io-index" 840 | checksum = "18bec9b0adc4eba778b33684b7ba3e7137789434769ee3ce3930463ef904cfca" 841 | dependencies = [ 842 | "anyhow", 843 | "itertools", 844 | "proc-macro2", 845 | "quote", 846 | "syn", 847 | ] 848 | 849 | [[package]] 850 | name = "prost-types" 851 | version = "0.13.1" 852 | source = "registry+https://github.com/rust-lang/crates.io-index" 853 | checksum = "cee5168b05f49d4b0ca581206eb14a7b22fafd963efe729ac48eb03266e25cc2" 854 | dependencies = [ 855 | "prost", 856 | ] 857 | 858 | [[package]] 859 | name = "quote" 860 | version = "1.0.36" 861 | source = "registry+https://github.com/rust-lang/crates.io-index" 862 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" 863 | dependencies = [ 864 | "proc-macro2", 865 | ] 866 | 867 | [[package]] 868 | name = "rand" 869 | version = "0.8.5" 870 | source = "registry+https://github.com/rust-lang/crates.io-index" 871 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 872 | dependencies = [ 873 | "libc", 874 | "rand_chacha", 875 | "rand_core", 876 | ] 877 | 878 | [[package]] 879 | name = "rand_chacha" 880 | version = "0.3.1" 881 | source = "registry+https://github.com/rust-lang/crates.io-index" 882 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 883 | dependencies = [ 884 | "ppv-lite86", 885 | "rand_core", 886 | ] 887 | 888 | [[package]] 889 | name = "rand_core" 890 | version = "0.6.4" 891 | source = "registry+https://github.com/rust-lang/crates.io-index" 892 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 893 | dependencies = [ 894 | "getrandom", 895 | ] 896 | 897 | [[package]] 898 | name = "rayon" 899 | version = "1.10.0" 900 | source = "registry+https://github.com/rust-lang/crates.io-index" 901 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 902 | dependencies = [ 903 | "either", 904 | "rayon-core", 905 | ] 906 | 907 | [[package]] 908 | name = "rayon-core" 909 | version = "1.12.1" 910 | source = "registry+https://github.com/rust-lang/crates.io-index" 911 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 912 | dependencies = [ 913 | "crossbeam-deque", 914 | "crossbeam-utils", 915 | ] 916 | 917 | [[package]] 918 | name = "regex" 919 | version = "1.10.6" 920 | source = "registry+https://github.com/rust-lang/crates.io-index" 921 | checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" 922 | dependencies = [ 923 | "aho-corasick", 924 | "memchr", 925 | "regex-automata", 926 | "regex-syntax", 927 | ] 928 | 929 | [[package]] 930 | name = "regex-automata" 931 | version = "0.4.7" 932 | source = "registry+https://github.com/rust-lang/crates.io-index" 933 | checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" 934 | dependencies = [ 935 | "aho-corasick", 936 | "memchr", 937 | "regex-syntax", 938 | ] 939 | 940 | [[package]] 941 | name = "regex-syntax" 942 | version = "0.8.4" 943 | source = "registry+https://github.com/rust-lang/crates.io-index" 944 | checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" 945 | 946 | [[package]] 947 | name = "rustc-demangle" 948 | version = "0.1.24" 949 | source = "registry+https://github.com/rust-lang/crates.io-index" 950 | checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" 951 | 952 | [[package]] 953 | name = "rustix" 954 | version = "0.38.34" 955 | source = "registry+https://github.com/rust-lang/crates.io-index" 956 | checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" 957 | dependencies = [ 958 | "bitflags", 959 | "errno", 960 | "libc", 961 | "linux-raw-sys", 962 | "windows-sys 0.52.0", 963 | ] 964 | 965 | [[package]] 966 | name = "rustversion" 967 | version = "1.0.17" 968 | source = "registry+https://github.com/rust-lang/crates.io-index" 969 | checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" 970 | 971 | [[package]] 972 | name = "serde" 973 | version = "1.0.208" 974 | source = "registry+https://github.com/rust-lang/crates.io-index" 975 | checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2" 976 | dependencies = [ 977 | "serde_derive", 978 | ] 979 | 980 | [[package]] 981 | name = "serde_derive" 982 | version = "1.0.208" 983 | source = "registry+https://github.com/rust-lang/crates.io-index" 984 | checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" 985 | dependencies = [ 986 | "proc-macro2", 987 | "quote", 988 | "syn", 989 | ] 990 | 991 | [[package]] 992 | name = "sharded-slab" 993 | version = "0.1.7" 994 | source = "registry+https://github.com/rust-lang/crates.io-index" 995 | checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" 996 | dependencies = [ 997 | "lazy_static", 998 | ] 999 | 1000 | [[package]] 1001 | name = "shlex" 1002 | version = "1.3.0" 1003 | source = "registry+https://github.com/rust-lang/crates.io-index" 1004 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 1005 | 1006 | [[package]] 1007 | name = "simsimd" 1008 | version = "5.0.1" 1009 | source = "registry+https://github.com/rust-lang/crates.io-index" 1010 | checksum = "9c7381de792480c6f8e1dff0012206413dfce923421bcdd4ba927f5c30681ad7" 1011 | dependencies = [ 1012 | "cc", 1013 | ] 1014 | 1015 | [[package]] 1016 | name = "slab" 1017 | version = "0.4.9" 1018 | source = "registry+https://github.com/rust-lang/crates.io-index" 1019 | checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" 1020 | dependencies = [ 1021 | "autocfg", 1022 | ] 1023 | 1024 | [[package]] 1025 | name = "smallvec" 1026 | version = "1.13.2" 1027 | source = "registry+https://github.com/rust-lang/crates.io-index" 1028 | checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" 1029 | 1030 | [[package]] 1031 | name = "socket2" 1032 | version = "0.5.7" 1033 | source = "registry+https://github.com/rust-lang/crates.io-index" 1034 | checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" 1035 | dependencies = [ 1036 | "libc", 1037 | "windows-sys 0.52.0", 1038 | ] 1039 | 1040 | [[package]] 1041 | name = "strsim" 1042 | version = "0.11.1" 1043 | source = "registry+https://github.com/rust-lang/crates.io-index" 1044 | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" 1045 | 1046 | [[package]] 1047 | name = "syn" 1048 | version = "2.0.75" 1049 | source = "registry+https://github.com/rust-lang/crates.io-index" 1050 | checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9" 1051 | dependencies = [ 1052 | "proc-macro2", 1053 | "quote", 1054 | "unicode-ident", 1055 | ] 1056 | 1057 | [[package]] 1058 | name = "sync_wrapper" 1059 | version = "0.1.2" 1060 | source = "registry+https://github.com/rust-lang/crates.io-index" 1061 | checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" 1062 | 1063 | [[package]] 1064 | name = "sync_wrapper" 1065 | version = "1.0.1" 1066 | source = "registry+https://github.com/rust-lang/crates.io-index" 1067 | checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" 1068 | 1069 | [[package]] 1070 | name = "tempfile" 1071 | version = "3.12.0" 1072 | source = "registry+https://github.com/rust-lang/crates.io-index" 1073 | checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" 1074 | dependencies = [ 1075 | "cfg-if", 1076 | "fastrand", 1077 | "once_cell", 1078 | "rustix", 1079 | "windows-sys 0.59.0", 1080 | ] 1081 | 1082 | [[package]] 1083 | name = "thread_local" 1084 | version = "1.1.8" 1085 | source = "registry+https://github.com/rust-lang/crates.io-index" 1086 | checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" 1087 | dependencies = [ 1088 | "cfg-if", 1089 | "once_cell", 1090 | ] 1091 | 1092 | [[package]] 1093 | name = "tokio" 1094 | version = "1.39.3" 1095 | source = "registry+https://github.com/rust-lang/crates.io-index" 1096 | checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5" 1097 | dependencies = [ 1098 | "backtrace", 1099 | "bytes", 1100 | "libc", 1101 | "mio", 1102 | "pin-project-lite", 1103 | "socket2", 1104 | "tokio-macros", 1105 | "windows-sys 0.52.0", 1106 | ] 1107 | 1108 | [[package]] 1109 | name = "tokio-macros" 1110 | version = "2.4.0" 1111 | source = "registry+https://github.com/rust-lang/crates.io-index" 1112 | checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" 1113 | dependencies = [ 1114 | "proc-macro2", 1115 | "quote", 1116 | "syn", 1117 | ] 1118 | 1119 | [[package]] 1120 | name = "tokio-stream" 1121 | version = "0.1.15" 1122 | source = "registry+https://github.com/rust-lang/crates.io-index" 1123 | checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" 1124 | dependencies = [ 1125 | "futures-core", 1126 | "pin-project-lite", 1127 | "tokio", 1128 | ] 1129 | 1130 | [[package]] 1131 | name = "tokio-util" 1132 | version = "0.7.11" 1133 | source = "registry+https://github.com/rust-lang/crates.io-index" 1134 | checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" 1135 | dependencies = [ 1136 | "bytes", 1137 | "futures-core", 1138 | "futures-sink", 1139 | "pin-project-lite", 1140 | "tokio", 1141 | ] 1142 | 1143 | [[package]] 1144 | name = "tonic" 1145 | version = "0.12.1" 1146 | source = "registry+https://github.com/rust-lang/crates.io-index" 1147 | checksum = "38659f4a91aba8598d27821589f5db7dddd94601e7a01b1e485a50e5484c7401" 1148 | dependencies = [ 1149 | "async-stream", 1150 | "async-trait", 1151 | "axum", 1152 | "base64", 1153 | "bytes", 1154 | "h2", 1155 | "http", 1156 | "http-body", 1157 | "http-body-util", 1158 | "hyper", 1159 | "hyper-timeout", 1160 | "hyper-util", 1161 | "percent-encoding", 1162 | "pin-project", 1163 | "prost", 1164 | "socket2", 1165 | "tokio", 1166 | "tokio-stream", 1167 | "tower", 1168 | "tower-layer", 1169 | "tower-service", 1170 | "tracing", 1171 | ] 1172 | 1173 | [[package]] 1174 | name = "tonic-build" 1175 | version = "0.12.1" 1176 | source = "registry+https://github.com/rust-lang/crates.io-index" 1177 | checksum = "568392c5a2bd0020723e3f387891176aabafe36fd9fcd074ad309dfa0c8eb964" 1178 | dependencies = [ 1179 | "prettyplease", 1180 | "proc-macro2", 1181 | "prost-build", 1182 | "quote", 1183 | "syn", 1184 | ] 1185 | 1186 | [[package]] 1187 | name = "tower" 1188 | version = "0.4.13" 1189 | source = "registry+https://github.com/rust-lang/crates.io-index" 1190 | checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" 1191 | dependencies = [ 1192 | "futures-core", 1193 | "futures-util", 1194 | "indexmap 1.9.3", 1195 | "pin-project", 1196 | "pin-project-lite", 1197 | "rand", 1198 | "slab", 1199 | "tokio", 1200 | "tokio-util", 1201 | "tower-layer", 1202 | "tower-service", 1203 | "tracing", 1204 | ] 1205 | 1206 | [[package]] 1207 | name = "tower-layer" 1208 | version = "0.3.3" 1209 | source = "registry+https://github.com/rust-lang/crates.io-index" 1210 | checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" 1211 | 1212 | [[package]] 1213 | name = "tower-service" 1214 | version = "0.3.3" 1215 | source = "registry+https://github.com/rust-lang/crates.io-index" 1216 | checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" 1217 | 1218 | [[package]] 1219 | name = "tracing" 1220 | version = "0.1.40" 1221 | source = "registry+https://github.com/rust-lang/crates.io-index" 1222 | checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" 1223 | dependencies = [ 1224 | "pin-project-lite", 1225 | "tracing-attributes", 1226 | "tracing-core", 1227 | ] 1228 | 1229 | [[package]] 1230 | name = "tracing-attributes" 1231 | version = "0.1.27" 1232 | source = "registry+https://github.com/rust-lang/crates.io-index" 1233 | checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" 1234 | dependencies = [ 1235 | "proc-macro2", 1236 | "quote", 1237 | "syn", 1238 | ] 1239 | 1240 | [[package]] 1241 | name = "tracing-core" 1242 | version = "0.1.32" 1243 | source = "registry+https://github.com/rust-lang/crates.io-index" 1244 | checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" 1245 | dependencies = [ 1246 | "once_cell", 1247 | "valuable", 1248 | ] 1249 | 1250 | [[package]] 1251 | name = "tracing-log" 1252 | version = "0.2.0" 1253 | source = "registry+https://github.com/rust-lang/crates.io-index" 1254 | checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" 1255 | dependencies = [ 1256 | "log", 1257 | "once_cell", 1258 | "tracing-core", 1259 | ] 1260 | 1261 | [[package]] 1262 | name = "tracing-subscriber" 1263 | version = "0.3.18" 1264 | source = "registry+https://github.com/rust-lang/crates.io-index" 1265 | checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" 1266 | dependencies = [ 1267 | "nu-ansi-term", 1268 | "sharded-slab", 1269 | "smallvec", 1270 | "thread_local", 1271 | "tracing-core", 1272 | "tracing-log", 1273 | ] 1274 | 1275 | [[package]] 1276 | name = "try-lock" 1277 | version = "0.2.5" 1278 | source = "registry+https://github.com/rust-lang/crates.io-index" 1279 | checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" 1280 | 1281 | [[package]] 1282 | name = "unicode-ident" 1283 | version = "1.0.12" 1284 | source = "registry+https://github.com/rust-lang/crates.io-index" 1285 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 1286 | 1287 | [[package]] 1288 | name = "utf8parse" 1289 | version = "0.2.2" 1290 | source = "registry+https://github.com/rust-lang/crates.io-index" 1291 | checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 1292 | 1293 | [[package]] 1294 | name = "uuid" 1295 | version = "1.10.0" 1296 | source = "registry+https://github.com/rust-lang/crates.io-index" 1297 | checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" 1298 | dependencies = [ 1299 | "getrandom", 1300 | "serde", 1301 | ] 1302 | 1303 | [[package]] 1304 | name = "valuable" 1305 | version = "0.1.0" 1306 | source = "registry+https://github.com/rust-lang/crates.io-index" 1307 | checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" 1308 | 1309 | [[package]] 1310 | name = "want" 1311 | version = "0.3.1" 1312 | source = "registry+https://github.com/rust-lang/crates.io-index" 1313 | checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" 1314 | dependencies = [ 1315 | "try-lock", 1316 | ] 1317 | 1318 | [[package]] 1319 | name = "wasi" 1320 | version = "0.11.0+wasi-snapshot-preview1" 1321 | source = "registry+https://github.com/rust-lang/crates.io-index" 1322 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 1323 | 1324 | [[package]] 1325 | name = "winapi" 1326 | version = "0.3.9" 1327 | source = "registry+https://github.com/rust-lang/crates.io-index" 1328 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 1329 | dependencies = [ 1330 | "winapi-i686-pc-windows-gnu", 1331 | "winapi-x86_64-pc-windows-gnu", 1332 | ] 1333 | 1334 | [[package]] 1335 | name = "winapi-i686-pc-windows-gnu" 1336 | version = "0.4.0" 1337 | source = "registry+https://github.com/rust-lang/crates.io-index" 1338 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 1339 | 1340 | [[package]] 1341 | name = "winapi-x86_64-pc-windows-gnu" 1342 | version = "0.4.0" 1343 | source = "registry+https://github.com/rust-lang/crates.io-index" 1344 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 1345 | 1346 | [[package]] 1347 | name = "windows-sys" 1348 | version = "0.52.0" 1349 | source = "registry+https://github.com/rust-lang/crates.io-index" 1350 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 1351 | dependencies = [ 1352 | "windows-targets", 1353 | ] 1354 | 1355 | [[package]] 1356 | name = "windows-sys" 1357 | version = "0.59.0" 1358 | source = "registry+https://github.com/rust-lang/crates.io-index" 1359 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 1360 | dependencies = [ 1361 | "windows-targets", 1362 | ] 1363 | 1364 | [[package]] 1365 | name = "windows-targets" 1366 | version = "0.52.6" 1367 | source = "registry+https://github.com/rust-lang/crates.io-index" 1368 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 1369 | dependencies = [ 1370 | "windows_aarch64_gnullvm", 1371 | "windows_aarch64_msvc", 1372 | "windows_i686_gnu", 1373 | "windows_i686_gnullvm", 1374 | "windows_i686_msvc", 1375 | "windows_x86_64_gnu", 1376 | "windows_x86_64_gnullvm", 1377 | "windows_x86_64_msvc", 1378 | ] 1379 | 1380 | [[package]] 1381 | name = "windows_aarch64_gnullvm" 1382 | version = "0.52.6" 1383 | source = "registry+https://github.com/rust-lang/crates.io-index" 1384 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 1385 | 1386 | [[package]] 1387 | name = "windows_aarch64_msvc" 1388 | version = "0.52.6" 1389 | source = "registry+https://github.com/rust-lang/crates.io-index" 1390 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 1391 | 1392 | [[package]] 1393 | name = "windows_i686_gnu" 1394 | version = "0.52.6" 1395 | source = "registry+https://github.com/rust-lang/crates.io-index" 1396 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 1397 | 1398 | [[package]] 1399 | name = "windows_i686_gnullvm" 1400 | version = "0.52.6" 1401 | source = "registry+https://github.com/rust-lang/crates.io-index" 1402 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 1403 | 1404 | [[package]] 1405 | name = "windows_i686_msvc" 1406 | version = "0.52.6" 1407 | source = "registry+https://github.com/rust-lang/crates.io-index" 1408 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 1409 | 1410 | [[package]] 1411 | name = "windows_x86_64_gnu" 1412 | version = "0.52.6" 1413 | source = "registry+https://github.com/rust-lang/crates.io-index" 1414 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 1415 | 1416 | [[package]] 1417 | name = "windows_x86_64_gnullvm" 1418 | version = "0.52.6" 1419 | source = "registry+https://github.com/rust-lang/crates.io-index" 1420 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 1421 | 1422 | [[package]] 1423 | name = "windows_x86_64_msvc" 1424 | version = "0.52.6" 1425 | source = "registry+https://github.com/rust-lang/crates.io-index" 1426 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 1427 | 1428 | [[package]] 1429 | name = "zerocopy" 1430 | version = "0.7.35" 1431 | source = "registry+https://github.com/rust-lang/crates.io-index" 1432 | checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" 1433 | dependencies = [ 1434 | "byteorder", 1435 | "zerocopy-derive", 1436 | ] 1437 | 1438 | [[package]] 1439 | name = "zerocopy-derive" 1440 | version = "0.7.35" 1441 | source = "registry+https://github.com/rust-lang/crates.io-index" 1442 | checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" 1443 | dependencies = [ 1444 | "proc-macro2", 1445 | "quote", 1446 | "syn", 1447 | ] 1448 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "oasysdb" 3 | version = "0.8.0" 4 | edition = "2021" 5 | authors = ["Edwin Kys"] 6 | 7 | [dependencies] 8 | tokio = { version = "1.39.3", features = ["rt-multi-thread", "macros"] } 9 | hashbrown = { version = "0.15.0", features = ["serde", "rayon"] } 10 | uuid = { version = "1.10.0", features = ["v4", "serde"] } 11 | clap = "4.5.16" 12 | 13 | # gRPC-related dependencies 14 | tonic = "0.12.1" 15 | prost = "0.13.1" 16 | 17 | # Serialization-related dependencies 18 | serde = { version = "1.0.208", features = ["derive"] } 19 | bincode = "1.3.3" 20 | 21 | # Parallelism-related dependencies 22 | simsimd = "5.0.1" 23 | rayon = "1.10.0" 24 | 25 | # Logging-related dependencies 26 | tracing = "0.1.40" 27 | tracing-subscriber = "0.3.18" 28 | 29 | # Utility dependencies 30 | rand = "0.8.5" 31 | dotenv = "0.15.0" 32 | 33 | [build-dependencies] 34 | tonic-build = "0.12" 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![OasysDB Use Case](https://odb-assets.s3.amazonaws.com/banners/0.7.0.png) 2 | 3 | [![GitHub Stars](https://img.shields.io/github/stars/oasysai/oasysdb?style=for-the-badge&logo=github&logoColor=%23000000&labelColor=%23fcd34d&color=%236b7280)](https://github.com/oasysai/oasysdb) 4 | [![Crates.io](https://img.shields.io/crates/d/oasysdb?style=for-the-badge&logo=rust&logoColor=%23000&label=crates.io&labelColor=%23fdba74&color=%236b7280)](https://crates.io/crates/oasysdb) 5 | 6 | ## Notice 7 | 8 | This repository is not currently maintained. I initially created this project to 9 | learn more about databases and Rust. As times goes on, I actually learned from 10 | this project and the people who used it. Unfortunately, most open-source 11 | projects doesn't generate enough revenue to sustain itself. 12 | 13 | I'm currently looking for a new opportunity to work as a **Software Engineer in 14 | AI Infrastructure**. If you have or know someone who has an open position, 15 | please let me know. I'm open to work remotely or anywhere in the United States. 16 | 17 | You can reach me via [LinkedIn](https://www.linkedin.com/in/edwinkys). 18 | 19 | If you're interested in taking over this project, please let me know. I'll be 20 | happy to discuss the details with you. Other than that, I'll just leave this 21 | project as is for historical purposes. 22 | 23 | Thank you all for your support and understanding. It's been a great journey! 24 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use tonic_build::compile_protos; 3 | 4 | fn main() -> Result<(), Box> { 5 | compile_protos("protos/database.proto")?; 6 | Ok(()) 7 | } 8 | -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | docs.oasysdb.com 2 | -------------------------------------------------------------------------------- /docs/assets/favicon64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edwinkys/oasysdb/9aebb4426dae35452e468967db50f6097b280363/docs/assets/favicon64.png -------------------------------------------------------------------------------- /docs/assets/wordmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edwinkys/oasysdb/9aebb4426dae35452e468967db50f6097b280363/docs/assets/wordmark.png -------------------------------------------------------------------------------- /docs/blog/.authors.yml: -------------------------------------------------------------------------------- 1 | authors: 2 | edwinkys: 3 | name: Edwin Kys 4 | description: Author of OasysDB 5 | avatar: https://avatars.githubusercontent.com/u/51223060?v=4 6 | -------------------------------------------------------------------------------- /docs/blog/index.md: -------------------------------------------------------------------------------- 1 | # Latest Posts 2 | 3 | Bite-sized blog posts about generative AI, machine learning, and more. 4 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v0.7.2 4 | 5 | ### What's Changed 6 | 7 | This release includes a fix for the file system issue happening on Windows which 8 | happen when the default temporary directory in in a different drive than the 9 | current working directory. This issue is fixed by creating a temporary directory 10 | in the root of the database directory. 11 | 12 | ### Contributors 13 | 14 | - @edwinkys 15 | 16 | ### Full Changelog 17 | 18 | [v0.7.1...v0.7.2](https://github.com/oasysai/oasysdb/compare/v0.7.1...v0.7.2) 19 | 20 | ## v0.7.1 21 | 22 | ### What's Changed 23 | 24 | This release includes a low-level CRUD API for the index implementation from the 25 | Database layer. Once the index is built, when necessary, you can use the CRUD 26 | API to manage the index data directly. This API allows you to perform the 27 | following operations: 28 | 29 | - Insert new records into the index. 30 | - Update existing records in the index. 31 | - Delete records from the index. 32 | 33 | ### Contributors 34 | 35 | - @edwinkys 36 | 37 | ### Full Changelog 38 | 39 | [v0.7.0...v0.7.1](https://github.com/oasysai/oasysdb/compare/v0.7.0...v0.7.1) 40 | 41 | ## v0.7.0 42 | 43 | ### What's Changed 44 | 45 | OasysDB v0.7.0 is a major release that includes a complete overhaul of the 46 | system. Instead of being a dedicated vector database, OasysDB is now a hybrid 47 | vector database that integrates with SQL databases such as SQLite and PostgreSQL 48 | which you can configure to store the vector records. This approach gives various 49 | advantages such as: 50 | 51 | - Reliability and durability of the data due to SQL database ACID properties. 52 | - Separation of vector storage and computation allowing you to scale the system 53 | independently. 54 | 55 | These are some of the key changes in this release: 56 | 57 | - **SQL Storage Layer**: OasysDB can be configured to source vector records from 58 | a SQL database such as SQLite or PostgreSQL. 59 | - **Multi-index Support**: OasysDB can support multiple indices for the same SQL 60 | table allowing users to improve the search performance. 61 | - **Pre-filtering**: OasysDB can pre-filter the vector records from SQL tables 62 | based on the metadata before inserting them into the index. 63 | - **Configurable Algorithm**: Each index in OasysDB can be configured with 64 | different algorithms and parameters to fit the performance requirements. 65 | 66 | ### Contributors 67 | 68 | - @edwinkys 69 | 70 | ### Full Changelog 71 | 72 | [v0.6.1...v0.7.0](https://github.com/oasysai/oasysdb/compare/v0.6.1...v0.7.0) 73 | 74 | ## v0.6.1 75 | 76 | ### What's Changed 77 | 78 | - Add support for boolean metadata type. This allows full compatibility with 79 | JSON-like object or dictionary metadata when storing vector records in the 80 | collection. 81 | - We optimize the database save and get collection operations performance by 82 | 10-20% by reducing the number of IO operations. Also, the save collection 83 | operation is now atomic which means that the collection is saved to the disk 84 | only when the operation is completed successfully. 85 | - We launch our own documentation website at 86 | [docs.oasysdb.com](https://docs.oasysdb.com) to provide a better user 87 | experience and more comprehensive documentation for the OasysDB library. It's 88 | still a work in progress and we will continue to improve the documentation 89 | over time. 90 | 91 | ### Contributors 92 | 93 | - @edwinkys 94 | 95 | ### Full Changelog 96 | 97 | [v0.6.0...v0.6.1](https://github.com/oasysai/oasysdb/compare/v0.6.0...v0.6.1) 98 | 99 | ## v0.6.0 100 | 101 | ### What's Changed 102 | 103 | - **CONDITIONAL BREAKING CHANGE**: We remove support for dot distance metric and 104 | we replace cosine similarity with cosine distance metric. This change is made 105 | to make the distance metric consistent with the other distance metrics. 106 | - The default configuration for the collection (EF Construction and EF Search) 107 | is increased to a more sensible value according to the common real-world use 108 | cases. The default EF Construction is set to 128 and the default EF Search is 109 | set to 64. 110 | - We add a new script to measure the recall rate of the collection search 111 | functionality. And with this, we improve the search recall rate of OasysDB to 112 | match the recall rate of HNSWLib with the same configuration. 113 | 114 | ```sh 115 | cargo run --example measure-recall 116 | ``` 117 | 118 | - We add a new benchmark to measure the performance of saving and getting the 119 | collection. The benchmark can be run by running the command below. 120 | 121 | ```sh 122 | cargo bench 123 | ``` 124 | 125 | ### Contributors 126 | 127 | - @edwinkys 128 | 129 | ### Full Changelog 130 | 131 | [v0.5.1...v0.6.0](https://github.com/oasysai/oasysdb/compare/v0.5.1...v0.6.0) 132 | 133 | ## v0.5.1 134 | 135 | ### What's Changed 136 | 137 | We add a new method `Collection.filter` to filter the vector records based on 138 | the metadata. This method returns a HashMap of the filtered vector records and 139 | their corresponding vector IDs. This implementation performs a linear search 140 | through the collection and thus might be slow for large datasets. 141 | 142 | This implementation includes support for the following metadata to filter: 143 | 144 | - `String`: Stored value must include the filter string. 145 | - `Float`: Stored value must be equal to the filter float. 146 | - `Integer`: Stored value must be equal to the filter integer. 147 | - `Object`: Stored value must match all the key-value pairs in the filter 148 | object. 149 | 150 | We currently don't support filtering based on the array type metadata because I 151 | am not sure of the best way to implement it. If you have any suggestions, please 152 | let me know. 153 | 154 | ### Contributors 155 | 156 | - @edwinkys 157 | 158 | ### Full Changelog 159 | 160 | [v0.5.0...v0.5.1](https://github.com/oasysai/oasysdb/compare/v0.5.0...v0.5.1) 161 | 162 | ## v0.5.0 163 | 164 | ### What's Changed 165 | 166 | - **BREAKING CHANGE**: Although there is no change in the database API, the 167 | underlying storage format has been changed to save the collection data to 168 | dedicated files directly. The details of the new persistent system and how to 169 | migrate from v0.4.x to v0.5.0 can be found in this migration guide. 170 | 171 | - By adding the feature `gen`, you can now use the `EmbeddingModel` trait and 172 | OpenAI's embedding models to generate vectors or records from text without 173 | external dependencies. This feature is optional and can be enabled by adding 174 | the feature to the `Cargo.toml` file. 175 | 176 | ```toml 177 | [dependencies] 178 | oasysdb = { version = "0.5.0", features = ["gen"] } 179 | ``` 180 | 181 | ### Contributors 182 | 183 | - @edwinkys 184 | 185 | ### Full Changelog 186 | 187 | [v0.4.5...v0.5.0](https://github.com/oasysai/oasysdb/compare/v0.4.5...v0.5.0) 188 | 189 | ## v0.4.5 190 | 191 | ### What's Changed 192 | 193 | - Add insert benchmark to measure the performance of inserting vectors into the 194 | collection. The benchmark can be run using the `cargo bench` command. 195 | - Fix the issue with large-size dirty IO buffers caused by the database 196 | operation. This issue is fixed by flushing the dirty IO buffers after the 197 | operation is completed. This operation can be done synchronously or 198 | asynchronously based on the user's preference since this operation might take 199 | some time to complete. 200 | 201 | ### Contributors 202 | 203 | - @edwinkys 204 | 205 | ### Full Changelog 206 | 207 | [v0.4.4...v0.4.5](https://github.com/oasysai/oasysdb/compare/v0.4.4...v0.4.5) 208 | 209 | ## v0.4.4 210 | 211 | ### What's Changed 212 | 213 | - Maximize compatibility with the standard library error types to allow users to 214 | convert OasysDB errors to most commonly used error handling libraries such as 215 | `anyhow`, `thiserror`, etc. 216 | - Add conversion methods to convert metadata to JSON value by `serde_json` and 217 | vice versa. This allows users to store JSON format metadata easily. 218 | - Add normalized cosine distance metric to the collection search functionality. 219 | Read more about the normalized cosine distance metric here. 220 | - Fix the search distance calculation to use the correct distance metric and 221 | sort it accordingly based on the collection configuration. 222 | - Add vector ID utility methods to the `VectorID` struct to make it easier to 223 | work with the vector ID. 224 | 225 | ### Additional Notes 226 | 227 | - Add a new benchmark to measure the true search AKA brute-force search 228 | performance of the collection. If possible, dealing with a small dataset, it 229 | is recommended to use the true search method for better accuracy. The 230 | benchmark can be run using the `cargo bench` command. 231 | - Improve the documentation to include more examples and explanations on how to 232 | use the library: Comprehensive Guide. 233 | 234 | ### Contributors 235 | 236 | - @edwinkys 237 | 238 | ### Full Changelog 239 | 240 | [v0.4.3...v0.4.4](https://github.com/oasysai/oasysdb/compare/v0.4.3...v0.4.4) 241 | 242 | ## v0.4.3 243 | 244 | ### What's Changed 245 | 246 | - Add SIMD acceleration to calculate the distance between vectors. This improves 247 | the performance of inserting and searching vectors in the collection. 248 | - Improve OasysDB native error type implementation to include the type/kind of 249 | error that occurred in addition to the error message. For example, 250 | `ErrorKind::CollectionError` is used to represent errors that occur during 251 | collection operations. 252 | - Fix the `Config.ml` default value from 0.3 to 0.2885 which is the optimal 253 | value for the HNSW with M of 32. The optimal value formula for ml is 254 | `1/ln(M)`. 255 | 256 | ### Contributors 257 | 258 | - @edwinkys 259 | 260 | ### Full Changelog 261 | 262 | [v0.4.2...v0.4.3](https://github.com/oasysai/oasysdb/compare/v0.4.2...v0.4.3) 263 | 264 | ## v0.4.2 265 | 266 | ### What's Changed 267 | 268 | Due to an issue (#62) with the Python release of v0.4.1, this patch version is 269 | released to fix the build wheels for Python users. The issue is caused due to 270 | the new optional PyO3 feature for the v0.4.1 Rust crate release which exclude 271 | PyO3 dependencies from the build process. To solve this, the Python package 272 | build and deploy script now includes `--features py` argument. 273 | 274 | For Rust users, this version doesn't offer any additional features or 275 | functionality compared to v0.4.1 release. 276 | 277 | ### Full Changelog 278 | 279 | [v0.4.1...v0.4.2](https://github.com/oasysai/oasysdb/compare/v0.4.1...v0.4.2) 280 | 281 | ## v0.4.1 282 | 283 | ### What's Changed 284 | 285 | - Added quality of life improvements to the `VectorID` type interoperability. 286 | - Improved the `README.md` file with additional data points on the database 287 | performance. 288 | - Changed to `Collection.insert` method to return the new `VectorID` after 289 | inserting a new vector record. 290 | - Pyo3 dependencies are now hidden behind the `py` feature. This allows users to 291 | build the library without the Python bindings if they don't need it, which is 292 | probably all of them. 293 | 294 | ### Contributors 295 | 296 | - @dteare 297 | - @edwinkys 298 | - @noneback 299 | 300 | ### Full Changelog 301 | 302 | [v0.4.0...v0.4.1](https://github.com/oasysai/oasysdb/compare/v0.4.0...v0.4.1) 303 | 304 | ## v0.4.0 305 | 306 | ### What's Changed 307 | 308 | - **CONDITIONAL BREAKING CHANGE**: Add an option to configure distance for the 309 | vector collection via `Config` struct. The new field `distance` can be set 310 | using the `Distance` enum. This includes Euclidean, Cosine, and Dot distance 311 | metrics. The default distance metric is Euclidean. This change is backward 312 | compatible if you are creating a config using the `Config::default()` method. 313 | Otherwise, you need to update the config to include the distance metric. 314 | 315 | ```rs 316 | let config = Config { 317 | ... 318 | distance: Distance::Cosine, 319 | }; 320 | ``` 321 | 322 | - With the new distance metric feature, now, you can set a `relevancy` threshold 323 | for the search results. This will filter out the results that are below or 324 | above the threshold depending on the distance metric used. This feature is 325 | disabled by default which is set to -1.0. To enable this feature, you can set 326 | the `relevancy` field in the `Collection` struct. 327 | 328 | ```rs 329 | ... 330 | let mut collection = Collection::new(&config)?; 331 | collection.relevancy = 3.0; 332 | ``` 333 | 334 | - Add a new method `Collection::insert_many` to insert multiple vector records 335 | into the collection at once. This method is more optimized than using the 336 | `Collection::insert` method in a loop. 337 | 338 | ### Contributors 339 | 340 | - @noneback 341 | - @edwinkys 342 | 343 | ### Full Changelog 344 | 345 | [v0.3.0...v0.4.0](https://github.com/oasysai/oasysdb/compare/v0.3.0...v0.4.0) 346 | 347 | ## v0.3.0 348 | 349 | This release introduces a BREAKING CHANGE to one of the method from the 350 | `Database` struct. The `Database::create_collection` method has been removed 351 | from the library due to redundancy. The `Database::save_collection` method can 352 | be used to create a new collection or update an existing one. This change is 353 | made to simplify the API and to make it more consistent with the other methods 354 | in the `Database` struct. 355 | 356 | ### What's Changed 357 | 358 | - **BREAKING CHANGE**: Removed the `Database::create_collection` method from the 359 | library. To replace this, you can use the code snippet below: 360 | 361 | ```rs 362 | // Before: this creates a new empty collection. 363 | db.create_collection("vectors", None, Some(records))?; 364 | 365 | // After: create new or build a collection then save it. 366 | // let collection = Collection::new(&config)?; 367 | let collection = Collection::build(&config, &records)?; 368 | db.save_collection("vectors", &collection)?; 369 | ``` 370 | 371 | - Added the `Collection::list` method to list all the vector records in the 372 | collection. 373 | - Created a full Python binding for OasysDB which is available on PyPI. This 374 | allows you to use OasysDB directly from Python. The Python binding is 375 | available at https://pypi.org/project/oasysdb. 376 | 377 | ### Contributors 378 | 379 | - @edwinkys 380 | - @Zelaren 381 | - @FebianFebian1 382 | 383 | ### Full Changelog 384 | 385 | [v0.2.1...v0.3.0](https://github.com/oasysai/oasysdb/compare/v0.2.1...v0.3.0) 386 | 387 | ## v0.2.1 388 | 389 | ### What's Changed 390 | 391 | - `Metadata` enum can now be accessed publicly using 392 | `oasysdb::metadata::Metadata`. This allows users to use `match` statements to 393 | extract the data from it. 394 | - Added a `prelude` module that re-exports the most commonly used types and 395 | traits. This makes it easier to use the library by importing the prelude 396 | module by `use oasysdb::prelude::*`. 397 | 398 | ### Contributors 399 | 400 | - @edwinkys 401 | 402 | ### Full Changelog 403 | 404 | [v0.2.0...v0.2.1](https://github.com/oasysai/oasysdb/compare/v0.2.0...v0.2.1) 405 | 406 | ## v0.2.0 407 | 408 | ### What's Changed 409 | 410 | - For `Collection` struct, the generic parameter `D` has been replaced with 411 | `Metadata` enum which allows one collection to store different types of data 412 | as needed. 413 | - The `Vector` now uses `Vec` instead of `[f32, N]` which removes the `N` 414 | generic parameter from the `Vector` struct. Since there is a chance of using 415 | different vector dimensions in the same collection with this change, An 416 | additional functionality is added to the `Collection` to make sure that the 417 | vector dimension is uniform. 418 | - The `M` generic parameter in the `Collection` struct has been replaced with a 419 | constant of 32. This removes the flexibility to tweak the indexing 420 | configuration for this value. But for most use cases, this value should be 421 | sufficient. 422 | - Added multiple utility functions to structs such as `Record`, `Vector`, and 423 | `Collection` to make it easier to work with the data. 424 | 425 | ### Contributors 426 | 427 | - @edwinkys 428 | 429 | ### Full Changelog 430 | 431 | [v0.1.0...v0.2.0](https://github.com/oasysai/oasysdb/compare/v0.1.0...v0.2.0) 432 | 433 | ## v0.1.0 434 | 435 | ### What's Changed 436 | 437 | - OasysDB release as an embedded vector database available directly via 438 | `cargo add oasysdb` command. 439 | - Using HNSW algorithm implementation for the collection indexing along with 440 | Euclidean distance metrics. 441 | - Incremental updates on the vector collections allowing inserts, deletes, and 442 | modifications without rebuilding the index. 443 | - Add a benchmark on the collection search functionality using SIFT dataset that 444 | can be run using `cargo bench` command. 445 | 446 | ### Contributors 447 | 448 | - @edwinkys 449 | 450 | ### Full Changelog 451 | 452 | [v0.1.0](https://github.com/oasysai/oasysdb/commits/v0.1.0) 453 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing to OasysDB 2 | 3 | First of all, thank you for considering to contribute to OasysDB! We welcome 4 | contributions from the community, and this documentation outlines the process to 5 | start contributing to our project. 6 | 7 | ## Code of Conduct 8 | 9 | We are committed to building an inclusive and welcoming community because we 10 | believe that it will lead to a more successful project and a better experience 11 | for everyone involved. To achieve that, any participant in our project is 12 | expected to act respectfully and to follow the Code of Conduct. 13 | 14 | ## Have questions or suggestions? 15 | 16 | [![Discord](https://img.shields.io/discord/1182432298382131200?logo=discord&logoColor=%23ffffff&label=Discord&labelColor=%235865F2&style=for-the-badge)][discord] 17 | 18 | There is no such thing as a stupid question. If you have a question, chances 19 | are, someone else does too. So, please feel free to ask questions whether it's 20 | on our [Discord][discord] server or by opening a new discussion on [GitHub 21 | Discussions][gh_discussions]. 22 | 23 | ## Encounter a bug? Have a feature request? 24 | 25 | If you encounter a bug or have a feature request, please open an issue on 26 | [GitHub Issues][gh_issues]. Please include enough information for us to 27 | understand the issue or the feature request. For this reason, we recommend you 28 | to follow the issue templates we have provided when creating a new issue. 29 | 30 | ## Want to contribute code? 31 | 32 | **TLDR: Check or open an issue first before working on a PR.** 33 | 34 | Before you start working on a pull request, we encourage you to check out the 35 | existing issues and pull requests to make sure that the feature you want to work 36 | on is in our roadmap and is aligned with the project's vision. After all, we 37 | don't want you to waste your time working on something that might not be merged. 38 | 39 | We try to prioritize features and bug fixes that are on our roadmap or requested 40 | a lot by the community. If you want to work on a feature or a fix that isn't 41 | already in the issue tracker, please open an issue first to discuss it with the 42 | project maintainers and the community. 43 | 44 | For features, we try to prioritize features that are backed by real-world use 45 | cases. If you have a use case for a feature, please include it in the issue. 46 | We'd love to hear about it! 47 | 48 | ## Getting started 49 | 50 | OasysDB is written in Rust. So, you need to have Rust installed on your local 51 | machine. If you haven't installed Rust yet, you can install it by following the 52 | instructions on the [Rust Installation Guide][rustup]. 53 | 54 | After you have installed Rust, you can clone the repository into your local 55 | machine. Before you start making changes in the codebase, you should run the 56 | tests to make sure that everything is working as expected: 57 | 58 | ```sh 59 | cargo test 60 | ``` 61 | 62 | OasysDB uses a couple of third-party dependencies that might be useful for you 63 | to get familiar with. These are the most important ones along with their 64 | documentation: 65 | 66 | - [gRPC](https://grpc.io/) 67 | - [Tonic](https://github.com/hyperium/tonic) 68 | - [Tokio](https://tokio.rs/) 69 | 70 | ## Style guide 71 | 72 | We mostly use the default linting and style guide for Rust except for some 73 | linting changes listed in the rustfmt.toml file. For more information about the 74 | code style, see the [Rust Style Guide][style_guide]. 75 | 76 | For commit messages, we use the [Conventional Commits][conventional_commits] 77 | format. This allows us to maintain consistency and readability in our Git commit 78 | history making it easier to understand the changes made to the codebase at a 79 | high-level. 80 | 81 | When commenting your code, please try your best to write comments that are clear 82 | and concise with proper English sentence capitalization and punctuation. This 83 | will help us and the community understand your code better and keep the codebase 84 | maintainable. 85 | 86 | ## Submitting a pull request 87 | 88 | Once you have made your changes, you can submit a pull request. We will review 89 | your pull request and provide feedback. If your pull request is accepted, we 90 | will merge it into the main branch. 91 | 92 | For organization purposes, we ask that you use the [Conventional 93 | Commits][conventional_commits] format for your pull request title in lowercase: 94 | 95 | ``` 96 | : 97 | ``` 98 | 99 | For example: 100 | 101 | ``` 102 | feat: add support ... 103 | fix: fix issue ... 104 | ``` 105 | 106 | ## Conclusion 107 | 108 | Thank you for taking the time to read this documentation. We look forward to 109 | your contributions! Another way to support this project is to star this project, 110 | share it with your circles, and join us on [Discord][discord]. 111 | 112 | Best regards,
Edwin Kys 113 | 114 | [discord]: https://discord.gg/bDhQrkqNP4 115 | [gh_issues]: https://github.com/oasysai/oasysdb/issues 116 | [gh_discussions]: https://github.com/oasysai/oasysdb/discussions 117 | [rustup]: https://www.rust-lang.org/tools/install 118 | [style_guide]: https://doc.rust-lang.org/beta/style-guide/index.html 119 | [conventional_commits]: https://www.conventionalcommits.org/en/v1.0.0/ 120 | -------------------------------------------------------------------------------- /docs/css/style.css: -------------------------------------------------------------------------------- 1 | h1, 2 | h2, 3 | h3 { 4 | font-weight: bold !important; 5 | } 6 | 7 | .odb-button { 8 | text-align: center; 9 | width: 100%; 10 | } 11 | 12 | .odb-button.disabled { 13 | opacity: 0.5; 14 | cursor: not-allowed; 15 | } 16 | 17 | /* Tables will be displayed at full width. */ 18 | 19 | .md-typeset__table { 20 | width: 100%; 21 | } 22 | 23 | .md-typeset__table table:not([class]) { 24 | display: table; 25 | } 26 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Welcome to OasysDB 🎉 2 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: OasysDB 2 | 3 | repo_name: oasysai/oasysdb 4 | repo_url: https://github.com/oasysai/oasysdb 5 | 6 | theme: 7 | name: material 8 | logo: assets/wordmark.png 9 | favicon: assets/favicon64.png 10 | 11 | icon: 12 | repo: fontawesome/brands/github 13 | 14 | palette: 15 | - media: "(prefers-color-scheme: light)" 16 | scheme: default 17 | primary: black 18 | toggle: 19 | name: Light Mode 20 | icon: material/brightness-7 21 | 22 | - media: "(prefers-color-scheme: dark)" 23 | scheme: slate 24 | primary: black 25 | toggle: 26 | name: Dark Mode 27 | icon: material/brightness-4 28 | 29 | font: 30 | text: Space Grotesk 31 | code: Space Mono 32 | 33 | features: 34 | - header.autohide 35 | - navigation.tabs 36 | - navigation.tabs.sticky 37 | - navigation.expand 38 | - navigation.footer 39 | - content.code.copy 40 | 41 | copyright: Copyright © 2024 OasysDB 42 | 43 | extra: 44 | generator: false 45 | 46 | social: 47 | - icon: fontawesome/brands/x-twitter 48 | link: https://x.com/oasysai 49 | 50 | - icon: fontawesome/brands/linkedin 51 | link: https://www.linkedin.com/company/oasysai 52 | 53 | - icon: fontawesome/brands/discord 54 | link: https://discord.gg/bDhQrkqNP4 55 | 56 | extra_css: 57 | - css/style.css 58 | 59 | nav: 60 | - Documentation: 61 | - Introduction: index.md 62 | 63 | - Other: 64 | - Changelog: changelog.md 65 | - Contributing: contributing.md 66 | 67 | - Blog: 68 | - blog/index.md 69 | 70 | markdown_extensions: 71 | - admonition 72 | - attr_list 73 | - md_in_html 74 | - pymdownx.details 75 | - pymdownx.inlinehilite 76 | - pymdownx.snippets 77 | - pymdownx.superfences 78 | 79 | - pymdownx.tabbed: 80 | alternate_style: true 81 | 82 | - pymdownx.emoji: 83 | emoji_index: !!python/name:material.extensions.emoji.twemoji 84 | emoji_generator: !!python/name:material.extensions.emoji.to_svg 85 | 86 | - toc: 87 | permalink: "#" 88 | 89 | plugins: 90 | - blog: 91 | post_readtime: true 92 | post_excerpt: required 93 | authors: true 94 | categories_allowed: 95 | - Log 96 | - Rust 97 | -------------------------------------------------------------------------------- /protos/database.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package database; 3 | 4 | import "google/protobuf/empty.proto"; 5 | 6 | // OasysDB gRPC service definition. 7 | service Database { 8 | // Check if the connection to the database is alive. 9 | rpc Heartbeat(google.protobuf.Empty) returns (HeartbeatResponse); 10 | 11 | // Manually create a snapshot of the database. 12 | rpc Snapshot(google.protobuf.Empty) returns (SnapshotResponse); 13 | 14 | // Insert a new record into the database. 15 | rpc Insert(InsertRequest) returns (InsertResponse); 16 | 17 | // Retrieve an existing record from the database. 18 | rpc Get(GetRequest) returns (GetResponse); 19 | 20 | // Delete a record from the database. 21 | rpc Delete(DeleteRequest) returns (google.protobuf.Empty); 22 | 23 | // Update a record metadata in the database. 24 | rpc Update(UpdateRequest) returns (google.protobuf.Empty); 25 | 26 | // Query the database for nearest neighbors. 27 | rpc Query(QueryRequest) returns (QueryResponse); 28 | } 29 | 30 | message HeartbeatResponse { 31 | string version = 1; 32 | } 33 | 34 | message SnapshotResponse { 35 | int32 count = 1; 36 | } 37 | 38 | message InsertRequest { 39 | Record record = 1; 40 | } 41 | 42 | message InsertResponse { 43 | string id = 1; 44 | } 45 | 46 | message GetRequest { 47 | string id = 1; 48 | } 49 | 50 | message GetResponse { 51 | Record record = 1; 52 | } 53 | 54 | message DeleteRequest { 55 | string id = 1; 56 | } 57 | 58 | message UpdateRequest { 59 | string id = 1; 60 | map metadata = 2; 61 | } 62 | 63 | message QueryRequest { 64 | Vector vector = 1; 65 | int32 k = 2; 66 | string filter = 3; 67 | QueryParameters params = 4; 68 | } 69 | 70 | message QueryParameters { 71 | int32 probes = 1; 72 | float radius = 2; 73 | } 74 | 75 | message QueryResponse { 76 | repeated QueryResult results = 1; 77 | } 78 | 79 | message QueryResult { 80 | string id = 1; 81 | map metadata = 2; 82 | float distance = 3; 83 | } 84 | 85 | // List shared types below. 86 | 87 | message Record { 88 | Vector vector = 1; 89 | map metadata = 2; 90 | } 91 | 92 | message Vector { 93 | repeated float data = 1; 94 | } 95 | 96 | message Value { 97 | oneof value { 98 | string text = 1; 99 | double number = 2; 100 | bool boolean = 4; 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Documentation website. 2 | mkdocs-material==9.5.26 3 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | tab_spaces = 4 2 | reorder_imports = true 3 | max_width = 80 4 | use_small_heuristics = "Max" 5 | merge_derives = false 6 | -------------------------------------------------------------------------------- /src/cores/database.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use protos::database_server::Database as DatabaseService; 3 | use std::io::{BufReader, BufWriter}; 4 | use tonic::{Request, Response}; 5 | 6 | const TMP_DIR: &str = "tmp"; 7 | const PARAMS_FILE: &str = "odb_params"; 8 | const STORAGE_FILE: &str = "odb_storage"; 9 | const INDEX_FILE: &str = "odb_index"; 10 | 11 | /// Database parameters. 12 | /// 13 | /// Fields: 14 | /// - dimension: Vector dimension. 15 | /// - metric: Metric to calculate distance. 16 | /// - density: Max number of records per IVF cluster. 17 | #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] 18 | pub struct Parameters { 19 | pub dimension: usize, 20 | pub metric: Metric, 21 | pub density: usize, 22 | } 23 | 24 | /// Dynamic query-time parameters. 25 | /// 26 | /// Fields: 27 | /// - probes: Suggested number of clusters to visit. 28 | /// - radius: Maximum distance to include in the result. 29 | #[derive(Debug, Clone, Copy, PartialEq)] 30 | pub struct QueryParameters { 31 | pub probes: usize, 32 | pub radius: f32, 33 | } 34 | 35 | impl Default for QueryParameters { 36 | /// Default query parameters: 37 | /// - probes: 32 38 | /// - radius: ∞ 39 | fn default() -> Self { 40 | QueryParameters { probes: 32, radius: f32::INFINITY } 41 | } 42 | } 43 | 44 | impl TryFrom for QueryParameters { 45 | type Error = Status; 46 | fn try_from(value: protos::QueryParameters) -> Result { 47 | Ok(QueryParameters { 48 | probes: value.probes as usize, 49 | radius: value.radius, 50 | }) 51 | } 52 | } 53 | 54 | /// Database snapshot statistics. 55 | /// 56 | /// The snapshot statistics include the information that might be useful 57 | /// for monitoring the state of the database. This stats will be returned 58 | /// by the `create_snapshot` method. 59 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 60 | pub struct SnapshotStats { 61 | pub count: usize, 62 | } 63 | 64 | impl From for protos::SnapshotResponse { 65 | fn from(value: SnapshotStats) -> Self { 66 | protos::SnapshotResponse { count: value.count as i32 } 67 | } 68 | } 69 | 70 | #[derive(Debug)] 71 | pub struct Database { 72 | dir: PathBuf, 73 | params: Parameters, 74 | index: RwLock, 75 | storage: RwLock, 76 | } 77 | 78 | impl Database { 79 | pub fn configure(params: &Parameters) { 80 | let index = Index::new() 81 | .with_metric(params.metric) 82 | .with_density(params.density); 83 | 84 | let db = Database { 85 | dir: Self::dir(), 86 | params: *params, 87 | index: RwLock::new(index), 88 | storage: RwLock::new(Storage::new()), 89 | }; 90 | 91 | if db.dir.join(PARAMS_FILE).exists() { 92 | let stdin = std::io::stdin(); 93 | let overwrite = { 94 | eprint!("Database is already configured. Overwrite? (y/n): "); 95 | let mut input = String::new(); 96 | stdin.read_line(&mut input).unwrap(); 97 | matches!(input.to_lowercase().trim(), "y") 98 | }; 99 | 100 | if !overwrite { 101 | return; 102 | } 103 | 104 | fs::remove_dir_all(&db.dir).expect("Failed to reset the database"); 105 | println!("The database has been reset successfully"); 106 | } 107 | 108 | db.setup_dir().expect("Failed to setup database directory"); 109 | } 110 | 111 | pub fn open() -> Result> { 112 | let dir = Self::dir(); 113 | let params = Self::load_binary(dir.join(PARAMS_FILE))?; 114 | let index = Self::load_binary(dir.join(INDEX_FILE))?; 115 | let storage: Storage = Self::load_binary(dir.join(STORAGE_FILE))?; 116 | 117 | let count = storage.count(); 118 | tracing::info!("Restored {count} record(s) from the disk"); 119 | 120 | Ok(Database { 121 | dir, 122 | params, 123 | index: RwLock::new(index), 124 | storage: RwLock::new(storage), 125 | }) 126 | } 127 | 128 | fn dir() -> PathBuf { 129 | match env::var("ODB_DIR") { 130 | Ok(dir) => PathBuf::from(dir), 131 | Err(_) => PathBuf::from("oasysdb"), 132 | } 133 | } 134 | 135 | fn setup_dir(&self) -> Result<(), Box> { 136 | if self.dir.try_exists()? { 137 | return Ok(()); 138 | } 139 | 140 | fs::create_dir_all(&self.dir)?; 141 | fs::create_dir_all(self.dir.join(TMP_DIR))?; 142 | 143 | self.create_snapshot()?; 144 | Ok(()) 145 | } 146 | 147 | fn load_binary( 148 | path: impl AsRef, 149 | ) -> Result> { 150 | let file = OpenOptions::new().read(true).open(path)?; 151 | let reader = BufReader::new(file); 152 | Ok(bincode::deserialize_from(reader)?) 153 | } 154 | 155 | fn persist_as_binary( 156 | &self, 157 | path: impl AsRef, 158 | data: T, 159 | ) -> Result<(), Box> { 160 | let file_name = path.as_ref().file_name().unwrap(); 161 | let tmp_file = self.dir.join(TMP_DIR).join(file_name); 162 | let file = OpenOptions::new() 163 | .write(true) 164 | .create(true) 165 | .truncate(true) 166 | .open(&tmp_file)?; 167 | 168 | let writer = BufWriter::new(file); 169 | bincode::serialize_into(writer, &data)?; 170 | fs::rename(&tmp_file, &path)?; 171 | Ok(()) 172 | } 173 | 174 | pub fn create_snapshot(&self) -> Result> { 175 | self.persist_as_binary(self.dir.join(PARAMS_FILE), self.params)?; 176 | 177 | let index = self.index.read().unwrap(); 178 | self.persist_as_binary(self.dir.join(INDEX_FILE), &*index)?; 179 | 180 | let storage = self.storage.read().unwrap(); 181 | self.persist_as_binary(self.dir.join(STORAGE_FILE), &*storage)?; 182 | 183 | let count = storage.count(); 184 | tracing::info!("Created a snapshot with {count} record(s)"); 185 | 186 | Ok(SnapshotStats { count }) 187 | } 188 | 189 | fn validate_dimension(&self, vector: &Vector) -> Result<(), Status> { 190 | if vector.len() != self.params.dimension { 191 | return Err(Status::invalid_argument(format!( 192 | "Invalid vector dimension: expected {}, got {}", 193 | self.params.dimension, 194 | vector.len() 195 | ))); 196 | } 197 | 198 | Ok(()) 199 | } 200 | } 201 | 202 | #[tonic::async_trait] 203 | impl DatabaseService for Arc { 204 | async fn heartbeat( 205 | &self, 206 | _request: Request<()>, 207 | ) -> Result, Status> { 208 | let response = protos::HeartbeatResponse { 209 | version: env!("CARGO_PKG_VERSION").to_string(), 210 | }; 211 | 212 | Ok(Response::new(response)) 213 | } 214 | 215 | async fn snapshot( 216 | &self, 217 | _request: Request<()>, 218 | ) -> Result, Status> { 219 | let stats = self.create_snapshot().map_err(|e| { 220 | let message = format!("Failed to create a snapshot: {e}"); 221 | Status::internal(message) 222 | })?; 223 | 224 | Ok(Response::new(stats.into())) 225 | } 226 | 227 | async fn insert( 228 | &self, 229 | request: Request, 230 | ) -> Result, Status> { 231 | let record = match request.into_inner().record { 232 | Some(record) => Record::try_from(record)?, 233 | None => { 234 | let message = "Record data is required for insertion"; 235 | return Err(Status::invalid_argument(message)); 236 | } 237 | }; 238 | 239 | self.validate_dimension(&record.vector)?; 240 | 241 | let id = RecordID::new(); 242 | 243 | // Insert the record into the storage. 244 | // This operation must be done before updating the index. Otherwise, 245 | // the index won't have access to the record data. 246 | let mut storage = self.storage.write().unwrap(); 247 | storage.insert(&id, &record)?; 248 | 249 | let mut index = self.index.write().unwrap(); 250 | index.insert(&id, &record, storage.records())?; 251 | 252 | tracing::info!("Inserted a new record with ID: {id}"); 253 | Ok(Response::new(protos::InsertResponse { id: id.to_string() })) 254 | } 255 | 256 | async fn get( 257 | &self, 258 | request: Request, 259 | ) -> Result, Status> { 260 | let request = request.into_inner(); 261 | let id = request.id.parse::()?; 262 | 263 | let storage = self.storage.read().unwrap(); 264 | let record = storage.get(&id)?.to_owned(); 265 | 266 | let response = protos::GetResponse { record: Some(record.into()) }; 267 | Ok(Response::new(response)) 268 | } 269 | 270 | async fn delete( 271 | &self, 272 | request: Request, 273 | ) -> Result, Status> { 274 | let request = request.into_inner(); 275 | let id = request.id.parse::()?; 276 | 277 | let mut index = self.index.write().unwrap(); 278 | index.delete(&id)?; 279 | 280 | let mut storage = self.storage.write().unwrap(); 281 | storage.delete(&id)?; 282 | 283 | tracing::info!("Deleted a record with ID: {id}"); 284 | Ok(Response::new(())) 285 | } 286 | 287 | async fn update( 288 | &self, 289 | request: Request, 290 | ) -> Result, Status> { 291 | let request = request.into_inner(); 292 | let id = request.id.parse::()?; 293 | 294 | let mut metadata = HashMap::new(); 295 | for (key, value) in request.metadata { 296 | metadata.insert(key, value.try_into()?); 297 | } 298 | 299 | let mut storage = self.storage.write().unwrap(); 300 | storage.update(&id, &metadata)?; 301 | 302 | tracing::info!("Updated metadata for a record: {id}"); 303 | Ok(Response::new(())) 304 | } 305 | 306 | async fn query( 307 | &self, 308 | request: Request, 309 | ) -> Result, Status> { 310 | let request = request.into_inner(); 311 | let vector = match request.vector { 312 | Some(vector) => Vector::try_from(vector)?, 313 | None => { 314 | let message = "Vector is required for query operation"; 315 | return Err(Status::invalid_argument(message)); 316 | } 317 | }; 318 | 319 | self.validate_dimension(&vector)?; 320 | 321 | let k = request.k as usize; 322 | if k == 0 { 323 | let message = "Invalid k value, k must be greater than 0"; 324 | return Err(Status::invalid_argument(message)); 325 | } 326 | 327 | let filter = Filters::try_from(request.filter.as_str())?; 328 | 329 | let params = match request.params { 330 | Some(params) => QueryParameters::try_from(params)?, 331 | None => QueryParameters::default(), 332 | }; 333 | 334 | let storage = self.storage.read().unwrap(); 335 | let records = storage.records(); 336 | 337 | let index = self.index.read().unwrap(); 338 | let results = index 339 | .query(&vector, k, &filter, ¶ms, records)? 340 | .into_iter() 341 | .map(Into::into) 342 | .collect(); 343 | 344 | Ok(Response::new(protos::QueryResponse { results })) 345 | } 346 | } 347 | 348 | #[cfg(test)] 349 | mod tests { 350 | use super::*; 351 | use uuid::Uuid; 352 | 353 | #[test] 354 | fn test_open() { 355 | let db = setup_db(); 356 | assert_eq!(db.params, Parameters::default()); 357 | } 358 | 359 | #[tokio::test] 360 | async fn test_heartbeat() { 361 | let db = setup_db(); 362 | let request = Request::new(()); 363 | let response = db.heartbeat(request).await.unwrap(); 364 | assert_eq!(response.get_ref().version, env!("CARGO_PKG_VERSION")); 365 | } 366 | 367 | #[tokio::test] 368 | async fn test_insert() { 369 | let params = Parameters::default(); 370 | let db = setup_db(); 371 | 372 | let vector = Vector::random(params.dimension); 373 | let request = Request::new(protos::InsertRequest { 374 | record: Some(protos::Record { 375 | vector: Some(vector.into()), 376 | metadata: std::collections::HashMap::new(), 377 | }), 378 | }); 379 | 380 | let response = db.insert(request).await.unwrap(); 381 | assert!(response.get_ref().id.parse::().is_ok()); 382 | assert_eq!(db.storage.read().unwrap().records().len(), 1); 383 | } 384 | 385 | fn setup_db() -> Arc { 386 | if Database::dir().exists() { 387 | fs::remove_dir_all(Database::dir()).unwrap(); 388 | } 389 | 390 | let params = Parameters::default(); 391 | Database::configure(¶ms); 392 | Arc::new(Database::open().unwrap()) 393 | } 394 | 395 | impl Default for Parameters { 396 | fn default() -> Self { 397 | Parameters { 398 | dimension: 128, 399 | metric: Metric::Euclidean, 400 | density: 64, 401 | } 402 | } 403 | } 404 | } 405 | -------------------------------------------------------------------------------- /src/cores/index.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use std::cmp::{min, Ordering}; 3 | use std::collections::BinaryHeap; 4 | use std::rc::Rc; 5 | 6 | type ClusterIndex = usize; 7 | 8 | /// ANNS search result containing the metadata of the record. 9 | /// 10 | /// We exclude the vector data from the result because it doesn't provide 11 | /// any additional value on the search result. If users are interested in 12 | /// the vector data, they can use the get method to retrieve the record. 13 | #[derive(Debug, Clone)] 14 | pub struct QueryResult { 15 | pub id: RecordID, 16 | pub metadata: HashMap, 17 | pub distance: f32, 18 | } 19 | 20 | impl Eq for QueryResult {} 21 | 22 | impl PartialEq for QueryResult { 23 | /// Compare two query results based on their IDs. 24 | fn eq(&self, other: &Self) -> bool { 25 | self.id == other.id 26 | } 27 | } 28 | 29 | impl Ord for QueryResult { 30 | fn cmp(&self, other: &Self) -> Ordering { 31 | self.distance.partial_cmp(&other.distance).unwrap_or(Ordering::Equal) 32 | } 33 | } 34 | 35 | impl PartialOrd for QueryResult { 36 | /// Allow the query results to be sorted based on their distance. 37 | fn partial_cmp(&self, other: &Self) -> Option { 38 | Some(self.cmp(other)) 39 | } 40 | } 41 | 42 | impl From for protos::QueryResult { 43 | fn from(value: QueryResult) -> Self { 44 | let metadata = value 45 | .metadata 46 | .into_iter() 47 | .map(|(key, value)| (key, value.into())) 48 | .collect(); 49 | 50 | protos::QueryResult { 51 | id: value.id.to_string(), 52 | metadata, 53 | distance: value.distance, 54 | } 55 | } 56 | } 57 | 58 | /// ANNS Index interface. 59 | /// 60 | /// OasysDB uses a modified version of IVF index algorithm. This custom index 61 | /// implementation allows OasysDB to maintain a balanced index structure 62 | /// allowing the clusters to grow to accommodate data growth. 63 | #[repr(C)] 64 | #[derive(Debug, Serialize, Deserialize)] 65 | pub struct Index { 66 | centroids: Vec, 67 | clusters: Vec>, 68 | 69 | // Index parameters. 70 | metric: Metric, 71 | density: usize, 72 | } 73 | 74 | impl Index { 75 | /// Create a new index instance with default parameters. 76 | /// 77 | /// Default parameters: 78 | /// - metric: Euclidean 79 | /// - density: 256 80 | pub fn new() -> Self { 81 | Index { 82 | centroids: vec![], 83 | clusters: vec![], 84 | metric: Metric::Euclidean, 85 | density: 256, 86 | } 87 | } 88 | 89 | /// Configure the metric used for distance calculations. 90 | pub fn with_metric(mut self, metric: Metric) -> Self { 91 | self.metric = metric; 92 | self 93 | } 94 | 95 | /// Configure the density of the index. 96 | pub fn with_density(mut self, density: usize) -> Self { 97 | self.density = density; 98 | self 99 | } 100 | 101 | /// Insert a new record into the index. 102 | /// 103 | /// This method required the reference to all the records because 104 | /// during the cluster splitting process, the record assignments 105 | /// will be re-calculated 106 | pub fn insert( 107 | &mut self, 108 | id: &RecordID, 109 | record: &Record, 110 | records: &HashMap, 111 | ) -> Result<(), Status> { 112 | let vector = &record.vector; 113 | let nearest_centroid = self.find_nearest_centroid(vector); 114 | 115 | // If the index is empty, the record's vector will be 116 | // the first centroid. 117 | if nearest_centroid.is_none() { 118 | let cluster_id = self.insert_centroid(vector); 119 | self.clusters[cluster_id].push(*id); 120 | return Ok(()); 121 | } 122 | 123 | let nearest_centroid = nearest_centroid.unwrap(); 124 | if self.clusters[nearest_centroid].len() < self.density { 125 | self.update_centroid(&nearest_centroid, vector); 126 | self.clusters[nearest_centroid].push(*id); 127 | } else { 128 | // If the cluster is full, insert the record into the cluster 129 | // and split the cluster with KMeans algorithm. 130 | self.clusters[nearest_centroid].push(*id); 131 | self.split_cluster(&nearest_centroid, records); 132 | } 133 | 134 | Ok(()) 135 | } 136 | 137 | /// Delete a record from the index by its ID. 138 | /// 139 | /// This method will iterate over all the clusters and remove the record 140 | /// from the cluster if it exists. This method doesn't update the value of 141 | /// the cluster's centroid. 142 | pub fn delete(&mut self, id: &RecordID) -> Result<(), Status> { 143 | // Find the cluster and record indices where the record is stored. 144 | let cluster_record_index = 145 | self.clusters.iter().enumerate().find_map(|(i, cluster)| { 146 | cluster.par_iter().position_first(|x| x == id).map(|x| (i, x)) 147 | }); 148 | 149 | if let Some((cluster_ix, record_ix)) = cluster_record_index { 150 | // If the cluster has only one record, remove the cluster and 151 | // centroid from the index. This won't happen often. 152 | if self.clusters[cluster_ix].len() == 1 { 153 | self.clusters.remove(cluster_ix); 154 | self.centroids.remove(cluster_ix); 155 | } else { 156 | self.clusters[cluster_ix].remove(record_ix); 157 | } 158 | } 159 | 160 | Ok(()) 161 | } 162 | 163 | /// Search for the nearest neighbors of a given vector. 164 | /// 165 | /// This method uses the IVF search algorithm to find the nearest neighbors 166 | /// of the query vector. The filtering process of the search is done within 167 | /// the boundaries of the nearest clusters to the query vector. 168 | pub fn query( 169 | &self, 170 | vector: &Vector, 171 | k: usize, 172 | filters: &Filters, 173 | params: &QueryParameters, 174 | records: &HashMap, 175 | ) -> Result, Status> { 176 | let QueryParameters { probes, radius } = params.to_owned(); 177 | let probes = min(probes, self.centroids.len()); 178 | 179 | let nearest_clusters = self.sort_nearest_centroids(vector); 180 | let mut results = BinaryHeap::new(); 181 | 182 | for cluster_id in nearest_clusters.iter().take(probes) { 183 | for record_id in &self.clusters[*cluster_id] { 184 | let record = match records.get(record_id) { 185 | Some(record) => record, 186 | None => continue, 187 | }; 188 | 189 | let distance = self.metric.distance(&record.vector, vector); 190 | let distance = match distance { 191 | Some(distance) => distance as f32, 192 | None => continue, 193 | }; 194 | 195 | // Check if the record is within the search radius and 196 | // the record's metadata passes the filters. 197 | if distance > radius || !filters.apply(&record.metadata) { 198 | continue; 199 | } 200 | 201 | results.push(QueryResult { 202 | id: *record_id, 203 | metadata: record.metadata.clone(), 204 | distance, 205 | }); 206 | 207 | if results.len() > k { 208 | results.pop(); 209 | } 210 | } 211 | } 212 | 213 | Ok(results.into_sorted_vec()) 214 | } 215 | 216 | /// Insert a new centroid and cluster into the index. 217 | /// - vector: Centroid vector. 218 | fn insert_centroid(&mut self, vector: &Vector) -> ClusterIndex { 219 | self.centroids.push(vector.to_owned()); 220 | self.clusters.push(vec![]); 221 | self.centroids.len() - 1 222 | } 223 | 224 | /// Recalculate the centroid of a cluster with the new vector. 225 | /// 226 | /// This method must be called before inserting the new vector into the 227 | /// cluster because this method calculates the new centroid by taking the 228 | /// weighted average of the current centroid and adding the new vector 229 | /// before normalizing the result with the new cluster size. 230 | fn update_centroid(&mut self, cluster_id: &ClusterIndex, vector: &Vector) { 231 | let count = self.clusters[*cluster_id].len() as f32; 232 | self.centroids[*cluster_id] = self.centroids[*cluster_id] 233 | .as_slice() 234 | .iter() 235 | .zip(vector.as_slice()) 236 | .map(|(a, b)| (a * count) + b / count + 1.0) 237 | .collect::>() 238 | .into(); 239 | } 240 | 241 | /// Find the nearest centroid to a given vector. 242 | /// 243 | /// If the index is empty, this method will return None. Otherwise, it will 244 | /// calculate the distance between the given vector and all centroids and 245 | /// return the index of the centroid with the smallest distance. 246 | fn find_nearest_centroid(&self, vector: &Vector) -> Option { 247 | self.centroids 248 | .par_iter() 249 | .map(|centroid| self.metric.distance(centroid, vector)) 250 | .enumerate() 251 | .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()) 252 | .map(|(index, _)| index) 253 | } 254 | 255 | /// Sort the centroids by their distance to a given vector. 256 | /// 257 | /// This method returns an array of cluster indices sorted by their 258 | /// distance to the vector. The first element will be the index of the 259 | /// nearest centroid. 260 | fn sort_nearest_centroids(&self, vector: &Vector) -> Vec { 261 | let mut distances = self 262 | .centroids 263 | .par_iter() 264 | .enumerate() 265 | .map(|(i, centroid)| (i, self.metric.distance(centroid, vector))) 266 | .collect::)>>(); 267 | 268 | // Sort the distances in ascending order. If the distance is NaN or 269 | // something else, it will be placed at the end. 270 | distances.sort_by(|(_, a), (_, b)| { 271 | a.partial_cmp(b).unwrap_or(Ordering::Greater) 272 | }); 273 | 274 | distances.iter().map(|(i, _)| *i).collect() 275 | } 276 | 277 | /// Split a cluster into two new clusters. 278 | /// 279 | /// The current cluster will be halved. The first half will be assigned to 280 | /// the current cluster, and the second half will be assigned to a new 281 | /// cluster with a new centroid. 282 | fn split_cluster( 283 | &mut self, 284 | cluster_id: &ClusterIndex, 285 | records: &HashMap, 286 | ) { 287 | let record_ids = &self.clusters[*cluster_id]; 288 | let vectors = record_ids 289 | .iter() 290 | .map(|id| &records.get(id).unwrap().vector) 291 | .collect::>(); 292 | 293 | let mut kmeans = KMeans::new(2).with_metric(self.metric); 294 | kmeans.fit(Rc::from(vectors)).unwrap(); 295 | 296 | let centroids = kmeans.centroids(); 297 | self.centroids[*cluster_id] = centroids[0].to_owned(); 298 | self.centroids.push(centroids[1].to_owned()); 299 | 300 | let mut clusters = [vec![], vec![]]; 301 | let assignments = kmeans.assignments(); 302 | for (i, cluster_id) in assignments.iter().enumerate() { 303 | clusters[*cluster_id].push(record_ids[i]); 304 | } 305 | 306 | self.clusters[*cluster_id] = clusters[0].to_vec(); 307 | self.clusters.push(clusters[1].to_vec()); 308 | } 309 | } 310 | 311 | #[cfg(test)] 312 | mod tests { 313 | use super::*; 314 | 315 | #[test] 316 | fn test_insert_many() { 317 | let params = Parameters::default(); 318 | let mut index = setup_index(¶ms); 319 | 320 | let mut records = HashMap::new(); 321 | for _ in 0..1000 { 322 | let id = RecordID::new(); 323 | let record = Record::random(params.dimension); 324 | records.insert(id, record); 325 | } 326 | 327 | for (id, record) in records.iter() { 328 | index.insert(id, record, &records).unwrap(); 329 | } 330 | 331 | assert!(index.centroids.len() > 20); 332 | } 333 | 334 | #[test] 335 | fn test_delete() { 336 | let params = Parameters::default(); 337 | let mut index = setup_index(¶ms); 338 | 339 | let mut ids = vec![]; 340 | for _ in 0..10 { 341 | let centroid = Vector::random(params.dimension); 342 | let mut cluster = vec![]; 343 | for _ in 0..10 { 344 | let id = RecordID::new(); 345 | cluster.push(id); 346 | ids.push(id); 347 | } 348 | 349 | index.centroids.push(centroid); 350 | index.clusters.push(cluster); 351 | } 352 | 353 | assert_eq!(ids.len(), 100); 354 | assert_eq!(index.centroids.len(), 10); 355 | 356 | index.delete(&ids[0]).unwrap(); 357 | for cluster in index.clusters.iter() { 358 | assert!(!cluster.contains(&ids[0])); 359 | } 360 | 361 | for i in 1..10 { 362 | index.delete(&ids[i]).unwrap(); 363 | } 364 | 365 | assert_eq!(index.centroids.len(), 9); 366 | } 367 | 368 | #[test] 369 | fn test_query() { 370 | let params = Parameters::default(); 371 | let mut index = setup_index(¶ms); 372 | 373 | // Populate the index with 1000 sequential records. 374 | // This allows us to predict the order of the results. 375 | let mut ids = vec![]; 376 | let mut records = HashMap::new(); 377 | for i in 0..1000 { 378 | let id = RecordID::new(); 379 | let vector = Vector::from(vec![i as f32; params.dimension]); 380 | 381 | let mut metadata = HashMap::new(); 382 | let value = Value::Number((1000 + i) as f64); 383 | metadata.insert("number".to_string(), value); 384 | 385 | let record = Record { vector, metadata }; 386 | records.insert(id, record); 387 | ids.push(id); 388 | } 389 | 390 | for (id, record) in records.iter() { 391 | index.insert(id, record, &records).unwrap(); 392 | } 393 | 394 | let query = Vector::from(vec![1.0; params.dimension]); 395 | let query_params = QueryParameters::default(); 396 | let result = index 397 | .query(&query, 10, &Filters::None, &query_params, &records) 398 | .unwrap(); 399 | 400 | assert_eq!(result.len(), 10); 401 | assert!(result.iter().any(|r| r.id == ids[0])); 402 | 403 | let metadata_filters = Filters::try_from("number > 1050").unwrap(); 404 | let result = index 405 | .query(&query, 10, &metadata_filters, &query_params, &records) 406 | .unwrap(); 407 | 408 | assert_eq!(result.len(), 10); 409 | assert!(result.iter().any(|r| r.id == ids[51])); 410 | } 411 | 412 | #[test] 413 | fn test_insert_centroid() { 414 | let params = Parameters::default(); 415 | let mut index = setup_index(¶ms); 416 | 417 | let vector = Vector::random(params.dimension); 418 | let cluster_id = index.insert_centroid(&vector); 419 | 420 | assert_eq!(index.centroids.len(), 1); 421 | assert_eq!(index.clusters.len(), 1); 422 | 423 | assert_eq!(index.centroids[0], vector); 424 | assert_eq!(cluster_id, 0); 425 | } 426 | 427 | #[test] 428 | fn test_update_centroid() { 429 | let params = Parameters::default(); 430 | let mut index = setup_index(¶ms); 431 | 432 | let initial_centroid = Vector::from(vec![0.0; params.dimension]); 433 | let cluster_id = index.insert_centroid(&initial_centroid); 434 | index.clusters[cluster_id].push(RecordID::new()); 435 | 436 | let vector = Vector::from(vec![1.0; params.dimension]); 437 | index.update_centroid(&cluster_id, &vector); 438 | 439 | let centroid = Vector::from(vec![0.5; params.dimension]); 440 | assert_ne!(index.centroids[cluster_id], centroid); 441 | } 442 | 443 | #[test] 444 | fn test_find_nearest_centroid_empty() { 445 | let params = Parameters::default(); 446 | let index = setup_index(¶ms); 447 | 448 | let query = Vector::random(params.dimension); 449 | assert_eq!(index.find_nearest_centroid(&query), None); 450 | } 451 | 452 | #[test] 453 | fn test_find_nearest_centroid() { 454 | let params = Parameters::default(); 455 | let mut index = setup_index(¶ms); 456 | 457 | for i in 1..5 { 458 | let centroid = Vector::from(vec![i as f32; params.dimension]); 459 | index.centroids.push(centroid); 460 | } 461 | 462 | let query = Vector::from(vec![0.0; params.dimension]); 463 | assert_eq!(index.find_nearest_centroid(&query), Some(0)); 464 | } 465 | 466 | #[test] 467 | fn test_split_cluster() { 468 | let params = Parameters::default(); 469 | let mut index = setup_index(¶ms); 470 | 471 | let mut ids = vec![]; 472 | let mut records = HashMap::new(); 473 | for i in 1..5 { 474 | let id = RecordID::new(); 475 | let vector = Vector::from(vec![i as f32; params.dimension]); 476 | let record = Record { vector, metadata: HashMap::new() }; 477 | 478 | ids.push(id); 479 | records.insert(id, record); 480 | } 481 | 482 | let centroid = Vector::from(vec![2.5; params.dimension]); 483 | index.centroids.push(centroid); 484 | index.clusters.push(ids); 485 | 486 | index.split_cluster(&0, &records); 487 | assert_eq!(index.centroids.len(), 2); 488 | } 489 | 490 | #[test] 491 | fn test_sort_nearest_centroids() { 492 | let params = Parameters::default(); 493 | let mut index = setup_index(¶ms); 494 | 495 | for i in 1..5 { 496 | let centroid = Vector::from(vec![i as f32; params.dimension]); 497 | index.centroids.push(centroid); 498 | } 499 | 500 | let query = Vector::from(vec![5.0; params.dimension]); 501 | let nearest = index.sort_nearest_centroids(&query); 502 | assert_eq!(nearest, vec![3, 2, 1, 0]); 503 | } 504 | 505 | fn setup_index(params: &Parameters) -> Index { 506 | let index = Index::new() 507 | .with_metric(params.metric) 508 | .with_density(params.density); 509 | 510 | index 511 | } 512 | } 513 | -------------------------------------------------------------------------------- /src/cores/mod.rs: -------------------------------------------------------------------------------- 1 | // Initialize the modules without making them public. 2 | mod database; 3 | mod index; 4 | mod storage; 5 | 6 | // Re-export types from the modules. 7 | pub use database::*; 8 | pub use index::*; 9 | pub use storage::*; 10 | 11 | // Import common dependencies below. 12 | use crate::protos; 13 | use crate::types::*; 14 | use crate::utils::kmeans::KMeans; 15 | use hashbrown::HashMap; 16 | use rayon::prelude::*; 17 | use serde::de::DeserializeOwned; 18 | use serde::{Deserialize, Serialize}; 19 | use std::error::Error; 20 | use std::fs::OpenOptions; 21 | use std::path::{Path, PathBuf}; 22 | use std::sync::{Arc, RwLock}; 23 | use std::{env, fs}; 24 | use tonic::Status; 25 | -------------------------------------------------------------------------------- /src/cores/storage.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | /// Record storage interface. 4 | /// 5 | /// This interface wraps around Hashbrown's HashMap implementation to store 6 | /// the records. In the future, if needed, we can modify the storage 7 | /// implementation without changing the rest of the code. 8 | #[repr(C)] 9 | #[derive(Debug, Serialize, Deserialize)] 10 | pub struct Storage { 11 | count: usize, 12 | records: HashMap, 13 | } 14 | 15 | impl Storage { 16 | /// Create a new empty storage instance. 17 | pub fn new() -> Self { 18 | Storage { count: 0, records: HashMap::new() } 19 | } 20 | 21 | /// Insert a new record into the record storage. 22 | pub fn insert( 23 | &mut self, 24 | id: &RecordID, 25 | record: &Record, 26 | ) -> Result<(), Status> { 27 | self.records.insert(*id, record.to_owned()); 28 | self.count += 1; 29 | Ok(()) 30 | } 31 | 32 | /// Retrieve a record from the storage given its ID. 33 | pub fn get(&self, id: &RecordID) -> Result<&Record, Status> { 34 | let record = self.records.get(id); 35 | if record.is_none() { 36 | let message = "The specified record is not found"; 37 | return Err(Status::not_found(message)); 38 | } 39 | 40 | Ok(record.unwrap()) 41 | } 42 | 43 | /// Delete a record from the storage given its ID. 44 | pub fn delete(&mut self, id: &RecordID) -> Result<(), Status> { 45 | self.records.remove(id); 46 | self.count -= 1; 47 | Ok(()) 48 | } 49 | 50 | /// Update a record metadata given its ID. 51 | /// 52 | /// Vector data should be immutable as it is tightly coupled with the 53 | /// semantic meaning of the record. If the vector data changes, users 54 | /// should create a new record instead. 55 | pub fn update( 56 | &mut self, 57 | id: &RecordID, 58 | metadata: &HashMap, 59 | ) -> Result<(), Status> { 60 | let record = match self.records.get_mut(id) { 61 | Some(record) => record, 62 | None => { 63 | let message = "The specified record is not found"; 64 | return Err(Status::not_found(message)); 65 | } 66 | }; 67 | 68 | record.metadata = metadata.to_owned(); 69 | Ok(()) 70 | } 71 | 72 | /// Return a reference to the records in the storage. 73 | pub fn records(&self) -> &HashMap { 74 | &self.records 75 | } 76 | 77 | /// Return the number of records in the storage. 78 | pub fn count(&self) -> usize { 79 | self.count 80 | } 81 | } 82 | 83 | #[cfg(test)] 84 | mod tests { 85 | use super::*; 86 | 87 | #[test] 88 | fn test_insert() { 89 | let mut storage = Storage::new(); 90 | 91 | let record = Record::random(128); 92 | let id = RecordID::new(); 93 | storage.insert(&id, &record).unwrap(); 94 | 95 | assert_eq!(storage.count, 1); 96 | assert_eq!(storage.count, storage.records.len()); 97 | } 98 | 99 | #[test] 100 | fn test_delete() { 101 | let mut storage = Storage::new(); 102 | 103 | let record = Record::random(128); 104 | let id = RecordID::new(); 105 | storage.insert(&id, &record).unwrap(); 106 | 107 | storage.delete(&id).unwrap(); 108 | assert_eq!(storage.count, 0); 109 | assert_eq!(storage.count, storage.records.len()); 110 | } 111 | 112 | #[test] 113 | fn test_update() { 114 | let mut storage = Storage::new(); 115 | 116 | let record = Record::random(128); 117 | let id = RecordID::new(); 118 | storage.insert(&id, &record).unwrap(); 119 | 120 | let mut metadata = HashMap::new(); 121 | metadata.insert("key".to_string(), Value::random()); 122 | storage.update(&id, &metadata).unwrap(); 123 | 124 | let updated_record = storage.records.get(&id).unwrap(); 125 | assert_eq!(updated_record.metadata, metadata); 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | mod cores; 2 | mod protos; 3 | mod types; 4 | mod utils; 5 | 6 | use clap::{arg, ArgMatches, Command}; 7 | use cores::{Database, Parameters}; 8 | use dotenv::dotenv; 9 | use protos::database_server::DatabaseServer; 10 | use std::sync::Arc; 11 | use std::thread; 12 | use std::time::Duration; 13 | use tonic::transport::Server; 14 | use types::Metric; 15 | 16 | const SNAPSHOT_INTERVAL: Duration = Duration::from_secs(600); 17 | 18 | #[tokio::main] 19 | async fn main() { 20 | dotenv().ok(); 21 | tracing_subscriber::fmt::init(); 22 | 23 | let command = Command::new(env!("CARGO_PKG_NAME")) 24 | .version(env!("CARGO_PKG_VERSION")) 25 | .about("Interface to setup and manage OasysDB server") 26 | .arg_required_else_help(true) 27 | .subcommand(start()) 28 | .subcommand(configure()) 29 | .get_matches(); 30 | 31 | match command.subcommand() { 32 | Some(("start", args)) => start_handler(args).await, 33 | Some(("configure", args)) => configure_handler(args).await, 34 | _ => unreachable!(), 35 | } 36 | } 37 | 38 | fn start() -> Command { 39 | let arg_port = arg!(--port "Port to listen on") 40 | .default_value("2505") 41 | .value_parser(clap::value_parser!(u16)) 42 | .allow_negative_numbers(false); 43 | 44 | Command::new("start") 45 | .alias("run") 46 | .about("Start the database server") 47 | .arg(arg_port) 48 | } 49 | 50 | async fn start_handler(args: &ArgMatches) { 51 | // Unwrap is safe because Clap validates the arguments. 52 | let port = args.get_one::("port").unwrap(); 53 | let addr = format!("[::]:{port}").parse().unwrap(); 54 | 55 | let db = Arc::new(Database::open().expect("Failed to open the database")); 56 | 57 | let db_clone = db.clone(); 58 | thread::spawn(move || loop { 59 | thread::sleep(SNAPSHOT_INTERVAL); 60 | db_clone.create_snapshot().expect("Failed to create a snapshot"); 61 | }); 62 | 63 | tracing::info!("Database server is ready on port {port}"); 64 | 65 | Server::builder() 66 | .add_service(DatabaseServer::new(db)) 67 | .serve(addr) 68 | .await 69 | .expect("Failed to start the database"); 70 | } 71 | 72 | fn configure() -> Command { 73 | let arg_dimension = arg!(--dim "Vector dimension") 74 | .required(true) 75 | .value_parser(clap::value_parser!(usize)) 76 | .allow_negative_numbers(false); 77 | 78 | // List optional arguments below. 79 | let arg_metric = arg!(--metric "Metric to calculate distance") 80 | .default_value(Metric::Euclidean.as_str()) 81 | .value_parser(clap::value_parser!(Metric)); 82 | 83 | let arg_density = arg!(--density "Density of the cluster") 84 | .default_value("256") 85 | .value_parser(clap::value_parser!(usize)) 86 | .allow_negative_numbers(false); 87 | 88 | Command::new("configure") 89 | .about("Configure the initial database parameters") 90 | .arg(arg_dimension) 91 | .arg(arg_metric) 92 | .arg(arg_density) 93 | } 94 | 95 | async fn configure_handler(args: &ArgMatches) { 96 | let dim = *args.get_one::("dim").unwrap(); 97 | let metric = *args.get_one::("metric").unwrap(); 98 | let density = *args.get_one::("density").unwrap(); 99 | 100 | let params = Parameters { dimension: dim, metric, density }; 101 | Database::configure(¶ms); 102 | } 103 | -------------------------------------------------------------------------------- /src/protos.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::all)] 2 | #![allow(non_snake_case)] 3 | tonic::include_proto!("database"); 4 | -------------------------------------------------------------------------------- /src/types/filter.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | /// Joined multiple filters operation with either AND or OR. 4 | /// 5 | /// At the moment, OasysDB only supports single-type join operations. This 6 | /// means that we can't use both AND and OR operations in the same filter. 7 | #[derive(Debug, Clone, PartialEq, PartialOrd)] 8 | pub enum Filters { 9 | None, 10 | And(Vec), 11 | Or(Vec), 12 | } 13 | 14 | impl Filters { 15 | /// Returns true if the record passes the filters. 16 | /// - metadata: Record metadata to check against the filters. 17 | /// 18 | /// Filters of NONE type will always return true. This is useful when 19 | /// no filters are provided and we want to include all records. 20 | pub fn apply(&self, metadata: &HashMap) -> bool { 21 | match self { 22 | Filters::None => true, 23 | Filters::And(filters) => filters.iter().all(|f| f.apply(metadata)), 24 | Filters::Or(filters) => filters.iter().any(|f| f.apply(metadata)), 25 | } 26 | } 27 | } 28 | 29 | impl TryFrom<&str> for Filters { 30 | type Error = Status; 31 | fn try_from(value: &str) -> Result { 32 | if value.is_empty() { 33 | return Ok(Filters::None); 34 | } 35 | 36 | const OR: &str = " OR "; 37 | const AND: &str = " AND "; 38 | 39 | // Check which join operator is used. 40 | let or_count = value.matches(OR).count(); 41 | let and_count = value.matches(AND).count(); 42 | 43 | if or_count > 0 && and_count > 0 { 44 | let message = "Mixing AND and OR join operators is not supported"; 45 | return Err(Status::invalid_argument(message)); 46 | } 47 | 48 | let join = if or_count > 0 { OR } else { AND }; 49 | let filters = value 50 | .split(join) 51 | .map(TryInto::try_into) 52 | .collect::>()?; 53 | 54 | let filters = match join { 55 | OR => Filters::Or(filters), 56 | _ => Filters::And(filters), 57 | }; 58 | 59 | Ok(filters) 60 | } 61 | } 62 | 63 | /// Record metadata filter. 64 | /// 65 | /// Using the filter operator, the record metadata can be compared against 66 | /// a specific value to determine if it should be included in the results. 67 | #[derive(Debug, Clone, PartialEq, PartialOrd)] 68 | pub struct Filter { 69 | key: String, 70 | value: Value, 71 | operator: Operator, 72 | } 73 | 74 | impl Filter { 75 | fn apply(&self, metadata: &HashMap) -> bool { 76 | let value = match metadata.get(&self.key) { 77 | Some(value) => value, 78 | None => return false, 79 | }; 80 | 81 | match (value, &self.value) { 82 | (Value::Text(a), Value::Text(b)) => self.filter_text(a, b), 83 | (Value::Number(a), Value::Number(b)) => self.filter_number(a, b), 84 | (Value::Boolean(a), Value::Boolean(b)) => self.filter_boolean(a, b), 85 | _ => false, 86 | } 87 | } 88 | 89 | fn filter_text(&self, a: impl AsRef, b: impl AsRef) -> bool { 90 | let (a, b) = (a.as_ref(), b.as_ref()); 91 | match self.operator { 92 | Operator::Equal => a == b, 93 | Operator::NotEqual => a != b, 94 | Operator::Contains => a.contains(b), 95 | _ => false, 96 | } 97 | } 98 | 99 | fn filter_number(&self, a: &f64, b: &f64) -> bool { 100 | match self.operator { 101 | Operator::Equal => a == b, 102 | Operator::NotEqual => a != b, 103 | Operator::GreaterThan => a > b, 104 | Operator::GreaterThanOrEqual => a >= b, 105 | Operator::LessThan => a < b, 106 | Operator::LessThanOrEqual => a <= b, 107 | _ => false, 108 | } 109 | } 110 | 111 | fn filter_boolean(&self, a: &bool, b: &bool) -> bool { 112 | match self.operator { 113 | Operator::Equal => a == b, 114 | Operator::NotEqual => a != b, 115 | _ => false, 116 | } 117 | } 118 | } 119 | 120 | impl TryFrom<&str> for Filter { 121 | type Error = Status; 122 | fn try_from(value: &str) -> Result { 123 | if value.is_empty() { 124 | let message = "Filter string cannot be empty"; 125 | return Err(Status::invalid_argument(message)); 126 | } 127 | 128 | // Split the filter string into EXACTLY 3 parts. 129 | let parts = value 130 | .splitn(3, ' ') 131 | .map(|token| token.trim()) 132 | .collect::>(); 133 | 134 | let key = parts[0].to_string(); 135 | let operator = Operator::try_from(parts[1])?; 136 | let value = Value::from(parts[2]); 137 | 138 | let filter = Filter { key, value, operator }; 139 | Ok(filter) 140 | } 141 | } 142 | 143 | #[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd)] 144 | pub enum Operator { 145 | Equal, 146 | NotEqual, 147 | GreaterThan, 148 | GreaterThanOrEqual, 149 | LessThan, 150 | LessThanOrEqual, 151 | Contains, 152 | } 153 | 154 | impl TryFrom<&str> for Operator { 155 | type Error = Status; 156 | fn try_from(value: &str) -> Result { 157 | let operator = match value { 158 | "CONTAINS" => Operator::Contains, 159 | "=" => Operator::Equal, 160 | "!=" => Operator::NotEqual, 161 | ">" => Operator::GreaterThan, 162 | ">=" => Operator::GreaterThanOrEqual, 163 | "<" => Operator::LessThan, 164 | "<=" => Operator::LessThanOrEqual, 165 | _ => { 166 | let message = format!("Invalid filter operator: {value}"); 167 | return Err(Status::invalid_argument(message)); 168 | } 169 | }; 170 | 171 | Ok(operator) 172 | } 173 | } 174 | 175 | #[cfg(test)] 176 | mod tests { 177 | use super::*; 178 | use std::error::Error; 179 | 180 | #[test] 181 | fn test_filters_from_string() { 182 | let filters = Filters::try_from("name CONTAINS Ada").unwrap(); 183 | let expected = Filters::And(vec![Filter { 184 | key: "name".into(), 185 | value: "Ada".into(), 186 | operator: Operator::Contains, 187 | }]); 188 | 189 | assert_eq!(filters, expected); 190 | 191 | let filters = Filters::try_from("gpa >= 3.0 OR age < 21").unwrap(); 192 | let expected = { 193 | let filter_gpa = Filter { 194 | key: "gpa".into(), 195 | value: Value::Number(3.0), 196 | operator: Operator::GreaterThanOrEqual, 197 | }; 198 | 199 | let filter_age = Filter { 200 | key: "age".into(), 201 | value: Value::Number(21.0), 202 | operator: Operator::LessThan, 203 | }; 204 | 205 | Filters::Or(vec![filter_gpa, filter_age]) 206 | }; 207 | 208 | assert_eq!(filters, expected); 209 | } 210 | 211 | #[test] 212 | fn test_filters_apply() -> Result<(), Box> { 213 | let data = setup_metadata(); 214 | 215 | let filters = Filters::try_from("name CONTAINS Alice")?; 216 | assert!(filters.apply(&data)); 217 | 218 | let filters = Filters::try_from("name = Bob")?; 219 | assert!(!filters.apply(&data)); 220 | 221 | let filters = Filters::try_from("age >= 20 AND gpa < 4.0")?; 222 | assert!(filters.apply(&data)); 223 | 224 | let filters = Filters::try_from("age >= 20 AND gpa < 3.0")?; 225 | assert!(!filters.apply(&data)); 226 | 227 | let filters = Filters::try_from("active = true")?; 228 | assert!(filters.apply(&data)); 229 | 230 | Ok(()) 231 | } 232 | 233 | fn setup_metadata() -> HashMap { 234 | let keys = vec!["name", "age", "gpa", "active"]; 235 | let values: Vec = vec![ 236 | "Alice".into(), 237 | Value::Number(20.0), 238 | Value::Number(3.5), 239 | Value::Boolean(true), 240 | ]; 241 | 242 | let mut data = HashMap::new(); 243 | for (key, value) in keys.into_iter().zip(values.into_iter()) { 244 | data.insert(key.into(), value); 245 | } 246 | 247 | data 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /src/types/metric.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use simsimd::SpatialSimilarity; 3 | 4 | // Distance name constants. 5 | const EUCLIDEAN: &str = "euclidean"; 6 | const COSINE: &str = "cosine"; 7 | 8 | /// Distance formula for vector similarity calculations. 9 | /// 10 | /// ### Euclidean 11 | /// We use the squared Euclidean distance instead for a slight performance 12 | /// boost since we only use the distance for comparison. 13 | /// 14 | /// ### Cosine 15 | /// We use cosine distance instead of cosine similarity to be consistent with 16 | /// other distance metrics where a lower value indicates a closer match. 17 | #[allow(missing_docs)] 18 | #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] 19 | pub enum Metric { 20 | Euclidean, 21 | Cosine, 22 | } 23 | 24 | impl Metric { 25 | /// Calculate the distance between two vectors. 26 | pub fn distance(&self, a: &Vector, b: &Vector) -> Option { 27 | let (a, b) = (a.as_slice(), b.as_slice()); 28 | match self { 29 | Metric::Euclidean => f32::sqeuclidean(a, b), 30 | Metric::Cosine => f32::cosine(a, b), 31 | } 32 | } 33 | 34 | /// Return the metric name as a string slice. 35 | pub fn as_str(&self) -> &str { 36 | match self { 37 | Metric::Euclidean => EUCLIDEAN, 38 | Metric::Cosine => COSINE, 39 | } 40 | } 41 | } 42 | 43 | impl From<&str> for Metric { 44 | fn from(value: &str) -> Self { 45 | let value = value.to_lowercase(); 46 | match value.as_str() { 47 | COSINE => Metric::Cosine, 48 | EUCLIDEAN => Metric::Euclidean, 49 | _ => panic!("Metric should be cosine or euclidean"), 50 | } 51 | } 52 | } 53 | 54 | impl From for Metric { 55 | fn from(value: String) -> Self { 56 | Metric::from(value.as_str()) 57 | } 58 | } 59 | 60 | #[cfg(test)] 61 | mod tests { 62 | use super::*; 63 | 64 | #[test] 65 | fn test_distance() { 66 | let a = Vector::from(vec![1.0, 2.0, 3.0]); 67 | let b = Vector::from(vec![4.0, 5.0, 6.0]); 68 | 69 | let euclidean = Metric::Euclidean.distance(&a, &b).unwrap(); 70 | let cosine = Metric::Cosine.distance(&a, &b).unwrap(); 71 | 72 | assert_eq!(euclidean, 27.0); 73 | assert_eq!(cosine.round(), 0.0); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/types/mod.rs: -------------------------------------------------------------------------------- 1 | // Initialize modules without publicizing them. 2 | mod filter; 3 | mod metric; 4 | mod record; 5 | mod vector; 6 | 7 | // Re-export types from the modules. 8 | pub use filter::*; 9 | pub use metric::*; 10 | pub use record::*; 11 | pub use vector::*; 12 | 13 | // Import common dependencies below. 14 | use crate::protos; 15 | use hashbrown::HashMap; 16 | use serde::{Deserialize, Serialize}; 17 | use tonic::Status; 18 | -------------------------------------------------------------------------------- /src/types/record.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use std::fmt; 3 | use std::str::FromStr; 4 | use uuid::Uuid; 5 | 6 | /// Record identifier. 7 | /// 8 | /// OasysDB should be able to deal with a lot of writes and deletes. Using UUID 9 | /// version 4 to allow us to generate a lot of IDs with very low probability 10 | /// of collision. 11 | #[derive(Debug, Serialize, Deserialize, Clone, Copy)] 12 | #[derive(PartialOrd, Ord, PartialEq, Eq, Hash)] 13 | pub struct RecordID(Uuid); 14 | 15 | impl RecordID { 16 | /// Generate a new random record ID using UUID v4. 17 | pub fn new() -> Self { 18 | RecordID(Uuid::new_v4()) 19 | } 20 | } 21 | 22 | impl fmt::Display for RecordID { 23 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 24 | write!(f, "{}", self.0) 25 | } 26 | } 27 | 28 | impl FromStr for RecordID { 29 | type Err = Status; 30 | fn from_str(s: &str) -> Result { 31 | Ok(RecordID(Uuid::try_parse(s).map_err(|_| { 32 | let message = "Record ID should be a string-encoded UUID"; 33 | Status::invalid_argument(message) 34 | })?)) 35 | } 36 | } 37 | 38 | /// Metadata value. 39 | /// 40 | /// OasysDB doesn't support nested objects in metadata for performance reasons. 41 | /// We only need to support primitive types for metadata. 42 | #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd)] 43 | pub enum Value { 44 | Text(String), 45 | Number(f64), 46 | Boolean(bool), 47 | } 48 | 49 | impl From for Value { 50 | fn from(value: String) -> Self { 51 | Value::from(value.as_str()) 52 | } 53 | } 54 | 55 | impl From<&str> for Value { 56 | fn from(value: &str) -> Self { 57 | // Try to parse the value as a number. 58 | // This is must be prioritized over boolean parsing. 59 | if let Ok(float) = value.parse::() { 60 | return Value::Number(float); 61 | } 62 | 63 | if let Ok(boolean) = value.parse::() { 64 | return Value::Boolean(boolean); 65 | } 66 | 67 | // Remove quotes from the start and end of the string. 68 | // This ensures that we won't have to deal with quotes. 69 | let match_quotes = |c: char| c == '\"' || c == '\''; 70 | let value = value 71 | .trim_start_matches(match_quotes) 72 | .trim_end_matches(match_quotes) 73 | .to_string(); 74 | 75 | Value::Text(value) 76 | } 77 | } 78 | 79 | impl From for protos::Value { 80 | fn from(value: Value) -> Self { 81 | type ProtoValue = protos::value::Value; 82 | let value = match value { 83 | Value::Text(text) => ProtoValue::Text(text), 84 | Value::Number(number) => ProtoValue::Number(number), 85 | Value::Boolean(boolean) => ProtoValue::Boolean(boolean), 86 | }; 87 | 88 | protos::Value { value: Some(value) } 89 | } 90 | } 91 | 92 | impl TryFrom for Value { 93 | type Error = Status; 94 | fn try_from(value: protos::Value) -> Result { 95 | type ProtoValue = protos::value::Value; 96 | match value.value { 97 | Some(ProtoValue::Text(text)) => Ok(Value::Text(text)), 98 | Some(ProtoValue::Number(number)) => Ok(Value::Number(number)), 99 | Some(ProtoValue::Boolean(boolean)) => Ok(Value::Boolean(boolean)), 100 | None => Err(Status::invalid_argument("Metadata value is required")), 101 | } 102 | } 103 | } 104 | 105 | /// OasysDB vector record. 106 | /// 107 | /// This is the main data structure for OasysDB. It contains the vector data 108 | /// and metadata of the record. Metadata is a key-value store that can be used 109 | /// to store additional information about the vector. 110 | #[derive(Debug, Clone, Serialize, Deserialize)] 111 | pub struct Record { 112 | pub vector: Vector, 113 | pub metadata: HashMap, 114 | } 115 | 116 | impl From for protos::Record { 117 | fn from(value: Record) -> Self { 118 | let vector = value.vector.into(); 119 | let metadata = value 120 | .metadata 121 | .into_iter() 122 | .map(|(key, value)| (key, value.into())) 123 | .collect(); 124 | 125 | protos::Record { vector: Some(vector), metadata } 126 | } 127 | } 128 | 129 | impl TryFrom for Record { 130 | type Error = Status; 131 | fn try_from(value: protos::Record) -> Result { 132 | let vector = match value.vector { 133 | Some(vector) => Vector::try_from(vector)?, 134 | None => { 135 | let message = "Vector data should not be empty"; 136 | return Err(Status::invalid_argument(message)); 137 | } 138 | }; 139 | 140 | let metadata = value 141 | .metadata 142 | .into_iter() 143 | .map(|(k, v)| Ok((k, v.try_into()?))) 144 | .collect::, Self::Error>>()?; 145 | 146 | Ok(Record { vector, metadata }) 147 | } 148 | } 149 | 150 | #[cfg(test)] 151 | mod tests { 152 | use super::*; 153 | use rand::random; 154 | 155 | impl Value { 156 | pub fn random() -> Self { 157 | Value::Number(random::()) 158 | } 159 | } 160 | 161 | impl Record { 162 | pub fn random(dimension: usize) -> Self { 163 | let mut metadata = HashMap::new(); 164 | metadata.insert("key".to_string(), Value::random()); 165 | Record { vector: Vector::random(dimension), metadata } 166 | } 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /src/types/vector.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | /// Vector data structure. 4 | /// 5 | /// We use a boxed slice to store the vector data for a slight memory 6 | /// efficiency boost. The length of the vector is not checked, so a length 7 | /// validation should be performed before most operations. 8 | #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd)] 9 | pub struct Vector(Box<[f32]>); 10 | 11 | impl Vector { 12 | /// Return the vector as a slice of floating-point numbers. 13 | pub fn as_slice(&self) -> &[f32] { 14 | self.0.as_ref() 15 | } 16 | 17 | /// Return as a vector of floating-point numbers. 18 | pub fn to_vec(&self) -> Vec { 19 | self.0.to_vec() 20 | } 21 | 22 | /// Return the length of the vector. 23 | pub fn len(&self) -> usize { 24 | self.0.len() 25 | } 26 | } 27 | 28 | // Vector conversion implementations. 29 | 30 | impl From> for Vector { 31 | fn from(value: Vec) -> Self { 32 | Vector(value.into_boxed_slice()) 33 | } 34 | } 35 | 36 | impl From for protos::Vector { 37 | fn from(value: Vector) -> Self { 38 | protos::Vector { data: value.to_vec() } 39 | } 40 | } 41 | 42 | impl TryFrom for Vector { 43 | type Error = Status; 44 | fn try_from(value: protos::Vector) -> Result { 45 | Ok(Vector(value.data.into_boxed_slice())) 46 | } 47 | } 48 | 49 | #[cfg(test)] 50 | mod tests { 51 | use super::*; 52 | 53 | #[test] 54 | fn test_random_vector() { 55 | let dim = 128; 56 | let vector = Vector::random(dim); 57 | assert_eq!(vector.len(), dim); 58 | } 59 | 60 | impl Vector { 61 | pub fn random(dimension: usize) -> Self { 62 | let vector = vec![0.0; dimension] 63 | .iter() 64 | .map(|_| rand::random::()) 65 | .collect::>(); 66 | 67 | Vector(vector.into_boxed_slice()) 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/utils/kmeans.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use rand::seq::SliceRandom; 3 | use rand::Rng; 4 | use std::cmp::min; 5 | use std::rc::Rc; 6 | 7 | type ClusterIndex = usize; 8 | 9 | /// A list of vectors. 10 | /// 11 | /// We use a reference-counted slice to store the vectors. This allows us to 12 | /// share the vectors around without having to actually clone the vectors. 13 | type Vectors<'v> = Rc<[&'v Vector]>; 14 | 15 | /// K-means clustering algorithm. 16 | /// 17 | /// The K-means algorithm is a clustering algorithm that partitions a dataset 18 | /// into K clusters by iteratively assigning data points to the nearest cluster 19 | /// centroids and recalculating these centroids until they are stable. 20 | #[derive(Debug)] 21 | pub struct KMeans { 22 | assignments: Vec, 23 | centroids: Vec, 24 | 25 | // Algorithm parameters. 26 | metric: Metric, 27 | n_clusters: usize, 28 | max_iter: usize, 29 | } 30 | 31 | impl KMeans { 32 | /// Initialize the K-means algorithm with default parameters. 33 | /// 34 | /// Default parameters: 35 | /// - metric: Euclidean 36 | /// - max_iter: 100 37 | pub fn new(n_clusters: usize) -> Self { 38 | Self { 39 | n_clusters, 40 | metric: Metric::Euclidean, 41 | max_iter: 100, 42 | assignments: Vec::new(), 43 | centroids: Vec::with_capacity(n_clusters), 44 | } 45 | } 46 | 47 | /// Configure the metric used for distance calculations. 48 | pub fn with_metric(mut self, metric: Metric) -> Self { 49 | self.metric = metric; 50 | self 51 | } 52 | 53 | /// Configure the maximum number of iterations to run the algorithm. 54 | #[allow(dead_code)] 55 | pub fn with_max_iter(mut self, max_iter: usize) -> Self { 56 | self.max_iter = max_iter; 57 | self 58 | } 59 | 60 | /// Train the K-means algorithm with the given vectors. 61 | pub fn fit(&mut self, vectors: Vectors) -> Result<(), Box> { 62 | if self.n_clusters > vectors.len() { 63 | let message = "Dataset is smaller than cluster configuration."; 64 | return Err(message.into()); 65 | } 66 | 67 | self.centroids = self.initialize_centroids(vectors.clone()); 68 | self.assignments = vec![0; vectors.len()]; 69 | 70 | let mut no_improvement_count = 0; 71 | for _ in 0..self.max_iter { 72 | if no_improvement_count > 3 { 73 | break; 74 | } 75 | 76 | let assignments = self.assign_clusters(vectors.clone()); 77 | 78 | // Check at most 1000 assignments for convergence. 79 | // This prevents checking the entire dataset for large datasets. 80 | let end = min(1000, assignments.len()); 81 | match assignments[0..end] == self.assignments[0..end] { 82 | true => no_improvement_count += 1, 83 | false => no_improvement_count = 0, 84 | } 85 | 86 | self.assignments = assignments; 87 | self.centroids = self.update_centroids(vectors.clone()); 88 | } 89 | 90 | Ok(()) 91 | } 92 | 93 | fn initialize_centroids(&self, vectors: Vectors) -> Vec { 94 | let mut rng = rand::thread_rng(); 95 | let mut centroids = Vec::with_capacity(self.n_clusters); 96 | 97 | // Pick the first centroid randomly. 98 | let first_centroid = vectors.choose(&mut rng).cloned().unwrap(); 99 | centroids.push(first_centroid.to_owned()); 100 | 101 | for _ in 1..self.n_clusters { 102 | let nearest_centroid_distance = |vector: &&Vector| { 103 | centroids 104 | .iter() 105 | .map(|centroid| self.metric.distance(vector, centroid)) 106 | .min_by(|a, b| a.partial_cmp(b).unwrap()) 107 | .unwrap() 108 | .unwrap() 109 | }; 110 | 111 | let distances = vectors 112 | .par_iter() 113 | .map(nearest_centroid_distance) 114 | .collect::>(); 115 | 116 | // Choose the next centroid with probability proportional 117 | // to the squared distance. 118 | let threshold = rng.gen::() * distances.iter().sum::(); 119 | let mut cumulative_sum = 0.0; 120 | 121 | for (i, distance) in distances.iter().enumerate() { 122 | cumulative_sum += distance; 123 | if cumulative_sum >= threshold { 124 | centroids.push(vectors[i].clone()); 125 | break; 126 | } 127 | } 128 | } 129 | 130 | centroids 131 | } 132 | 133 | fn update_centroids(&self, vectors: Vectors) -> Vec { 134 | let dimension = vectors[0].len(); 135 | let mut centroids = vec![vec![0.0; dimension]; self.n_clusters]; 136 | let mut cluster_count = vec![0; self.n_clusters]; 137 | 138 | // Sum up vectors assigned to the cluster into the centroid. 139 | for (i, cluster_id) in self.assignments.iter().enumerate() { 140 | let cluster_id = *cluster_id; 141 | cluster_count[cluster_id] += 1; 142 | centroids[cluster_id] = centroids[cluster_id] 143 | .iter() 144 | .zip(vectors[i].as_slice().iter()) 145 | .map(|(a, b)| a + b) 146 | .collect(); 147 | } 148 | 149 | // Divide the sum by the number of vectors in the cluster. 150 | for i in 0..self.n_clusters { 151 | // If the cluster is empty, reinitialize the centroid. 152 | if cluster_count[i] == 0 { 153 | let mut rng = rand::thread_rng(); 154 | centroids[i] = vectors.choose(&mut rng).unwrap().to_vec(); 155 | continue; 156 | } 157 | 158 | centroids[i] = centroids[i] 159 | .iter() 160 | .map(|x| x / cluster_count[i] as f32) 161 | .collect(); 162 | } 163 | 164 | centroids.into_par_iter().map(|centroid| centroid.into()).collect() 165 | } 166 | 167 | /// Create cluster assignments for the vectors. 168 | fn assign_clusters(&self, vectors: Vectors) -> Vec { 169 | vectors 170 | .par_iter() 171 | .map(|vector| self.find_nearest_centroid(vector)) 172 | .collect() 173 | } 174 | 175 | /// Find the index of the nearest centroid from a vector. 176 | pub fn find_nearest_centroid(&self, vector: &Vector) -> ClusterIndex { 177 | self.centroids 178 | .par_iter() 179 | .enumerate() 180 | .map(|(i, centroid)| (i, self.metric.distance(vector, centroid))) 181 | .min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()) 182 | .map(|(id, _)| id) 183 | .unwrap() 184 | } 185 | 186 | /// Returns index-mapped cluster assignment for each data point. 187 | /// 188 | /// The index corresponds to the data point index and the value corresponds 189 | /// to the cluster index. For example, given the following assignments: 190 | /// 191 | /// ```text 192 | /// [0, 1, 0, 1, 2] 193 | /// ``` 194 | /// 195 | /// This means: 196 | /// - Point 0 and 2 are assigned to cluster 0. 197 | /// - Point 1 and 3 are assigned to cluster 1. 198 | /// - Point 4 is assigned to cluster 2. 199 | /// 200 | pub fn assignments(&self) -> &[ClusterIndex] { 201 | &self.assignments 202 | } 203 | 204 | /// Returns the centroids of each cluster. 205 | pub fn centroids(&self) -> &[Vector] { 206 | &self.centroids 207 | } 208 | } 209 | 210 | #[cfg(test)] 211 | mod tests { 212 | use super::*; 213 | 214 | #[test] 215 | fn test_kmeans_fit_1_to_1() { 216 | evaluate_kmeans(1, generate_vectors(1)); 217 | } 218 | 219 | #[test] 220 | fn test_kmeans_fit_10_to_5() { 221 | evaluate_kmeans(5, generate_vectors(10)); 222 | } 223 | 224 | #[test] 225 | fn test_kmeans_fit_100_to_10() { 226 | evaluate_kmeans(10, generate_vectors(100)); 227 | } 228 | 229 | fn evaluate_kmeans(n_cluster: usize, vectors: Vec) { 230 | let vectors: Vectors = { 231 | let vectors_ref: Vec<&Vector> = vectors.iter().collect(); 232 | Rc::from(vectors_ref.as_slice()) 233 | }; 234 | 235 | let mut kmeans = KMeans::new(n_cluster); 236 | kmeans.fit(vectors.clone()).unwrap(); 237 | assert_eq!(kmeans.centroids().len(), n_cluster); 238 | 239 | let mut correct_count = 0; 240 | for (i, clusted_id) in kmeans.assignments().iter().enumerate() { 241 | let vector = vectors[i]; 242 | let nearest_centroid = kmeans.find_nearest_centroid(vector); 243 | if clusted_id == &nearest_centroid { 244 | correct_count += 1; 245 | } 246 | } 247 | 248 | let accuracy = correct_count as f32 / vectors.len() as f32; 249 | assert!(accuracy > 0.99); 250 | } 251 | 252 | fn generate_vectors(n: usize) -> Vec { 253 | (0..n).map(|i| Vector::from(vec![i as f32; 3])).collect() 254 | } 255 | } 256 | -------------------------------------------------------------------------------- /src/utils/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod kmeans; 2 | 3 | // Import common dependencies below. 4 | use crate::types::{Metric, Vector}; 5 | use rayon::prelude::*; 6 | use std::error::Error; 7 | --------------------------------------------------------------------------------