├── .dockerignore ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── Dockerfile ├── LICENSE ├── README.md ├── benches ├── gemm.rs ├── gemv.rs └── hamming_distance.rs ├── src ├── constants.rs ├── lib.rs ├── main.rs ├── main1.rs ├── math.rs ├── math │ ├── gemm.rs │ ├── gemv.rs │ └── hamming_distance.rs ├── services.rs ├── services │ ├── commit.rs │ ├── lock_service.rs │ ├── namespace_state.rs │ └── query.rs ├── structures.rs ├── structures │ ├── dense_vector_list.rs │ ├── filters.rs │ ├── inverted_index.rs │ ├── metadata_index.rs │ ├── mmap_tree.rs │ ├── mmap_tree │ │ ├── node.rs │ │ ├── serialization.rs │ │ └── storage.rs │ ├── tree.rs │ ├── tree │ │ ├── node.rs │ │ ├── serialization.rs │ │ └── storage.rs │ └── wal.rs ├── utils.rs └── utils │ └── quantization.rs └── tests ├── filters.rs ├── main.rs ├── math.rs └── trees.rs /.dockerignore: -------------------------------------------------------------------------------- 1 | /tests/data/ 2 | /target -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /tests/data/ -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "addr2line" 7 | version = "0.21.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" 10 | dependencies = [ 11 | "gimli", 12 | ] 13 | 14 | [[package]] 15 | name = "adler" 16 | version = "1.0.2" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 19 | 20 | [[package]] 21 | name = "aho-corasick" 22 | version = "1.1.3" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 25 | dependencies = [ 26 | "memchr", 27 | ] 28 | 29 | [[package]] 30 | name = "anes" 31 | version = "0.1.6" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" 34 | 35 | [[package]] 36 | name = "anstream" 37 | version = "0.6.13" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" 40 | dependencies = [ 41 | "anstyle", 42 | "anstyle-parse", 43 | "anstyle-query", 44 | "anstyle-wincon", 45 | "colorchoice", 46 | "utf8parse", 47 | ] 48 | 49 | [[package]] 50 | name = "anstyle" 51 | version = "1.0.6" 52 | source = "registry+https://github.com/rust-lang/crates.io-index" 53 | checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" 54 | 55 | [[package]] 56 | name = "anstyle-parse" 57 | version = "0.2.3" 58 | source = "registry+https://github.com/rust-lang/crates.io-index" 59 | checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" 60 | dependencies = [ 61 | "utf8parse", 62 | ] 63 | 64 | [[package]] 65 | name = "anstyle-query" 66 | version = "1.0.2" 67 | source = "registry+https://github.com/rust-lang/crates.io-index" 68 | checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" 69 | dependencies = [ 70 | "windows-sys 0.52.0", 71 | ] 72 | 73 | [[package]] 74 | name = "anstyle-wincon" 75 | version = "3.0.2" 76 | source = "registry+https://github.com/rust-lang/crates.io-index" 77 | checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" 78 | dependencies = [ 79 | "anstyle", 80 | "windows-sys 0.52.0", 81 | ] 82 | 83 | [[package]] 84 | name = "autocfg" 85 | version = "1.2.0" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" 88 | 89 | [[package]] 90 | name = "backtrace" 91 | version = "0.3.71" 92 | source = "registry+https://github.com/rust-lang/crates.io-index" 93 | checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" 94 | dependencies = [ 95 | "addr2line", 96 | "cc", 97 | "cfg-if", 98 | "libc", 99 | "miniz_oxide", 100 | "object", 101 | "rustc-demangle", 102 | ] 103 | 104 | [[package]] 105 | name = "base64" 106 | version = "0.21.7" 107 | source = "registry+https://github.com/rust-lang/crates.io-index" 108 | checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" 109 | 110 | [[package]] 111 | name = "bitflags" 112 | version = "1.3.2" 113 | source = "registry+https://github.com/rust-lang/crates.io-index" 114 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 115 | 116 | [[package]] 117 | name = "block-buffer" 118 | version = "0.10.4" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" 121 | dependencies = [ 122 | "generic-array", 123 | ] 124 | 125 | [[package]] 126 | name = "bumpalo" 127 | version = "3.16.0" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" 130 | 131 | [[package]] 132 | name = "byteorder" 133 | version = "1.5.0" 134 | source = "registry+https://github.com/rust-lang/crates.io-index" 135 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 136 | 137 | [[package]] 138 | name = "bytes" 139 | version = "1.6.0" 140 | source = "registry+https://github.com/rust-lang/crates.io-index" 141 | checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" 142 | 143 | [[package]] 144 | name = "cast" 145 | version = "0.3.0" 146 | source = "registry+https://github.com/rust-lang/crates.io-index" 147 | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 148 | 149 | [[package]] 150 | name = "cc" 151 | version = "1.0.95" 152 | source = "registry+https://github.com/rust-lang/crates.io-index" 153 | checksum = "d32a725bc159af97c3e629873bb9f88fb8cf8a4867175f76dc987815ea07c83b" 154 | 155 | [[package]] 156 | name = "cfg-if" 157 | version = "1.0.0" 158 | source = "registry+https://github.com/rust-lang/crates.io-index" 159 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 160 | 161 | [[package]] 162 | name = "ciborium" 163 | version = "0.2.2" 164 | source = "registry+https://github.com/rust-lang/crates.io-index" 165 | checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" 166 | dependencies = [ 167 | "ciborium-io", 168 | "ciborium-ll", 169 | "serde", 170 | ] 171 | 172 | [[package]] 173 | name = "ciborium-io" 174 | version = "0.2.2" 175 | source = "registry+https://github.com/rust-lang/crates.io-index" 176 | checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" 177 | 178 | [[package]] 179 | name = "ciborium-ll" 180 | version = "0.2.2" 181 | source = "registry+https://github.com/rust-lang/crates.io-index" 182 | checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" 183 | dependencies = [ 184 | "ciborium-io", 185 | "half", 186 | ] 187 | 188 | [[package]] 189 | name = "clap" 190 | version = "4.5.4" 191 | source = "registry+https://github.com/rust-lang/crates.io-index" 192 | checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" 193 | dependencies = [ 194 | "clap_builder", 195 | ] 196 | 197 | [[package]] 198 | name = "clap_builder" 199 | version = "4.5.2" 200 | source = "registry+https://github.com/rust-lang/crates.io-index" 201 | checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" 202 | dependencies = [ 203 | "anstyle", 204 | "clap_lex", 205 | ] 206 | 207 | [[package]] 208 | name = "clap_lex" 209 | version = "0.7.0" 210 | source = "registry+https://github.com/rust-lang/crates.io-index" 211 | checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" 212 | 213 | [[package]] 214 | name = "colorchoice" 215 | version = "1.0.0" 216 | source = "registry+https://github.com/rust-lang/crates.io-index" 217 | checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" 218 | 219 | [[package]] 220 | name = "cpufeatures" 221 | version = "0.2.12" 222 | source = "registry+https://github.com/rust-lang/crates.io-index" 223 | checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" 224 | dependencies = [ 225 | "libc", 226 | ] 227 | 228 | [[package]] 229 | name = "criterion" 230 | version = "0.5.1" 231 | source = "registry+https://github.com/rust-lang/crates.io-index" 232 | checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" 233 | dependencies = [ 234 | "anes", 235 | "cast", 236 | "ciborium", 237 | "clap", 238 | "criterion-plot", 239 | "is-terminal", 240 | "itertools", 241 | "num-traits", 242 | "once_cell", 243 | "oorandom", 244 | "plotters", 245 | "rayon", 246 | "regex", 247 | "serde", 248 | "serde_derive", 249 | "serde_json", 250 | "tinytemplate", 251 | "walkdir", 252 | ] 253 | 254 | [[package]] 255 | name = "criterion-plot" 256 | version = "0.5.0" 257 | source = "registry+https://github.com/rust-lang/crates.io-index" 258 | checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" 259 | dependencies = [ 260 | "cast", 261 | "itertools", 262 | ] 263 | 264 | [[package]] 265 | name = "crossbeam-deque" 266 | version = "0.8.5" 267 | source = "registry+https://github.com/rust-lang/crates.io-index" 268 | checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" 269 | dependencies = [ 270 | "crossbeam-epoch", 271 | "crossbeam-utils", 272 | ] 273 | 274 | [[package]] 275 | name = "crossbeam-epoch" 276 | version = "0.9.18" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 279 | dependencies = [ 280 | "crossbeam-utils", 281 | ] 282 | 283 | [[package]] 284 | name = "crossbeam-utils" 285 | version = "0.8.19" 286 | source = "registry+https://github.com/rust-lang/crates.io-index" 287 | checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" 288 | 289 | [[package]] 290 | name = "crunchy" 291 | version = "0.2.2" 292 | source = "registry+https://github.com/rust-lang/crates.io-index" 293 | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" 294 | 295 | [[package]] 296 | name = "crypto-common" 297 | version = "0.1.6" 298 | source = "registry+https://github.com/rust-lang/crates.io-index" 299 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" 300 | dependencies = [ 301 | "generic-array", 302 | "typenum", 303 | ] 304 | 305 | [[package]] 306 | name = "data-encoding" 307 | version = "2.6.0" 308 | source = "registry+https://github.com/rust-lang/crates.io-index" 309 | checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" 310 | 311 | [[package]] 312 | name = "digest" 313 | version = "0.10.7" 314 | source = "registry+https://github.com/rust-lang/crates.io-index" 315 | checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" 316 | dependencies = [ 317 | "block-buffer", 318 | "crypto-common", 319 | ] 320 | 321 | [[package]] 322 | name = "either" 323 | version = "1.11.0" 324 | source = "registry+https://github.com/rust-lang/crates.io-index" 325 | checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" 326 | 327 | [[package]] 328 | name = "encoding_rs" 329 | version = "0.8.34" 330 | source = "registry+https://github.com/rust-lang/crates.io-index" 331 | checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" 332 | dependencies = [ 333 | "cfg-if", 334 | ] 335 | 336 | [[package]] 337 | name = "env_filter" 338 | version = "0.1.0" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" 341 | dependencies = [ 342 | "log", 343 | "regex", 344 | ] 345 | 346 | [[package]] 347 | name = "env_logger" 348 | version = "0.11.3" 349 | source = "registry+https://github.com/rust-lang/crates.io-index" 350 | checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9" 351 | dependencies = [ 352 | "anstream", 353 | "anstyle", 354 | "env_filter", 355 | "humantime", 356 | "log", 357 | ] 358 | 359 | [[package]] 360 | name = "equivalent" 361 | version = "1.0.1" 362 | source = "registry+https://github.com/rust-lang/crates.io-index" 363 | checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" 364 | 365 | [[package]] 366 | name = "fnv" 367 | version = "1.0.7" 368 | source = "registry+https://github.com/rust-lang/crates.io-index" 369 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 370 | 371 | [[package]] 372 | name = "form_urlencoded" 373 | version = "1.2.1" 374 | source = "registry+https://github.com/rust-lang/crates.io-index" 375 | checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" 376 | dependencies = [ 377 | "percent-encoding", 378 | ] 379 | 380 | [[package]] 381 | name = "fs2" 382 | version = "0.4.3" 383 | source = "registry+https://github.com/rust-lang/crates.io-index" 384 | checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" 385 | dependencies = [ 386 | "libc", 387 | "winapi", 388 | ] 389 | 390 | [[package]] 391 | name = "futures-channel" 392 | version = "0.3.30" 393 | source = "registry+https://github.com/rust-lang/crates.io-index" 394 | checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" 395 | dependencies = [ 396 | "futures-core", 397 | "futures-sink", 398 | ] 399 | 400 | [[package]] 401 | name = "futures-core" 402 | version = "0.3.30" 403 | source = "registry+https://github.com/rust-lang/crates.io-index" 404 | checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" 405 | 406 | [[package]] 407 | name = "futures-sink" 408 | version = "0.3.30" 409 | source = "registry+https://github.com/rust-lang/crates.io-index" 410 | checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" 411 | 412 | [[package]] 413 | name = "futures-task" 414 | version = "0.3.30" 415 | source = "registry+https://github.com/rust-lang/crates.io-index" 416 | checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" 417 | 418 | [[package]] 419 | name = "futures-util" 420 | version = "0.3.30" 421 | source = "registry+https://github.com/rust-lang/crates.io-index" 422 | checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" 423 | dependencies = [ 424 | "futures-core", 425 | "futures-sink", 426 | "futures-task", 427 | "pin-project-lite", 428 | "pin-utils", 429 | "slab", 430 | ] 431 | 432 | [[package]] 433 | name = "generic-array" 434 | version = "0.14.7" 435 | source = "registry+https://github.com/rust-lang/crates.io-index" 436 | checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" 437 | dependencies = [ 438 | "typenum", 439 | "version_check", 440 | ] 441 | 442 | [[package]] 443 | name = "getrandom" 444 | version = "0.2.14" 445 | source = "registry+https://github.com/rust-lang/crates.io-index" 446 | checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" 447 | dependencies = [ 448 | "cfg-if", 449 | "libc", 450 | "wasi", 451 | ] 452 | 453 | [[package]] 454 | name = "gimli" 455 | version = "0.28.1" 456 | source = "registry+https://github.com/rust-lang/crates.io-index" 457 | checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" 458 | 459 | [[package]] 460 | name = "h2" 461 | version = "0.3.26" 462 | source = "registry+https://github.com/rust-lang/crates.io-index" 463 | checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" 464 | dependencies = [ 465 | "bytes", 466 | "fnv", 467 | "futures-core", 468 | "futures-sink", 469 | "futures-util", 470 | "http 0.2.12", 471 | "indexmap", 472 | "slab", 473 | "tokio", 474 | "tokio-util", 475 | "tracing", 476 | ] 477 | 478 | [[package]] 479 | name = "half" 480 | version = "2.4.1" 481 | source = "registry+https://github.com/rust-lang/crates.io-index" 482 | checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" 483 | dependencies = [ 484 | "cfg-if", 485 | "crunchy", 486 | ] 487 | 488 | [[package]] 489 | name = "hashbrown" 490 | version = "0.14.5" 491 | source = "registry+https://github.com/rust-lang/crates.io-index" 492 | checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" 493 | 494 | [[package]] 495 | name = "haystackdb" 496 | version = "0.1.0" 497 | dependencies = [ 498 | "criterion", 499 | "env_logger", 500 | "fs2", 501 | "log", 502 | "memmap", 503 | "rayon", 504 | "serde", 505 | "serde_json", 506 | "tokio", 507 | "uuid", 508 | "warp", 509 | ] 510 | 511 | [[package]] 512 | name = "headers" 513 | version = "0.3.9" 514 | source = "registry+https://github.com/rust-lang/crates.io-index" 515 | checksum = "06683b93020a07e3dbcf5f8c0f6d40080d725bea7936fc01ad345c01b97dc270" 516 | dependencies = [ 517 | "base64", 518 | "bytes", 519 | "headers-core", 520 | "http 0.2.12", 521 | "httpdate", 522 | "mime", 523 | "sha1", 524 | ] 525 | 526 | [[package]] 527 | name = "headers-core" 528 | version = "0.2.0" 529 | source = "registry+https://github.com/rust-lang/crates.io-index" 530 | checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" 531 | dependencies = [ 532 | "http 0.2.12", 533 | ] 534 | 535 | [[package]] 536 | name = "hermit-abi" 537 | version = "0.3.9" 538 | source = "registry+https://github.com/rust-lang/crates.io-index" 539 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" 540 | 541 | [[package]] 542 | name = "http" 543 | version = "0.2.12" 544 | source = "registry+https://github.com/rust-lang/crates.io-index" 545 | checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" 546 | dependencies = [ 547 | "bytes", 548 | "fnv", 549 | "itoa", 550 | ] 551 | 552 | [[package]] 553 | name = "http" 554 | version = "1.1.0" 555 | source = "registry+https://github.com/rust-lang/crates.io-index" 556 | checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" 557 | dependencies = [ 558 | "bytes", 559 | "fnv", 560 | "itoa", 561 | ] 562 | 563 | [[package]] 564 | name = "http-body" 565 | version = "0.4.6" 566 | source = "registry+https://github.com/rust-lang/crates.io-index" 567 | checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" 568 | dependencies = [ 569 | "bytes", 570 | "http 0.2.12", 571 | "pin-project-lite", 572 | ] 573 | 574 | [[package]] 575 | name = "httparse" 576 | version = "1.8.0" 577 | source = "registry+https://github.com/rust-lang/crates.io-index" 578 | checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" 579 | 580 | [[package]] 581 | name = "httpdate" 582 | version = "1.0.3" 583 | source = "registry+https://github.com/rust-lang/crates.io-index" 584 | checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" 585 | 586 | [[package]] 587 | name = "humantime" 588 | version = "2.1.0" 589 | source = "registry+https://github.com/rust-lang/crates.io-index" 590 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 591 | 592 | [[package]] 593 | name = "hyper" 594 | version = "0.14.28" 595 | source = "registry+https://github.com/rust-lang/crates.io-index" 596 | checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" 597 | dependencies = [ 598 | "bytes", 599 | "futures-channel", 600 | "futures-core", 601 | "futures-util", 602 | "h2", 603 | "http 0.2.12", 604 | "http-body", 605 | "httparse", 606 | "httpdate", 607 | "itoa", 608 | "pin-project-lite", 609 | "socket2", 610 | "tokio", 611 | "tower-service", 612 | "tracing", 613 | "want", 614 | ] 615 | 616 | [[package]] 617 | name = "idna" 618 | version = "0.5.0" 619 | source = "registry+https://github.com/rust-lang/crates.io-index" 620 | checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" 621 | dependencies = [ 622 | "unicode-bidi", 623 | "unicode-normalization", 624 | ] 625 | 626 | [[package]] 627 | name = "indexmap" 628 | version = "2.2.6" 629 | source = "registry+https://github.com/rust-lang/crates.io-index" 630 | checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" 631 | dependencies = [ 632 | "equivalent", 633 | "hashbrown", 634 | ] 635 | 636 | [[package]] 637 | name = "is-terminal" 638 | version = "0.4.12" 639 | source = "registry+https://github.com/rust-lang/crates.io-index" 640 | checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" 641 | dependencies = [ 642 | "hermit-abi", 643 | "libc", 644 | "windows-sys 0.52.0", 645 | ] 646 | 647 | [[package]] 648 | name = "itertools" 649 | version = "0.10.5" 650 | source = "registry+https://github.com/rust-lang/crates.io-index" 651 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" 652 | dependencies = [ 653 | "either", 654 | ] 655 | 656 | [[package]] 657 | name = "itoa" 658 | version = "1.0.11" 659 | source = "registry+https://github.com/rust-lang/crates.io-index" 660 | checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" 661 | 662 | [[package]] 663 | name = "js-sys" 664 | version = "0.3.69" 665 | source = "registry+https://github.com/rust-lang/crates.io-index" 666 | checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" 667 | dependencies = [ 668 | "wasm-bindgen", 669 | ] 670 | 671 | [[package]] 672 | name = "libc" 673 | version = "0.2.153" 674 | source = "registry+https://github.com/rust-lang/crates.io-index" 675 | checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" 676 | 677 | [[package]] 678 | name = "lock_api" 679 | version = "0.4.11" 680 | source = "registry+https://github.com/rust-lang/crates.io-index" 681 | checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" 682 | dependencies = [ 683 | "autocfg", 684 | "scopeguard", 685 | ] 686 | 687 | [[package]] 688 | name = "log" 689 | version = "0.4.21" 690 | source = "registry+https://github.com/rust-lang/crates.io-index" 691 | checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" 692 | 693 | [[package]] 694 | name = "memchr" 695 | version = "2.7.2" 696 | source = "registry+https://github.com/rust-lang/crates.io-index" 697 | checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" 698 | 699 | [[package]] 700 | name = "memmap" 701 | version = "0.7.0" 702 | source = "registry+https://github.com/rust-lang/crates.io-index" 703 | checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" 704 | dependencies = [ 705 | "libc", 706 | "winapi", 707 | ] 708 | 709 | [[package]] 710 | name = "mime" 711 | version = "0.3.17" 712 | source = "registry+https://github.com/rust-lang/crates.io-index" 713 | checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" 714 | 715 | [[package]] 716 | name = "mime_guess" 717 | version = "2.0.4" 718 | source = "registry+https://github.com/rust-lang/crates.io-index" 719 | checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" 720 | dependencies = [ 721 | "mime", 722 | "unicase", 723 | ] 724 | 725 | [[package]] 726 | name = "miniz_oxide" 727 | version = "0.7.2" 728 | source = "registry+https://github.com/rust-lang/crates.io-index" 729 | checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" 730 | dependencies = [ 731 | "adler", 732 | ] 733 | 734 | [[package]] 735 | name = "mio" 736 | version = "0.8.11" 737 | source = "registry+https://github.com/rust-lang/crates.io-index" 738 | checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" 739 | dependencies = [ 740 | "libc", 741 | "wasi", 742 | "windows-sys 0.48.0", 743 | ] 744 | 745 | [[package]] 746 | name = "multer" 747 | version = "2.1.0" 748 | source = "registry+https://github.com/rust-lang/crates.io-index" 749 | checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" 750 | dependencies = [ 751 | "bytes", 752 | "encoding_rs", 753 | "futures-util", 754 | "http 0.2.12", 755 | "httparse", 756 | "log", 757 | "memchr", 758 | "mime", 759 | "spin", 760 | "version_check", 761 | ] 762 | 763 | [[package]] 764 | name = "num-traits" 765 | version = "0.2.18" 766 | source = "registry+https://github.com/rust-lang/crates.io-index" 767 | checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" 768 | dependencies = [ 769 | "autocfg", 770 | ] 771 | 772 | [[package]] 773 | name = "num_cpus" 774 | version = "1.16.0" 775 | source = "registry+https://github.com/rust-lang/crates.io-index" 776 | checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" 777 | dependencies = [ 778 | "hermit-abi", 779 | "libc", 780 | ] 781 | 782 | [[package]] 783 | name = "object" 784 | version = "0.32.2" 785 | source = "registry+https://github.com/rust-lang/crates.io-index" 786 | checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" 787 | dependencies = [ 788 | "memchr", 789 | ] 790 | 791 | [[package]] 792 | name = "once_cell" 793 | version = "1.19.0" 794 | source = "registry+https://github.com/rust-lang/crates.io-index" 795 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" 796 | 797 | [[package]] 798 | name = "oorandom" 799 | version = "11.1.3" 800 | source = "registry+https://github.com/rust-lang/crates.io-index" 801 | checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" 802 | 803 | [[package]] 804 | name = "parking_lot" 805 | version = "0.12.1" 806 | source = "registry+https://github.com/rust-lang/crates.io-index" 807 | checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" 808 | dependencies = [ 809 | "lock_api", 810 | "parking_lot_core", 811 | ] 812 | 813 | [[package]] 814 | name = "parking_lot_core" 815 | version = "0.9.9" 816 | source = "registry+https://github.com/rust-lang/crates.io-index" 817 | checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" 818 | dependencies = [ 819 | "cfg-if", 820 | "libc", 821 | "redox_syscall", 822 | "smallvec", 823 | "windows-targets 0.48.5", 824 | ] 825 | 826 | [[package]] 827 | name = "percent-encoding" 828 | version = "2.3.1" 829 | source = "registry+https://github.com/rust-lang/crates.io-index" 830 | checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" 831 | 832 | [[package]] 833 | name = "pin-project" 834 | version = "1.1.5" 835 | source = "registry+https://github.com/rust-lang/crates.io-index" 836 | checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" 837 | dependencies = [ 838 | "pin-project-internal", 839 | ] 840 | 841 | [[package]] 842 | name = "pin-project-internal" 843 | version = "1.1.5" 844 | source = "registry+https://github.com/rust-lang/crates.io-index" 845 | checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" 846 | dependencies = [ 847 | "proc-macro2", 848 | "quote", 849 | "syn", 850 | ] 851 | 852 | [[package]] 853 | name = "pin-project-lite" 854 | version = "0.2.14" 855 | source = "registry+https://github.com/rust-lang/crates.io-index" 856 | checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" 857 | 858 | [[package]] 859 | name = "pin-utils" 860 | version = "0.1.0" 861 | source = "registry+https://github.com/rust-lang/crates.io-index" 862 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 863 | 864 | [[package]] 865 | name = "plotters" 866 | version = "0.3.5" 867 | source = "registry+https://github.com/rust-lang/crates.io-index" 868 | checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" 869 | dependencies = [ 870 | "num-traits", 871 | "plotters-backend", 872 | "plotters-svg", 873 | "wasm-bindgen", 874 | "web-sys", 875 | ] 876 | 877 | [[package]] 878 | name = "plotters-backend" 879 | version = "0.3.5" 880 | source = "registry+https://github.com/rust-lang/crates.io-index" 881 | checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" 882 | 883 | [[package]] 884 | name = "plotters-svg" 885 | version = "0.3.5" 886 | source = "registry+https://github.com/rust-lang/crates.io-index" 887 | checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" 888 | dependencies = [ 889 | "plotters-backend", 890 | ] 891 | 892 | [[package]] 893 | name = "ppv-lite86" 894 | version = "0.2.17" 895 | source = "registry+https://github.com/rust-lang/crates.io-index" 896 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 897 | 898 | [[package]] 899 | name = "proc-macro2" 900 | version = "1.0.81" 901 | source = "registry+https://github.com/rust-lang/crates.io-index" 902 | checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" 903 | dependencies = [ 904 | "unicode-ident", 905 | ] 906 | 907 | [[package]] 908 | name = "quote" 909 | version = "1.0.36" 910 | source = "registry+https://github.com/rust-lang/crates.io-index" 911 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" 912 | dependencies = [ 913 | "proc-macro2", 914 | ] 915 | 916 | [[package]] 917 | name = "rand" 918 | version = "0.8.5" 919 | source = "registry+https://github.com/rust-lang/crates.io-index" 920 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 921 | dependencies = [ 922 | "libc", 923 | "rand_chacha", 924 | "rand_core", 925 | ] 926 | 927 | [[package]] 928 | name = "rand_chacha" 929 | version = "0.3.1" 930 | source = "registry+https://github.com/rust-lang/crates.io-index" 931 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 932 | dependencies = [ 933 | "ppv-lite86", 934 | "rand_core", 935 | ] 936 | 937 | [[package]] 938 | name = "rand_core" 939 | version = "0.6.4" 940 | source = "registry+https://github.com/rust-lang/crates.io-index" 941 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 942 | dependencies = [ 943 | "getrandom", 944 | ] 945 | 946 | [[package]] 947 | name = "rayon" 948 | version = "1.10.0" 949 | source = "registry+https://github.com/rust-lang/crates.io-index" 950 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 951 | dependencies = [ 952 | "either", 953 | "rayon-core", 954 | ] 955 | 956 | [[package]] 957 | name = "rayon-core" 958 | version = "1.12.1" 959 | source = "registry+https://github.com/rust-lang/crates.io-index" 960 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 961 | dependencies = [ 962 | "crossbeam-deque", 963 | "crossbeam-utils", 964 | ] 965 | 966 | [[package]] 967 | name = "redox_syscall" 968 | version = "0.4.1" 969 | source = "registry+https://github.com/rust-lang/crates.io-index" 970 | checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" 971 | dependencies = [ 972 | "bitflags", 973 | ] 974 | 975 | [[package]] 976 | name = "regex" 977 | version = "1.10.4" 978 | source = "registry+https://github.com/rust-lang/crates.io-index" 979 | checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" 980 | dependencies = [ 981 | "aho-corasick", 982 | "memchr", 983 | "regex-automata", 984 | "regex-syntax", 985 | ] 986 | 987 | [[package]] 988 | name = "regex-automata" 989 | version = "0.4.6" 990 | source = "registry+https://github.com/rust-lang/crates.io-index" 991 | checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" 992 | dependencies = [ 993 | "aho-corasick", 994 | "memchr", 995 | "regex-syntax", 996 | ] 997 | 998 | [[package]] 999 | name = "regex-syntax" 1000 | version = "0.8.3" 1001 | source = "registry+https://github.com/rust-lang/crates.io-index" 1002 | checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" 1003 | 1004 | [[package]] 1005 | name = "rustc-demangle" 1006 | version = "0.1.23" 1007 | source = "registry+https://github.com/rust-lang/crates.io-index" 1008 | checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" 1009 | 1010 | [[package]] 1011 | name = "ryu" 1012 | version = "1.0.17" 1013 | source = "registry+https://github.com/rust-lang/crates.io-index" 1014 | checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" 1015 | 1016 | [[package]] 1017 | name = "same-file" 1018 | version = "1.0.6" 1019 | source = "registry+https://github.com/rust-lang/crates.io-index" 1020 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 1021 | dependencies = [ 1022 | "winapi-util", 1023 | ] 1024 | 1025 | [[package]] 1026 | name = "scoped-tls" 1027 | version = "1.0.1" 1028 | source = "registry+https://github.com/rust-lang/crates.io-index" 1029 | checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" 1030 | 1031 | [[package]] 1032 | name = "scopeguard" 1033 | version = "1.2.0" 1034 | source = "registry+https://github.com/rust-lang/crates.io-index" 1035 | checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 1036 | 1037 | [[package]] 1038 | name = "serde" 1039 | version = "1.0.198" 1040 | source = "registry+https://github.com/rust-lang/crates.io-index" 1041 | checksum = "9846a40c979031340571da2545a4e5b7c4163bdae79b301d5f86d03979451fcc" 1042 | dependencies = [ 1043 | "serde_derive", 1044 | ] 1045 | 1046 | [[package]] 1047 | name = "serde_derive" 1048 | version = "1.0.198" 1049 | source = "registry+https://github.com/rust-lang/crates.io-index" 1050 | checksum = "e88edab869b01783ba905e7d0153f9fc1a6505a96e4ad3018011eedb838566d9" 1051 | dependencies = [ 1052 | "proc-macro2", 1053 | "quote", 1054 | "syn", 1055 | ] 1056 | 1057 | [[package]] 1058 | name = "serde_json" 1059 | version = "1.0.116" 1060 | source = "registry+https://github.com/rust-lang/crates.io-index" 1061 | checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813" 1062 | dependencies = [ 1063 | "itoa", 1064 | "ryu", 1065 | "serde", 1066 | ] 1067 | 1068 | [[package]] 1069 | name = "serde_urlencoded" 1070 | version = "0.7.1" 1071 | source = "registry+https://github.com/rust-lang/crates.io-index" 1072 | checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" 1073 | dependencies = [ 1074 | "form_urlencoded", 1075 | "itoa", 1076 | "ryu", 1077 | "serde", 1078 | ] 1079 | 1080 | [[package]] 1081 | name = "sha1" 1082 | version = "0.10.6" 1083 | source = "registry+https://github.com/rust-lang/crates.io-index" 1084 | checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" 1085 | dependencies = [ 1086 | "cfg-if", 1087 | "cpufeatures", 1088 | "digest", 1089 | ] 1090 | 1091 | [[package]] 1092 | name = "signal-hook-registry" 1093 | version = "1.4.2" 1094 | source = "registry+https://github.com/rust-lang/crates.io-index" 1095 | checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" 1096 | dependencies = [ 1097 | "libc", 1098 | ] 1099 | 1100 | [[package]] 1101 | name = "slab" 1102 | version = "0.4.9" 1103 | source = "registry+https://github.com/rust-lang/crates.io-index" 1104 | checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" 1105 | dependencies = [ 1106 | "autocfg", 1107 | ] 1108 | 1109 | [[package]] 1110 | name = "smallvec" 1111 | version = "1.13.2" 1112 | source = "registry+https://github.com/rust-lang/crates.io-index" 1113 | checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" 1114 | 1115 | [[package]] 1116 | name = "socket2" 1117 | version = "0.5.6" 1118 | source = "registry+https://github.com/rust-lang/crates.io-index" 1119 | checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" 1120 | dependencies = [ 1121 | "libc", 1122 | "windows-sys 0.52.0", 1123 | ] 1124 | 1125 | [[package]] 1126 | name = "spin" 1127 | version = "0.9.8" 1128 | source = "registry+https://github.com/rust-lang/crates.io-index" 1129 | checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" 1130 | 1131 | [[package]] 1132 | name = "syn" 1133 | version = "2.0.60" 1134 | source = "registry+https://github.com/rust-lang/crates.io-index" 1135 | checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" 1136 | dependencies = [ 1137 | "proc-macro2", 1138 | "quote", 1139 | "unicode-ident", 1140 | ] 1141 | 1142 | [[package]] 1143 | name = "thiserror" 1144 | version = "1.0.59" 1145 | source = "registry+https://github.com/rust-lang/crates.io-index" 1146 | checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa" 1147 | dependencies = [ 1148 | "thiserror-impl", 1149 | ] 1150 | 1151 | [[package]] 1152 | name = "thiserror-impl" 1153 | version = "1.0.59" 1154 | source = "registry+https://github.com/rust-lang/crates.io-index" 1155 | checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66" 1156 | dependencies = [ 1157 | "proc-macro2", 1158 | "quote", 1159 | "syn", 1160 | ] 1161 | 1162 | [[package]] 1163 | name = "tinytemplate" 1164 | version = "1.2.1" 1165 | source = "registry+https://github.com/rust-lang/crates.io-index" 1166 | checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 1167 | dependencies = [ 1168 | "serde", 1169 | "serde_json", 1170 | ] 1171 | 1172 | [[package]] 1173 | name = "tinyvec" 1174 | version = "1.6.0" 1175 | source = "registry+https://github.com/rust-lang/crates.io-index" 1176 | checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" 1177 | dependencies = [ 1178 | "tinyvec_macros", 1179 | ] 1180 | 1181 | [[package]] 1182 | name = "tinyvec_macros" 1183 | version = "0.1.1" 1184 | source = "registry+https://github.com/rust-lang/crates.io-index" 1185 | checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" 1186 | 1187 | [[package]] 1188 | name = "tokio" 1189 | version = "1.37.0" 1190 | source = "registry+https://github.com/rust-lang/crates.io-index" 1191 | checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" 1192 | dependencies = [ 1193 | "backtrace", 1194 | "bytes", 1195 | "libc", 1196 | "mio", 1197 | "num_cpus", 1198 | "parking_lot", 1199 | "pin-project-lite", 1200 | "signal-hook-registry", 1201 | "socket2", 1202 | "tokio-macros", 1203 | "windows-sys 0.48.0", 1204 | ] 1205 | 1206 | [[package]] 1207 | name = "tokio-macros" 1208 | version = "2.2.0" 1209 | source = "registry+https://github.com/rust-lang/crates.io-index" 1210 | checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" 1211 | dependencies = [ 1212 | "proc-macro2", 1213 | "quote", 1214 | "syn", 1215 | ] 1216 | 1217 | [[package]] 1218 | name = "tokio-tungstenite" 1219 | version = "0.21.0" 1220 | source = "registry+https://github.com/rust-lang/crates.io-index" 1221 | checksum = "c83b561d025642014097b66e6c1bb422783339e0909e4429cde4749d1990bc38" 1222 | dependencies = [ 1223 | "futures-util", 1224 | "log", 1225 | "tokio", 1226 | "tungstenite", 1227 | ] 1228 | 1229 | [[package]] 1230 | name = "tokio-util" 1231 | version = "0.7.10" 1232 | source = "registry+https://github.com/rust-lang/crates.io-index" 1233 | checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" 1234 | dependencies = [ 1235 | "bytes", 1236 | "futures-core", 1237 | "futures-sink", 1238 | "pin-project-lite", 1239 | "tokio", 1240 | "tracing", 1241 | ] 1242 | 1243 | [[package]] 1244 | name = "tower-service" 1245 | version = "0.3.2" 1246 | source = "registry+https://github.com/rust-lang/crates.io-index" 1247 | checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" 1248 | 1249 | [[package]] 1250 | name = "tracing" 1251 | version = "0.1.40" 1252 | source = "registry+https://github.com/rust-lang/crates.io-index" 1253 | checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" 1254 | dependencies = [ 1255 | "log", 1256 | "pin-project-lite", 1257 | "tracing-core", 1258 | ] 1259 | 1260 | [[package]] 1261 | name = "tracing-core" 1262 | version = "0.1.32" 1263 | source = "registry+https://github.com/rust-lang/crates.io-index" 1264 | checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" 1265 | dependencies = [ 1266 | "once_cell", 1267 | ] 1268 | 1269 | [[package]] 1270 | name = "try-lock" 1271 | version = "0.2.5" 1272 | source = "registry+https://github.com/rust-lang/crates.io-index" 1273 | checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" 1274 | 1275 | [[package]] 1276 | name = "tungstenite" 1277 | version = "0.21.0" 1278 | source = "registry+https://github.com/rust-lang/crates.io-index" 1279 | checksum = "9ef1a641ea34f399a848dea702823bbecfb4c486f911735368f1f137cb8257e1" 1280 | dependencies = [ 1281 | "byteorder", 1282 | "bytes", 1283 | "data-encoding", 1284 | "http 1.1.0", 1285 | "httparse", 1286 | "log", 1287 | "rand", 1288 | "sha1", 1289 | "thiserror", 1290 | "url", 1291 | "utf-8", 1292 | ] 1293 | 1294 | [[package]] 1295 | name = "typenum" 1296 | version = "1.17.0" 1297 | source = "registry+https://github.com/rust-lang/crates.io-index" 1298 | checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" 1299 | 1300 | [[package]] 1301 | name = "unicase" 1302 | version = "2.7.0" 1303 | source = "registry+https://github.com/rust-lang/crates.io-index" 1304 | checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" 1305 | dependencies = [ 1306 | "version_check", 1307 | ] 1308 | 1309 | [[package]] 1310 | name = "unicode-bidi" 1311 | version = "0.3.15" 1312 | source = "registry+https://github.com/rust-lang/crates.io-index" 1313 | checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" 1314 | 1315 | [[package]] 1316 | name = "unicode-ident" 1317 | version = "1.0.12" 1318 | source = "registry+https://github.com/rust-lang/crates.io-index" 1319 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 1320 | 1321 | [[package]] 1322 | name = "unicode-normalization" 1323 | version = "0.1.23" 1324 | source = "registry+https://github.com/rust-lang/crates.io-index" 1325 | checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" 1326 | dependencies = [ 1327 | "tinyvec", 1328 | ] 1329 | 1330 | [[package]] 1331 | name = "url" 1332 | version = "2.5.0" 1333 | source = "registry+https://github.com/rust-lang/crates.io-index" 1334 | checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" 1335 | dependencies = [ 1336 | "form_urlencoded", 1337 | "idna", 1338 | "percent-encoding", 1339 | ] 1340 | 1341 | [[package]] 1342 | name = "utf-8" 1343 | version = "0.7.6" 1344 | source = "registry+https://github.com/rust-lang/crates.io-index" 1345 | checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" 1346 | 1347 | [[package]] 1348 | name = "utf8parse" 1349 | version = "0.2.1" 1350 | source = "registry+https://github.com/rust-lang/crates.io-index" 1351 | checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" 1352 | 1353 | [[package]] 1354 | name = "uuid" 1355 | version = "1.8.0" 1356 | source = "registry+https://github.com/rust-lang/crates.io-index" 1357 | checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" 1358 | dependencies = [ 1359 | "getrandom", 1360 | "serde", 1361 | ] 1362 | 1363 | [[package]] 1364 | name = "version_check" 1365 | version = "0.9.4" 1366 | source = "registry+https://github.com/rust-lang/crates.io-index" 1367 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 1368 | 1369 | [[package]] 1370 | name = "walkdir" 1371 | version = "2.5.0" 1372 | source = "registry+https://github.com/rust-lang/crates.io-index" 1373 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 1374 | dependencies = [ 1375 | "same-file", 1376 | "winapi-util", 1377 | ] 1378 | 1379 | [[package]] 1380 | name = "want" 1381 | version = "0.3.1" 1382 | source = "registry+https://github.com/rust-lang/crates.io-index" 1383 | checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" 1384 | dependencies = [ 1385 | "try-lock", 1386 | ] 1387 | 1388 | [[package]] 1389 | name = "warp" 1390 | version = "0.3.7" 1391 | source = "registry+https://github.com/rust-lang/crates.io-index" 1392 | checksum = "4378d202ff965b011c64817db11d5829506d3404edeadb61f190d111da3f231c" 1393 | dependencies = [ 1394 | "bytes", 1395 | "futures-channel", 1396 | "futures-util", 1397 | "headers", 1398 | "http 0.2.12", 1399 | "hyper", 1400 | "log", 1401 | "mime", 1402 | "mime_guess", 1403 | "multer", 1404 | "percent-encoding", 1405 | "pin-project", 1406 | "scoped-tls", 1407 | "serde", 1408 | "serde_json", 1409 | "serde_urlencoded", 1410 | "tokio", 1411 | "tokio-tungstenite", 1412 | "tokio-util", 1413 | "tower-service", 1414 | "tracing", 1415 | ] 1416 | 1417 | [[package]] 1418 | name = "wasi" 1419 | version = "0.11.0+wasi-snapshot-preview1" 1420 | source = "registry+https://github.com/rust-lang/crates.io-index" 1421 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 1422 | 1423 | [[package]] 1424 | name = "wasm-bindgen" 1425 | version = "0.2.92" 1426 | source = "registry+https://github.com/rust-lang/crates.io-index" 1427 | checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" 1428 | dependencies = [ 1429 | "cfg-if", 1430 | "wasm-bindgen-macro", 1431 | ] 1432 | 1433 | [[package]] 1434 | name = "wasm-bindgen-backend" 1435 | version = "0.2.92" 1436 | source = "registry+https://github.com/rust-lang/crates.io-index" 1437 | checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" 1438 | dependencies = [ 1439 | "bumpalo", 1440 | "log", 1441 | "once_cell", 1442 | "proc-macro2", 1443 | "quote", 1444 | "syn", 1445 | "wasm-bindgen-shared", 1446 | ] 1447 | 1448 | [[package]] 1449 | name = "wasm-bindgen-macro" 1450 | version = "0.2.92" 1451 | source = "registry+https://github.com/rust-lang/crates.io-index" 1452 | checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" 1453 | dependencies = [ 1454 | "quote", 1455 | "wasm-bindgen-macro-support", 1456 | ] 1457 | 1458 | [[package]] 1459 | name = "wasm-bindgen-macro-support" 1460 | version = "0.2.92" 1461 | source = "registry+https://github.com/rust-lang/crates.io-index" 1462 | checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" 1463 | dependencies = [ 1464 | "proc-macro2", 1465 | "quote", 1466 | "syn", 1467 | "wasm-bindgen-backend", 1468 | "wasm-bindgen-shared", 1469 | ] 1470 | 1471 | [[package]] 1472 | name = "wasm-bindgen-shared" 1473 | version = "0.2.92" 1474 | source = "registry+https://github.com/rust-lang/crates.io-index" 1475 | checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" 1476 | 1477 | [[package]] 1478 | name = "web-sys" 1479 | version = "0.3.69" 1480 | source = "registry+https://github.com/rust-lang/crates.io-index" 1481 | checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" 1482 | dependencies = [ 1483 | "js-sys", 1484 | "wasm-bindgen", 1485 | ] 1486 | 1487 | [[package]] 1488 | name = "winapi" 1489 | version = "0.3.9" 1490 | source = "registry+https://github.com/rust-lang/crates.io-index" 1491 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 1492 | dependencies = [ 1493 | "winapi-i686-pc-windows-gnu", 1494 | "winapi-x86_64-pc-windows-gnu", 1495 | ] 1496 | 1497 | [[package]] 1498 | name = "winapi-i686-pc-windows-gnu" 1499 | version = "0.4.0" 1500 | source = "registry+https://github.com/rust-lang/crates.io-index" 1501 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 1502 | 1503 | [[package]] 1504 | name = "winapi-util" 1505 | version = "0.1.7" 1506 | source = "registry+https://github.com/rust-lang/crates.io-index" 1507 | checksum = "134306a13c5647ad6453e8deaec55d3a44d6021970129e6188735e74bf546697" 1508 | dependencies = [ 1509 | "windows-sys 0.52.0", 1510 | ] 1511 | 1512 | [[package]] 1513 | name = "winapi-x86_64-pc-windows-gnu" 1514 | version = "0.4.0" 1515 | source = "registry+https://github.com/rust-lang/crates.io-index" 1516 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 1517 | 1518 | [[package]] 1519 | name = "windows-sys" 1520 | version = "0.48.0" 1521 | source = "registry+https://github.com/rust-lang/crates.io-index" 1522 | checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" 1523 | dependencies = [ 1524 | "windows-targets 0.48.5", 1525 | ] 1526 | 1527 | [[package]] 1528 | name = "windows-sys" 1529 | version = "0.52.0" 1530 | source = "registry+https://github.com/rust-lang/crates.io-index" 1531 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 1532 | dependencies = [ 1533 | "windows-targets 0.52.5", 1534 | ] 1535 | 1536 | [[package]] 1537 | name = "windows-targets" 1538 | version = "0.48.5" 1539 | source = "registry+https://github.com/rust-lang/crates.io-index" 1540 | checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" 1541 | dependencies = [ 1542 | "windows_aarch64_gnullvm 0.48.5", 1543 | "windows_aarch64_msvc 0.48.5", 1544 | "windows_i686_gnu 0.48.5", 1545 | "windows_i686_msvc 0.48.5", 1546 | "windows_x86_64_gnu 0.48.5", 1547 | "windows_x86_64_gnullvm 0.48.5", 1548 | "windows_x86_64_msvc 0.48.5", 1549 | ] 1550 | 1551 | [[package]] 1552 | name = "windows-targets" 1553 | version = "0.52.5" 1554 | source = "registry+https://github.com/rust-lang/crates.io-index" 1555 | checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" 1556 | dependencies = [ 1557 | "windows_aarch64_gnullvm 0.52.5", 1558 | "windows_aarch64_msvc 0.52.5", 1559 | "windows_i686_gnu 0.52.5", 1560 | "windows_i686_gnullvm", 1561 | "windows_i686_msvc 0.52.5", 1562 | "windows_x86_64_gnu 0.52.5", 1563 | "windows_x86_64_gnullvm 0.52.5", 1564 | "windows_x86_64_msvc 0.52.5", 1565 | ] 1566 | 1567 | [[package]] 1568 | name = "windows_aarch64_gnullvm" 1569 | version = "0.48.5" 1570 | source = "registry+https://github.com/rust-lang/crates.io-index" 1571 | checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" 1572 | 1573 | [[package]] 1574 | name = "windows_aarch64_gnullvm" 1575 | version = "0.52.5" 1576 | source = "registry+https://github.com/rust-lang/crates.io-index" 1577 | checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" 1578 | 1579 | [[package]] 1580 | name = "windows_aarch64_msvc" 1581 | version = "0.48.5" 1582 | source = "registry+https://github.com/rust-lang/crates.io-index" 1583 | checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" 1584 | 1585 | [[package]] 1586 | name = "windows_aarch64_msvc" 1587 | version = "0.52.5" 1588 | source = "registry+https://github.com/rust-lang/crates.io-index" 1589 | checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" 1590 | 1591 | [[package]] 1592 | name = "windows_i686_gnu" 1593 | version = "0.48.5" 1594 | source = "registry+https://github.com/rust-lang/crates.io-index" 1595 | checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" 1596 | 1597 | [[package]] 1598 | name = "windows_i686_gnu" 1599 | version = "0.52.5" 1600 | source = "registry+https://github.com/rust-lang/crates.io-index" 1601 | checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" 1602 | 1603 | [[package]] 1604 | name = "windows_i686_gnullvm" 1605 | version = "0.52.5" 1606 | source = "registry+https://github.com/rust-lang/crates.io-index" 1607 | checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" 1608 | 1609 | [[package]] 1610 | name = "windows_i686_msvc" 1611 | version = "0.48.5" 1612 | source = "registry+https://github.com/rust-lang/crates.io-index" 1613 | checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" 1614 | 1615 | [[package]] 1616 | name = "windows_i686_msvc" 1617 | version = "0.52.5" 1618 | source = "registry+https://github.com/rust-lang/crates.io-index" 1619 | checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" 1620 | 1621 | [[package]] 1622 | name = "windows_x86_64_gnu" 1623 | version = "0.48.5" 1624 | source = "registry+https://github.com/rust-lang/crates.io-index" 1625 | checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" 1626 | 1627 | [[package]] 1628 | name = "windows_x86_64_gnu" 1629 | version = "0.52.5" 1630 | source = "registry+https://github.com/rust-lang/crates.io-index" 1631 | checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" 1632 | 1633 | [[package]] 1634 | name = "windows_x86_64_gnullvm" 1635 | version = "0.48.5" 1636 | source = "registry+https://github.com/rust-lang/crates.io-index" 1637 | checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" 1638 | 1639 | [[package]] 1640 | name = "windows_x86_64_gnullvm" 1641 | version = "0.52.5" 1642 | source = "registry+https://github.com/rust-lang/crates.io-index" 1643 | checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" 1644 | 1645 | [[package]] 1646 | name = "windows_x86_64_msvc" 1647 | version = "0.48.5" 1648 | source = "registry+https://github.com/rust-lang/crates.io-index" 1649 | checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" 1650 | 1651 | [[package]] 1652 | name = "windows_x86_64_msvc" 1653 | version = "0.52.5" 1654 | source = "registry+https://github.com/rust-lang/crates.io-index" 1655 | checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" 1656 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "haystackdb" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [build] 7 | target = "x86_64-unknown-linux-gnu" 8 | flags = ["-C", "target-cpu=native"] 9 | 10 | [dependencies] 11 | warp = "0.3" 12 | tokio = { version = "1", features = ["full"] } 13 | serde = { version = "1.0", features = ["derive"] } 14 | rayon = "1.10.0" 15 | uuid = { version = "1.8.0", features = ["v4", "serde"] } 16 | memmap = "0.7.0" 17 | log = "0.4.14" 18 | fs2 = "0.4.0" 19 | env_logger = "0.11.3" 20 | serde_json = "1.0.68" 21 | 22 | [profile.release] 23 | opt-level = 3 24 | 25 | [profile.bench] 26 | opt-level = 3 27 | 28 | # [[bench]] 29 | # name = "hamming_distance" 30 | # harness = false 31 | 32 | # [[bench]] 33 | # name = "gemv" 34 | # harness = false 35 | 36 | # [[bench]] 37 | # name = "gemm" 38 | # harness = false 39 | 40 | 41 | [dev-dependencies] 42 | criterion = "0.5.1" 43 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 rust:slim-buster 2 | 3 | # Create a new empty shell project 4 | RUN USER=root cargo new --bin svc 5 | WORKDIR /svc 6 | 7 | # Copy over your manifests 8 | COPY ./Cargo.lock ./Cargo.lock 9 | COPY ./Cargo.toml ./Cargo.toml 10 | 11 | # This build step will cache your dependencies 12 | RUN cargo build --release 13 | RUN rm src/*.rs 14 | 15 | # Now copy your source code 16 | COPY ./src ./src 17 | 18 | # Build for release, reusing the cached dependencies 19 | RUN cargo build --release 20 | 21 | CMD ["./target/release/haystackdb"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2024 Carson Poole 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HaystackDB 2 | 3 | > Minimal but performant Vector DB 4 | 5 | ## Features 6 | 7 | - Binary embeddings by default (soon int8 reranking) 8 | - JSON filtering for queries 9 | - Scalable, distributed architecture for use with multi replica deployments 10 | - Durable (WAL), persistent data, mem mapped for fast access in the client 11 | 12 | ## Benchmarks 13 | 14 | > On a MacBook with an M2, 1024 dimension, binary quantized. 15 | 16 | > FAISS is using a flat index, so brute force, but it's in memory. Haystack is storing the data on disk, and also brute forces. 17 | 18 | TLDR is Haystack is ~10x faster despite being stored on disk. 19 | 20 | ``` 21 | 100,000 Vectors 22 | Haystack — 3.44ms 23 | FAISS — 29.67ms 24 | 25 | 500,000 Vectors 26 | Haystack — 11.98ms 27 | FAISS - 146.50ms 28 | 29 | 1,000,000 Vectors 30 | Haystack — 22.65ms 31 | FAISS — 293.91ms 32 | ``` 33 | 34 | ## Roadmap 35 | 36 | - **Quickstart Guide** 37 | - **Quality benchmarks** (this is in progress) 38 | - Int8 reranking 39 | - ~~Better queries with more than simple equality~~ (this is done now) 40 | - Full text search 41 | - ~~Better insertion performance with batch B+Tree insertion~~ (could probably be further improved, but good for now) 42 | - ~~Point in time backups/rollback~~ 43 | - currently this is destructive (ie you cannot return forward after you go backwards), so a nondestructive version is next on the todo list. 44 | - Cursor based pagination 45 | - Schema migrations 46 | - Vector Kmeans clustering with centroid similarity for improved search perf 47 | -------------------------------------------------------------------------------- /benches/gemm.rs: -------------------------------------------------------------------------------- 1 | extern crate haystackdb; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use haystackdb::constants::VECTOR_SIZE; 5 | use haystackdb::math::gemm; 6 | 7 | fn criterion_benchmark(c: &mut Criterion) { 8 | let matrix_a = vec![[1.0f32; VECTOR_SIZE]; VECTOR_SIZE]; // Example matrix data for GEMV and GEMM 9 | let matrix_b = vec![[2.0f32; VECTOR_SIZE]; VECTOR_SIZE]; // Example matrix data for GEMV and GEMM 10 | let mut result_matrix = vec![[0f32; VECTOR_SIZE]; VECTOR_SIZE]; // Placeholder for GEMM output 11 | 12 | c.bench_function("gemm", |bencher| { 13 | bencher.iter(|| { 14 | gemm::gemm( 15 | black_box(&matrix_a), 16 | black_box(&matrix_b), 17 | black_box(&mut result_matrix), 18 | ) 19 | }) 20 | }); 21 | } 22 | 23 | fn custom_criterion() -> Criterion { 24 | Criterion::default() 25 | .warm_up_time(std::time::Duration::from_secs(2)) 26 | .measurement_time(std::time::Duration::from_secs(5)) // Increasing target time to 11 seconds 27 | } 28 | 29 | criterion_group! { 30 | name = benches; 31 | config = custom_criterion(); 32 | targets = criterion_benchmark 33 | } 34 | criterion_main!(benches); 35 | -------------------------------------------------------------------------------- /benches/gemv.rs: -------------------------------------------------------------------------------- 1 | extern crate haystackdb; 2 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 3 | use haystackdb::constants::VECTOR_SIZE; 4 | use haystackdb::math::gemv; 5 | 6 | fn criterion_benchmark(c: &mut Criterion) { 7 | let matrix_a = vec![[1.0f32; VECTOR_SIZE]; VECTOR_SIZE]; // Example matrix data for GEMV and GEMM 8 | let vector = [1.0f32; VECTOR_SIZE]; // Example vector data for GEMV 9 | 10 | c.bench_function("gemv", |bencher| { 11 | bencher.iter(|| gemv::gemv(black_box(&matrix_a), black_box(&vector))) 12 | }); 13 | } 14 | 15 | fn custom_criterion() -> Criterion { 16 | Criterion::default() 17 | .warm_up_time(std::time::Duration::from_secs(2)) 18 | .measurement_time(std::time::Duration::from_secs(5)) // Increasing target time to 11 seconds 19 | } 20 | 21 | criterion_group! { 22 | name = benches; 23 | config = custom_criterion(); 24 | targets = criterion_benchmark 25 | } 26 | criterion_main!(benches); 27 | -------------------------------------------------------------------------------- /benches/hamming_distance.rs: -------------------------------------------------------------------------------- 1 | extern crate haystackdb; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 4 | use haystackdb::constants::QUANTIZED_VECTOR_SIZE; 5 | use haystackdb::math::hamming_distance::hamming_distance; 6 | 7 | fn criterion_benchmark(c: &mut Criterion) { 8 | let a = [0u8; QUANTIZED_VECTOR_SIZE]; // Example data for hamming_distance 9 | let b = [255u8; QUANTIZED_VECTOR_SIZE]; // Example data for hamming_distance 10 | 11 | c.bench_function("hamming_distance", |bencher| { 12 | bencher.iter(|| hamming_distance(black_box(&a), black_box(&b))) 13 | }); 14 | } 15 | 16 | fn custom_criterion() -> Criterion { 17 | Criterion::default() 18 | .warm_up_time(std::time::Duration::from_secs(2)) 19 | .measurement_time(std::time::Duration::from_secs(5)) // Increasing target time to 11 seconds 20 | } 21 | 22 | criterion_group! { 23 | name = benches; 24 | config = custom_criterion(); 25 | targets = criterion_benchmark 26 | } 27 | criterion_main!(benches); 28 | -------------------------------------------------------------------------------- /src/constants.rs: -------------------------------------------------------------------------------- 1 | pub const VECTOR_SIZE: usize = 1024; 2 | pub const QUANTIZED_VECTOR_SIZE: usize = VECTOR_SIZE / 8; 3 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod constants; 2 | pub mod math; 3 | pub mod services; 4 | pub mod structures; 5 | pub mod utils; 6 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use env_logger::Builder; 2 | use haystackdb::constants::VECTOR_SIZE; 3 | use haystackdb::services::CommitService; 4 | use haystackdb::services::QueryService; 5 | use haystackdb::structures::filters::Filter as QueryFilter; 6 | use haystackdb::structures::metadata_index::KVPair; 7 | use log::info; 8 | use log::LevelFilter; 9 | use std::io::Write; 10 | use std::sync::{Arc, Mutex}; 11 | use std::{self, path::PathBuf}; 12 | use tokio::time::{interval, Duration}; 13 | 14 | use std::collections::HashMap; 15 | use tokio::sync::OnceCell; 16 | use warp::Filter; 17 | 18 | static ACTIVE_NAMESPACES: OnceCell>>>> = 19 | OnceCell::const_new(); 20 | 21 | #[tokio::main] 22 | async fn main() { 23 | let mut builder = Builder::new(); 24 | builder 25 | .format(|buf, record| writeln!(buf, "{}: {}", record.level(), record.args())) 26 | .filter(None, LevelFilter::Info) 27 | .init(); 28 | 29 | let active_namespaces = ACTIVE_NAMESPACES 30 | .get_or_init(|| async { Arc::new(Mutex::new(HashMap::new())) }) 31 | .await; 32 | 33 | let search_route = warp::path!("query" / String) 34 | .and(warp::post()) 35 | .and(warp::body::json()) 36 | .and(with_active_namespaces(active_namespaces.clone())) 37 | .then( 38 | |namespace_id: String, body: (Vec, QueryFilter, usize), active_namespaces| async move { 39 | let base_path = PathBuf::from(format!("/workspace/data/{}/current", namespace_id.clone())); 40 | ensure_namespace_initialized(&namespace_id, &active_namespaces, base_path.clone()) 41 | .await; 42 | 43 | let mut query_service = QueryService::new(base_path, namespace_id.clone()).unwrap(); 44 | let fvec = &body.0; 45 | let metadata = &body.1; 46 | let top_k = body.2; 47 | 48 | let mut vec: [f32; VECTOR_SIZE] = [0.0; VECTOR_SIZE]; 49 | fvec.iter() 50 | .enumerate() 51 | .for_each(|(i, &val)| vec[i] = val as f32); 52 | 53 | let start = std::time::Instant::now(); 54 | 55 | let search_result = query_service 56 | .query(&vec, metadata, top_k) 57 | .expect("Failed to query"); 58 | 59 | let duration = start.elapsed(); 60 | 61 | println!("Query took {:?} to complete", duration); 62 | warp::reply::json(&search_result) 63 | }, 64 | ); 65 | 66 | let add_vector_route = 67 | warp::path!("addVector" / String) 68 | .and(warp::post()) 69 | .and(warp::body::json()) 70 | .and(with_active_namespaces(active_namespaces.clone())) 71 | .then( 72 | |namespace_id: String, 73 | body: (Vec, Vec, String), 74 | active_namespaces| async move { 75 | let base_path = PathBuf::from(format!( 76 | "/workspace/data/{}/current", 77 | namespace_id.clone() 78 | )); 79 | 80 | ensure_namespace_initialized( 81 | &namespace_id, 82 | &active_namespaces, 83 | base_path.clone(), 84 | ) 85 | .await; 86 | 87 | let mut commit_service = 88 | CommitService::new(base_path, namespace_id.clone()).unwrap(); 89 | let fvec = &body.0; 90 | let metadata = &body.1; 91 | 92 | let mut vec: [f32; VECTOR_SIZE] = [0.0; VECTOR_SIZE]; 93 | fvec.iter() 94 | .enumerate() 95 | .for_each(|(i, &val)| vec[i] = val as f32); 96 | 97 | // let id = uuid::Uuid::from_str(id_str).unwrap(); 98 | commit_service.add_to_wal(vec![vec], vec![metadata.clone()]).expect("Failed to add to WAL"); 99 | warp::reply::json(&"Success") 100 | }, 101 | ); 102 | 103 | // add a PITR route 104 | let pitr_route = warp::path!("pitr" / String / String) 105 | .and(warp::get()) 106 | .and(with_active_namespaces(active_namespaces.clone())) 107 | .then( 108 | |namespace_id: String, timestamp: String, active_namespaces| async move { 109 | println!("PITR for namespace: {}", namespace_id); 110 | let base_path = 111 | PathBuf::from(format!("/workspace/data/{}/current", namespace_id.clone())); 112 | 113 | ensure_namespace_initialized(&namespace_id, &active_namespaces, base_path.clone()) 114 | .await; 115 | 116 | let mut commit_service = 117 | CommitService::new(base_path, namespace_id.clone()).unwrap(); 118 | 119 | let timestamp = timestamp.parse::().unwrap(); 120 | commit_service 121 | .recover_point_in_time(timestamp) 122 | .expect("Failed to PITR"); 123 | warp::reply::json(&"Success") 124 | }, 125 | ); 126 | 127 | let routes = search_route 128 | .or(add_vector_route) 129 | .or(pitr_route) 130 | .with(warp::cors().allow_any_origin()); 131 | warp::serve(routes).run(([0, 0, 0, 0], 8080)).await; 132 | } 133 | 134 | fn with_active_namespaces( 135 | active_namespaces: Arc>>>, 136 | ) -> impl Filter< 137 | Extract = (Arc>>>,), 138 | Error = std::convert::Infallible, 139 | > + Clone { 140 | warp::any().map(move || active_namespaces.clone()) 141 | } 142 | 143 | async fn ensure_namespace_initialized( 144 | namespace_id: &String, 145 | active_namespaces: &Arc>>>, 146 | base_path_for_async: PathBuf, 147 | ) { 148 | let mut namespaces = active_namespaces.lock().unwrap(); 149 | if !namespaces.contains_key(namespace_id) { 150 | let namespace_id_cloned = namespace_id.clone(); 151 | let handle = tokio::spawn(async move { 152 | let mut interval = interval(Duration::from_secs(10)); 153 | loop { 154 | interval.tick().await; 155 | println!("Committing for namespace {}", namespace_id_cloned); 156 | let start = std::time::Instant::now(); 157 | let commit_worker = std::sync::Arc::new(std::sync::Mutex::new( 158 | CommitService::new(base_path_for_async.clone(), namespace_id_cloned.clone()) 159 | .unwrap(), 160 | )); 161 | 162 | commit_worker 163 | .lock() 164 | .unwrap() 165 | .commit() 166 | .expect("Failed to commit"); 167 | let duration = start.elapsed(); 168 | info!("Commit worker took {:?} to complete", duration); 169 | } 170 | }); 171 | namespaces.insert(namespace_id.clone(), handle); 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/main1.rs: -------------------------------------------------------------------------------- 1 | extern crate haystackdb; 2 | use haystackdb::constants::VECTOR_SIZE; 3 | use haystackdb::services::commit::CommitService; 4 | use haystackdb::services::query::QueryService; 5 | use haystackdb::structures::metadata_index::KVPair; 6 | use std::fs; 7 | use std::path::PathBuf; 8 | use std::str::FromStr; 9 | use uuid; 10 | 11 | fn random_vec() -> [f32; VECTOR_SIZE] { 12 | return [0.0; VECTOR_SIZE]; 13 | } 14 | 15 | fn main() { 16 | let namespace_id = uuid::Uuid::new_v4().to_string(); 17 | let path = PathBuf::from_str("tests/data") 18 | .expect("Failed to create path") 19 | .join("namespaces") 20 | .join(namespace_id.clone()); 21 | fs::create_dir_all(&path).expect("Failed to create directory"); 22 | let mut commit_service = CommitService::new(path.clone(), namespace_id.clone()) 23 | .expect("Failed to create commit service"); 24 | 25 | let start = std::time::Instant::now(); 26 | // for _ in 0..20000 { 27 | // commit_service 28 | // .add_to_wal( 29 | // vec![random_vec()], 30 | // vec![vec![KVPair { 31 | // key: "key".to_string(), 32 | // value: "value".to_string(), 33 | // }]], 34 | // ) 35 | // .expect("Failed to add to WAL"); 36 | // } 37 | 38 | const NUM_VECTORS: usize = 100_000; 39 | 40 | let batch_vectors: Vec> = 41 | (0..NUM_VECTORS).map(|_| vec![random_vec()]).collect(); 42 | let batch_kvs: Vec>> = (0..NUM_VECTORS) 43 | .map(|_| { 44 | vec![vec![KVPair { 45 | key: "key".to_string(), 46 | value: "value".to_string(), 47 | }]] 48 | }) 49 | .collect(); 50 | 51 | println!("Batch creation took: {:?}", start.elapsed()); 52 | commit_service 53 | .batch_add_to_wal(batch_vectors, batch_kvs) 54 | .expect("Failed to add to WAL"); 55 | 56 | println!("Add to WAL took: {:?}", start.elapsed()); 57 | 58 | // commit_service 59 | // .add_to_wal( 60 | // vec![[0.0; VECTOR_SIZE]], 61 | // vec![vec![KVPair { 62 | // key: "key".to_string(), 63 | // value: "value".to_string(), 64 | // }]], 65 | // ) 66 | // .expect("Failed to add to WAL"); 67 | 68 | let start = std::time::Instant::now(); 69 | 70 | commit_service.commit().expect("Failed to commit"); 71 | 72 | println!("Commit took: {:?}", start.elapsed()); 73 | 74 | let mut query_service = 75 | QueryService::new(path.clone(), namespace_id).expect("Failed to create query service"); 76 | 77 | let _start = std::time::Instant::now(); 78 | 79 | const NUM_RUNS: usize = 100; 80 | 81 | let start = std::time::Instant::now(); 82 | 83 | for _ in 0..NUM_RUNS { 84 | let _ = query_service 85 | .query( 86 | &[0.0; VECTOR_SIZE], 87 | vec![KVPair { 88 | key: "key".to_string(), 89 | value: "value".to_string(), 90 | }], 91 | 1, 92 | ) 93 | .expect("Failed to query"); 94 | 95 | // println!("{:?}", result); 96 | } 97 | 98 | println!("Query took: {:?}", start.elapsed().div_f32(NUM_RUNS as f32)); 99 | 100 | // let result = query_service 101 | // .query( 102 | // &[0.0; VECTOR_SIZE], 103 | // vec![KVPair { 104 | // key: "key".to_string(), 105 | // value: "value".to_string(), 106 | // }], 107 | // 1, 108 | // ) 109 | // .expect("Failed to query"); 110 | 111 | // println!("{:?}", result); 112 | 113 | // println!("Query took: {:?}", start.elapsed()); 114 | } 115 | 116 | // fn main() { 117 | // let mut storage_manager: StorageManager = StorageManager::new( 118 | // PathBuf::from_str("tests/data/test.db").expect("Failed to create path"), 119 | // ) 120 | // .expect("Failed to create storage manager"); 121 | 122 | // let mut node: Node = Node::new_leaf(0); 123 | 124 | // for i in 0..2048 { 125 | // node.set_key_value(i, uuid::Uuid::new_v4().to_string()); 126 | // } 127 | 128 | // let serialized = Node::serialize(&node); 129 | // let deserialized = Node::deserialize(&serialized); 130 | 131 | // assert_eq!(node, deserialized); 132 | 133 | // let offset = storage_manager 134 | // .store_node(&mut node) 135 | // .expect("Failed to store node"); 136 | 137 | // node.offset = offset; 138 | 139 | // let mut loaded_node = storage_manager 140 | // .load_node(offset) 141 | // .expect("Failed to load node"); 142 | 143 | // loaded_node.offset = offset; 144 | 145 | // assert_eq!(loaded_node, node); 146 | // } 147 | -------------------------------------------------------------------------------- /src/math.rs: -------------------------------------------------------------------------------- 1 | pub mod gemm; 2 | pub mod gemv; 3 | pub mod hamming_distance; 4 | 5 | pub use gemm::gemm; 6 | pub use gemv::gemv; 7 | pub use hamming_distance::hamming_distance; 8 | -------------------------------------------------------------------------------- /src/math/gemm.rs: -------------------------------------------------------------------------------- 1 | use crate::constants::VECTOR_SIZE; 2 | 3 | pub fn gemm( 4 | a: &Vec<[f32; VECTOR_SIZE]>, 5 | b: &Vec<[f32; VECTOR_SIZE]>, 6 | result: &mut Vec<[f32; VECTOR_SIZE]>, 7 | ) { 8 | for i in 0..a.len() { 9 | for j in 0..VECTOR_SIZE { 10 | let mut sum = 0.0_f32; 11 | for k in 0..VECTOR_SIZE { 12 | sum += a[i][k] * b[k][j]; 13 | } 14 | result[i][j] = sum; 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/math/gemv.rs: -------------------------------------------------------------------------------- 1 | use crate::constants::VECTOR_SIZE; 2 | 3 | pub fn gemv(matrix: &Vec<[f32; VECTOR_SIZE]>, vector: &[f32; VECTOR_SIZE]) -> Vec { 4 | let mut result = vec![0f32; VECTOR_SIZE]; 5 | for (i, row) in matrix.iter().enumerate() { 6 | let mut sum = 0f32; 7 | for j in 0..VECTOR_SIZE { 8 | sum += row[j] * vector[j]; 9 | } 10 | result[i] = sum; 11 | } 12 | result 13 | } 14 | -------------------------------------------------------------------------------- /src/math/hamming_distance.rs: -------------------------------------------------------------------------------- 1 | use crate::constants::QUANTIZED_VECTOR_SIZE as ARRAY_SIZE; 2 | 3 | // #[cfg(not(target_arch = "aarch64"))] 4 | pub fn hamming_distance(a: &[u8; ARRAY_SIZE], b: &[u8; ARRAY_SIZE]) -> u16 { 5 | a.iter() 6 | .zip(b.iter()) 7 | .fold(0, |acc, (&x, &y)| acc + (x ^ y).count_ones() as u16) 8 | } 9 | -------------------------------------------------------------------------------- /src/services.rs: -------------------------------------------------------------------------------- 1 | pub mod commit; 2 | pub mod lock_service; 3 | pub mod namespace_state; 4 | pub mod query; 5 | 6 | pub use commit::CommitService; 7 | pub use lock_service::LockService; 8 | pub use namespace_state::NamespaceState; 9 | pub use query::QueryService; 10 | -------------------------------------------------------------------------------- /src/services/commit.rs: -------------------------------------------------------------------------------- 1 | use crate::constants::VECTOR_SIZE; 2 | use crate::structures::inverted_index::InvertedIndexItem; 3 | use crate::structures::metadata_index::{KVPair, MetadataIndexItem}; 4 | 5 | use super::namespace_state::NamespaceState; 6 | use std::collections::HashMap; 7 | 8 | use std::io; 9 | use std::os::unix::fs as unix_fs; 10 | use std::path::PathBuf; 11 | 12 | pub struct CommitService { 13 | pub state: NamespaceState, 14 | } 15 | 16 | impl CommitService { 17 | pub fn new(path: PathBuf, namespace_id: String) -> io::Result { 18 | let state = NamespaceState::new(path, namespace_id)?; 19 | Ok(CommitService { state }) 20 | } 21 | 22 | pub fn commit(&mut self) -> io::Result<()> { 23 | let commits = self.state.wal.get_uncommitted(100000)?; 24 | 25 | let commits_len = commits.len(); 26 | 27 | if commits.len() == 0 { 28 | return Ok(()); 29 | } 30 | 31 | println!("Commits: {:?}", commits_len); 32 | 33 | let mut processed = 0; 34 | 35 | let merged_commits = commits 36 | .iter() 37 | .fold((Vec::new(), Vec::new()), |mut items, commit| { 38 | let vectors = commit.vectors.clone(); 39 | let kvs = commit.kvs.clone(); 40 | 41 | items.0.extend(vectors); 42 | items.1.extend(kvs); 43 | 44 | items 45 | }); 46 | 47 | for (vectors, kvs) in vec![merged_commits] { 48 | // let vectors = commit.vectors; 49 | // let kvs = commit.kvs; 50 | 51 | if vectors.len() != kvs.len() { 52 | return Err(io::Error::new( 53 | io::ErrorKind::InvalidInput, 54 | "Quantized vectors length mismatch", 55 | )); 56 | } 57 | 58 | println!( 59 | "Processing commit: {} of {} with vectors of len: {}", 60 | processed, 61 | commits_len, 62 | vectors.len() 63 | ); 64 | 65 | processed += 1; 66 | 67 | // generate u128 ids 68 | 69 | let ids = (0..vectors.len()) 70 | .map(|_| uuid::Uuid::new_v4().as_u128()) 71 | .collect::>(); 72 | 73 | println!("Generated ids"); 74 | 75 | let vector_indices = self.state.vectors.batch_push(vectors)?; 76 | 77 | println!("Vector indices: {:?}", vector_indices); 78 | 79 | println!("Pushed vectors"); 80 | 81 | let mut inverted_index_items: HashMap> = HashMap::new(); 82 | 83 | // let mut metadata_index_items = Vec::new(); 84 | 85 | let mut batch_metadata_to_insert = Vec::new(); 86 | 87 | for (idx, kv) in kvs.iter().enumerate() { 88 | let metadata_index_item = MetadataIndexItem { 89 | id: ids[idx], 90 | kvs: kv.clone(), 91 | vector_index: vector_indices[idx], 92 | // namespaced_id: self.state.namespace_id.clone(), 93 | }; 94 | 95 | // println!("Inserting id: {}, {} of {}", ids[idx], idx, ids.len()); 96 | 97 | batch_metadata_to_insert.push((ids[idx], metadata_index_item)); 98 | 99 | // self.state 100 | // .metadata_index 101 | // .insert(ids[idx], metadata_index_item); 102 | 103 | for kv in kv { 104 | // let inverted_index_item = InvertedIndexItem { 105 | // indices: vec![vector_indices[idx]], 106 | // ids: vec![ids[idx]], 107 | // }; 108 | 109 | // self.state 110 | // .inverted_index 111 | // .insert_append(kv.clone(), inverted_index_item); 112 | 113 | inverted_index_items 114 | .entry(kv.clone()) 115 | .or_insert_with(Vec::new) 116 | .push((vector_indices[idx], ids[idx])); 117 | } 118 | } 119 | 120 | self.state 121 | .metadata_index 122 | .batch_insert(batch_metadata_to_insert); 123 | 124 | // self.state.metadata_index.batch_insert(metadata_index_items); 125 | 126 | for (kv, items) in inverted_index_items { 127 | let inverted_index_item = InvertedIndexItem { 128 | indices: items.iter().map(|(idx, _)| *idx).collect(), 129 | ids: items.iter().map(|(_, id)| *id).collect(), 130 | }; 131 | 132 | self.state 133 | .inverted_index 134 | .insert_append(kv, inverted_index_item); 135 | } 136 | } 137 | 138 | for commit in commits { 139 | self.state.wal.mark_commit_finished(commit.hash)?; 140 | } 141 | 142 | Ok(()) 143 | } 144 | 145 | pub fn recover_point_in_time(&mut self, timestamp: u64) -> io::Result<()> { 146 | println!("Recovering to timestamp: {}", timestamp); 147 | let versions: Vec = self.state.get_all_versions()?; 148 | let max_version = versions.iter().max().unwrap(); 149 | let new_version = max_version + 1; 150 | 151 | println!("Versions: {:?}", versions); 152 | 153 | println!("Creating new version: {}", new_version); 154 | 155 | let new_version_path = self 156 | .state 157 | .path 158 | .parent() 159 | .unwrap() 160 | .join(format!("v{}", new_version)); 161 | 162 | let mut fresh_state = 163 | NamespaceState::new(new_version_path.clone(), self.state.namespace_id.clone())?; 164 | 165 | let commits = self.state.wal.get_commits_before(timestamp)?; 166 | let commits_len = commits.len(); 167 | 168 | if commits.len() == 0 { 169 | return Ok(()); 170 | } 171 | 172 | println!("Commits to PITR: {:?}", commits_len); 173 | 174 | let mut processed = 0; 175 | 176 | for commit in commits.iter() { 177 | let vectors = commit.vectors.clone(); 178 | let kvs = commit.kvs.clone(); 179 | 180 | if vectors.len() != kvs.len() { 181 | return Err(io::Error::new( 182 | io::ErrorKind::InvalidInput, 183 | "Quantized vectors length mismatch", 184 | )); 185 | } 186 | 187 | println!( 188 | "Processing commit: {} of {} with vectors of len: {}", 189 | processed, 190 | commits_len, 191 | vectors.len() 192 | ); 193 | 194 | processed += 1; 195 | 196 | // generate u128 ids 197 | let ids = (0..vectors.len()) 198 | .map(|_| uuid::Uuid::new_v4().as_u128()) 199 | .collect::>(); 200 | 201 | println!("Generated ids"); 202 | 203 | let vector_indices = fresh_state.vectors.batch_push(vectors)?; 204 | 205 | println!("Pushed vectors"); 206 | 207 | let mut inverted_index_items: HashMap> = HashMap::new(); 208 | 209 | let mut metadata_index_items = Vec::new(); 210 | 211 | for (idx, kv) in kvs.iter().enumerate() { 212 | let metadata_index_item = MetadataIndexItem { 213 | id: ids[idx], 214 | kvs: kv.clone(), 215 | vector_index: vector_indices[idx], 216 | // namespaced_id: self.state.namespace_id.clone(), 217 | }; 218 | 219 | // println!("Inserting id: {}, {} of {}", ids[idx], idx, ids.len()); 220 | 221 | metadata_index_items.push((ids[idx], metadata_index_item)); 222 | 223 | for kv in kv { 224 | inverted_index_items 225 | .entry(kv.clone()) 226 | .or_insert_with(Vec::new) 227 | .push((vector_indices[idx], ids[idx])); 228 | } 229 | } 230 | 231 | fresh_state 232 | .metadata_index 233 | .batch_insert(metadata_index_items); 234 | 235 | for (kv, items) in inverted_index_items { 236 | let inverted_index_item = InvertedIndexItem { 237 | indices: items.iter().map(|(idx, _)| *idx).collect(), 238 | ids: items.iter().map(|(_, id)| *id).collect(), 239 | }; 240 | 241 | fresh_state 242 | .inverted_index 243 | .insert_append(kv, inverted_index_item); 244 | } 245 | 246 | fresh_state.wal.mark_commit_finished(commit.hash)?; 247 | } 248 | 249 | // update symlink for /current 250 | let current_path = self.state.path.clone(); 251 | 252 | println!("Removing current symlink: {:?}", current_path); 253 | 254 | std::fs::remove_file(¤t_path)?; 255 | unix_fs::symlink(&new_version_path, ¤t_path)?; 256 | 257 | Ok(()) 258 | } 259 | 260 | pub fn add_to_wal( 261 | &mut self, 262 | vectors: Vec<[f32; VECTOR_SIZE]>, 263 | kvs: Vec>, 264 | ) -> io::Result<()> { 265 | if vectors.len() != vectors.len() { 266 | return Err(io::Error::new( 267 | io::ErrorKind::InvalidInput, 268 | "Quantized vectors length mismatch", 269 | )); 270 | } 271 | 272 | // self.state.wal.commit(hash, quantized_vectors, kvs) 273 | self.state 274 | .wal 275 | .add_to_wal(vectors, kvs) 276 | .expect("Failed to add to wal"); 277 | 278 | Ok(()) 279 | } 280 | 281 | pub fn batch_add_to_wal( 282 | &mut self, 283 | vectors: Vec>, 284 | kvs: Vec>>, 285 | ) -> io::Result<()> { 286 | if vectors.len() != kvs.len() { 287 | return Err(io::Error::new( 288 | io::ErrorKind::InvalidInput, 289 | "Quantized vectors length mismatch", 290 | )); 291 | } 292 | 293 | self.state.wal.batch_add_to_wal(vectors, kvs)?; 294 | 295 | Ok(()) 296 | } 297 | } 298 | -------------------------------------------------------------------------------- /src/services/lock_service.rs: -------------------------------------------------------------------------------- 1 | use fs2::FileExt; 2 | use std::collections::HashMap; 3 | use std::fs::{self, OpenOptions}; 4 | use std::path::PathBuf; 5 | 6 | pub struct LockService { 7 | path: PathBuf, 8 | } 9 | 10 | impl LockService { 11 | pub fn new(path: PathBuf) -> Self { 12 | LockService { path } 13 | } 14 | 15 | pub fn acquire(&self, key: String) -> std::io::Result<()> { 16 | let path = self.path.join(key); 17 | let file = OpenOptions::new() 18 | .read(true) 19 | .write(true) 20 | .create(true) 21 | .open(&path)?; 22 | file.lock_exclusive()?; 23 | Ok(()) 24 | } 25 | 26 | pub fn release(&self, key: String) -> std::io::Result<()> { 27 | let path = self.path.join(key); 28 | let file = OpenOptions::new().read(true).write(true).open(&path)?; 29 | file.unlock()?; 30 | Ok(()) 31 | } 32 | 33 | // Function to return a map of keys to their lock status 34 | pub fn check_locks(&self) -> std::io::Result> { 35 | let mut status = HashMap::new(); 36 | for entry in fs::read_dir(&self.path)? { 37 | let entry = entry?; 38 | let path = entry.path(); 39 | if path.is_file() { 40 | let file_name = path 41 | .file_name() 42 | .unwrap_or_default() 43 | .to_string_lossy() 44 | .into_owned(); 45 | let file = OpenOptions::new().read(true).write(true).open(&path)?; 46 | 47 | // Try to acquire a shared lock without blocking 48 | match file.try_lock_shared() { 49 | Ok(_) => { 50 | // If we can lock it, then it's not locked by another process 51 | status.insert(file_name, false); 52 | file.unlock()?; // Unlock immediately since we were just checking 53 | } 54 | Err(_) => { 55 | // If we cannot lock it, it's already locked 56 | status.insert(file_name, true); 57 | } 58 | } 59 | } 60 | } 61 | Ok(status) 62 | } 63 | 64 | pub fn is_locked(&self, key: String) -> std::io::Result { 65 | let path = self.path.join(key); 66 | let file = OpenOptions::new().read(true).write(true).open(&path)?; 67 | match file.try_lock_shared() { 68 | Ok(_) => { 69 | file.unlock()?; 70 | Ok(false) 71 | } 72 | Err(_) => Ok(true), 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/services/namespace_state.rs: -------------------------------------------------------------------------------- 1 | use crate::structures::dense_vector_list::DenseVectorList; 2 | use crate::structures::inverted_index::InvertedIndex; 3 | use crate::structures::metadata_index::MetadataIndex; 4 | use crate::structures::wal::WAL; 5 | use std::fs; 6 | use std::io; 7 | use std::path::Path; 8 | use std::path::PathBuf; 9 | 10 | use super::LockService; 11 | 12 | pub struct NamespaceState { 13 | pub namespace_id: String, 14 | pub metadata_index: MetadataIndex, 15 | pub inverted_index: InvertedIndex, 16 | pub vectors: DenseVectorList, 17 | pub wal: WAL, 18 | pub locks: LockService, 19 | pub path: PathBuf, 20 | } 21 | 22 | fn get_all_versions(path: &Path) -> io::Result> { 23 | let mut versions = Vec::new(); 24 | for entry in fs::read_dir(&path)? { 25 | let entry = entry?; 26 | let path = entry.path(); 27 | if path.is_dir() { 28 | let version = path.file_name().unwrap().to_str().unwrap().to_string(); 29 | // parse as int without `v` to see if it's a version 30 | // must remove the v first 31 | if version.starts_with("v") { 32 | let version = version[1..].parse::(); 33 | if version.is_ok() { 34 | versions.push(version.unwrap()); 35 | } 36 | } 37 | } 38 | } 39 | Ok(versions) 40 | } 41 | 42 | impl NamespaceState { 43 | pub fn new(path: PathBuf, namespace_id: String) -> io::Result { 44 | // path should be .../current, which should be a symlink to the current version 45 | 46 | // println!("Creating namespace state with path: {:?}", path); 47 | 48 | if !path.exists() { 49 | fs::create_dir_all(&path.clone().parent().unwrap()) 50 | .expect("Failed to create directory"); 51 | } 52 | 53 | let versions = get_all_versions(path.clone().parent().unwrap())?; 54 | 55 | if versions.len() == 0 { 56 | // create v0 57 | // println!("Creating v0"); 58 | let version_path = path.clone().parent().unwrap().join("v0"); 59 | // println!("Creating version path: {:?}", version_path); 60 | fs::create_dir_all(&version_path).expect("Failed to create directory"); 61 | 62 | // create symlink 63 | 64 | std::os::unix::fs::symlink(&version_path, &path).expect("Failed to create symlink"); 65 | } 66 | 67 | let metadata_path = path.clone().join("metadata.bin"); 68 | let inverted_index_path = path.clone().join("inverted_index.bin"); 69 | let wal_path = path.clone().join("wal"); 70 | let locks_path = path.clone().join("locks"); 71 | 72 | fs::create_dir_all(&wal_path).expect("Failed to create directory"); 73 | 74 | fs::create_dir_all(&locks_path).expect("Failed to create directory"); 75 | 76 | let vectors_path = path.clone().join("vectors.bin"); 77 | 78 | let metadata_index = MetadataIndex::new(metadata_path); 79 | let inverted_index = InvertedIndex::new(inverted_index_path); 80 | let wal = WAL::new(wal_path, namespace_id.clone())?; 81 | let vectors = DenseVectorList::new(vectors_path, 100_000)?; 82 | let locks = LockService::new(locks_path); 83 | 84 | Ok(NamespaceState { 85 | namespace_id, 86 | metadata_index, 87 | inverted_index, 88 | vectors, 89 | wal, 90 | locks, 91 | path, 92 | }) 93 | } 94 | 95 | pub fn get_all_versions(&self) -> io::Result> { 96 | let mut versions = Vec::new(); 97 | for entry in fs::read_dir(&self.path.parent().unwrap())? { 98 | let entry = entry?; 99 | let path = entry.path(); 100 | if path.is_dir() { 101 | // println!("path: {:?}", path); 102 | let version = path.file_name().unwrap().to_str().unwrap().to_string(); 103 | // println!("version: {:?}", version); 104 | // parse as int without `v` to see if it's a version 105 | // must remove the v first 106 | if version.starts_with("v") { 107 | let version = version[1..].parse::(); 108 | if version.is_ok() { 109 | versions.push(version.unwrap()); 110 | } 111 | } 112 | } 113 | } 114 | // println!("versions: {:?}", versions); 115 | Ok(versions) 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/services/query.rs: -------------------------------------------------------------------------------- 1 | use rayon::prelude::*; 2 | 3 | use super::namespace_state::NamespaceState; 4 | use crate::constants::VECTOR_SIZE; 5 | use crate::math::hamming_distance; 6 | use crate::structures::filters::{Filter, Filters}; 7 | use crate::structures::metadata_index::KVPair; 8 | use crate::utils::quantize; 9 | use std::io; 10 | use std::path::PathBuf; 11 | 12 | pub struct QueryService { 13 | pub state: NamespaceState, 14 | } 15 | 16 | impl QueryService { 17 | pub fn new(path: PathBuf, namespace_id: String) -> io::Result { 18 | let state = NamespaceState::new(path, namespace_id)?; 19 | Ok(QueryService { state }) 20 | } 21 | 22 | pub fn query( 23 | &mut self, 24 | query_vector: &[f32; VECTOR_SIZE], 25 | filters: &Filter, 26 | top_k: usize, 27 | ) -> io::Result>> { 28 | let quantized_query_vector = quantize(query_vector); 29 | 30 | let (indices, ids) = 31 | Filters::evaluate(filters, &mut self.state.inverted_index).get_indices(); 32 | 33 | // group contiguous indices to batch get vectors 34 | 35 | let mut batch_indices: Vec> = Vec::new(); 36 | 37 | let mut current_batch = Vec::new(); 38 | 39 | for index in indices { 40 | if current_batch.len() == 0 { 41 | current_batch.push(index); 42 | } else { 43 | let last_index = current_batch[current_batch.len() - 1]; 44 | if index == last_index + 1 { 45 | current_batch.push(index); 46 | } else { 47 | batch_indices.push(current_batch); 48 | current_batch = Vec::new(); 49 | current_batch.push(index); 50 | } 51 | } 52 | } 53 | 54 | current_batch.sort(); 55 | current_batch.dedup(); 56 | 57 | if current_batch.len() > 0 { 58 | batch_indices.push(current_batch); 59 | } 60 | 61 | // println!("BATCH INDICES: {:?}", batch_indices.len()); 62 | 63 | let mut top_k_indices = Vec::new(); 64 | 65 | let top_k_to_use = top_k.min(ids.len()); 66 | 67 | for batch in batch_indices { 68 | let vectors = self.state.vectors.get_contiguous(batch[0], batch.len())?; 69 | top_k_indices.extend( 70 | vectors 71 | .par_iter() 72 | .enumerate() 73 | .fold( 74 | || Vec::new(), 75 | |mut acc, (idx, vector)| { 76 | let distance = hamming_distance(&quantized_query_vector, vector); 77 | 78 | if acc.len() < top_k_to_use { 79 | acc.push((ids[idx], distance)); 80 | acc.sort(); 81 | } else { 82 | let worst_best_distance = acc[acc.len() - 1].1; 83 | if distance < worst_best_distance { 84 | acc.pop(); 85 | acc.push((ids[idx], distance)); 86 | acc.sort(); 87 | } 88 | } 89 | 90 | acc 91 | }, 92 | ) 93 | .reduce( 94 | || Vec::new(), // Initializer for the reduce step 95 | |mut a, mut b| { 96 | // How to combine results from different threads 97 | a.append(&mut b); 98 | a.sort_by_key(|&(_, dist)| dist); // Sort by distance 99 | a.truncate(top_k_to_use); // Keep only the top k elements 100 | a 101 | }, 102 | ), 103 | ); 104 | } 105 | 106 | let mut kvs = Vec::new(); 107 | 108 | for (id, _) in top_k_indices { 109 | let r = self.state.metadata_index.get(id); 110 | match r { 111 | Some(item) => { 112 | kvs.push(item.kvs); 113 | } 114 | None => { 115 | println!("Metadata not found"); 116 | continue; 117 | } 118 | } 119 | } 120 | 121 | Ok(kvs) 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/structures.rs: -------------------------------------------------------------------------------- 1 | pub mod dense_vector_list; 2 | pub mod filters; 3 | pub mod inverted_index; 4 | pub mod metadata_index; 5 | pub mod mmap_tree; 6 | pub mod tree; 7 | pub mod wal; 8 | -------------------------------------------------------------------------------- /src/structures/dense_vector_list.rs: -------------------------------------------------------------------------------- 1 | use crate::constants::QUANTIZED_VECTOR_SIZE; 2 | use memmap::MmapMut; 3 | use std::fs::OpenOptions; 4 | use std::io; 5 | use std::path::PathBuf; 6 | 7 | const SIZE_OF_U64: usize = std::mem::size_of::(); 8 | const HEADER_SIZE: usize = SIZE_OF_U64; 9 | 10 | pub struct DenseVectorList { 11 | mmap: MmapMut, 12 | used_space: usize, // Track the used space within the mmap beyond the header 13 | pub path: PathBuf, 14 | } 15 | 16 | impl DenseVectorList { 17 | pub fn new(path: PathBuf, elements: u64) -> io::Result { 18 | let exists = path.exists(); 19 | let file = OpenOptions::new() 20 | .read(true) 21 | .write(true) 22 | .create(!exists) 23 | .open(path.clone())?; 24 | 25 | if !exists { 26 | // Set the file size, accounting for the header 27 | file.set_len(elements * (QUANTIZED_VECTOR_SIZE as u64) + HEADER_SIZE as u64)?; 28 | } 29 | 30 | let mut mmap = unsafe { MmapMut::map_mut(&file)? }; 31 | 32 | let used_space = if exists && file.metadata().unwrap().len() as usize > HEADER_SIZE { 33 | // Read the existing used space from the file 34 | let used_bytes = &mmap[0..HEADER_SIZE]; 35 | u64::from_le_bytes(used_bytes.try_into().unwrap()) as usize 36 | } else { 37 | 0 // No data written yet, or file did not exist 38 | }; 39 | 40 | if !exists { 41 | // Initialize the header if the file is newly created 42 | mmap[0..HEADER_SIZE].copy_from_slice(&(used_space as u64).to_le_bytes()); 43 | } 44 | 45 | Ok(DenseVectorList { 46 | mmap, 47 | used_space, 48 | path, 49 | }) 50 | } 51 | 52 | pub fn push(&mut self, vector: [u8; QUANTIZED_VECTOR_SIZE]) -> io::Result { 53 | let offset = self.used_space + HEADER_SIZE; 54 | let required_space = offset + QUANTIZED_VECTOR_SIZE; 55 | 56 | if required_space > self.mmap.len() { 57 | self.resize_mmap(required_space * 2)?; 58 | } 59 | 60 | self.mmap[offset..required_space].copy_from_slice(&vector); 61 | self.used_space += QUANTIZED_VECTOR_SIZE; 62 | // Update the header in the mmap 63 | self.mmap[0..HEADER_SIZE].copy_from_slice(&(self.used_space as u64).to_le_bytes()); 64 | 65 | Ok(self.used_space / QUANTIZED_VECTOR_SIZE - 1) 66 | } 67 | 68 | fn resize_mmap(&mut self, new_len: usize) -> io::Result<()> { 69 | println!("Resizing mmap in DenseVectorList"); 70 | 71 | let file = OpenOptions::new() 72 | .read(true) 73 | .write(true) 74 | .open(self.path.clone())?; // Ensure this path is handled correctly 75 | 76 | file.set_len(new_len as u64)?; 77 | 78 | self.mmap = unsafe { MmapMut::map_mut(&file)? }; 79 | Ok(()) 80 | } 81 | 82 | pub fn batch_push( 83 | &mut self, 84 | vectors: Vec<[u8; QUANTIZED_VECTOR_SIZE]>, 85 | ) -> io::Result> { 86 | let start_offset = self.used_space + HEADER_SIZE; 87 | let total_size = vectors.len() * QUANTIZED_VECTOR_SIZE; 88 | let required_space = start_offset + total_size; 89 | 90 | // println!( 91 | // "Required space: {}, mmap len: {}", 92 | // required_space, 93 | // self.mmap.len() 94 | // ); 95 | 96 | if required_space > self.mmap.len() { 97 | self.resize_mmap(required_space * 2)?; 98 | } 99 | 100 | // println!("Batch push"); 101 | 102 | for (i, vector) in vectors.iter().enumerate() { 103 | let offset = start_offset + i * QUANTIZED_VECTOR_SIZE; 104 | self.mmap[offset..offset + QUANTIZED_VECTOR_SIZE].copy_from_slice(vector); 105 | } 106 | 107 | // println!("Batch push done"); 108 | 109 | self.used_space += total_size; 110 | // Update the header in the mmap 111 | self.mmap[0..HEADER_SIZE].copy_from_slice(&(self.used_space as u64).to_le_bytes()); 112 | 113 | Ok((((start_offset - HEADER_SIZE) / QUANTIZED_VECTOR_SIZE) 114 | ..(self.used_space / QUANTIZED_VECTOR_SIZE)) 115 | .collect()) 116 | } 117 | 118 | pub fn get(&self, index: usize) -> io::Result<&[u8; QUANTIZED_VECTOR_SIZE]> { 119 | let offset = HEADER_SIZE + index * QUANTIZED_VECTOR_SIZE; 120 | let end = offset + QUANTIZED_VECTOR_SIZE; 121 | 122 | if end > self.used_space + HEADER_SIZE { 123 | // print everything for debugging 124 | println!("Offset: {}", offset); 125 | println!("End: {}", end); 126 | println!("Used space: {}", self.used_space); 127 | 128 | return Err(io::Error::new( 129 | io::ErrorKind::InvalidInput, 130 | "Index out of bounds", 131 | )); 132 | } 133 | 134 | //don't use unsafe 135 | let bytes = &self.mmap[offset..end]; 136 | let val = bytes.try_into().unwrap(); 137 | Ok(val) 138 | } 139 | 140 | pub fn get_contiguous( 141 | &self, 142 | index: usize, 143 | num_elements: usize, 144 | ) -> io::Result<&[[u8; QUANTIZED_VECTOR_SIZE]]> { 145 | let start = HEADER_SIZE + index * QUANTIZED_VECTOR_SIZE; 146 | let end = start + num_elements * QUANTIZED_VECTOR_SIZE; 147 | 148 | if end > self.used_space + HEADER_SIZE { 149 | println!("start: {}", start); 150 | println!("End: {}", end); 151 | println!("Used space: {}", self.used_space); 152 | println!("Num elements: {}", num_elements); 153 | println!("Index: {}", index); 154 | return Err(io::Error::new( 155 | io::ErrorKind::InvalidInput, 156 | "Index out of bounds", 157 | )); 158 | } 159 | 160 | // let mut vectors = Vec::with_capacity(num_elements); 161 | // for i in 0..num_elements { 162 | // let offset = HEADER_SIZE + (index + i) * QUANTIZED_VECTOR_SIZE; 163 | // vectors.push(self.get(index + i)?); 164 | // } 165 | 166 | // the indices are contiguous, so we can just get a slice of the mmap 167 | let vectors: &[[u8; QUANTIZED_VECTOR_SIZE]] = unsafe { 168 | std::slice::from_raw_parts( 169 | self.mmap.as_ptr().add(start) as *const [u8; QUANTIZED_VECTOR_SIZE], 170 | num_elements, 171 | ) 172 | }; 173 | 174 | Ok(vectors) 175 | } 176 | 177 | pub fn len(&self) -> usize { 178 | self.used_space / QUANTIZED_VECTOR_SIZE 179 | } 180 | 181 | pub fn insert(&mut self, index: usize, vector: [u8; QUANTIZED_VECTOR_SIZE]) -> io::Result<()> { 182 | let offset = HEADER_SIZE + index * QUANTIZED_VECTOR_SIZE; 183 | let end = offset + QUANTIZED_VECTOR_SIZE; 184 | 185 | if end > self.used_space + HEADER_SIZE { 186 | self.resize_mmap(end * 2)?; 187 | } 188 | 189 | self.mmap[offset..end].copy_from_slice(&vector); 190 | 191 | Ok(()) 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/structures/filters.rs: -------------------------------------------------------------------------------- 1 | use crate::structures::inverted_index::InvertedIndex; 2 | use crate::structures::metadata_index::KVPair; 3 | use rayon::prelude::*; 4 | use std::collections::HashSet; 5 | 6 | use serde::{Deserialize, Serialize}; 7 | 8 | #[derive(Debug, Serialize, Deserialize, Clone)] 9 | #[serde(tag = "type", content = "args")] 10 | pub enum Filter { 11 | And(Vec), 12 | Or(Vec), 13 | In(String, Vec), // Assuming first String is the key and Vec is the list of values 14 | Eq(String, String), // Assuming first String is the key and second String is the value 15 | } 16 | 17 | #[derive(Debug, Serialize, Deserialize)] 18 | pub struct Query { 19 | filters: Filter, 20 | } 21 | 22 | pub struct Filters { 23 | pub current_indices: Vec, 24 | pub current_ids: Vec, 25 | } 26 | 27 | impl Filters { 28 | pub fn new(indices: Vec, current_ids: Vec) -> Self { 29 | Filters { 30 | current_indices: indices, 31 | current_ids: current_ids, 32 | } 33 | } 34 | 35 | pub fn get_indices(&self) -> (Vec, Vec) { 36 | (self.current_indices.clone(), self.current_ids.clone()) 37 | } 38 | 39 | pub fn set_indices(&mut self, indices: Vec, ids: Vec) { 40 | self.current_indices = indices; 41 | self.current_ids = ids; 42 | } 43 | 44 | pub fn intersection(&self, other: &Filters) -> Filters { 45 | let intersection_indices: Vec = self 46 | .current_indices 47 | .par_iter() 48 | .filter(|&x| other.current_indices.contains(x)) 49 | .cloned() 50 | .collect(); 51 | 52 | let intersection_ids: Vec = self 53 | .current_ids 54 | .par_iter() 55 | .filter(|&x| other.current_ids.contains(x)) 56 | .cloned() 57 | .collect(); 58 | 59 | Filters::new(intersection_indices, intersection_ids) 60 | } 61 | pub fn union(&self, other: &Filters) -> Filters { 62 | let mut union_indices = self.current_indices.clone(); 63 | union_indices.extend(other.current_indices.iter().cloned()); 64 | union_indices.sort_unstable(); 65 | union_indices.dedup(); 66 | 67 | let mut union_ids = self.current_ids.clone(); 68 | union_ids.extend(other.current_ids.iter().cloned()); 69 | union_ids.sort_unstable(); 70 | union_ids.dedup(); 71 | 72 | Filters::new(union_indices, union_ids) 73 | } 74 | 75 | pub fn difference(&self, other: &Filters) -> Filters { 76 | let other_indices_set: HashSet<_> = other.current_indices.iter().collect(); 77 | let difference_indices = self 78 | .current_indices 79 | .iter() 80 | .filter(|&x| !other_indices_set.contains(x)) 81 | .cloned() 82 | .collect::>(); 83 | 84 | let other_ids_set: HashSet<_> = other.current_ids.iter().collect(); 85 | let difference_ids = self 86 | .current_ids 87 | .iter() 88 | .filter(|&x| !other_ids_set.contains(x)) 89 | .cloned() 90 | .collect::>(); 91 | 92 | Filters::new(difference_indices, difference_ids) 93 | } 94 | 95 | pub fn is_subset(&self, other: &Filters) -> bool { 96 | self.current_indices 97 | .par_iter() 98 | .all(|x| other.current_indices.contains(x)) 99 | && self 100 | .current_ids 101 | .par_iter() 102 | .all(|x| other.current_ids.contains(x)) 103 | } 104 | 105 | pub fn is_superset(&self, other: &Filters) -> bool { 106 | other.is_subset(self) 107 | } 108 | 109 | pub fn from_index(index: &mut InvertedIndex, key: &KVPair) -> Self { 110 | match index.get(key.clone()) { 111 | Some(item) => Filters::new(item.indices, item.ids), 112 | None => Filters::new(vec![], vec![]), 113 | } 114 | } 115 | 116 | // Evaluate a Filter and return the resulting Filters object 117 | pub fn evaluate(filter: &Filter, index: &mut InvertedIndex) -> Filters { 118 | match filter { 119 | Filter::And(filters) => { 120 | let mut result = Filters::new(vec![], vec![]); // Start with an empty set or universal set if applicable 121 | for f in filters.iter() { 122 | let current = Filters::evaluate(f, index); 123 | if result.current_indices.is_empty() && result.current_ids.is_empty() { 124 | result = current; 125 | } else { 126 | result = result.intersection(¤t); 127 | } 128 | } 129 | result 130 | } 131 | Filter::Or(filters) => { 132 | let mut result = Filters::new(vec![], vec![]); 133 | for f in filters.iter() { 134 | let current = Filters::evaluate(f, index); 135 | result = result.union(¤t); 136 | } 137 | result 138 | } 139 | Filter::In(key, values) => { 140 | let mut result = Filters::new(vec![], vec![]); 141 | for value in values.iter() { 142 | let kv_pair = KVPair::new(key.clone(), value.clone()); // Ensure correct KVPair creation 143 | let current = Filters::from_index(index, &kv_pair); 144 | result = result.union(¤t); 145 | } 146 | result 147 | } 148 | Filter::Eq(key, value) => { 149 | println!( 150 | "Evaluating EQ filter for key: {:?}, value: {:?}", 151 | key, value 152 | ); // Debug output 153 | let kv_pair = KVPair::new(key.clone(), value.clone()); // Ensure correct KVPair creation 154 | Filters::from_index(index, &kv_pair) 155 | } 156 | } 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /src/structures/inverted_index.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | use std::path::PathBuf; 3 | 4 | use serde::{Deserialize, Serialize}; 5 | 6 | use crate::structures::mmap_tree::Tree; 7 | 8 | use super::metadata_index::KVPair; 9 | use super::mmap_tree::serialization::{TreeDeserialization, TreeSerialization}; 10 | 11 | #[derive(Debug, Serialize, Deserialize, Clone)] 12 | pub struct InvertedIndexItem { 13 | pub indices: Vec, 14 | pub ids: Vec, 15 | } 16 | 17 | impl Display for InvertedIndexItem { 18 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 19 | write!(f, "InvertedIndexItem {{ ... }}") 20 | } 21 | } 22 | 23 | impl TreeSerialization for InvertedIndexItem { 24 | fn serialize(&self) -> Vec { 25 | let mut serialized = Vec::new(); 26 | 27 | serialized.extend_from_slice(self.indices.len().to_le_bytes().as_ref()); 28 | 29 | let len_of_index_bytes: usize = 8; 30 | 31 | serialized.extend_from_slice(len_of_index_bytes.to_le_bytes().as_ref()); 32 | 33 | for index in &self.indices { 34 | serialized.extend_from_slice(index.to_le_bytes().as_ref()); 35 | } 36 | 37 | serialized.extend_from_slice(self.ids.len().to_le_bytes().as_ref()); 38 | 39 | let len_of_id_bytes: usize = 16; 40 | 41 | serialized.extend_from_slice(len_of_id_bytes.to_le_bytes().as_ref()); 42 | 43 | for id in &self.ids { 44 | serialized.extend_from_slice(id.to_le_bytes().as_ref()); 45 | } 46 | 47 | serialized 48 | } 49 | } 50 | 51 | impl TreeDeserialization for InvertedIndexItem { 52 | fn deserialize(data: &[u8]) -> Self { 53 | let mut offset = 0; 54 | 55 | let indices_len = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 56 | offset += 8; 57 | // let mut indices = Vec::new(); 58 | let len_of_index_bytes = usize::from_le_bytes(data[offset..offset + 8].try_into().unwrap()); 59 | offset += 8; 60 | 61 | let start = offset; 62 | let end = start + indices_len * len_of_index_bytes; 63 | 64 | let indices_bytes = &data[start..end]; 65 | 66 | let indices_chunks = indices_bytes.chunks(len_of_index_bytes); 67 | 68 | // for chunk in indices_chunks { 69 | // let index = usize::from_le_bytes(chunk.try_into().unwrap()); 70 | // indices.push(index); 71 | // } 72 | 73 | let indices = indices_chunks 74 | .map(|chunk| usize::from_le_bytes(chunk.try_into().unwrap())) 75 | .collect(); 76 | 77 | offset = end; 78 | 79 | let ids_len = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 80 | offset += 8; 81 | // let mut ids = Vec::new(); 82 | let len_of_id_bytes = usize::from_le_bytes(data[offset..offset + 8].try_into().unwrap()); 83 | offset += 8; 84 | 85 | // get them all and split the bytes into chunks 86 | 87 | let start = offset; 88 | let end = start + ids_len * len_of_id_bytes; 89 | let ids_bytes = &data[start..end]; 90 | 91 | let ids_chunks = ids_bytes.chunks(len_of_id_bytes); 92 | 93 | // for chunk in ids_chunks { 94 | // let id = String::from_utf8(chunk.to_vec()).unwrap(); 95 | // ids.push(id); 96 | // } 97 | let ids = ids_chunks 98 | .map(|chunk| u128::from_le_bytes(chunk.try_into().unwrap())) 99 | .collect(); 100 | 101 | InvertedIndexItem { indices, ids } 102 | } 103 | } 104 | 105 | pub struct InvertedIndex { 106 | pub path: PathBuf, 107 | pub tree: Tree, 108 | } 109 | 110 | pub fn compress_indices(indices: Vec) -> Vec { 111 | let mut compressed = Vec::new(); 112 | if indices.is_empty() { 113 | return compressed; 114 | } 115 | 116 | let mut current_start = indices[0]; 117 | let mut count = 1; 118 | 119 | for i in 1..indices.len() { 120 | if indices[i] == current_start + count { 121 | count += 1; 122 | } else { 123 | compressed.push(current_start); 124 | compressed.push(count); 125 | current_start = indices[i]; 126 | count = 1; 127 | } 128 | } 129 | compressed.push(current_start); 130 | compressed.push(count); 131 | 132 | compressed 133 | } 134 | 135 | pub fn decompress_indices(compressed: Vec) -> Vec { 136 | let mut decompressed = Vec::new(); 137 | let mut i = 0; 138 | 139 | while i < compressed.len() { 140 | let start = compressed[i]; 141 | let count = compressed[i + 1]; 142 | decompressed.extend((start..start + count).collect::>()); 143 | i += 2; // Move to the next pair 144 | } 145 | 146 | decompressed 147 | } 148 | 149 | impl InvertedIndex { 150 | pub fn new(path: PathBuf) -> Self { 151 | let tree = Tree::new(path.clone()).expect("Failed to create tree"); 152 | InvertedIndex { path, tree } 153 | } 154 | 155 | pub fn insert(&mut self, key: KVPair, value: InvertedIndexItem, skip_compression: bool) { 156 | // println!("Inserting INTO INVERTED INDEX: {:?}", key); 157 | if !skip_compression { 158 | let compressed_indices = compress_indices(value.indices); 159 | let value = InvertedIndexItem { 160 | indices: compressed_indices, 161 | ids: value.ids, 162 | }; 163 | self.tree.insert(key, value).expect("Failed to insert"); 164 | } else { 165 | self.tree.insert(key, value).expect("Failed to insert"); 166 | } 167 | // let compressed_indices = compress_indices(value.indices); 168 | // let value = InvertedIndexItem { 169 | // indices: compressed_indices, 170 | // ids: value.ids, 171 | // }; 172 | // self.tree.insert(key, value).expect("Failed to insert"); 173 | } 174 | 175 | pub fn get(&mut self, key: KVPair) -> Option { 176 | // println!("Getting key: {:?}", key); 177 | match self.tree.search(key) { 178 | Ok(v) => { 179 | // decompress the indices 180 | match v { 181 | Some(mut item) => { 182 | println!("Search result: {:?}", item); // Add this 183 | 184 | item.indices = decompress_indices(item.indices); 185 | println!("Decompressed indices: {:?}", item.indices); // Check output 186 | 187 | Some(item) 188 | } 189 | None => None, 190 | } 191 | } 192 | Err(_) => None, 193 | } 194 | } 195 | 196 | pub fn insert_append(&mut self, key: KVPair, mut value: InvertedIndexItem) { 197 | match self.get(key.clone()) { 198 | Some(mut v) => { 199 | // v.indices.extend(value.indices); 200 | v.ids.extend(value.ids); 201 | 202 | let mut decompressed = v.indices.clone(); 203 | 204 | // binary search to insert all of the ones to append 205 | for index in value.indices { 206 | let idx = decompressed.binary_search(&index).unwrap_or_else(|x| x); 207 | decompressed.insert(idx, index); 208 | } 209 | 210 | decompressed.sort_unstable(); 211 | decompressed.dedup(); 212 | 213 | // println!("Before compression: {:?}", decompressed); 214 | 215 | v.indices = compress_indices(decompressed); 216 | 217 | // println!("After compression: {:?}", v.indices); 218 | 219 | self.insert(key, v, true); 220 | } 221 | None => { 222 | value.indices = compress_indices(value.indices); 223 | // println!("Compressed: {:?}", value.indices); 224 | self.insert(key, value, true); 225 | } 226 | } 227 | } 228 | } 229 | -------------------------------------------------------------------------------- /src/structures/metadata_index.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | use std::hash::Hash; 3 | use std::path::PathBuf; 4 | 5 | use serde::{Deserialize, Serialize}; 6 | 7 | use crate::structures::mmap_tree::Tree; 8 | 9 | use super::mmap_tree::serialization::{TreeDeserialization, TreeSerialization}; 10 | 11 | #[derive(Debug, Serialize, Deserialize, Clone, Hash)] 12 | pub struct KVPair { 13 | pub key: String, 14 | pub value: String, 15 | } 16 | 17 | impl KVPair { 18 | pub fn new(key: String, value: String) -> Self { 19 | KVPair { key, value } 20 | } 21 | } 22 | 23 | impl PartialEq for KVPair { 24 | fn eq(&self, other: &Self) -> bool { 25 | self.key == other.key && self.value == other.value 26 | } 27 | } 28 | 29 | impl Eq for KVPair {} 30 | 31 | impl PartialOrd for KVPair { 32 | fn partial_cmp(&self, other: &Self) -> Option { 33 | Some(self.cmp(other)) 34 | } 35 | } 36 | 37 | impl Ord for KVPair { 38 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 39 | self.key 40 | .cmp(&other.key) 41 | .then_with(|| self.value.cmp(&other.value)) 42 | } 43 | } 44 | 45 | impl Display for KVPair { 46 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 47 | write!(f, "KVPair {{ key: {}, value: {} }}", self.key, self.value) 48 | } 49 | } 50 | 51 | impl TreeSerialization for KVPair { 52 | fn serialize(&self) -> Vec { 53 | let mut serialized = Vec::new(); 54 | 55 | serialized.extend_from_slice(self.key.len().to_le_bytes().as_ref()); 56 | serialized.extend_from_slice(self.key.as_bytes()); 57 | serialized.extend_from_slice(self.value.len().to_le_bytes().as_ref()); 58 | serialized.extend_from_slice(self.value.as_bytes()); 59 | 60 | serialized 61 | } 62 | } 63 | 64 | impl TreeDeserialization for KVPair { 65 | fn deserialize(data: &[u8]) -> Self { 66 | let mut offset = 0; 67 | 68 | let key_len = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 69 | offset += 8; 70 | let key = String::from_utf8(data[offset..offset + key_len].to_vec()).unwrap(); 71 | offset += key_len; 72 | 73 | let value_len = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 74 | offset += 8; 75 | let value = String::from_utf8(data[offset..offset + value_len].to_vec()).unwrap(); 76 | // offset += value_len; 77 | 78 | KVPair { key, value } 79 | } 80 | } 81 | 82 | #[derive(Debug, Serialize, Deserialize, Clone)] 83 | pub struct MetadataIndexItem { 84 | pub kvs: Vec, 85 | pub id: u128, 86 | pub vector_index: usize, 87 | // pub namespaced_id: String, 88 | } 89 | 90 | impl Display for MetadataIndexItem { 91 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 92 | write!( 93 | f, 94 | "MetadataIndexItem {{ kvs: {:?}, id: {}, vector_index: {}, namespaced_id: }}", 95 | self.kvs, self.id, self.vector_index 96 | ) 97 | } 98 | } 99 | 100 | impl TreeSerialization for MetadataIndexItem { 101 | fn serialize(&self) -> Vec { 102 | let mut serialized = Vec::new(); 103 | 104 | serialized.extend_from_slice(self.kvs.len().to_le_bytes().as_ref()); 105 | // for kv in &self.kvs { 106 | // serialized.extend_from_slice(kv.key.len().to_le_bytes().as_ref()); 107 | // serialized.extend_from_slice(kv.key.as_bytes()); 108 | // serialized.extend_from_slice(kv.value.len().to_le_bytes().as_ref()); 109 | // serialized.extend_from_slice(kv.value.as_bytes()); 110 | // } 111 | for kv in &self.kvs { 112 | let serialized_kv = TreeSerialization::serialize(kv); 113 | serialized.extend_from_slice(serialized_kv.len().to_le_bytes().as_ref()); 114 | serialized.extend_from_slice(serialized_kv.as_ref()); 115 | } 116 | 117 | // serialized.extend_from_slice(self.id.len().to_le_bytes().as_ref()); 118 | serialized.extend_from_slice(self.id.to_le_bytes().as_ref()); 119 | 120 | serialized.extend_from_slice(self.vector_index.to_le_bytes().as_ref()); 121 | 122 | // serialized.extend_from_slice(self.namespaced_id.len().to_le_bytes().as_ref()); 123 | // serialized.extend_from_slice(self.namespaced_id.as_bytes()); 124 | 125 | serialized 126 | } 127 | } 128 | 129 | impl TreeDeserialization for MetadataIndexItem { 130 | fn deserialize(data: &[u8]) -> Self { 131 | let mut offset = 0; 132 | 133 | let kvs_len = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 134 | offset += 8; 135 | 136 | let mut kvs = Vec::new(); 137 | for _ in 0..kvs_len { 138 | // let key_len = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 139 | // offset += 8; 140 | 141 | // let key = String::from_utf8(data[offset..offset + key_len].to_vec()).unwrap(); 142 | // offset += key_len; 143 | 144 | // let value_len = 145 | // u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 146 | // offset += 8; 147 | 148 | // let value = String::from_utf8(data[offset..offset + value_len].to_vec()).unwrap(); 149 | // offset += value_len; 150 | 151 | // kvs.push(KVPair { key, value }); 152 | 153 | let kv_len = 154 | usize::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 155 | offset += 8; 156 | 157 | let kv = TreeDeserialization::deserialize(&data[offset..offset + kv_len]); 158 | offset += kv_len; 159 | 160 | kvs.push(kv); 161 | } 162 | 163 | // let id_len = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 164 | // offset += 8; 165 | 166 | let id = u128::from_le_bytes(data[offset..offset + 16].try_into().unwrap()); 167 | offset += 16; 168 | 169 | let vector_index = usize::from_le_bytes(data[offset..offset + 8].try_into().unwrap()); 170 | // offset += 8; 171 | 172 | // let namespaced_id_len = 173 | // u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 174 | // offset += 8; 175 | 176 | // let namespaced_id = 177 | // String::from_utf8(data[offset..offset + namespaced_id_len].to_vec()).unwrap(); 178 | // offset += namespaced_id_len; 179 | 180 | MetadataIndexItem { 181 | kvs, 182 | id, 183 | vector_index, 184 | // namespaced_id, 185 | } 186 | } 187 | } 188 | 189 | impl TreeSerialization for u128 { 190 | fn serialize(&self) -> Vec { 191 | self.to_le_bytes().to_vec() 192 | } 193 | } 194 | 195 | impl TreeDeserialization for u128 { 196 | fn deserialize(data: &[u8]) -> Self { 197 | u128::from_le_bytes(data.try_into().unwrap()) 198 | } 199 | } 200 | 201 | pub struct MetadataIndex { 202 | pub path: PathBuf, 203 | pub tree: Tree, 204 | } 205 | 206 | impl MetadataIndex { 207 | pub fn new(path: PathBuf) -> Self { 208 | let tree = Tree::new(path.clone()).expect("Failed to create tree"); 209 | MetadataIndex { path, tree } 210 | } 211 | 212 | pub fn insert(&mut self, key: u128, value: MetadataIndexItem) { 213 | // self.tree.insert(key, value).expect("Failed to insert"); 214 | self.tree.insert(key, value).expect("Failed to insert"); 215 | } 216 | 217 | pub fn batch_insert(&mut self, items: Vec<(u128, MetadataIndexItem)>) { 218 | self.tree 219 | .batch_insert(items) 220 | .expect("Failed to batch insert"); 221 | } 222 | 223 | pub fn get(&mut self, key: u128) -> Option { 224 | match self.tree.search(key) { 225 | Ok(v) => v, 226 | Err(_) => None, 227 | } 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /src/structures/mmap_tree.rs: -------------------------------------------------------------------------------- 1 | pub mod node; 2 | pub mod serialization; 3 | pub mod storage; 4 | 5 | use std::fmt::{Debug, Display}; 6 | use std::io; 7 | use std::path::PathBuf; 8 | 9 | use node::{Node, NodeType}; 10 | use serialization::{TreeDeserialization, TreeSerialization}; 11 | use storage::StorageManager; 12 | 13 | pub struct Tree { 14 | pub b: usize, 15 | pub storage_manager: storage::StorageManager, 16 | } 17 | 18 | impl Tree 19 | where 20 | K: Clone + Ord + TreeSerialization + TreeDeserialization + Debug + Display, 21 | V: Clone + TreeSerialization + TreeDeserialization, 22 | { 23 | pub fn new(path: PathBuf) -> io::Result { 24 | let mut storage_manager = StorageManager::::new(path)?; 25 | 26 | // println!("INIT Used space: {}", storage_manager.used_space); 27 | 28 | if storage_manager.used_space() == 0 { 29 | let root_offset: usize; 30 | let mut root = Node::new_leaf(0); 31 | root.is_root = true; 32 | root_offset = storage_manager.store_node(&mut root)?; 33 | storage_manager.set_root_offset(root_offset); 34 | // println!("Initialized from scratch with Root offset: {}", root_offset); 35 | } 36 | 37 | Ok(Tree { 38 | storage_manager, 39 | b: 32, 40 | }) 41 | } 42 | 43 | pub fn insert(&mut self, key: K, value: V) -> Result<(), io::Error> { 44 | // println!("Inserting key: {}, value: {}", key, value); 45 | let mut root = self 46 | .storage_manager 47 | .load_node(self.storage_manager.root_offset())?; 48 | 49 | // println!("Root offset: {}, {}", self.root_offset, root.offset); 50 | 51 | if root.is_full() { 52 | // println!("Root is full, needs splitting"); 53 | let mut new_root = Node::new_internal(0); 54 | new_root.is_root = true; 55 | let (median, mut sibling) = root.split(self.b)?; 56 | // println!("Root split: median = {}, new sibling created", median); 57 | // println!("Root split: median = {}, new sibling created", median); 58 | root.is_root = false; 59 | self.storage_manager.store_node(&mut root)?; 60 | // println!("Root stored"); 61 | let sibling_offset = self.storage_manager.store_node(&mut sibling)?; 62 | new_root.keys.push(median); 63 | new_root.children.push(self.storage_manager.root_offset()); // old root offset 64 | new_root.children.push(sibling_offset); // new sibling offset 65 | new_root.is_root = true; 66 | self.storage_manager.store_node(&mut new_root)?; 67 | self.storage_manager.set_root_offset(new_root.offset); 68 | 69 | root.parent_offset = Some(new_root.offset); 70 | sibling.parent_offset = Some(new_root.offset); 71 | self.storage_manager.store_node(&mut root)?; 72 | self.storage_manager.store_node(&mut sibling)?; 73 | // println!( 74 | // "New root created with children offsets: {} and {}", 75 | // self.root_offset, sibling_offset 76 | // ); 77 | } 78 | // println!("Inserting into non-full root"); 79 | self.insert_non_full(self.storage_manager.root_offset(), key, value, 0)?; 80 | 81 | // println!("Inserted key, root offset: {}", self.root_offset); 82 | 83 | Ok(()) 84 | } 85 | 86 | fn insert_non_full( 87 | &mut self, 88 | node_offset: usize, 89 | key: K, 90 | value: V, 91 | depth: usize, 92 | ) -> Result<(), io::Error> { 93 | if depth > 100 { 94 | // Set a reasonable limit based on your observations 95 | println!("Recursion depth limit reached: {}", depth); 96 | return Ok(()); 97 | } 98 | 99 | let mut node = self.storage_manager.load_node(node_offset)?; 100 | // println!( 101 | // "Depth: {}, Node type: {:?}, Keys: {:?}, is_full: {}", 102 | // depth, 103 | // node.node_type, 104 | // node.keys, 105 | // node.is_full() 106 | // ); 107 | 108 | if node.node_type == NodeType::Leaf { 109 | let idx = node.keys.binary_search(&key).unwrap_or_else(|x| x); 110 | // println!( 111 | // "Inserting into leaf node: key: {}, len: {}", 112 | // key, 113 | // node.keys.len() 114 | // ); 115 | // println!( 116 | // "Inserting into leaf node: key: {}, idx: {}, node_offset: {}", 117 | // key, idx, node_offset 118 | // ); 119 | 120 | if node.keys.get(idx) == Some(&key) { 121 | node.values[idx] = Some(value); 122 | 123 | // println!( 124 | // "Storing leaf node with keys: {:?}, offset: {}", 125 | // node.keys, node.offset 126 | // ); 127 | self.storage_manager.store_node(&mut node)?; 128 | if node.is_root { 129 | // println!("Updating root offset to: {}", node.offset); 130 | // self.root_offset = node.offset.clone(); 131 | self.storage_manager.set_root_offset(node.offset); 132 | } 133 | } else { 134 | node.keys.insert(idx, key); 135 | node.values.insert(idx, Some(value)); 136 | 137 | // println!( 138 | // "Storing leaf node with keys: {:?}, offset: {}", 139 | // node.keys, node.offset 140 | // ); 141 | self.storage_manager.store_node(&mut node)?; 142 | if node.is_root { 143 | // println!("Updating root offset to: {}", node.offset); 144 | // self.root_offset = node.offset.clone(); 145 | self.storage_manager.set_root_offset(node.offset); 146 | } 147 | } 148 | } else { 149 | let idx = node.keys.binary_search(&key).unwrap_or_else(|x| x); // Find the child to go to 150 | let child_offset = node.children[idx]; 151 | let mut child = self.storage_manager.load_node(child_offset)?; 152 | 153 | if child.is_full() { 154 | // println!("Child is full, needs splitting"); 155 | let (median, mut sibling) = child.split(self.b)?; 156 | let sibling_offset = self.storage_manager.store_node(&mut sibling)?; 157 | 158 | node.keys.insert(idx, median.clone()); 159 | node.children.insert(idx + 1, sibling_offset); 160 | self.storage_manager.store_node(&mut node)?; 161 | 162 | if key < median { 163 | self.insert_non_full(child_offset, key, value, depth + 1)?; 164 | } else { 165 | self.insert_non_full(sibling_offset, key, value, depth + 1)?; 166 | } 167 | } else { 168 | self.insert_non_full(child_offset, key, value, depth + 1)?; 169 | } 170 | } 171 | 172 | Ok(()) 173 | } 174 | 175 | pub fn search(&mut self, key: K) -> Result, io::Error> { 176 | self.search_node(self.storage_manager.root_offset(), key) 177 | } 178 | 179 | fn search_node(&mut self, node_offset: usize, key: K) -> Result, io::Error> { 180 | // println!("Searching for key: {} at offset: {}", key, node_offset); 181 | let node = self.storage_manager.load_node(node_offset)?; 182 | 183 | match node.node_type { 184 | NodeType::Internal => { 185 | let idx = node.keys.binary_search(&key).unwrap_or_else(|x| x); // Find the child to go to 186 | self.search_node(node.children[idx], key) 187 | } 188 | NodeType::Leaf => match node.keys.binary_search(&key) { 189 | Ok(idx) => Ok(node.values[idx].clone()), 190 | Err(_) => Ok(None), 191 | }, 192 | } 193 | } 194 | 195 | pub fn has_key(&mut self, key: K) -> Result { 196 | self.has_key_node(self.storage_manager.root_offset(), key) 197 | } 198 | 199 | pub fn has_key_node(&mut self, node_offset: usize, key: K) -> Result { 200 | let node = self.storage_manager.load_node(node_offset)?; 201 | 202 | match node.node_type { 203 | NodeType::Internal => { 204 | let idx = node.keys.binary_search(&key).unwrap_or_else(|x| x); // Find the child to go to 205 | self.has_key_node(node.children[idx], key) 206 | } 207 | NodeType::Leaf => Ok(node.keys.binary_search(&key).into_iter().next().is_some()), 208 | } 209 | } 210 | 211 | pub fn get_range(&mut self, start: K, end: K) -> Result, io::Error> { 212 | let mut result = Vec::new(); 213 | self.get_range_node(self.storage_manager.root_offset(), start, end, &mut result)?; 214 | Ok(result) 215 | } 216 | 217 | fn get_range_node( 218 | &mut self, 219 | node_offset: usize, 220 | start: K, 221 | end: K, 222 | result: &mut Vec<(K, V)>, 223 | ) -> Result<(), io::Error> { 224 | let node = self.storage_manager.load_node(node_offset)?; 225 | 226 | match node.node_type { 227 | NodeType::Internal => { 228 | let mut idx = node 229 | .keys 230 | .binary_search(&start.clone()) 231 | .unwrap_or_else(|x| x); 232 | if idx == node.keys.len() { 233 | idx -= 1; 234 | } 235 | 236 | self.get_range_node(node.children[idx], start.clone(), end.clone(), result)?; 237 | 238 | while idx < node.keys.len() && node.keys[idx] < end { 239 | self.get_range_node( 240 | node.children[idx + 1], 241 | start.clone(), 242 | end.clone(), 243 | result, 244 | )?; 245 | idx += 1; 246 | } 247 | } 248 | NodeType::Leaf => { 249 | let mut idx = node.keys.binary_search(&start).unwrap_or_else(|x| x); 250 | if node.keys.len() == 0 { 251 | return Ok(()); 252 | } 253 | if idx == node.keys.len() { 254 | idx -= 1; 255 | } 256 | 257 | while idx < node.keys.len() && node.keys[idx] < end { 258 | if node.keys[idx] >= start { 259 | result.push((node.keys[idx].clone(), node.values[idx].clone().unwrap())); 260 | } 261 | idx += 1; 262 | } 263 | } 264 | } 265 | 266 | Ok(()) 267 | } 268 | 269 | pub fn batch_insert(&mut self, entries: Vec<(K, V)>) -> Result<(), io::Error> { 270 | if entries.is_empty() { 271 | println!("No entries to insert"); 272 | return Ok(()); 273 | } 274 | 275 | let mut entries = entries; 276 | entries.sort_by(|a, b| a.0.cmp(&b.0)); 277 | 278 | let entrypoint = self.find_entrypoint(entries[0].0.clone())?; 279 | 280 | let mut current_offset = entrypoint; 281 | let mut node = self.storage_manager.load_node(current_offset)?; 282 | 283 | for (key, value) in entries.iter() { 284 | while node.node_type == NodeType::Internal { 285 | // We should only be operating on leaf nodes in this loop 286 | let idx = node.keys.binary_search(key).unwrap_or_else(|x| x); 287 | current_offset = node.children[idx]; 288 | node = self.storage_manager.load_node(current_offset)?; 289 | } 290 | 291 | if node.is_full() { 292 | let (median, mut sibling) = node.split(self.b)?; 293 | let sibling_offset = self.storage_manager.store_node(&mut sibling)?; 294 | self.storage_manager.store_node(&mut node)?; // Store changes to the original node after splitting 295 | 296 | if node.is_root { 297 | // println!("Creating new root"); 298 | // Create a new root if the current node is the root 299 | let mut new_root = Node::new_internal(0); 300 | new_root.is_root = true; 301 | new_root.keys.push(median.clone()); 302 | new_root.children.push(current_offset); // old root offset 303 | new_root.children.push(sibling_offset); // new sibling offset 304 | new_root.parent_offset = None; 305 | let new_root_offset = self.storage_manager.store_node(&mut new_root)?; 306 | self.storage_manager.set_root_offset(new_root_offset); 307 | node.is_root = false; 308 | node.parent_offset = Some(new_root_offset); 309 | sibling.parent_offset = Some(new_root_offset); 310 | // println!("New root offset: {}", new_root_offset); 311 | self.storage_manager.store_node(&mut node)?; 312 | self.storage_manager.store_node(&mut sibling)?; 313 | } else { 314 | // Update the parent node with the new median 315 | let parent_offset = node.parent_offset.unwrap(); 316 | // println!("Parent offset: {}", parent_offset); 317 | let mut parent = self.storage_manager.load_node(parent_offset)?; 318 | let idx = parent 319 | .keys 320 | .binary_search(&median.clone()) 321 | .unwrap_or_else(|x| x); 322 | parent.keys.insert(idx, median.clone()); 323 | parent.children.insert(idx + 1, sibling_offset); 324 | self.storage_manager.store_node(&mut parent)?; 325 | } 326 | 327 | // Decide which node to continue insertion 328 | if *key >= median { 329 | current_offset = sibling_offset; 330 | node = sibling; 331 | } 332 | } 333 | 334 | // Insert the key into the correct leaf node 335 | let position = node.keys.binary_search(key).unwrap_or_else(|x| x); 336 | node.keys.insert(position, key.clone()); 337 | node.values.insert(position, Some(value.clone())); 338 | self.storage_manager.store_node(&mut node)?; // Store changes after each insertion 339 | } 340 | 341 | Ok(()) 342 | } 343 | 344 | fn find_entrypoint(&mut self, key: K) -> Result { 345 | let mut current_offset = self.storage_manager.root_offset(); 346 | let mut node = self.storage_manager.load_node(current_offset)?; 347 | 348 | while node.node_type == NodeType::Internal { 349 | let idx = node.keys.binary_search(&key).unwrap_or_else(|x| x); 350 | current_offset = node.children[idx]; 351 | node = self.storage_manager.load_node(current_offset)?; 352 | } 353 | 354 | Ok(current_offset) 355 | } 356 | } 357 | -------------------------------------------------------------------------------- /src/structures/mmap_tree/node.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Debug, io}; 2 | 3 | use super::serialization::{TreeDeserialization, TreeSerialization}; 4 | 5 | #[derive(Debug, PartialEq, Clone)] 6 | pub enum NodeType { 7 | Leaf, 8 | Internal, 9 | } 10 | 11 | const MAX_KEYS: usize = 32; 12 | 13 | pub fn serialize_node_type(node_type: &NodeType) -> [u8; 1] { 14 | match node_type { 15 | NodeType::Leaf => [0], 16 | NodeType::Internal => [1], 17 | } 18 | } 19 | 20 | pub fn deserialize_node_type(data: &[u8]) -> NodeType { 21 | match data[0] { 22 | 0 => NodeType::Leaf, 23 | 1 => NodeType::Internal, 24 | _ => panic!("Invalid node type"), 25 | } 26 | } 27 | 28 | #[derive(Debug, PartialEq, Clone)] 29 | pub struct Node { 30 | pub keys: Vec, 31 | pub values: Vec>, // Option for handling deletion in COW 32 | pub children: Vec, // Offsets into the memmap file 33 | pub max_keys: usize, // Maximum number of keys a node can hold 34 | pub node_type: NodeType, 35 | pub offset: usize, // Offset into the memmap file 36 | pub is_root: bool, 37 | pub parent_offset: Option, // Offset of the parent node 38 | } 39 | 40 | impl Node 41 | where 42 | K: Clone + Ord + TreeSerialization + TreeDeserialization, 43 | V: Clone + TreeSerialization + TreeDeserialization, 44 | { 45 | pub fn new_leaf(offset: usize) -> Self { 46 | Node { 47 | keys: Vec::new(), 48 | values: Vec::new(), 49 | children: Vec::new(), 50 | max_keys: MAX_KEYS, // Assuming a small number for testing purposes 51 | node_type: NodeType::Leaf, 52 | offset, 53 | is_root: false, 54 | parent_offset: Some(0), 55 | } 56 | } 57 | 58 | pub fn new_internal(offset: usize) -> Self { 59 | Node { 60 | keys: Vec::new(), 61 | values: Vec::new(), 62 | children: Vec::new(), 63 | max_keys: MAX_KEYS, 64 | node_type: NodeType::Internal, 65 | offset, 66 | is_root: false, 67 | parent_offset: Some(0), 68 | } 69 | } 70 | 71 | pub fn split(&mut self, b: usize) -> Result<(K, Node), io::Error> { 72 | // println!("Splitting node: {:?}", self.keys); 73 | 74 | match self.node_type { 75 | NodeType::Internal => { 76 | if b <= 1 || b > self.keys.len() { 77 | return Err(io::Error::new( 78 | io::ErrorKind::Other, 79 | "Invalid split point for internal node", 80 | )); 81 | } 82 | let mut sibling_keys = self.keys.split_off(b - 1); 83 | let median_key = sibling_keys.remove(0); 84 | 85 | let sibling_children = self.children.split_off(b); 86 | 87 | let sibling = Node { 88 | keys: sibling_keys, 89 | values: Vec::new(), 90 | children: sibling_children, 91 | max_keys: self.max_keys, 92 | node_type: NodeType::Internal, 93 | offset: 0, // This should be set when the node is stored 94 | is_root: false, 95 | parent_offset: self.parent_offset, 96 | }; 97 | 98 | // println!( 99 | // "Internal node split: median_key = {}, sibling_keys = {:?}", 100 | // median_key, sibling.keys 101 | // ); 102 | Ok((median_key, sibling)) 103 | } 104 | NodeType::Leaf => { 105 | if b < 1 || b >= self.keys.len() { 106 | return Err(io::Error::new( 107 | io::ErrorKind::Other, 108 | "Invalid split point for leaf node", 109 | )); 110 | } 111 | let sibling_keys = self.keys.split_off(b); 112 | let median_key = self.keys.get(b - 1).unwrap().clone(); 113 | let sibling_values = self.values.split_off(b); 114 | 115 | let sibling = Node { 116 | keys: sibling_keys, 117 | values: sibling_values, 118 | children: Vec::new(), 119 | max_keys: self.max_keys, 120 | node_type: NodeType::Leaf, 121 | offset: 0, // This should be set when the node is stored 122 | is_root: false, 123 | parent_offset: self.parent_offset, 124 | }; 125 | 126 | // println!( 127 | // "Leaf node split: median_key = {}, sibling_keys = {:?}", 128 | // median_key, sibling.keys 129 | // ); 130 | Ok((median_key, sibling)) 131 | } 132 | } 133 | } 134 | 135 | pub fn is_full(&self) -> bool { 136 | let b = self.max_keys; 137 | return self.keys.len() >= (2 * b - 1); 138 | } 139 | 140 | pub fn serialize(&self) -> Vec { 141 | let mut serialized = Vec::with_capacity(1_000_000); 142 | serialized.extend_from_slice(&serialize_node_type(&self.node_type)); 143 | serialized.push(self.is_root as u8); 144 | serialized.extend_from_slice(&(self.parent_offset.unwrap_or(0) as u64).to_le_bytes()); 145 | serialize_length(&mut serialized, self.keys.len() as u32); 146 | serialize_length(&mut serialized, self.values.len() as u32); 147 | serialize_length(&mut serialized, self.children.len() as u32); 148 | 149 | for key in &self.keys { 150 | let serialized_key = key.serialize(); 151 | serialized.extend_from_slice(&serialize_length( 152 | &mut Vec::new(), 153 | serialized_key.len() as u32, 154 | )); 155 | serialized.extend_from_slice(&serialized_key); 156 | } 157 | 158 | for value in &self.values { 159 | match value { 160 | Some(value) => { 161 | let serialized_value = value.serialize(); 162 | serialized.extend_from_slice(&serialize_length( 163 | &mut Vec::new(), 164 | serialized_value.len() as u32, 165 | )); 166 | serialized.extend_from_slice(&serialized_value); 167 | } 168 | None => serialized.extend_from_slice(&0u32.to_le_bytes()), 169 | } 170 | } 171 | 172 | for child in &self.children { 173 | serialized.extend_from_slice(&child.to_le_bytes()); 174 | } 175 | 176 | serialized 177 | } 178 | 179 | pub fn deserialize(data: &[u8]) -> Self { 180 | let mut offset = 0; 181 | 182 | let node_type = deserialize_node_type(&data[offset..offset + 1]); 183 | offset += 1; 184 | let is_root = data[offset] == 1; 185 | offset += 1; 186 | let parent_offset = 187 | u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 188 | offset += 8; 189 | let keys_len = read_length(&data[offset..offset + 4]); 190 | offset += 4; 191 | let values_len = read_length(&data[offset..offset + 4]); 192 | offset += 4; 193 | let num_children = read_length(&data[offset..offset + 4]); 194 | offset += 4; 195 | 196 | let mut keys = Vec::with_capacity(keys_len); 197 | for _ in 0..keys_len { 198 | let key_size = read_length(&data[offset..offset + 4]) as usize; 199 | offset += 4; 200 | let key = K::deserialize(&data[offset..offset + key_size]); 201 | offset += key_size; 202 | keys.push(key); 203 | } 204 | 205 | let mut values = Vec::with_capacity(values_len); 206 | for _ in 0..values_len { 207 | let value_size = read_length(&data[offset..offset + 4]) as usize; 208 | offset += 4; 209 | let value = if value_size > 0 { 210 | Some(V::deserialize(&data[offset..offset + value_size])) 211 | } else { 212 | None 213 | }; 214 | offset += value_size; 215 | values.push(value); 216 | } 217 | 218 | let mut children = Vec::with_capacity(num_children); 219 | for _ in 0..num_children { 220 | let child_offset = usize::from_le_bytes(data[offset..offset + 8].try_into().unwrap()); 221 | offset += 8; 222 | children.push(child_offset); 223 | } 224 | 225 | Node { 226 | keys, 227 | values, 228 | children, 229 | max_keys: MAX_KEYS, 230 | node_type, 231 | offset: 0, 232 | is_root, 233 | parent_offset: Some(parent_offset), 234 | } 235 | } 236 | 237 | pub fn set_key_value(&mut self, key: K, value: V) { 238 | let idx = self.keys.binary_search(&key).unwrap_or_else(|x| x); 239 | self.keys.insert(idx, key); 240 | self.values.insert(idx, Some(value)); 241 | } 242 | 243 | pub fn get_value(&self, key: K) -> Option { 244 | match self.keys.binary_search(&key) { 245 | Ok(idx) => self.values[idx].clone(), 246 | Err(_) => None, 247 | } 248 | } 249 | } 250 | impl Default for Node { 251 | fn default() -> Self { 252 | Node { 253 | keys: Vec::new(), 254 | values: Vec::new(), 255 | children: Vec::new(), 256 | max_keys: 0, // Adjust this as necessary 257 | node_type: NodeType::Leaf, // Or another appropriate default NodeType 258 | offset: 0, 259 | is_root: false, 260 | parent_offset: None, 261 | } 262 | } 263 | } 264 | 265 | fn serialize_length(buffer: &mut Vec, length: u32) -> &Vec { 266 | buffer.extend_from_slice(&length.to_le_bytes()); 267 | 268 | // Return the buffer to allow chaining 269 | buffer 270 | } 271 | 272 | fn read_length(data: &[u8]) -> usize { 273 | u32::from_le_bytes(data.try_into().unwrap()) as usize 274 | } 275 | -------------------------------------------------------------------------------- /src/structures/mmap_tree/serialization.rs: -------------------------------------------------------------------------------- 1 | pub trait TreeSerialization { 2 | fn serialize(&self) -> Vec; 3 | } 4 | 5 | pub trait TreeDeserialization { 6 | fn deserialize(data: &[u8]) -> Self 7 | where 8 | Self: Sized; 9 | } 10 | 11 | impl TreeDeserialization for i32 { 12 | fn deserialize(data: &[u8]) -> Self { 13 | let mut bytes = [0; 4]; 14 | bytes.copy_from_slice(&data[..4]); 15 | i32::from_le_bytes(bytes) 16 | } 17 | } 18 | 19 | impl TreeSerialization for i32 { 20 | fn serialize(&self) -> Vec { 21 | self.to_le_bytes().to_vec() 22 | } 23 | } 24 | impl TreeDeserialization for String { 25 | fn deserialize(data: &[u8]) -> Self { 26 | if data.len() < 4 { 27 | panic!("Data too short to contain length prefix"); 28 | } 29 | let len = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; // Read length 30 | if data.len() < 4 + len { 31 | panic!("Data too short for specified string length"); 32 | } 33 | let string_data = &data[4..4 + len]; // Extract string data 34 | String::from_utf8(string_data.to_vec()).unwrap() 35 | } 36 | } 37 | 38 | impl TreeSerialization for String { 39 | fn serialize(&self) -> Vec { 40 | let mut data = Vec::new(); 41 | data.extend_from_slice(&(self.len() as u32).to_le_bytes()); // Write length 42 | data.extend_from_slice(self.as_bytes()); // Write string data 43 | data 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/structures/mmap_tree/storage.rs: -------------------------------------------------------------------------------- 1 | use crate::services::LockService; 2 | 3 | use super::node::Node; 4 | use memmap::MmapMut; 5 | use std::fs; 6 | use std::fs::OpenOptions; 7 | use std::io; 8 | use std::path::PathBuf; 9 | 10 | use super::serialization::{TreeDeserialization, TreeSerialization}; 11 | use std::fmt::Debug; 12 | 13 | pub struct StorageManager { 14 | pub mmap: MmapMut, 15 | pub used_space: usize, 16 | path: PathBuf, 17 | phantom: std::marker::PhantomData<(K, V)>, 18 | locks: LockService, 19 | } 20 | 21 | pub const SIZE_OF_USIZE: usize = std::mem::size_of::(); 22 | pub const HEADER_SIZE: usize = SIZE_OF_USIZE * 2; // Used space + root offset 23 | 24 | pub const BLOCK_SIZE: usize = 4096; 25 | pub const OVERFLOW_POINTER_SIZE: usize = SIZE_OF_USIZE; 26 | pub const BLOCK_HEADER_SIZE: usize = SIZE_OF_USIZE + 1; // one byte for if it is the primary block or overflow block 27 | pub const BLOCK_DATA_SIZE: usize = BLOCK_SIZE - OVERFLOW_POINTER_SIZE - BLOCK_HEADER_SIZE; 28 | 29 | impl StorageManager 30 | where 31 | K: Clone + Ord + TreeSerialization + TreeDeserialization + Debug, 32 | V: Clone + TreeSerialization + TreeDeserialization, 33 | { 34 | pub fn new(path: PathBuf) -> io::Result { 35 | let exists = path.exists(); 36 | let file = OpenOptions::new() 37 | .read(true) 38 | .write(true) 39 | .create(!exists) 40 | .open(path.clone())?; 41 | 42 | if !exists { 43 | file.set_len(1_000_000)?; 44 | } 45 | 46 | let mmap = unsafe { MmapMut::map_mut(&file)? }; 47 | 48 | // take path, remove everything after the last dot (the extension), and add _locks 49 | let mut locks_path = path.clone().to_str().unwrap().to_string(); 50 | let last_dot = locks_path.rfind('.').unwrap(); 51 | locks_path.replace_range(last_dot.., "_locks"); 52 | 53 | fs::create_dir_all(&locks_path).expect("Failed to create directory"); 54 | 55 | let mut manager = StorageManager { 56 | mmap, 57 | used_space: 0, 58 | path, 59 | phantom: std::marker::PhantomData, 60 | locks: LockService::new(locks_path.into()), 61 | }; 62 | 63 | let used_space = if exists && manager.mmap.len() > HEADER_SIZE { 64 | manager.used_space() 65 | } else { 66 | 0 67 | }; 68 | 69 | // println!("INIT Used space: {}", used_space); 70 | 71 | manager.set_used_space(used_space); 72 | 73 | Ok(manager) 74 | } 75 | 76 | pub fn store_node(&mut self, node: &mut Node) -> io::Result { 77 | let serialized = node.serialize(); 78 | 79 | // println!("Storing Serialized len: {}", serialized.len()); 80 | 81 | let serialized_len = serialized.len(); 82 | 83 | let num_blocks_required = (serialized_len + BLOCK_DATA_SIZE - 1) / BLOCK_DATA_SIZE; 84 | 85 | let mut needs_new_blocks = true; 86 | 87 | let mut prev_num_blocks_required = 0; 88 | 89 | if node.offset == 0 { 90 | node.offset = self.increment_and_allocate_block()?; 91 | // println!("Allocating block offset: {}", node.offset); 92 | } else { 93 | // println!("Using previous node offset: {}", node.offset); 94 | let prev_serialized_len = usize::from_le_bytes( 95 | self.read_from_offset(node.offset + 1, SIZE_OF_USIZE) 96 | .try_into() 97 | .unwrap(), 98 | ); 99 | prev_num_blocks_required = 100 | (prev_serialized_len + BLOCK_DATA_SIZE - 1) / BLOCK_DATA_SIZE; 101 | needs_new_blocks = num_blocks_required > prev_num_blocks_required; 102 | 103 | // println!( 104 | // "Prev serialized len: {}, prev num blocks required: {}", 105 | // prev_serialized_len, prev_num_blocks_required 106 | // ); 107 | } 108 | 109 | // println!( 110 | // "Storing node at offset: {}, serialized len: {}", 111 | // node.offset, serialized_len 112 | // ); 113 | 114 | let mut current_block_offset = node.offset.clone(); 115 | 116 | let original_offset = current_block_offset.clone(); 117 | 118 | let mut remaining_bytes_to_write = serialized_len; 119 | 120 | let mut serialized_bytes_written = 0; 121 | 122 | let mut is_primary = 1u8; 123 | 124 | let mut blocks_written = 0; 125 | 126 | // 127 | 128 | // println!( 129 | // "Num blocks required: {}, num blocks prev: {}, needs new blocks: {}", 130 | // num_blocks_required, prev_num_blocks_required, needs_new_blocks 131 | // ); 132 | 133 | self.acquire_block_lock(original_offset)?; 134 | 135 | while remaining_bytes_to_write > 0 { 136 | let bytes_to_write = std::cmp::min(remaining_bytes_to_write, BLOCK_DATA_SIZE); 137 | 138 | // println!( 139 | // "writing is primary: {}, at offset: {}", 140 | // is_primary, current_block_offset 141 | // ); 142 | 143 | self.write_to_offset(current_block_offset, is_primary.to_le_bytes().as_ref()); 144 | 145 | current_block_offset += 1; // one for the primary byte 146 | 147 | self.write_to_offset(current_block_offset, &serialized_len.to_le_bytes()); 148 | 149 | current_block_offset += SIZE_OF_USIZE; 150 | self.write_to_offset( 151 | current_block_offset, 152 | &serialized[serialized_bytes_written..serialized_bytes_written + bytes_to_write], 153 | ); 154 | 155 | blocks_written += 1; 156 | serialized_bytes_written += bytes_to_write; 157 | 158 | remaining_bytes_to_write -= bytes_to_write; 159 | // current_block_offset += BLOCK_DATA_SIZE; 160 | current_block_offset += BLOCK_DATA_SIZE; // Move to the end of written data 161 | 162 | // println!( 163 | // "Remaining bytes to write: {}, bytes written: {}", 164 | // remaining_bytes_to_write, serialized_bytes_written 165 | // ); 166 | 167 | if remaining_bytes_to_write > 0 { 168 | let next_block_offset: usize; 169 | 170 | if needs_new_blocks && blocks_written >= prev_num_blocks_required { 171 | next_block_offset = self.increment_and_allocate_block()?; 172 | 173 | self.write_to_offset(current_block_offset, &next_block_offset.to_le_bytes()); 174 | } else { 175 | next_block_offset = usize::from_le_bytes( 176 | self.read_from_offset(current_block_offset, SIZE_OF_USIZE) 177 | .try_into() 178 | .unwrap(), 179 | ); 180 | 181 | // if next_block_offset == 0 { 182 | // next_block_offset = self.increment_and_allocate_block()?; 183 | // println!("allocating bc 0 Next block offset: {}", next_block_offset); 184 | // self.write_to_offset( 185 | // current_block_offset, 186 | // &next_block_offset.to_le_bytes(), 187 | // ); 188 | // } 189 | 190 | // println!("Next block offset: {}", next_block_offset); 191 | } 192 | 193 | current_block_offset = next_block_offset; 194 | } else { 195 | self.write_to_offset(current_block_offset, &0u64.to_le_bytes()); 196 | 197 | // println!( 198 | // "Setting next block offset to 0 at offset: {}", 199 | // current_block_offset 200 | // ); 201 | // // Clear the remaining unused overflow blocks 202 | // let mut next_block_offset = usize::from_le_bytes( 203 | // self.read_from_offset(current_block_offset, SIZE_OF_USIZE) 204 | // .try_into() 205 | // .unwrap(), 206 | // ); 207 | 208 | // while next_block_offset != 0 { 209 | // let next_next_block_offset = usize::from_le_bytes( 210 | // self.read_from_offset(next_block_offset + BLOCK_DATA_SIZE, SIZE_OF_USIZE) 211 | // .try_into() 212 | // .unwrap(), 213 | // ); 214 | 215 | // println!("Clearing next block offset: {}", next_block_offset); 216 | 217 | // self.write_to_offset(next_block_offset + BLOCK_DATA_SIZE, &0u64.to_le_bytes()); 218 | 219 | // next_block_offset = next_next_block_offset; 220 | // } 221 | } 222 | 223 | is_primary = 0; 224 | } 225 | 226 | self.release_block_lock(original_offset)?; 227 | 228 | Ok(node.offset) 229 | } 230 | 231 | pub fn load_node(&mut self, offset: usize) -> io::Result> { 232 | let original_offset = offset.clone(); 233 | let mut offset = offset.clone(); 234 | 235 | // println!("Loading node at offset: {}", offset); 236 | 237 | let mut serialized = Vec::new(); 238 | 239 | let mut is_primary; 240 | 241 | let mut serialized_len; 242 | 243 | let mut bytes_read = 0; 244 | 245 | self.acquire_block_lock(original_offset)?; 246 | 247 | loop { 248 | let block_is_primary = 249 | u8::from_le_bytes(self.read_from_offset(offset, 1).try_into().unwrap()); 250 | 251 | if block_is_primary == 0 { 252 | is_primary = false; 253 | } else if block_is_primary == 1 { 254 | is_primary = true; 255 | } else { 256 | return Err(io::Error::new( 257 | io::ErrorKind::InvalidData, 258 | "Invalid block type", 259 | )); 260 | } 261 | 262 | if !is_primary && bytes_read == 0 { 263 | return Err(io::Error::new( 264 | io::ErrorKind::InvalidData, 265 | "Primary block not found", 266 | )); 267 | } 268 | 269 | offset += 1; // one for the primary byte 270 | 271 | serialized_len = usize::from_le_bytes( 272 | self.read_from_offset(offset, SIZE_OF_USIZE) 273 | .try_into() 274 | .unwrap(), 275 | ); 276 | 277 | offset += SIZE_OF_USIZE; 278 | 279 | // println!("Serialized len: {}", serialized_len); 280 | 281 | let bytes_to_read = std::cmp::min(serialized_len - bytes_read, BLOCK_DATA_SIZE); 282 | // println!( 283 | // "Bytes read: {}, bytes to read: {}", 284 | // bytes_read, bytes_to_read 285 | // ); 286 | 287 | bytes_read += bytes_to_read; 288 | 289 | serialized.extend_from_slice(&self.read_from_offset(offset, bytes_to_read)); 290 | 291 | offset += BLOCK_DATA_SIZE; 292 | 293 | let next_block_offset = usize::from_le_bytes( 294 | self.read_from_offset(offset, SIZE_OF_USIZE) 295 | .try_into() 296 | .unwrap(), 297 | ); 298 | 299 | // println!("Next block offset: {}", next_block_offset); 300 | 301 | if next_block_offset == 0 { 302 | break; 303 | } 304 | 305 | offset = next_block_offset; 306 | } 307 | 308 | self.release_block_lock(original_offset)?; 309 | 310 | let mut node = Node::deserialize(&serialized); 311 | node.offset = original_offset; 312 | 313 | Ok(node) 314 | } 315 | 316 | fn resize_mmap(&mut self) -> io::Result<()> { 317 | let current_len = self.mmap.len(); 318 | let new_len = current_len * 2; 319 | 320 | let file = OpenOptions::new() 321 | .read(true) 322 | .write(true) 323 | .open(self.path.clone())?; // Ensure this path is handled correctly 324 | 325 | file.set_len(new_len as u64)?; 326 | 327 | self.mmap = unsafe { MmapMut::map_mut(&file)? }; 328 | Ok(()) 329 | } 330 | 331 | pub fn used_space(&self) -> usize { 332 | usize::from_le_bytes(self.read_from_offset(0, SIZE_OF_USIZE).try_into().unwrap()) 333 | } 334 | 335 | pub fn set_used_space(&mut self, used_space: usize) { 336 | self.write_to_offset(0, &used_space.to_le_bytes()); 337 | } 338 | 339 | pub fn root_offset(&self) -> usize { 340 | usize::from_le_bytes( 341 | self.read_from_offset(SIZE_OF_USIZE, SIZE_OF_USIZE) 342 | .try_into() 343 | .unwrap(), 344 | ) 345 | // self.root_offset 346 | } 347 | 348 | pub fn set_root_offset(&mut self, root_offset: usize) { 349 | self.write_to_offset(SIZE_OF_USIZE, &root_offset.to_le_bytes()); 350 | // self.root_offset = root_offset; 351 | } 352 | 353 | pub fn increment_and_allocate_block(&mut self) -> io::Result { 354 | let used_space = self.used_space(); 355 | // println!("Used space: {}", used_space); 356 | self.set_used_space(used_space + BLOCK_SIZE); 357 | let out = used_space + HEADER_SIZE; 358 | // println!("Allocating block at offset: {}", out); 359 | 360 | if out + BLOCK_SIZE > self.mmap.len() { 361 | self.resize_mmap()?; 362 | } 363 | 364 | Ok(out) 365 | } 366 | 367 | fn write_to_offset(&mut self, offset: usize, data: &[u8]) { 368 | self.mmap[offset..offset + data.len()].copy_from_slice(data); 369 | // self.mmap.flush().unwrap(); 370 | } 371 | 372 | fn read_from_offset(&self, offset: usize, len: usize) -> &[u8] { 373 | &self.mmap[offset..offset + len] 374 | } 375 | 376 | fn acquire_block_lock(&self, offset: usize) -> io::Result<()> { 377 | self.locks.acquire(offset.to_string())?; 378 | Ok(()) 379 | } 380 | 381 | fn release_block_lock(&self, offset: usize) -> io::Result<()> { 382 | self.locks.release(offset.to_string())?; 383 | Ok(()) 384 | } 385 | } 386 | -------------------------------------------------------------------------------- /src/structures/tree.rs: -------------------------------------------------------------------------------- 1 | pub mod node; 2 | pub mod serialization; 3 | 4 | use std::fmt::{Debug, Display}; 5 | use std::io; 6 | 7 | use node::{Node, NodeType}; 8 | use serialization::{TreeDeserialization, TreeSerialization}; 9 | 10 | pub struct Tree { 11 | pub root: Box>, 12 | pub b: usize, 13 | } 14 | 15 | impl Tree 16 | where 17 | K: Clone + Ord + TreeSerialization + TreeDeserialization + Display + Debug + Copy, 18 | V: Clone + TreeSerialization + TreeDeserialization + Display + Debug, 19 | { 20 | pub fn new() -> Self { 21 | Tree { 22 | root: Box::new(Node::new_leaf()), // Initially the root is a leaf node 23 | b: 4, 24 | } 25 | } 26 | 27 | pub fn insert(&mut self, key: K, value: V) -> Result<(), io::Error> { 28 | let mut root = std::mem::replace(&mut self.root, Box::new(Node::new_leaf())); 29 | if self.is_node_full(&root)? { 30 | let mut new_root = Node::new_internal(); 31 | let (median, sibling) = root.split()?; 32 | new_root.keys.push(median); 33 | new_root.children.push(root); 34 | new_root.children.push(Box::new(sibling)); 35 | root = Box::new(new_root); 36 | } 37 | self.insert_non_full(&mut *root, key, value)?; 38 | self.root = root; 39 | Ok(()) 40 | } 41 | 42 | fn insert_non_full( 43 | &mut self, 44 | node: &mut Node, 45 | key: K, 46 | value: V, 47 | ) -> Result<(), io::Error> { 48 | match &mut node.node_type { 49 | NodeType::Leaf => { 50 | let idx = node.keys.binary_search(&key).unwrap_or_else(|x| x); 51 | node.keys.insert(idx, key); 52 | node.values.insert(idx, Some(value)); 53 | Ok(()) 54 | } 55 | NodeType::Internal => { 56 | let idx = node.keys.binary_search(&key).unwrap_or_else(|x| x); 57 | let child_idx = if idx == node.keys.len() || key < node.keys[idx] { 58 | idx 59 | } else { 60 | idx + 1 61 | }; 62 | 63 | if self.is_node_full(&node.children[child_idx])? { 64 | let (median, sibling) = node.children[child_idx].split()?; 65 | node.keys.insert(idx, median); 66 | node.children.insert(child_idx + 1, Box::new(sibling)); 67 | if key >= node.keys[idx] { 68 | self.insert_non_full(&mut *node.children[child_idx + 1], key, value) 69 | } else { 70 | self.insert_non_full(&mut *node.children[child_idx], key, value) 71 | } 72 | } else { 73 | self.insert_non_full(&mut *node.children[child_idx], key, value) 74 | } 75 | } 76 | } 77 | } 78 | 79 | fn is_node_full(&self, node: &Node) -> Result { 80 | Ok(node.keys.len() == node.max_keys) 81 | } 82 | 83 | pub fn search(&self, key: K) -> Result, io::Error> { 84 | self.search_node(&*self.root, key) 85 | } 86 | 87 | fn search_node(&self, node: &Node, key: K) -> Result, io::Error> { 88 | match node.node_type { 89 | NodeType::Internal => { 90 | let idx = node.keys.binary_search(&key).unwrap_or_else(|x| x); 91 | if idx < node.keys.len() && node.keys[idx] == key { 92 | self.search_node(&node.children[idx + 1], key) 93 | } else { 94 | self.search_node(&node.children[idx], key) 95 | } 96 | } 97 | NodeType::Leaf => match node.keys.binary_search(&key) { 98 | Ok(idx) => Ok(node.values.get(idx).expect("could not get value").clone()), 99 | Err(_) => Ok(None), 100 | }, 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/structures/tree/node.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fmt::{Debug, Display}, 3 | io, 4 | }; 5 | 6 | use super::serialization::{TreeDeserialization, TreeSerialization}; 7 | 8 | #[derive(Debug, PartialEq, Clone)] 9 | pub enum NodeType { 10 | Leaf, 11 | Internal, 12 | } 13 | 14 | const MAX_KEYS: usize = 10; 15 | 16 | pub struct Node { 17 | pub keys: Vec, 18 | pub values: Vec>, // Option for handling deletion in COW 19 | pub children: Vec>>, // Using Box for heap allocation 20 | pub max_keys: usize, // Maximum number of keys a node can hold 21 | pub node_type: NodeType, 22 | } 23 | 24 | impl Node 25 | where 26 | K: Clone + Ord + TreeSerialization + TreeDeserialization + Display + Debug + Copy, 27 | V: Clone + TreeSerialization + TreeDeserialization + Display + Debug, 28 | { 29 | pub fn new_leaf() -> Self { 30 | Node { 31 | keys: Vec::new(), 32 | values: Vec::new(), 33 | children: Vec::new(), 34 | max_keys: MAX_KEYS, // Assuming a small number for testing purposes 35 | node_type: NodeType::Leaf, 36 | } 37 | } 38 | 39 | pub fn new_internal() -> Self { 40 | Node { 41 | keys: Vec::new(), 42 | values: Vec::new(), 43 | children: Vec::new(), 44 | max_keys: MAX_KEYS, 45 | node_type: NodeType::Internal, 46 | } 47 | } 48 | 49 | pub fn clone(&self) -> Self { 50 | Node { 51 | keys: self.keys.clone(), 52 | values: self.values.clone(), 53 | children: self 54 | .children 55 | .iter() 56 | .map(|c| Box::new((**c).clone())) 57 | .collect(), 58 | max_keys: self.max_keys, 59 | node_type: self.node_type.clone(), 60 | } 61 | } 62 | 63 | pub fn split(&mut self) -> Result<(K, Node), io::Error> { 64 | match self.node_type { 65 | NodeType::Internal => { 66 | let split_index = (self.keys.len() + 1) / 2; 67 | let median_key = self.keys[split_index].clone(); 68 | 69 | let sibling_keys = self.keys.split_off(split_index + 1); 70 | let sibling_children = self.children.split_off(split_index + 1); 71 | 72 | let sibling = Node { 73 | keys: sibling_keys, 74 | values: Vec::new(), 75 | children: sibling_children, 76 | max_keys: self.max_keys, 77 | node_type: NodeType::Internal, 78 | }; 79 | 80 | self.keys.pop(); 81 | 82 | Ok((median_key, sibling)) 83 | } 84 | NodeType::Leaf => { 85 | let split_index = (self.keys.len() + 1) / 2; 86 | let median_key = self.keys[split_index].clone(); 87 | 88 | let sibling_keys = self.keys.split_off(split_index); 89 | let sibling_values = self.values.split_off(split_index); 90 | 91 | let sibling = Node { 92 | keys: sibling_keys, 93 | values: sibling_values, 94 | children: Vec::new(), 95 | max_keys: self.max_keys, 96 | node_type: NodeType::Leaf, 97 | }; 98 | 99 | Ok((median_key, sibling)) 100 | } 101 | } 102 | } 103 | } 104 | impl Default for Node { 105 | fn default() -> Self { 106 | Node { 107 | keys: Vec::new(), 108 | values: Vec::new(), 109 | children: Vec::new(), 110 | max_keys: 0, // Adjust this as necessary 111 | node_type: NodeType::Leaf, // Or another appropriate default NodeType 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/structures/tree/serialization.rs: -------------------------------------------------------------------------------- 1 | pub trait TreeSerialization { 2 | fn serialize(&self) -> Vec; 3 | } 4 | 5 | pub trait TreeDeserialization { 6 | fn deserialize(data: &[u8]) -> Self 7 | where 8 | Self: Sized; 9 | } 10 | 11 | impl TreeDeserialization for i32 { 12 | fn deserialize(data: &[u8]) -> Self { 13 | let mut bytes = [0; 4]; 14 | bytes.copy_from_slice(&data[..4]); 15 | i32::from_le_bytes(bytes) 16 | } 17 | } 18 | 19 | impl TreeSerialization for i32 { 20 | fn serialize(&self) -> Vec { 21 | self.to_le_bytes().to_vec() 22 | } 23 | } 24 | 25 | impl TreeDeserialization for String { 26 | fn deserialize(data: &[u8]) -> Self { 27 | let mut bytes = Vec::new(); 28 | let mut i = 4; 29 | while i < data.len() { 30 | let len = data[i..i + 4].try_into().unwrap(); 31 | let len = i32::from_le_bytes(len) as usize; 32 | let start = i + 4; 33 | let end = start + len; 34 | bytes.extend_from_slice(&data[start..end]); 35 | i = end; 36 | } 37 | String::from_utf8(bytes).unwrap() 38 | } 39 | } 40 | 41 | impl TreeSerialization for String { 42 | fn serialize(&self) -> Vec { 43 | let mut data = Vec::new(); 44 | data.extend_from_slice(&(self.len() as i32).to_le_bytes()); 45 | data.extend_from_slice(self.as_bytes()); 46 | data 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/structures/tree/storage.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonpo/haystackdb/147f8f28238f4b2720fa3bf9c3aef6a2021c31e0/src/structures/tree/storage.rs -------------------------------------------------------------------------------- /src/structures/wal.rs: -------------------------------------------------------------------------------- 1 | use crate::constants::{QUANTIZED_VECTOR_SIZE, VECTOR_SIZE}; 2 | 3 | use super::{ 4 | metadata_index::KVPair, 5 | mmap_tree::{ 6 | serialization::{TreeDeserialization, TreeSerialization}, 7 | Tree, 8 | }, 9 | }; 10 | use crate::utils::quantize; 11 | use std::hash::{Hash, Hasher}; 12 | use std::{ 13 | fmt::Display, 14 | hash::DefaultHasher, 15 | io, 16 | path::PathBuf, 17 | time::{SystemTime, UNIX_EPOCH}, 18 | }; 19 | 20 | #[derive(Debug, Clone)] 21 | pub struct CommitListItem { 22 | pub hash: u64, 23 | pub timestamp: u64, 24 | pub vectors: Vec<[u8; QUANTIZED_VECTOR_SIZE]>, 25 | pub kvs: Vec>, 26 | } 27 | 28 | impl Display for CommitListItem { 29 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 30 | write!( 31 | f, 32 | "CommitListItem {{ hash: {}, timestamp: {}}}", 33 | self.hash, self.timestamp 34 | ) 35 | } 36 | } 37 | 38 | impl TreeSerialization for CommitListItem { 39 | fn serialize(&self) -> Vec { 40 | let mut serialized = Vec::new(); 41 | 42 | serialized.extend_from_slice(self.hash.to_le_bytes().as_ref()); 43 | serialized.extend_from_slice(self.timestamp.to_le_bytes().as_ref()); 44 | 45 | serialized.extend_from_slice(self.vectors.len().to_le_bytes().as_ref()); 46 | for vector in &self.vectors { 47 | serialized.extend_from_slice(vector.as_ref()); 48 | } 49 | 50 | serialized.extend_from_slice(self.kvs.len().to_le_bytes().as_ref()); 51 | for sub_kvs in &self.kvs { 52 | serialized.extend_from_slice(sub_kvs.len().to_le_bytes().as_ref()); 53 | for kv in sub_kvs { 54 | serialized.extend_from_slice(&kv.serialize()); 55 | } 56 | } 57 | 58 | serialized 59 | } 60 | } 61 | 62 | impl TreeDeserialization for CommitListItem { 63 | fn deserialize(data: &[u8]) -> Self { 64 | let mut offset = 0; 65 | 66 | let hash = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()); 67 | offset += 8; 68 | let timestamp = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()); 69 | offset += 8; 70 | 71 | let vectors_len = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 72 | 73 | offset += 8; 74 | 75 | let mut vectors = Vec::new(); 76 | for _ in 0..vectors_len { 77 | let mut vector = [0; QUANTIZED_VECTOR_SIZE]; 78 | vector.copy_from_slice(&data[offset..offset + QUANTIZED_VECTOR_SIZE]); 79 | offset += QUANTIZED_VECTOR_SIZE; 80 | vectors.push(vector); 81 | } 82 | 83 | let kvs_len = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 84 | offset += 8; 85 | 86 | let mut kvs = Vec::new(); 87 | for _ in 0..kvs_len { 88 | let mut sub_kvs = Vec::new(); 89 | let sub_kvs_len = 90 | u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 91 | offset += 8; 92 | for _ in 0..sub_kvs_len { 93 | let kv = KVPair::deserialize(&data[offset..]); 94 | offset += kv.serialize().len(); 95 | sub_kvs.push(kv); 96 | } 97 | 98 | kvs.push(sub_kvs); 99 | } 100 | 101 | CommitListItem { 102 | hash, 103 | timestamp, 104 | kvs, 105 | vectors, 106 | } 107 | } 108 | } 109 | 110 | impl TreeSerialization for bool { 111 | fn serialize(&self) -> Vec { 112 | let mut serialized = Vec::new(); 113 | 114 | serialized.extend_from_slice(&[*self as u8]); 115 | 116 | serialized 117 | } 118 | } 119 | 120 | impl TreeDeserialization for bool { 121 | fn deserialize(data: &[u8]) -> Self { 122 | data[0] == 1 123 | } 124 | } 125 | 126 | impl TreeSerialization for u64 { 127 | fn serialize(&self) -> Vec { 128 | self.to_le_bytes().to_vec() 129 | } 130 | } 131 | 132 | impl TreeDeserialization for u64 { 133 | fn deserialize(data: &[u8]) -> Self { 134 | u64::from_le_bytes(data.try_into().unwrap()) 135 | } 136 | } 137 | 138 | impl TreeSerialization for Vec { 139 | fn serialize(&self) -> Vec { 140 | let mut serialized = Vec::new(); 141 | 142 | serialized.extend_from_slice(self.len().to_le_bytes().as_ref()); 143 | for val in self { 144 | serialized.extend_from_slice(val.to_le_bytes().as_ref()); 145 | } 146 | 147 | serialized 148 | } 149 | } 150 | 151 | impl TreeDeserialization for Vec { 152 | fn deserialize(data: &[u8]) -> Self { 153 | let mut offset = 0; 154 | 155 | let len = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()) as usize; 156 | offset += 8; 157 | 158 | let mut vals = Vec::new(); 159 | for _ in 0..len { 160 | let val = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap()); 161 | offset += 8; 162 | vals.push(val); 163 | } 164 | 165 | vals 166 | } 167 | } 168 | 169 | pub struct WAL { 170 | pub commit_list: Tree, 171 | pub timestamps: Tree>, // maps a timestamp to a hash 172 | pub commit_finish: Tree, 173 | pub path: PathBuf, 174 | pub namespace_id: String, 175 | } 176 | 177 | impl WAL { 178 | pub fn new(path: PathBuf, namespace_id: String) -> io::Result { 179 | let commit_list_path = path.clone().join("commit_list.bin"); 180 | let commit_list = Tree::::new(commit_list_path)?; 181 | let timestamps_path = path.clone().join("timestamps.bin"); 182 | let timestamps = Tree::>::new(timestamps_path)?; 183 | let commit_finish_path = path.clone().join("commit_finish.bin"); 184 | let commit_finish = Tree::::new(commit_finish_path)?; 185 | 186 | Ok(WAL { 187 | commit_list, 188 | path, 189 | namespace_id, 190 | timestamps, 191 | commit_finish, 192 | }) 193 | } 194 | 195 | pub fn add_to_commit_list( 196 | &mut self, 197 | hash: u64, 198 | vectors: Vec<[u8; QUANTIZED_VECTOR_SIZE]>, 199 | kvs: Vec>, 200 | ) -> Result<(), io::Error> { 201 | let timestamp = SystemTime::now() 202 | .duration_since(UNIX_EPOCH) 203 | .unwrap() 204 | .as_secs(); 205 | 206 | let commit_list_item = CommitListItem { 207 | hash, 208 | timestamp, 209 | vectors, 210 | kvs, 211 | }; 212 | 213 | self.commit_list.insert(hash, commit_list_item)?; 214 | 215 | // self.commit_finish.insert(hash, false)?; 216 | 217 | // self.timestamps.insert(timestamp, hash)?; 218 | 219 | Ok(()) 220 | } 221 | 222 | pub fn has_been_committed(&mut self, hash: u64) -> Result { 223 | match self.commit_list.has_key(hash) { 224 | Ok(r) => Ok(r), 225 | Err(_) => Ok(false), 226 | } 227 | } 228 | 229 | // pub fn get_commits_after(&self, timestamp: u64) -> Result, io::Error> { 230 | // let hashes = self.timestamps.get_range(timestamp, u64::MAX)?; 231 | 232 | // let mut commits = Vec::new(); 233 | 234 | // for (_, hash) in hashes { 235 | // match self.commit_list.search(hash) { 236 | // Ok(commit) => match commit { 237 | // Some(c) => { 238 | // commits.push(c); 239 | // } 240 | // None => {} 241 | // }, 242 | // Err(_) => {} 243 | // } 244 | // } 245 | 246 | // Ok(commits) 247 | // } 248 | 249 | pub fn get_commits(&mut self) -> Result, io::Error> { 250 | let start = 0; 251 | let end = u64::MAX; 252 | 253 | let commits = self 254 | .commit_list 255 | .get_range(start, end) 256 | .expect("Error getting commits"); 257 | 258 | Ok(commits.into_iter().map(|(_, v)| v).collect()) 259 | } 260 | 261 | pub fn get_commit(&mut self, hash: u64) -> Result, io::Error> { 262 | match self.commit_list.search(hash) { 263 | Ok(v) => Ok(v), 264 | Err(_) => Ok(None), 265 | } 266 | } 267 | 268 | pub fn get_commits_before(&mut self, timestamp: u64) -> Result, io::Error> { 269 | let hash_end = self.timestamps.get_range(0, timestamp)?; 270 | 271 | let mut commits = Vec::new(); 272 | 273 | for (_, hash) in hash_end { 274 | for h in hash { 275 | match self.commit_list.search(h) { 276 | Ok(commit) => match commit { 277 | Some(c) => { 278 | commits.push(c); 279 | } 280 | None => {} 281 | }, 282 | Err(_) => {} 283 | } 284 | } 285 | } 286 | 287 | // println!("Commits before: {:?}", commits.len()); 288 | 289 | Ok(commits) 290 | } 291 | 292 | pub fn get_uncommitted(&mut self, last_seconds: u64) -> Result, io::Error> { 293 | let start = SystemTime::now() 294 | .duration_since(UNIX_EPOCH) 295 | .unwrap() 296 | .as_secs() 297 | - last_seconds; 298 | 299 | let end = SystemTime::now() 300 | .duration_since(UNIX_EPOCH) 301 | .unwrap() 302 | .as_secs() 303 | + 1; 304 | 305 | let all_hashes = self.timestamps.get_range(start, end)?; 306 | 307 | let mut commits = Vec::new(); 308 | 309 | for (_, hashes) in all_hashes { 310 | for hash in hashes { 311 | match self.commit_finish.has_key(hash) { 312 | Ok(has_key) => { 313 | if !has_key { 314 | match self.commit_list.search(hash) { 315 | Ok(commit) => match commit { 316 | Some(c) => { 317 | commits.push(c); 318 | } 319 | None => {} 320 | }, 321 | Err(_) => {} 322 | } 323 | } 324 | } 325 | Err(_) => {} 326 | } 327 | } 328 | } 329 | 330 | // commits.dedup_by_key(|c| c.hash); 331 | 332 | Ok(commits) 333 | } 334 | 335 | pub fn compute_hash( 336 | &self, 337 | vectors: &Vec<[u8; QUANTIZED_VECTOR_SIZE]>, 338 | kvs: &Vec>, 339 | ) -> u64 { 340 | let mut hasher = DefaultHasher::default(); 341 | 342 | // for vector in vectors { 343 | // vector.hash(&mut hasher); 344 | // } 345 | vectors.hash(&mut hasher); 346 | 347 | kvs.hash(&mut hasher); 348 | 349 | hasher.finish() 350 | } 351 | 352 | pub fn add_to_wal( 353 | &mut self, 354 | vectors: Vec<[f32; VECTOR_SIZE]>, 355 | kvs: Vec>, 356 | ) -> io::Result<()> { 357 | if vectors.len() != kvs.len() { 358 | return Err(io::Error::new( 359 | io::ErrorKind::InvalidInput, 360 | "Quantized vectors length mismatch", 361 | )); 362 | } 363 | 364 | let quantized_vectors: Vec<[u8; QUANTIZED_VECTOR_SIZE]> = 365 | vectors.iter().map(|v| quantize(v)).collect(); 366 | 367 | let hash = self.compute_hash(&quantized_vectors, &kvs); 368 | 369 | let current_timestamp = SystemTime::now() 370 | .duration_since(UNIX_EPOCH) 371 | .unwrap() 372 | .as_secs(); 373 | 374 | // println!("Current timestamp: {}", current_timestamp); 375 | 376 | let mut current_timestamp_vals = match self.timestamps.search(current_timestamp) { 377 | Ok(v) => v, 378 | Err(_) => Some(Vec::new()), 379 | } 380 | .unwrap_or(Vec::new()); 381 | 382 | current_timestamp_vals.push(hash); 383 | 384 | self.timestamps 385 | .insert(current_timestamp, current_timestamp_vals)?; 386 | 387 | self.add_to_commit_list(hash, quantized_vectors, kvs)?; 388 | 389 | Ok(()) 390 | } 391 | 392 | pub fn batch_add_to_wal( 393 | &mut self, 394 | vectors: Vec>, 395 | kvs: Vec>>, 396 | ) -> io::Result<()> { 397 | if vectors.len() != kvs.len() { 398 | return Err(io::Error::new( 399 | io::ErrorKind::InvalidInput, 400 | "Quantized vectors length mismatch", 401 | )); 402 | } 403 | 404 | let quantized_vectors: Vec> = vectors 405 | .iter() 406 | .map(|v| v.iter().map(|v| quantize(v)).collect()) 407 | .collect(); 408 | 409 | let mut hashes = Vec::new(); 410 | 411 | let current_timestamp = SystemTime::now() 412 | .duration_since(UNIX_EPOCH) 413 | .unwrap() 414 | .as_secs(); 415 | 416 | let mut current_timestamp_vals = match self.timestamps.search(current_timestamp) { 417 | Ok(v) => v, 418 | Err(_) => Some(Vec::new()), 419 | } 420 | .unwrap_or(Vec::new()); 421 | 422 | for (_i, (v, k)) in quantized_vectors.iter().zip(kvs.iter()).enumerate() { 423 | let hash = self.compute_hash(v, k); 424 | hashes.push(hash); 425 | 426 | current_timestamp_vals.push(hash); 427 | } 428 | 429 | self.timestamps 430 | .insert(current_timestamp, current_timestamp_vals)?; 431 | 432 | for (hash, (v, k)) in hashes.iter().zip(quantized_vectors.iter().zip(kvs.iter())) { 433 | self.add_to_commit_list(*hash, v.clone(), k.clone())?; 434 | } 435 | 436 | Ok(()) 437 | } 438 | 439 | pub fn mark_commit_finished(&mut self, hash: u64) -> io::Result<()> { 440 | self.commit_finish.insert(hash, true)?; 441 | 442 | Ok(()) 443 | } 444 | } 445 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | pub mod quantization; 2 | 3 | pub use quantization::{dequantize, quantize}; 4 | -------------------------------------------------------------------------------- /src/utils/quantization.rs: -------------------------------------------------------------------------------- 1 | use crate::constants::{QUANTIZED_VECTOR_SIZE, VECTOR_SIZE}; 2 | 3 | pub fn quantize(vec: &[f32; VECTOR_SIZE]) -> [u8; QUANTIZED_VECTOR_SIZE] { 4 | let mut result = [0; QUANTIZED_VECTOR_SIZE]; 5 | for i in 0..QUANTIZED_VECTOR_SIZE { 6 | for j in 0..8 { 7 | result[i] |= ((vec[i * 8 + j] >= 0.0) as u8) << j; 8 | } 9 | } 10 | result 11 | } 12 | 13 | pub fn dequantize(vec: &[u8; QUANTIZED_VECTOR_SIZE]) -> [f32; VECTOR_SIZE] { 14 | let mut result = [0.0; VECTOR_SIZE]; 15 | for i in 0..QUANTIZED_VECTOR_SIZE { 16 | for j in 0..8 { 17 | result[i * 8 + j] = if (vec[i] & (1 << j)) > 0 { 1.0 } else { -1.0 }; 18 | } 19 | } 20 | result 21 | } 22 | -------------------------------------------------------------------------------- /tests/filters.rs: -------------------------------------------------------------------------------- 1 | extern crate haystackdb; 2 | 3 | use haystackdb::structures::filters::{Filter, Filters}; 4 | use haystackdb::structures::inverted_index::{ 5 | compress_indices, decompress_indices, InvertedIndex, InvertedIndexItem, 6 | }; 7 | use haystackdb::structures::metadata_index::KVPair; 8 | use serde::{Deserialize, Serialize}; 9 | use serde_json; 10 | use std::fs; 11 | use std::path::PathBuf; 12 | use uuid; 13 | 14 | #[derive(Debug, Serialize, Deserialize, Clone)] 15 | pub struct Query { 16 | filters: Filter, 17 | } 18 | 19 | fn handle_query(json_query: &str, index: &mut InvertedIndex) -> Filters { 20 | let query: Query = serde_json::from_str(json_query).expect("Failed to parse query"); 21 | Filters::evaluate(&query.filters, index) 22 | } 23 | 24 | #[cfg(test)] 25 | mod filters_tests { 26 | 27 | use super::*; 28 | 29 | fn setup_inverted_index() -> InvertedIndex { 30 | let path = PathBuf::from("tests/data") 31 | .join(uuid::Uuid::new_v4().to_string()) 32 | .join("inverted_index.bin"); 33 | fs::create_dir_all(path.parent().unwrap()).unwrap(); 34 | let mut index = InvertedIndex::new(path); 35 | // Insert some test data 36 | index.insert( 37 | KVPair::new("page_id".to_string(), "page1".to_string()), 38 | InvertedIndexItem { 39 | indices: vec![1, 2], 40 | ids: vec![0, 0], 41 | }, 42 | ); 43 | index.insert( 44 | KVPair::new("page_id".to_string(), "page2".to_string()), 45 | InvertedIndexItem { 46 | indices: vec![3], 47 | ids: vec![0], 48 | }, 49 | ); 50 | index.insert( 51 | KVPair::new("public".to_string(), "1".to_string()), 52 | InvertedIndexItem { 53 | indices: vec![1, 3], 54 | ids: vec![0, 0], 55 | }, 56 | ); 57 | index.insert( 58 | KVPair::new("permission_id".to_string(), "3iQK2VC4".to_string()), 59 | InvertedIndexItem { 60 | indices: vec![2], 61 | ids: vec![0], 62 | }, 63 | ); 64 | index.insert( 65 | KVPair::new("permission_id".to_string(), "wzw8zpnQ".to_string()), 66 | InvertedIndexItem { 67 | indices: vec![3], 68 | ids: vec![0], 69 | }, 70 | ); 71 | index 72 | } 73 | 74 | #[test] 75 | fn test_compression_decompression() { 76 | let indices = vec![1, 2, 3, 6, 7, 8]; 77 | let compressed = compress_indices(indices.clone()); 78 | let decompressed = decompress_indices(compressed); 79 | 80 | assert_eq!( 81 | indices, decompressed, 82 | "Decompression did not match the original indices" 83 | ); 84 | } 85 | 86 | #[test] 87 | fn test_basic_and_query() { 88 | let mut index = setup_inverted_index(); 89 | let json_query = r#"{"filters":{"type":"Eq","args":["public","1"]}}"#; 90 | let result = handle_query(json_query, &mut index); 91 | assert_eq!(result.get_indices().0, vec![1, 3]); // Expected indices 92 | } 93 | 94 | #[test] 95 | fn test_empty_query() { 96 | let mut index = setup_inverted_index(); 97 | let json_query = r#"{"filters":{"type":"And","args":[]}}"#; 98 | let result = handle_query(json_query, &mut index); 99 | assert!(result.get_indices().0.is_empty()); // Should handle empty AND gracefully 100 | } 101 | 102 | #[test] 103 | fn test_nonexistent_key() { 104 | let mut index = setup_inverted_index(); 105 | let json_query = r#"{"filters":{"type":"Eq","args":["nonexistent","value"]}}"#; 106 | let result = handle_query(json_query, &mut index); 107 | assert!(result.get_indices().0.is_empty()); // No crash, just empty result 108 | } 109 | 110 | #[test] 111 | fn test_single_eq() { 112 | let mut index = setup_inverted_index(); 113 | let json_query = r#"{"filters":{"type":"Eq","args":["public","1"]}}"#; 114 | let result = handle_query(json_query, &mut index); 115 | assert_eq!(result.get_indices().0, vec![1, 3]); 116 | } 117 | 118 | #[test] 119 | fn test_single_in() { 120 | let mut index = setup_inverted_index(); 121 | let json_query = r#"{"filters":{"type":"In","args":["page_id",["page1","page2"]]}}"#; 122 | let result = handle_query(json_query, &mut index); 123 | assert_eq!(result.get_indices().0, vec![1, 2, 3]); 124 | } 125 | 126 | #[test] 127 | fn test_combined_and() { 128 | let mut index = setup_inverted_index(); 129 | let json_query = r#" 130 | { 131 | "filters": { 132 | "type": "And", 133 | "args": [ 134 | { 135 | "type": "In", 136 | "args": ["page_id", ["page1"]] 137 | }, 138 | { 139 | "type": "Eq", 140 | "args": ["public", "1"] 141 | } 142 | ] 143 | } 144 | } 145 | "#; 146 | let result = handle_query(json_query, &mut index); 147 | assert_eq!(result.get_indices().0, vec![1]); 148 | } 149 | 150 | #[test] 151 | fn test_complex_or() { 152 | let mut index = setup_inverted_index(); 153 | let json_query = r#"{"filters":{"type":"Or","args":[{"type":"Eq","args":["public","1"]},{"type":"In","args":["permission_id",["wzw8zpnQ"]]}]}}"#; 154 | let result = handle_query(json_query, &mut index); 155 | assert_eq!(result.get_indices().0, vec![1, 3]); // Should be the union of [1, 3] and [3] 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /tests/main.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carsonpo/haystackdb/147f8f28238f4b2720fa3bf9c3aef6a2021c31e0/tests/main.rs -------------------------------------------------------------------------------- /tests/math.rs: -------------------------------------------------------------------------------- 1 | extern crate haystackdb; 2 | 3 | use haystackdb::constants::{QUANTIZED_VECTOR_SIZE, VECTOR_SIZE}; 4 | use haystackdb::math::gemm; 5 | use haystackdb::math::gemv; 6 | use haystackdb::math::hamming_distance::hamming_distance; 7 | 8 | #[cfg(test)] 9 | mod math_tests { 10 | 11 | use super::*; 12 | 13 | // Test the basic functionality of the hamming_distance function 14 | #[test] 15 | fn test_hamming_distance() { 16 | let a: [u8; QUANTIZED_VECTOR_SIZE] = [0xFF; QUANTIZED_VECTOR_SIZE]; // All bits set 17 | let b: [u8; QUANTIZED_VECTOR_SIZE] = [0x00; QUANTIZED_VECTOR_SIZE]; // All bits unset 18 | assert_eq!(hamming_distance(&a, &b), QUANTIZED_VECTOR_SIZE as u16 * 8); 19 | } 20 | 21 | // Test the zero case for hamming_distance 22 | #[test] 23 | fn test_zero_hamming_distance() { 24 | let a: [u8; QUANTIZED_VECTOR_SIZE] = [0xFF; QUANTIZED_VECTOR_SIZE]; // All bits set 25 | let b: [u8; QUANTIZED_VECTOR_SIZE] = [0xFF; QUANTIZED_VECTOR_SIZE]; // All bits set 26 | assert_eq!(hamming_distance(&a, &b), 0); 27 | } 28 | 29 | // Helper function to create identity matrix for testing GEMV and GEMM 30 | fn identity_matrix(size: usize) -> Vec<[f32; VECTOR_SIZE]> { 31 | let mut matrix = vec![[0.0; VECTOR_SIZE]; size]; 32 | for i in 0..size { 33 | matrix[i][i] = 1.0; 34 | } 35 | matrix 36 | } 37 | 38 | // Helper function to create zero matrix for testing GEMV and GEMM 39 | fn zero_matrix(size: usize) -> Vec<[f32; VECTOR_SIZE]> { 40 | vec![[0.0; VECTOR_SIZE]; size] 41 | } 42 | 43 | // Helper function to create a matrix of ones for testing GEMV and GEMM 44 | fn one_matrix(size: usize) -> Vec<[f32; VECTOR_SIZE]> { 45 | vec![[1.0; VECTOR_SIZE]; size] 46 | } 47 | 48 | // Test GEMV with identity matrix 49 | #[test] 50 | fn test_gemv_identity() { 51 | let matrix = identity_matrix(VECTOR_SIZE); 52 | let vector = [1.0; VECTOR_SIZE]; 53 | let result = gemv::gemv(&matrix, &vector); 54 | assert_eq!(result, [1.0; VECTOR_SIZE]); 55 | } 56 | 57 | // Test GEMM with identity matrix 58 | #[test] 59 | fn test_gemm_identity() { 60 | let matrix_a = identity_matrix(VECTOR_SIZE); 61 | let matrix_b = identity_matrix(VECTOR_SIZE); 62 | let mut result_matrix = zero_matrix(VECTOR_SIZE); 63 | gemm::gemm(&matrix_a, &matrix_b, &mut result_matrix); 64 | assert_eq!(result_matrix, identity_matrix(VECTOR_SIZE)); 65 | } 66 | 67 | // Test GEMV with zero matrix 68 | #[test] 69 | fn test_gemv_zero() { 70 | let matrix = zero_matrix(VECTOR_SIZE); 71 | let vector = [1.0; VECTOR_SIZE]; 72 | let result = gemv::gemv(&matrix, &vector); 73 | assert_eq!(result, [0.0; VECTOR_SIZE]); 74 | } 75 | 76 | // Test GEMM with zero matrix 77 | #[test] 78 | fn test_gemm_zero() { 79 | let matrix_a = zero_matrix(VECTOR_SIZE); 80 | let matrix_b = zero_matrix(VECTOR_SIZE); 81 | let mut result_matrix = zero_matrix(VECTOR_SIZE); 82 | gemm::gemm(&matrix_a, &matrix_b, &mut result_matrix); 83 | assert_eq!(result_matrix, zero_matrix(VECTOR_SIZE)); 84 | } 85 | 86 | // Test GEMV with one matrix 87 | #[test] 88 | fn test_gemv_ones() { 89 | let matrix = one_matrix(VECTOR_SIZE); 90 | let vector = [1.0; VECTOR_SIZE]; 91 | let result = gemv::gemv(&matrix, &vector); 92 | let expected_result: [f32; VECTOR_SIZE] = [VECTOR_SIZE as f32; VECTOR_SIZE]; 93 | assert_eq!(result, expected_result); 94 | } 95 | 96 | // Test GEMM with one matrix 97 | #[test] 98 | fn test_gemm_ones() { 99 | let matrix_a = one_matrix(VECTOR_SIZE); 100 | let matrix_b = one_matrix(VECTOR_SIZE); 101 | let mut result_matrix = zero_matrix(VECTOR_SIZE); 102 | gemm::gemm(&matrix_a, &matrix_b, &mut result_matrix); 103 | let expected_result: Vec<[f32; VECTOR_SIZE]> = 104 | vec![[VECTOR_SIZE as f32; VECTOR_SIZE]; VECTOR_SIZE]; 105 | assert_eq!(result_matrix, expected_result); 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /tests/trees.rs: -------------------------------------------------------------------------------- 1 | extern crate haystackdb; 2 | 3 | #[cfg(test)] 4 | mod tests { 5 | use std::path::PathBuf; 6 | use std::str::FromStr; 7 | 8 | use haystackdb::structures::mmap_tree::node::{Node, NodeType}; 9 | use haystackdb::structures::mmap_tree::storage::{StorageManager, HEADER_SIZE}; 10 | use haystackdb::structures::mmap_tree::Tree; 11 | use std::fs; 12 | use uuid; 13 | 14 | #[test] 15 | fn test_store_and_load_node() { 16 | let path = PathBuf::from_str("tests/data") 17 | .unwrap() 18 | .join(uuid::Uuid::new_v4().to_string()) 19 | .join("test.bin"); 20 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 21 | 22 | let mut storage_manager: StorageManager = 23 | StorageManager::new(path.clone()).unwrap(); 24 | 25 | let mut node = Node::new_leaf(0); 26 | node.keys.push(1); 27 | node.values.push(Some("one".to_string())); 28 | 29 | // Store the node 30 | let offset = storage_manager.store_node(&mut node).unwrap(); 31 | assert_eq!(offset, HEADER_SIZE); // Check that the node is stored at the correct offset 32 | 33 | // Load the node 34 | let loaded_node = storage_manager.load_node(offset).unwrap(); 35 | assert_eq!(loaded_node.keys, vec![1]); 36 | assert_eq!(loaded_node.values, vec![Some("one".to_string())]); 37 | } 38 | 39 | #[test] 40 | fn test_store_multiple_nodes() { 41 | let path = PathBuf::from_str("tests/data") 42 | .unwrap() 43 | .join(uuid::Uuid::new_v4().to_string()) 44 | .join("test.bin"); 45 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 46 | 47 | let mut storage_manager: StorageManager = 48 | StorageManager::new(path.clone()).unwrap(); 49 | 50 | let mut node1 = Node::new_leaf(0); 51 | node1.keys.push(1); 52 | node1.values.push(Some("one".to_string())); 53 | 54 | let mut node2 = Node::new_leaf(0); 55 | node2.keys.push(2); 56 | node2.values.push(Some("two".to_string())); 57 | 58 | // Store the first node 59 | let offset1 = storage_manager.store_node(&mut node1).unwrap(); 60 | assert_eq!(offset1, HEADER_SIZE); 61 | 62 | // Store the second node 63 | let offset2 = storage_manager.store_node(&mut node2).unwrap(); 64 | assert!(offset2 > offset1); // Ensure that the second node is stored after the first 65 | 66 | // Load the first node 67 | let loaded_node1 = storage_manager.load_node(offset1).unwrap(); 68 | assert_eq!(loaded_node1.keys, vec![1]); 69 | assert_eq!(loaded_node1.values, vec![Some("one".to_string())]); 70 | 71 | // Load the second node 72 | let loaded_node2 = storage_manager.load_node(offset2).unwrap(); 73 | assert_eq!(loaded_node2.keys, vec![2]); 74 | assert_eq!(loaded_node2.values, vec![Some("two".to_string())]); 75 | } 76 | 77 | // #[test] 78 | // fn test_resize_storage() { 79 | // let path = PathBuf::from_str("tests/data") 80 | // .unwrap() 81 | // .join(uuid::Uuid::new_v4().to_string()) 82 | // let mut storage_manager: StorageManager = 83 | // StorageManager::new(path.clone()).unwrap(); 84 | 85 | // let mut large_node = Node::new_leaf(0); 86 | // for i in 0..1000 { 87 | // large_node.keys.push(i); 88 | // large_node.values.push(Some(format!("value_{}", i))); 89 | // } 90 | 91 | // // Store the large node 92 | // let offset = storage_manager.store_node(&mut large_node).unwrap(); 93 | // assert_eq!(offset, HEADER_SIZE); 94 | 95 | // // Load the large node 96 | // let loaded_node = storage_manager.load_node(offset).unwrap(); 97 | // assert_eq!(loaded_node.keys.len(), 1000); 98 | // assert_eq!(loaded_node.values.len(), 1000); 99 | // } 100 | 101 | #[test] 102 | fn test_new_leaf() { 103 | let node: Node = Node::new_leaf(0); 104 | assert!(node.keys.is_empty()); 105 | assert!(node.values.is_empty()); 106 | assert!(node.children.is_empty()); 107 | assert_eq!(node.node_type, NodeType::Leaf); 108 | } 109 | 110 | #[test] 111 | fn test_search_in_leaf() { 112 | let path = PathBuf::from_str("tests/data") 113 | .unwrap() 114 | .join(uuid::Uuid::new_v4().to_string()) 115 | .join("test.bin"); 116 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 117 | 118 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 119 | 120 | let mut tree = Tree::new(path).expect("Failed to create tree"); 121 | tree.insert(1, "one".to_string()).unwrap(); 122 | tree.insert(2, "two".to_string()).unwrap(); 123 | assert_eq!(tree.search(1).unwrap(), Some("one".to_string())); 124 | assert_eq!(tree.search(2).unwrap(), Some("two".to_string())); 125 | assert_eq!(tree.search(3).unwrap(), None); 126 | } 127 | 128 | #[test] 129 | fn test_complex_tree_operations() { 130 | let path = PathBuf::from_str("tests/data") 131 | .unwrap() 132 | .join(uuid::Uuid::new_v4().to_string()) 133 | .join("test.bin"); 134 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 135 | 136 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 137 | 138 | let mut tree = Tree::new(path).expect("Failed to create tree"); 139 | for i in 0..10 { 140 | tree.insert(i, format!("value_{}", i)).unwrap(); 141 | } 142 | assert_eq!(tree.search(5).unwrap(), Some("value_5".to_string())); 143 | assert_eq!(tree.search(9).unwrap(), Some("value_9".to_string())); 144 | assert_eq!(tree.search(10).unwrap(), None); 145 | } 146 | 147 | #[test] 148 | fn test_serialization_and_deserialization() { 149 | let mut node: Node = Node::new_leaf(0); 150 | node.set_key_value(0, "value_0".to_string()); 151 | node.set_key_value(1, "value_1".to_string()); 152 | let serialized = node.serialize(); 153 | let deserialized: Node = Node::deserialize(&serialized); 154 | 155 | assert_eq!(node.keys, deserialized.keys); 156 | assert_eq!(node.values, deserialized.values); 157 | assert_eq!(node.children, deserialized.children); 158 | } 159 | 160 | #[test] 161 | fn test_tree_initialization() { 162 | let path = PathBuf::from_str("tests/data") 163 | .unwrap() 164 | .join(uuid::Uuid::new_v4().to_string()) 165 | .join("test.bin"); 166 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 167 | 168 | let tree: Result, std::io::Error> = Tree::new(path); 169 | assert!(tree.is_ok()); 170 | } 171 | 172 | #[test] 173 | fn test_insert_search_leaf() { 174 | let path = PathBuf::from_str("tests/data") 175 | .unwrap() 176 | .join(uuid::Uuid::new_v4().to_string()) 177 | .join("test.bin"); 178 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 179 | 180 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 181 | let mut tree = Tree::new(path).expect("Failed to create tree"); 182 | 183 | tree.insert(1, "one".to_string()).unwrap(); 184 | tree.insert(2, "two".to_string()).unwrap(); 185 | 186 | assert_eq!(tree.search(1).unwrap(), Some("one".to_string())); 187 | assert_eq!(tree.search(2).unwrap(), Some("two".to_string())); 188 | assert_eq!(tree.search(3).unwrap(), None); 189 | } 190 | 191 | // Edge Cases 192 | 193 | #[test] 194 | fn test_insert_duplicate_keys() { 195 | let path = PathBuf::from_str("tests/data") 196 | .unwrap() 197 | .join(uuid::Uuid::new_v4().to_string()) 198 | .join("test.bin"); 199 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 200 | 201 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 202 | let mut tree = Tree::new(path).expect("Failed to create tree"); 203 | 204 | tree.insert(1, "one".to_string()).unwrap(); 205 | tree.insert(1, "one_duplicate".to_string()).unwrap(); // Assuming overwrite behavior 206 | 207 | assert_eq!(tree.search(1).unwrap(), Some("one_duplicate".to_string())); 208 | } 209 | 210 | #[test] 211 | fn test_search_non_existent_key() { 212 | let path = PathBuf::from_str("tests/data") 213 | .unwrap() 214 | .join(uuid::Uuid::new_v4().to_string()) 215 | .join("test.bin"); 216 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 217 | 218 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 219 | let mut tree: Tree = Tree::new(path).expect("Failed to create tree"); 220 | 221 | assert_eq!(tree.search(999).unwrap(), None); 222 | } 223 | // Complex Operations 224 | 225 | #[test] 226 | fn test_complex_insertions() { 227 | let path = PathBuf::from_str("tests/data") 228 | .unwrap() 229 | .join(uuid::Uuid::new_v4().to_string()) 230 | .join("test.bin"); 231 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 232 | let mut tree = Tree::new(path).expect("Failed to create tree"); 233 | 234 | for i in 0..100 { 235 | tree.insert(i, format!("value_{}", i)) 236 | .expect(format!("Failed to insert {}", i).as_str()); 237 | } 238 | 239 | for i in 0..100 { 240 | assert_eq!(tree.search(i).unwrap(), Some(format!("value_{}", i))); 241 | } 242 | } 243 | 244 | #[test] 245 | fn test_large_scale_insert_search() { 246 | let path = PathBuf::from_str("tests/data") 247 | .unwrap() 248 | .join(uuid::Uuid::new_v4().to_string()) 249 | .join("test.bin"); 250 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 251 | 252 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 253 | let mut tree = Tree::new(path).unwrap(); 254 | 255 | let num_items = 1000; 256 | for i in 0..num_items { 257 | tree.insert(i, format!("value_{}", i)) 258 | .expect(format!("Failed to insert {}", i).as_str()); 259 | } 260 | 261 | for i in 0..num_items { 262 | assert_eq!(tree.search(i).unwrap(), Some(format!("value_{}", i))); 263 | } 264 | } 265 | 266 | #[test] 267 | fn test_repeated_insertions_same_key() { 268 | let path = PathBuf::from_str("tests/data") 269 | .unwrap() 270 | .join(uuid::Uuid::new_v4().to_string()) 271 | .join("test.bin"); 272 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 273 | 274 | let mut tree = Tree::new(path).unwrap(); 275 | 276 | tree.insert(1, "one".to_string()).unwrap(); 277 | tree.insert(1, "still_one".to_string()).unwrap(); // Try inserting the same key 278 | 279 | // Check that the value has not been replaced if replacing isn't supported 280 | assert_eq!(tree.search(1).unwrap(), Some("still_one".to_string())); 281 | } 282 | 283 | // #[test] 284 | // fn test_insertion_order_independence() { 285 | // let path = PathBuf::from_str("tests/data") 286 | // .unwrap() 287 | // .join(uuid::Uuid::new_v4().to_string()); 288 | // let mut tree = Tree::new(path.clone()).unwrap(); 289 | // let mut tree_reverse = Tree::new(path).unwrap(); 290 | 291 | // let keys = vec![3, 1, 4, 1, 5, 9, 2]; 292 | // let values = vec!["three", "one", "four", "one", "five", "nine", "two"]; 293 | 294 | // for (&k, &v) in keys.iter().zip(values.iter()) { 295 | // tree.insert(k, v.to_string()).unwrap(); 296 | // } 297 | 298 | // for (&k, &v) in keys.iter().zip(values.iter()).rev() { 299 | // tree_reverse.insert(k, v.to_string()).unwrap(); 300 | // } 301 | 302 | // for &k in &keys { 303 | // assert_eq!(tree.search(k).unwrap(), tree_reverse.search(k).unwrap()); 304 | // } 305 | // } 306 | 307 | #[test] 308 | fn test_search_non_existent_keys() { 309 | let path = PathBuf::from_str("tests/data") 310 | .unwrap() 311 | .join(uuid::Uuid::new_v4().to_string()) 312 | .join("test.bin"); 313 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 314 | 315 | let mut tree: Tree = Tree::new(path).unwrap(); 316 | 317 | assert_eq!(tree.search(999).unwrap(), None); 318 | } 319 | 320 | #[test] 321 | fn test_insert_search_edge_integers() { 322 | let path = PathBuf::from_str("tests/data") 323 | .unwrap() 324 | .join(uuid::Uuid::new_v4().to_string()) 325 | .join("test.bin"); 326 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 327 | 328 | let mut tree = Tree::new(path).unwrap(); 329 | 330 | let min_int = i32::MIN; 331 | let max_int = i32::MAX; 332 | 333 | tree.insert(min_int, "minimum".to_string()).unwrap(); 334 | tree.insert(max_int, "maximum".to_string()).unwrap(); 335 | 336 | assert_eq!(tree.search(min_int).unwrap(), Some("minimum".to_string())); 337 | assert_eq!(tree.search(max_int).unwrap(), Some("maximum".to_string())); 338 | } 339 | 340 | #[test] 341 | fn test_batch_insert() { 342 | let path = PathBuf::from_str("tests/data") 343 | .unwrap() 344 | .join(uuid::Uuid::new_v4().to_string()) 345 | .join("test.bin"); 346 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 347 | 348 | let mut tree: Tree = Tree::new(path).expect("Failed to create tree"); 349 | 350 | const NUM_ITEMS: usize = 10_000; 351 | 352 | for i in 0..NUM_ITEMS { 353 | tree.insert(i as i32, format!("value_{}", i)) 354 | .expect(format!("Failed to insert {}", i).as_str()); 355 | } 356 | 357 | for i in 0..NUM_ITEMS { 358 | assert_eq!(tree.search(i as i32).unwrap(), Some(format!("value_{}", i))); 359 | } 360 | 361 | let path = PathBuf::from_str("tests/data") 362 | .unwrap() 363 | .join(uuid::Uuid::new_v4().to_string()) 364 | .join("test.bin"); 365 | 366 | fs::create_dir_all(&path.parent().unwrap()).expect("Failed to create directory"); 367 | 368 | let mut tree: Tree = Tree::new(path).expect("Failed to create tree"); 369 | 370 | let entries: Vec<(i32, String)> = (0..NUM_ITEMS) 371 | .map(|i| (i as i32, format!("value_{}", i))) 372 | .collect(); 373 | 374 | tree.batch_insert(entries).expect("Failed to batch insert"); 375 | 376 | for i in 0..NUM_ITEMS { 377 | assert_eq!(tree.search(i as i32).unwrap(), Some(format!("value_{}", i))); 378 | } 379 | } 380 | } 381 | --------------------------------------------------------------------------------