├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .gitmodules ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── benchmarks ├── bench.py └── requirements.txt ├── docs └── index.md ├── pyproject.toml ├── requirements.txt ├── src ├── conv.rs └── lib.rs ├── test.py └── tests ├── __init__.py └── test_milli_index.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: [push] 3 | jobs: 4 | build: 5 | strategy: 6 | fail-fast: false 7 | matrix: 8 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 9 | os: ["ubuntu-latest", "windows-latest", "macos-latest"] 10 | runs-on: ${{ matrix.os }} 11 | steps: 12 | - uses: actions/checkout@v3 13 | with: 14 | submodules: true 15 | 16 | # Select Rust toolchain 17 | - name: Select Rust toolchain 18 | run: | 19 | rustup set auto-self-update disable 20 | rustup toolchain install stable --profile minimal 21 | 22 | # Install and cache Cargo dependencies 23 | - uses: Swatinem/rust-cache@v2 24 | - name: Check package 25 | run: | 26 | cargo check 27 | 28 | # Install and cache Python dependencies 29 | - name: Set up Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v4 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | cache: 'pip' 34 | - name: Add Python user scripts to PATH 35 | if: matrix.os == 'windows-latest' 36 | run: | 37 | $PYTHON_USER_SCRIPTS = (python -c "import os,sysconfig;print(sysconfig.get_path('scripts',f'{os.name}_user'))") 38 | $env:PATH = "$PYTHON_USER_SCRIPTS;$env:PATH" 39 | 40 | # Build package 41 | - name: Build package 42 | run: | 43 | pip3 install maturin 44 | maturin build -i ${{ matrix.python-version }} 45 | 46 | # Install package 47 | - name: Install package 48 | run: | 49 | pip3 install --no-index --find-links=target/wheels milli 50 | 51 | # Test package 52 | - name: Test package 53 | run: | 54 | python3 test.py 55 | 56 | # Publish package 57 | - name: Publish package 58 | env: 59 | MATURIN_PASSWORD: ${{ secrets.PYPI_TOKEN }} 60 | run: | 61 | maturin publish --skip-existing --username __token__ 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__ 3 | 4 | # Binaries 5 | target 6 | 7 | # Tests 8 | index 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "meilisearch"] 2 | path = meilisearch 3 | url = https://github.com/meilisearch/meilisearch 4 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "adler" 7 | version = "1.0.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 10 | 11 | [[package]] 12 | name = "ahash" 13 | version = "0.8.11" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" 16 | dependencies = [ 17 | "cfg-if", 18 | "once_cell", 19 | "version_check", 20 | "zerocopy", 21 | ] 22 | 23 | [[package]] 24 | name = "aho-corasick" 25 | version = "1.1.3" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 28 | dependencies = [ 29 | "memchr", 30 | ] 31 | 32 | [[package]] 33 | name = "allocator-api2" 34 | version = "0.2.18" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" 37 | 38 | [[package]] 39 | name = "anyhow" 40 | version = "1.0.86" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" 43 | 44 | [[package]] 45 | name = "anymap2" 46 | version = "0.13.0" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c" 49 | 50 | [[package]] 51 | name = "arroy" 52 | version = "0.2.0" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "efddeb1e7c32a551cc07ef4c3e181e3cd5478fdaf4f0bd799983171c1f6efe57" 55 | dependencies = [ 56 | "bytemuck", 57 | "byteorder", 58 | "heed", 59 | "log", 60 | "memmap2 0.9.4", 61 | "ordered-float", 62 | "rand", 63 | "rayon", 64 | "roaring", 65 | "tempfile", 66 | "thiserror", 67 | ] 68 | 69 | [[package]] 70 | name = "atomic-polyfill" 71 | version = "0.1.11" 72 | source = "registry+https://github.com/rust-lang/crates.io-index" 73 | checksum = "e3ff7eb3f316534d83a8a2c3d1674ace8a5a71198eba31e2e2b597833f699b28" 74 | dependencies = [ 75 | "critical-section", 76 | ] 77 | 78 | [[package]] 79 | name = "autocfg" 80 | version = "1.1.0" 81 | source = "registry+https://github.com/rust-lang/crates.io-index" 82 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 83 | 84 | [[package]] 85 | name = "base64" 86 | version = "0.13.1" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" 89 | 90 | [[package]] 91 | name = "base64" 92 | version = "0.21.7" 93 | source = "registry+https://github.com/rust-lang/crates.io-index" 94 | checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" 95 | 96 | [[package]] 97 | name = "base64" 98 | version = "0.22.1" 99 | source = "registry+https://github.com/rust-lang/crates.io-index" 100 | checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" 101 | 102 | [[package]] 103 | name = "bimap" 104 | version = "0.6.3" 105 | source = "registry+https://github.com/rust-lang/crates.io-index" 106 | checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" 107 | dependencies = [ 108 | "serde", 109 | ] 110 | 111 | [[package]] 112 | name = "bincode" 113 | version = "1.3.3" 114 | source = "registry+https://github.com/rust-lang/crates.io-index" 115 | checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" 116 | dependencies = [ 117 | "serde", 118 | ] 119 | 120 | [[package]] 121 | name = "bit-set" 122 | version = "0.5.3" 123 | source = "registry+https://github.com/rust-lang/crates.io-index" 124 | checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" 125 | dependencies = [ 126 | "bit-vec", 127 | ] 128 | 129 | [[package]] 130 | name = "bit-vec" 131 | version = "0.6.3" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" 134 | 135 | [[package]] 136 | name = "bitflags" 137 | version = "1.3.2" 138 | source = "registry+https://github.com/rust-lang/crates.io-index" 139 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 140 | 141 | [[package]] 142 | name = "bitflags" 143 | version = "2.5.0" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" 146 | dependencies = [ 147 | "serde", 148 | ] 149 | 150 | [[package]] 151 | name = "block-buffer" 152 | version = "0.10.4" 153 | source = "registry+https://github.com/rust-lang/crates.io-index" 154 | checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" 155 | dependencies = [ 156 | "generic-array", 157 | ] 158 | 159 | [[package]] 160 | name = "bstr" 161 | version = "1.9.1" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" 164 | dependencies = [ 165 | "memchr", 166 | "regex-automata", 167 | "serde", 168 | ] 169 | 170 | [[package]] 171 | name = "bytecount" 172 | version = "0.6.3" 173 | source = "registry+https://github.com/rust-lang/crates.io-index" 174 | checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" 175 | 176 | [[package]] 177 | name = "bytemuck" 178 | version = "1.16.0" 179 | source = "registry+https://github.com/rust-lang/crates.io-index" 180 | checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" 181 | dependencies = [ 182 | "bytemuck_derive", 183 | ] 184 | 185 | [[package]] 186 | name = "bytemuck_derive" 187 | version = "1.4.0" 188 | source = "registry+https://github.com/rust-lang/crates.io-index" 189 | checksum = "1aca418a974d83d40a0c1f0c5cba6ff4bc28d8df099109ca459a2118d40b6322" 190 | dependencies = [ 191 | "proc-macro2", 192 | "quote", 193 | "syn 1.0.107", 194 | ] 195 | 196 | [[package]] 197 | name = "byteorder" 198 | version = "1.5.0" 199 | source = "registry+https://github.com/rust-lang/crates.io-index" 200 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 201 | 202 | [[package]] 203 | name = "bytes" 204 | version = "1.4.0" 205 | source = "registry+https://github.com/rust-lang/crates.io-index" 206 | checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" 207 | 208 | [[package]] 209 | name = "candle-core" 210 | version = "0.4.1" 211 | source = "registry+https://github.com/rust-lang/crates.io-index" 212 | checksum = "6f1b20174c1707e20f4cb364a355b449803c03e9b0c9193324623cf9787a4e00" 213 | dependencies = [ 214 | "byteorder", 215 | "gemm", 216 | "half", 217 | "memmap2 0.9.4", 218 | "num-traits", 219 | "num_cpus", 220 | "rand", 221 | "rand_distr", 222 | "rayon", 223 | "safetensors", 224 | "thiserror", 225 | "yoke", 226 | "zip", 227 | ] 228 | 229 | [[package]] 230 | name = "candle-nn" 231 | version = "0.4.1" 232 | source = "registry+https://github.com/rust-lang/crates.io-index" 233 | checksum = "66a27533c8edfc915a6459f9850641ef523a829fa1a181c670766c1f752d873a" 234 | dependencies = [ 235 | "candle-core", 236 | "half", 237 | "num-traits", 238 | "rayon", 239 | "safetensors", 240 | "serde", 241 | "thiserror", 242 | ] 243 | 244 | [[package]] 245 | name = "candle-transformers" 246 | version = "0.4.1" 247 | source = "registry+https://github.com/rust-lang/crates.io-index" 248 | checksum = "b5847699f0643da05e57fc473672566e93dc36d82c1b7eeb970c6154d3434fe1" 249 | dependencies = [ 250 | "byteorder", 251 | "candle-core", 252 | "candle-nn", 253 | "num-traits", 254 | "rand", 255 | "rayon", 256 | "serde", 257 | "serde_json", 258 | "serde_plain", 259 | "tracing", 260 | ] 261 | 262 | [[package]] 263 | name = "cc" 264 | version = "1.0.99" 265 | source = "registry+https://github.com/rust-lang/crates.io-index" 266 | checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" 267 | 268 | [[package]] 269 | name = "cfg-if" 270 | version = "1.0.0" 271 | source = "registry+https://github.com/rust-lang/crates.io-index" 272 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 273 | 274 | [[package]] 275 | name = "charabia" 276 | version = "0.8.11" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | checksum = "11a09ae38cfcc153f01576c3f579dfd916e0320f1b474f298c8d680b2dd92eb6" 279 | dependencies = [ 280 | "aho-corasick", 281 | "cow-utils", 282 | "csv", 283 | "deunicode", 284 | "either", 285 | "fst", 286 | "irg-kvariants", 287 | "litemap", 288 | "once_cell", 289 | "serde", 290 | "slice-group-by", 291 | "unicode-normalization", 292 | "whatlang", 293 | "zerovec", 294 | ] 295 | 296 | [[package]] 297 | name = "concat-arrays" 298 | version = "0.1.2" 299 | source = "registry+https://github.com/rust-lang/crates.io-index" 300 | checksum = "1df715824eb382e34b7afb7463b0247bf41538aeba731fba05241ecdb5dc3747" 301 | dependencies = [ 302 | "proc-macro2", 303 | "quote", 304 | "syn 1.0.107", 305 | ] 306 | 307 | [[package]] 308 | name = "console" 309 | version = "0.15.8" 310 | source = "registry+https://github.com/rust-lang/crates.io-index" 311 | checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" 312 | dependencies = [ 313 | "encode_unicode", 314 | "lazy_static", 315 | "libc", 316 | "unicode-width", 317 | "windows-sys 0.52.0", 318 | ] 319 | 320 | [[package]] 321 | name = "convert_case" 322 | version = "0.6.0" 323 | source = "registry+https://github.com/rust-lang/crates.io-index" 324 | checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" 325 | dependencies = [ 326 | "unicode-segmentation", 327 | ] 328 | 329 | [[package]] 330 | name = "cow-utils" 331 | version = "0.1.2" 332 | source = "registry+https://github.com/rust-lang/crates.io-index" 333 | checksum = "79bb3adfaf5f75d24b01aee375f7555907840fa2800e5ec8fa3b9e2031830173" 334 | 335 | [[package]] 336 | name = "cpufeatures" 337 | version = "0.2.6" 338 | source = "registry+https://github.com/rust-lang/crates.io-index" 339 | checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" 340 | dependencies = [ 341 | "libc", 342 | ] 343 | 344 | [[package]] 345 | name = "crc32fast" 346 | version = "1.4.2" 347 | source = "registry+https://github.com/rust-lang/crates.io-index" 348 | checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" 349 | dependencies = [ 350 | "cfg-if", 351 | ] 352 | 353 | [[package]] 354 | name = "critical-section" 355 | version = "1.1.1" 356 | source = "registry+https://github.com/rust-lang/crates.io-index" 357 | checksum = "6548a0ad5d2549e111e1f6a11a6c2e2d00ce6a3dafe22948d67c2b443f775e52" 358 | 359 | [[package]] 360 | name = "crossbeam-channel" 361 | version = "0.5.13" 362 | source = "registry+https://github.com/rust-lang/crates.io-index" 363 | checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" 364 | dependencies = [ 365 | "crossbeam-utils", 366 | ] 367 | 368 | [[package]] 369 | name = "crossbeam-deque" 370 | version = "0.8.2" 371 | source = "registry+https://github.com/rust-lang/crates.io-index" 372 | checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" 373 | dependencies = [ 374 | "cfg-if", 375 | "crossbeam-epoch", 376 | "crossbeam-utils", 377 | ] 378 | 379 | [[package]] 380 | name = "crossbeam-epoch" 381 | version = "0.9.13" 382 | source = "registry+https://github.com/rust-lang/crates.io-index" 383 | checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" 384 | dependencies = [ 385 | "autocfg", 386 | "cfg-if", 387 | "crossbeam-utils", 388 | "memoffset 0.7.1", 389 | "scopeguard", 390 | ] 391 | 392 | [[package]] 393 | name = "crossbeam-queue" 394 | version = "0.3.8" 395 | source = "registry+https://github.com/rust-lang/crates.io-index" 396 | checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" 397 | dependencies = [ 398 | "cfg-if", 399 | "crossbeam-utils", 400 | ] 401 | 402 | [[package]] 403 | name = "crossbeam-utils" 404 | version = "0.8.20" 405 | source = "registry+https://github.com/rust-lang/crates.io-index" 406 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" 407 | 408 | [[package]] 409 | name = "crunchy" 410 | version = "0.2.2" 411 | source = "registry+https://github.com/rust-lang/crates.io-index" 412 | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" 413 | 414 | [[package]] 415 | name = "crypto-common" 416 | version = "0.1.6" 417 | source = "registry+https://github.com/rust-lang/crates.io-index" 418 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" 419 | dependencies = [ 420 | "generic-array", 421 | "typenum", 422 | ] 423 | 424 | [[package]] 425 | name = "csv" 426 | version = "1.3.0" 427 | source = "registry+https://github.com/rust-lang/crates.io-index" 428 | checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" 429 | dependencies = [ 430 | "csv-core", 431 | "itoa", 432 | "ryu", 433 | "serde", 434 | ] 435 | 436 | [[package]] 437 | name = "csv-core" 438 | version = "0.1.11" 439 | source = "registry+https://github.com/rust-lang/crates.io-index" 440 | checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" 441 | dependencies = [ 442 | "memchr", 443 | ] 444 | 445 | [[package]] 446 | name = "darling" 447 | version = "0.14.4" 448 | source = "registry+https://github.com/rust-lang/crates.io-index" 449 | checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" 450 | dependencies = [ 451 | "darling_core", 452 | "darling_macro", 453 | ] 454 | 455 | [[package]] 456 | name = "darling_core" 457 | version = "0.14.4" 458 | source = "registry+https://github.com/rust-lang/crates.io-index" 459 | checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" 460 | dependencies = [ 461 | "fnv", 462 | "ident_case", 463 | "proc-macro2", 464 | "quote", 465 | "strsim", 466 | "syn 1.0.107", 467 | ] 468 | 469 | [[package]] 470 | name = "darling_macro" 471 | version = "0.14.4" 472 | source = "registry+https://github.com/rust-lang/crates.io-index" 473 | checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" 474 | dependencies = [ 475 | "darling_core", 476 | "quote", 477 | "syn 1.0.107", 478 | ] 479 | 480 | [[package]] 481 | name = "deranged" 482 | version = "0.3.11" 483 | source = "registry+https://github.com/rust-lang/crates.io-index" 484 | checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" 485 | dependencies = [ 486 | "powerfmt", 487 | "serde", 488 | ] 489 | 490 | [[package]] 491 | name = "derive_builder" 492 | version = "0.12.0" 493 | source = "registry+https://github.com/rust-lang/crates.io-index" 494 | checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8" 495 | dependencies = [ 496 | "derive_builder_macro", 497 | ] 498 | 499 | [[package]] 500 | name = "derive_builder_core" 501 | version = "0.12.0" 502 | source = "registry+https://github.com/rust-lang/crates.io-index" 503 | checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f" 504 | dependencies = [ 505 | "darling", 506 | "proc-macro2", 507 | "quote", 508 | "syn 1.0.107", 509 | ] 510 | 511 | [[package]] 512 | name = "derive_builder_macro" 513 | version = "0.12.0" 514 | source = "registry+https://github.com/rust-lang/crates.io-index" 515 | checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e" 516 | dependencies = [ 517 | "derive_builder_core", 518 | "syn 1.0.107", 519 | ] 520 | 521 | [[package]] 522 | name = "deserr" 523 | version = "0.6.1" 524 | source = "registry+https://github.com/rust-lang/crates.io-index" 525 | checksum = "3418e814bb893a61b25a60db71dccab7cb70035d94ffe58c9b4a70387ac42083" 526 | dependencies = [ 527 | "deserr-internal", 528 | "serde-cs", 529 | "serde_json", 530 | "serde_urlencoded", 531 | "strsim", 532 | ] 533 | 534 | [[package]] 535 | name = "deserr-internal" 536 | version = "0.7.0" 537 | source = "registry+https://github.com/rust-lang/crates.io-index" 538 | checksum = "9cadd9624c995ecbd6d6c81ec0804fbbb335e3acba5326a06067ccd33295a37e" 539 | dependencies = [ 540 | "convert_case", 541 | "proc-macro2", 542 | "quote", 543 | "syn 2.0.66", 544 | ] 545 | 546 | [[package]] 547 | name = "deunicode" 548 | version = "1.6.0" 549 | source = "registry+https://github.com/rust-lang/crates.io-index" 550 | checksum = "339544cc9e2c4dc3fc7149fd630c5f22263a4fdf18a98afd0075784968b5cf00" 551 | 552 | [[package]] 553 | name = "digest" 554 | version = "0.10.7" 555 | source = "registry+https://github.com/rust-lang/crates.io-index" 556 | checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" 557 | dependencies = [ 558 | "block-buffer", 559 | "crypto-common", 560 | ] 561 | 562 | [[package]] 563 | name = "dirs" 564 | version = "5.0.1" 565 | source = "registry+https://github.com/rust-lang/crates.io-index" 566 | checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" 567 | dependencies = [ 568 | "dirs-sys", 569 | ] 570 | 571 | [[package]] 572 | name = "dirs-sys" 573 | version = "0.4.1" 574 | source = "registry+https://github.com/rust-lang/crates.io-index" 575 | checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" 576 | dependencies = [ 577 | "libc", 578 | "option-ext", 579 | "redox_users", 580 | "windows-sys 0.48.0", 581 | ] 582 | 583 | [[package]] 584 | name = "doc-comment" 585 | version = "0.3.3" 586 | source = "registry+https://github.com/rust-lang/crates.io-index" 587 | checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" 588 | 589 | [[package]] 590 | name = "doxygen-rs" 591 | version = "0.2.2" 592 | source = "registry+https://github.com/rust-lang/crates.io-index" 593 | checksum = "bff670ea0c9bbb8414e7efa6e23ebde2b8f520a7eef78273a3918cf1903e7505" 594 | dependencies = [ 595 | "phf", 596 | ] 597 | 598 | [[package]] 599 | name = "dyn-stack" 600 | version = "0.10.0" 601 | source = "registry+https://github.com/rust-lang/crates.io-index" 602 | checksum = "56e53799688f5632f364f8fb387488dd05db9fe45db7011be066fc20e7027f8b" 603 | dependencies = [ 604 | "bytemuck", 605 | "reborrow", 606 | ] 607 | 608 | [[package]] 609 | name = "either" 610 | version = "1.12.0" 611 | source = "registry+https://github.com/rust-lang/crates.io-index" 612 | checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" 613 | dependencies = [ 614 | "serde", 615 | ] 616 | 617 | [[package]] 618 | name = "encode_unicode" 619 | version = "0.3.6" 620 | source = "registry+https://github.com/rust-lang/crates.io-index" 621 | checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" 622 | 623 | [[package]] 624 | name = "enum-as-inner" 625 | version = "0.6.0" 626 | source = "registry+https://github.com/rust-lang/crates.io-index" 627 | checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" 628 | dependencies = [ 629 | "heck", 630 | "proc-macro2", 631 | "quote", 632 | "syn 2.0.66", 633 | ] 634 | 635 | [[package]] 636 | name = "equivalent" 637 | version = "1.0.1" 638 | source = "registry+https://github.com/rust-lang/crates.io-index" 639 | checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" 640 | 641 | [[package]] 642 | name = "errno" 643 | version = "0.3.9" 644 | source = "registry+https://github.com/rust-lang/crates.io-index" 645 | checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" 646 | dependencies = [ 647 | "libc", 648 | "windows-sys 0.52.0", 649 | ] 650 | 651 | [[package]] 652 | name = "esaxx-rs" 653 | version = "0.1.10" 654 | source = "registry+https://github.com/rust-lang/crates.io-index" 655 | checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" 656 | 657 | [[package]] 658 | name = "fancy-regex" 659 | version = "0.12.0" 660 | source = "registry+https://github.com/rust-lang/crates.io-index" 661 | checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05" 662 | dependencies = [ 663 | "bit-set", 664 | "regex", 665 | ] 666 | 667 | [[package]] 668 | name = "fastrand" 669 | version = "2.1.0" 670 | source = "registry+https://github.com/rust-lang/crates.io-index" 671 | checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" 672 | 673 | [[package]] 674 | name = "filter-parser" 675 | version = "1.8.1" 676 | dependencies = [ 677 | "nom", 678 | "nom_locate", 679 | "unescaper", 680 | ] 681 | 682 | [[package]] 683 | name = "flate2" 684 | version = "1.0.30" 685 | source = "registry+https://github.com/rust-lang/crates.io-index" 686 | checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" 687 | dependencies = [ 688 | "crc32fast", 689 | "miniz_oxide", 690 | ] 691 | 692 | [[package]] 693 | name = "flatten-serde-json" 694 | version = "1.8.1" 695 | dependencies = [ 696 | "serde_json", 697 | ] 698 | 699 | [[package]] 700 | name = "fnv" 701 | version = "1.0.7" 702 | source = "registry+https://github.com/rust-lang/crates.io-index" 703 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 704 | 705 | [[package]] 706 | name = "form_urlencoded" 707 | version = "1.2.1" 708 | source = "registry+https://github.com/rust-lang/crates.io-index" 709 | checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" 710 | dependencies = [ 711 | "percent-encoding", 712 | ] 713 | 714 | [[package]] 715 | name = "fst" 716 | version = "0.4.7" 717 | source = "registry+https://github.com/rust-lang/crates.io-index" 718 | checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" 719 | 720 | [[package]] 721 | name = "fxhash" 722 | version = "0.2.1" 723 | source = "registry+https://github.com/rust-lang/crates.io-index" 724 | checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" 725 | dependencies = [ 726 | "byteorder", 727 | ] 728 | 729 | [[package]] 730 | name = "gemm" 731 | version = "0.17.1" 732 | source = "registry+https://github.com/rust-lang/crates.io-index" 733 | checksum = "6ab24cc62135b40090e31a76a9b2766a501979f3070fa27f689c27ec04377d32" 734 | dependencies = [ 735 | "dyn-stack", 736 | "gemm-c32", 737 | "gemm-c64", 738 | "gemm-common", 739 | "gemm-f16", 740 | "gemm-f32", 741 | "gemm-f64", 742 | "num-complex", 743 | "num-traits", 744 | "paste", 745 | "raw-cpuid", 746 | "seq-macro", 747 | ] 748 | 749 | [[package]] 750 | name = "gemm-c32" 751 | version = "0.17.1" 752 | source = "registry+https://github.com/rust-lang/crates.io-index" 753 | checksum = "b9c030d0b983d1e34a546b86e08f600c11696fde16199f971cd46c12e67512c0" 754 | dependencies = [ 755 | "dyn-stack", 756 | "gemm-common", 757 | "num-complex", 758 | "num-traits", 759 | "paste", 760 | "raw-cpuid", 761 | "seq-macro", 762 | ] 763 | 764 | [[package]] 765 | name = "gemm-c64" 766 | version = "0.17.1" 767 | source = "registry+https://github.com/rust-lang/crates.io-index" 768 | checksum = "fbb5f2e79fefb9693d18e1066a557b4546cd334b226beadc68b11a8f9431852a" 769 | dependencies = [ 770 | "dyn-stack", 771 | "gemm-common", 772 | "num-complex", 773 | "num-traits", 774 | "paste", 775 | "raw-cpuid", 776 | "seq-macro", 777 | ] 778 | 779 | [[package]] 780 | name = "gemm-common" 781 | version = "0.17.1" 782 | source = "registry+https://github.com/rust-lang/crates.io-index" 783 | checksum = "a2e7ea062c987abcd8db95db917b4ffb4ecdfd0668471d8dc54734fdff2354e8" 784 | dependencies = [ 785 | "bytemuck", 786 | "dyn-stack", 787 | "half", 788 | "num-complex", 789 | "num-traits", 790 | "once_cell", 791 | "paste", 792 | "pulp", 793 | "raw-cpuid", 794 | "rayon", 795 | "seq-macro", 796 | "sysctl", 797 | ] 798 | 799 | [[package]] 800 | name = "gemm-f16" 801 | version = "0.17.1" 802 | source = "registry+https://github.com/rust-lang/crates.io-index" 803 | checksum = "7ca4c06b9b11952071d317604acb332e924e817bd891bec8dfb494168c7cedd4" 804 | dependencies = [ 805 | "dyn-stack", 806 | "gemm-common", 807 | "gemm-f32", 808 | "half", 809 | "num-complex", 810 | "num-traits", 811 | "paste", 812 | "raw-cpuid", 813 | "rayon", 814 | "seq-macro", 815 | ] 816 | 817 | [[package]] 818 | name = "gemm-f32" 819 | version = "0.17.1" 820 | source = "registry+https://github.com/rust-lang/crates.io-index" 821 | checksum = "e9a69f51aaefbd9cf12d18faf273d3e982d9d711f60775645ed5c8047b4ae113" 822 | dependencies = [ 823 | "dyn-stack", 824 | "gemm-common", 825 | "num-complex", 826 | "num-traits", 827 | "paste", 828 | "raw-cpuid", 829 | "seq-macro", 830 | ] 831 | 832 | [[package]] 833 | name = "gemm-f64" 834 | version = "0.17.1" 835 | source = "registry+https://github.com/rust-lang/crates.io-index" 836 | checksum = "aa397a48544fadf0b81ec8741e5c0fba0043008113f71f2034def1935645d2b0" 837 | dependencies = [ 838 | "dyn-stack", 839 | "gemm-common", 840 | "num-complex", 841 | "num-traits", 842 | "paste", 843 | "raw-cpuid", 844 | "seq-macro", 845 | ] 846 | 847 | [[package]] 848 | name = "generic-array" 849 | version = "0.14.7" 850 | source = "registry+https://github.com/rust-lang/crates.io-index" 851 | checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" 852 | dependencies = [ 853 | "typenum", 854 | "version_check", 855 | ] 856 | 857 | [[package]] 858 | name = "geoutils" 859 | version = "0.5.1" 860 | source = "registry+https://github.com/rust-lang/crates.io-index" 861 | checksum = "36d244a08113319b5ebcabad2b8b7925732d15eec46d7e7ac3c11734f3b7a6ad" 862 | 863 | [[package]] 864 | name = "getrandom" 865 | version = "0.2.15" 866 | source = "registry+https://github.com/rust-lang/crates.io-index" 867 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" 868 | dependencies = [ 869 | "cfg-if", 870 | "libc", 871 | "wasi", 872 | ] 873 | 874 | [[package]] 875 | name = "grenad" 876 | version = "0.4.6" 877 | source = "registry+https://github.com/rust-lang/crates.io-index" 878 | checksum = "c297f45167e6d543eb728e12ff284283e4ba2182a25c6cdcec883fda3316c7e7" 879 | dependencies = [ 880 | "bytemuck", 881 | "byteorder", 882 | "rayon", 883 | "tempfile", 884 | ] 885 | 886 | [[package]] 887 | name = "half" 888 | version = "2.4.1" 889 | source = "registry+https://github.com/rust-lang/crates.io-index" 890 | checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" 891 | dependencies = [ 892 | "bytemuck", 893 | "cfg-if", 894 | "crunchy", 895 | "num-traits", 896 | "rand", 897 | "rand_distr", 898 | ] 899 | 900 | [[package]] 901 | name = "hash32" 902 | version = "0.2.1" 903 | source = "registry+https://github.com/rust-lang/crates.io-index" 904 | checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" 905 | dependencies = [ 906 | "byteorder", 907 | ] 908 | 909 | [[package]] 910 | name = "hashbrown" 911 | version = "0.14.5" 912 | source = "registry+https://github.com/rust-lang/crates.io-index" 913 | checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" 914 | dependencies = [ 915 | "ahash", 916 | "allocator-api2", 917 | ] 918 | 919 | [[package]] 920 | name = "heapless" 921 | version = "0.7.16" 922 | source = "registry+https://github.com/rust-lang/crates.io-index" 923 | checksum = "db04bc24a18b9ea980628ecf00e6c0264f3c1426dac36c00cb49b6fbad8b0743" 924 | dependencies = [ 925 | "atomic-polyfill", 926 | "hash32", 927 | "rustc_version", 928 | "spin", 929 | "stable_deref_trait", 930 | ] 931 | 932 | [[package]] 933 | name = "heck" 934 | version = "0.4.1" 935 | source = "registry+https://github.com/rust-lang/crates.io-index" 936 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" 937 | 938 | [[package]] 939 | name = "heed" 940 | version = "0.20.0-alpha.9" 941 | source = "registry+https://github.com/rust-lang/crates.io-index" 942 | checksum = "9648a50991c86df7d00c56c268c27754fcf4c80be2ba57fc4a00dc928c6fe934" 943 | dependencies = [ 944 | "bitflags 2.5.0", 945 | "bytemuck", 946 | "byteorder", 947 | "heed-traits", 948 | "heed-types", 949 | "libc", 950 | "lmdb-master-sys", 951 | "once_cell", 952 | "page_size", 953 | "serde", 954 | "synchronoise", 955 | "url", 956 | ] 957 | 958 | [[package]] 959 | name = "heed-traits" 960 | version = "0.20.0" 961 | source = "registry+https://github.com/rust-lang/crates.io-index" 962 | checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff" 963 | 964 | [[package]] 965 | name = "heed-types" 966 | version = "0.20.0" 967 | source = "registry+https://github.com/rust-lang/crates.io-index" 968 | checksum = "3cb0d6ba3700c9a57e83c013693e3eddb68a6d9b6781cacafc62a0d992e8ddb3" 969 | dependencies = [ 970 | "bincode", 971 | "byteorder", 972 | "heed-traits", 973 | "serde", 974 | "serde_json", 975 | ] 976 | 977 | [[package]] 978 | name = "hermit-abi" 979 | version = "0.2.6" 980 | source = "registry+https://github.com/rust-lang/crates.io-index" 981 | checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" 982 | dependencies = [ 983 | "libc", 984 | ] 985 | 986 | [[package]] 987 | name = "hf-hub" 988 | version = "0.3.2" 989 | source = "git+https://github.com/dureuill/hf-hub.git?branch=rust_tls#88d4f11cb9fa079f2912bacb96f5080b16825ce8" 990 | dependencies = [ 991 | "dirs", 992 | "http", 993 | "indicatif", 994 | "log", 995 | "rand", 996 | "serde", 997 | "serde_json", 998 | "thiserror", 999 | "ureq", 1000 | ] 1001 | 1002 | [[package]] 1003 | name = "http" 1004 | version = "1.1.0" 1005 | source = "registry+https://github.com/rust-lang/crates.io-index" 1006 | checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" 1007 | dependencies = [ 1008 | "bytes", 1009 | "fnv", 1010 | "itoa", 1011 | ] 1012 | 1013 | [[package]] 1014 | name = "ident_case" 1015 | version = "1.0.1" 1016 | source = "registry+https://github.com/rust-lang/crates.io-index" 1017 | checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" 1018 | 1019 | [[package]] 1020 | name = "idna" 1021 | version = "0.5.0" 1022 | source = "registry+https://github.com/rust-lang/crates.io-index" 1023 | checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" 1024 | dependencies = [ 1025 | "unicode-bidi", 1026 | "unicode-normalization", 1027 | ] 1028 | 1029 | [[package]] 1030 | name = "indexmap" 1031 | version = "2.2.6" 1032 | source = "registry+https://github.com/rust-lang/crates.io-index" 1033 | checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" 1034 | dependencies = [ 1035 | "equivalent", 1036 | "hashbrown", 1037 | "serde", 1038 | ] 1039 | 1040 | [[package]] 1041 | name = "indicatif" 1042 | version = "0.17.8" 1043 | source = "registry+https://github.com/rust-lang/crates.io-index" 1044 | checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" 1045 | dependencies = [ 1046 | "console", 1047 | "instant", 1048 | "number_prefix", 1049 | "portable-atomic", 1050 | "unicode-width", 1051 | ] 1052 | 1053 | [[package]] 1054 | name = "indoc" 1055 | version = "1.0.9" 1056 | source = "registry+https://github.com/rust-lang/crates.io-index" 1057 | checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" 1058 | 1059 | [[package]] 1060 | name = "instant" 1061 | version = "0.1.12" 1062 | source = "registry+https://github.com/rust-lang/crates.io-index" 1063 | checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" 1064 | dependencies = [ 1065 | "cfg-if", 1066 | ] 1067 | 1068 | [[package]] 1069 | name = "irg-kvariants" 1070 | version = "0.1.0" 1071 | source = "registry+https://github.com/rust-lang/crates.io-index" 1072 | checksum = "c73214298363629cf9dbfc93b426808865ee3c121029778cb31b1284104fdf78" 1073 | dependencies = [ 1074 | "csv", 1075 | "once_cell", 1076 | "serde", 1077 | ] 1078 | 1079 | [[package]] 1080 | name = "itertools" 1081 | version = "0.11.0" 1082 | source = "registry+https://github.com/rust-lang/crates.io-index" 1083 | checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" 1084 | dependencies = [ 1085 | "either", 1086 | ] 1087 | 1088 | [[package]] 1089 | name = "itertools" 1090 | version = "0.12.1" 1091 | source = "registry+https://github.com/rust-lang/crates.io-index" 1092 | checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" 1093 | dependencies = [ 1094 | "either", 1095 | ] 1096 | 1097 | [[package]] 1098 | name = "itoa" 1099 | version = "1.0.5" 1100 | source = "registry+https://github.com/rust-lang/crates.io-index" 1101 | checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" 1102 | 1103 | [[package]] 1104 | name = "json-depth-checker" 1105 | version = "1.8.1" 1106 | dependencies = [ 1107 | "serde_json", 1108 | ] 1109 | 1110 | [[package]] 1111 | name = "kstring" 1112 | version = "2.0.0" 1113 | source = "registry+https://github.com/rust-lang/crates.io-index" 1114 | checksum = "ec3066350882a1cd6d950d055997f379ac37fd39f81cd4d8ed186032eb3c5747" 1115 | dependencies = [ 1116 | "serde", 1117 | "static_assertions", 1118 | ] 1119 | 1120 | [[package]] 1121 | name = "lazy_static" 1122 | version = "1.4.0" 1123 | source = "registry+https://github.com/rust-lang/crates.io-index" 1124 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 1125 | 1126 | [[package]] 1127 | name = "levenshtein_automata" 1128 | version = "0.2.1" 1129 | source = "registry+https://github.com/rust-lang/crates.io-index" 1130 | checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" 1131 | dependencies = [ 1132 | "fst", 1133 | ] 1134 | 1135 | [[package]] 1136 | name = "libc" 1137 | version = "0.2.155" 1138 | source = "registry+https://github.com/rust-lang/crates.io-index" 1139 | checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" 1140 | 1141 | [[package]] 1142 | name = "libm" 1143 | version = "0.2.6" 1144 | source = "registry+https://github.com/rust-lang/crates.io-index" 1145 | checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" 1146 | 1147 | [[package]] 1148 | name = "libredox" 1149 | version = "0.1.3" 1150 | source = "registry+https://github.com/rust-lang/crates.io-index" 1151 | checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" 1152 | dependencies = [ 1153 | "bitflags 2.5.0", 1154 | "libc", 1155 | ] 1156 | 1157 | [[package]] 1158 | name = "linux-raw-sys" 1159 | version = "0.4.14" 1160 | source = "registry+https://github.com/rust-lang/crates.io-index" 1161 | checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" 1162 | 1163 | [[package]] 1164 | name = "liquid" 1165 | version = "0.26.6" 1166 | source = "registry+https://github.com/rust-lang/crates.io-index" 1167 | checksum = "10929f201279ba14da3297b957dcda1e0bf7a6f3bb5115688be684aa8864e9cc" 1168 | dependencies = [ 1169 | "doc-comment", 1170 | "liquid-core", 1171 | "liquid-derive", 1172 | "liquid-lib", 1173 | "serde", 1174 | ] 1175 | 1176 | [[package]] 1177 | name = "liquid-core" 1178 | version = "0.26.6" 1179 | source = "registry+https://github.com/rust-lang/crates.io-index" 1180 | checksum = "3aef4b2160791f456eb880c990a97746f693746f92302ef5f1d06111cf14b768" 1181 | dependencies = [ 1182 | "anymap2", 1183 | "itertools 0.12.1", 1184 | "kstring", 1185 | "liquid-derive", 1186 | "num-traits", 1187 | "pest", 1188 | "pest_derive", 1189 | "regex", 1190 | "serde", 1191 | "time", 1192 | ] 1193 | 1194 | [[package]] 1195 | name = "liquid-derive" 1196 | version = "0.26.5" 1197 | source = "registry+https://github.com/rust-lang/crates.io-index" 1198 | checksum = "915f6d0a2963a27cd5205c1902f32ddfe3bc035816afd268cf88c0fc0f8d287e" 1199 | dependencies = [ 1200 | "proc-macro2", 1201 | "quote", 1202 | "syn 2.0.66", 1203 | ] 1204 | 1205 | [[package]] 1206 | name = "liquid-lib" 1207 | version = "0.26.6" 1208 | source = "registry+https://github.com/rust-lang/crates.io-index" 1209 | checksum = "73f48fc446873f74d869582f5c4b8cbf3248c93395e410a67af5809b3731e44a" 1210 | dependencies = [ 1211 | "itertools 0.12.1", 1212 | "liquid-core", 1213 | "once_cell", 1214 | "percent-encoding", 1215 | "regex", 1216 | "time", 1217 | "unicode-segmentation", 1218 | ] 1219 | 1220 | [[package]] 1221 | name = "litemap" 1222 | version = "0.7.3" 1223 | source = "registry+https://github.com/rust-lang/crates.io-index" 1224 | checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" 1225 | 1226 | [[package]] 1227 | name = "lmdb-master-sys" 1228 | version = "0.1.0" 1229 | source = "registry+https://github.com/rust-lang/crates.io-index" 1230 | checksum = "629c123f5321b48fa4f8f4d3b868165b748d9ba79c7103fb58e3a94f736bcedd" 1231 | dependencies = [ 1232 | "cc", 1233 | "doxygen-rs", 1234 | "libc", 1235 | "pkg-config", 1236 | ] 1237 | 1238 | [[package]] 1239 | name = "lock_api" 1240 | version = "0.4.9" 1241 | source = "registry+https://github.com/rust-lang/crates.io-index" 1242 | checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" 1243 | dependencies = [ 1244 | "autocfg", 1245 | "scopeguard", 1246 | ] 1247 | 1248 | [[package]] 1249 | name = "log" 1250 | version = "0.4.21" 1251 | source = "registry+https://github.com/rust-lang/crates.io-index" 1252 | checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" 1253 | 1254 | [[package]] 1255 | name = "macro_rules_attribute" 1256 | version = "0.2.0" 1257 | source = "registry+https://github.com/rust-lang/crates.io-index" 1258 | checksum = "8a82271f7bc033d84bbca59a3ce3e4159938cb08a9c3aebbe54d215131518a13" 1259 | dependencies = [ 1260 | "macro_rules_attribute-proc_macro", 1261 | "paste", 1262 | ] 1263 | 1264 | [[package]] 1265 | name = "macro_rules_attribute-proc_macro" 1266 | version = "0.2.0" 1267 | source = "registry+https://github.com/rust-lang/crates.io-index" 1268 | checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568" 1269 | 1270 | [[package]] 1271 | name = "memchr" 1272 | version = "2.7.2" 1273 | source = "registry+https://github.com/rust-lang/crates.io-index" 1274 | checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" 1275 | 1276 | [[package]] 1277 | name = "memmap2" 1278 | version = "0.7.1" 1279 | source = "registry+https://github.com/rust-lang/crates.io-index" 1280 | checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" 1281 | dependencies = [ 1282 | "libc", 1283 | ] 1284 | 1285 | [[package]] 1286 | name = "memmap2" 1287 | version = "0.9.4" 1288 | source = "registry+https://github.com/rust-lang/crates.io-index" 1289 | checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" 1290 | dependencies = [ 1291 | "libc", 1292 | "stable_deref_trait", 1293 | ] 1294 | 1295 | [[package]] 1296 | name = "memoffset" 1297 | version = "0.7.1" 1298 | source = "registry+https://github.com/rust-lang/crates.io-index" 1299 | checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" 1300 | dependencies = [ 1301 | "autocfg", 1302 | ] 1303 | 1304 | [[package]] 1305 | name = "memoffset" 1306 | version = "0.8.0" 1307 | source = "registry+https://github.com/rust-lang/crates.io-index" 1308 | checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" 1309 | dependencies = [ 1310 | "autocfg", 1311 | ] 1312 | 1313 | [[package]] 1314 | name = "milli" 1315 | version = "1.8.1" 1316 | dependencies = [ 1317 | "arroy", 1318 | "bimap", 1319 | "bincode", 1320 | "bstr", 1321 | "bytemuck", 1322 | "byteorder", 1323 | "candle-core", 1324 | "candle-nn", 1325 | "candle-transformers", 1326 | "charabia", 1327 | "concat-arrays", 1328 | "crossbeam-channel", 1329 | "csv", 1330 | "deserr", 1331 | "either", 1332 | "filter-parser", 1333 | "flatten-serde-json", 1334 | "fst", 1335 | "fxhash", 1336 | "geoutils", 1337 | "grenad", 1338 | "heed", 1339 | "hf-hub", 1340 | "indexmap", 1341 | "itertools 0.11.0", 1342 | "json-depth-checker", 1343 | "levenshtein_automata", 1344 | "liquid", 1345 | "memmap2 0.7.1", 1346 | "obkv", 1347 | "once_cell", 1348 | "ordered-float", 1349 | "puffin", 1350 | "rand", 1351 | "rand_pcg", 1352 | "rayon", 1353 | "roaring", 1354 | "rstar", 1355 | "serde", 1356 | "serde_json", 1357 | "slice-group-by", 1358 | "smallstr", 1359 | "smallvec", 1360 | "smartstring", 1361 | "tempfile", 1362 | "thiserror", 1363 | "tiktoken-rs", 1364 | "time", 1365 | "tokenizers", 1366 | "tracing", 1367 | "ureq", 1368 | "url", 1369 | "uuid", 1370 | ] 1371 | 1372 | [[package]] 1373 | name = "milli_py" 1374 | version = "1.5.1" 1375 | dependencies = [ 1376 | "heed", 1377 | "milli", 1378 | "obkv", 1379 | "pyo3", 1380 | "serde", 1381 | "serde_json", 1382 | ] 1383 | 1384 | [[package]] 1385 | name = "minimal-lexical" 1386 | version = "0.2.1" 1387 | source = "registry+https://github.com/rust-lang/crates.io-index" 1388 | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" 1389 | 1390 | [[package]] 1391 | name = "miniz_oxide" 1392 | version = "0.7.3" 1393 | source = "registry+https://github.com/rust-lang/crates.io-index" 1394 | checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" 1395 | dependencies = [ 1396 | "adler", 1397 | ] 1398 | 1399 | [[package]] 1400 | name = "monostate" 1401 | version = "0.1.13" 1402 | source = "registry+https://github.com/rust-lang/crates.io-index" 1403 | checksum = "0d208407d7552cd041d8cdb69a1bc3303e029c598738177a3d87082004dc0e1e" 1404 | dependencies = [ 1405 | "monostate-impl", 1406 | "serde", 1407 | ] 1408 | 1409 | [[package]] 1410 | name = "monostate-impl" 1411 | version = "0.1.13" 1412 | source = "registry+https://github.com/rust-lang/crates.io-index" 1413 | checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0" 1414 | dependencies = [ 1415 | "proc-macro2", 1416 | "quote", 1417 | "syn 2.0.66", 1418 | ] 1419 | 1420 | [[package]] 1421 | name = "nom" 1422 | version = "7.1.3" 1423 | source = "registry+https://github.com/rust-lang/crates.io-index" 1424 | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" 1425 | dependencies = [ 1426 | "memchr", 1427 | "minimal-lexical", 1428 | ] 1429 | 1430 | [[package]] 1431 | name = "nom_locate" 1432 | version = "4.2.0" 1433 | source = "registry+https://github.com/rust-lang/crates.io-index" 1434 | checksum = "1e3c83c053b0713da60c5b8de47fe8e494fe3ece5267b2f23090a07a053ba8f3" 1435 | dependencies = [ 1436 | "bytecount", 1437 | "memchr", 1438 | "nom", 1439 | ] 1440 | 1441 | [[package]] 1442 | name = "num-complex" 1443 | version = "0.4.5" 1444 | source = "registry+https://github.com/rust-lang/crates.io-index" 1445 | checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" 1446 | dependencies = [ 1447 | "bytemuck", 1448 | "num-traits", 1449 | ] 1450 | 1451 | [[package]] 1452 | name = "num-conv" 1453 | version = "0.1.0" 1454 | source = "registry+https://github.com/rust-lang/crates.io-index" 1455 | checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" 1456 | 1457 | [[package]] 1458 | name = "num-traits" 1459 | version = "0.2.15" 1460 | source = "registry+https://github.com/rust-lang/crates.io-index" 1461 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" 1462 | dependencies = [ 1463 | "autocfg", 1464 | "libm", 1465 | ] 1466 | 1467 | [[package]] 1468 | name = "num_cpus" 1469 | version = "1.15.0" 1470 | source = "registry+https://github.com/rust-lang/crates.io-index" 1471 | checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" 1472 | dependencies = [ 1473 | "hermit-abi", 1474 | "libc", 1475 | ] 1476 | 1477 | [[package]] 1478 | name = "number_prefix" 1479 | version = "0.4.0" 1480 | source = "registry+https://github.com/rust-lang/crates.io-index" 1481 | checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" 1482 | 1483 | [[package]] 1484 | name = "obkv" 1485 | version = "0.2.1" 1486 | source = "registry+https://github.com/rust-lang/crates.io-index" 1487 | checksum = "6c459142426056c639ff88d053ebaaaeca0ee1411c94362892398ef4ccd81080" 1488 | 1489 | [[package]] 1490 | name = "once_cell" 1491 | version = "1.19.0" 1492 | source = "registry+https://github.com/rust-lang/crates.io-index" 1493 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" 1494 | 1495 | [[package]] 1496 | name = "onig" 1497 | version = "6.4.0" 1498 | source = "registry+https://github.com/rust-lang/crates.io-index" 1499 | checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" 1500 | dependencies = [ 1501 | "bitflags 1.3.2", 1502 | "libc", 1503 | "once_cell", 1504 | "onig_sys", 1505 | ] 1506 | 1507 | [[package]] 1508 | name = "onig_sys" 1509 | version = "69.8.1" 1510 | source = "registry+https://github.com/rust-lang/crates.io-index" 1511 | checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7" 1512 | dependencies = [ 1513 | "cc", 1514 | "pkg-config", 1515 | ] 1516 | 1517 | [[package]] 1518 | name = "option-ext" 1519 | version = "0.2.0" 1520 | source = "registry+https://github.com/rust-lang/crates.io-index" 1521 | checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" 1522 | 1523 | [[package]] 1524 | name = "ordered-float" 1525 | version = "4.2.0" 1526 | source = "registry+https://github.com/rust-lang/crates.io-index" 1527 | checksum = "a76df7075c7d4d01fdcb46c912dd17fba5b60c78ea480b475f2b6ab6f666584e" 1528 | dependencies = [ 1529 | "num-traits", 1530 | ] 1531 | 1532 | [[package]] 1533 | name = "page_size" 1534 | version = "0.6.0" 1535 | source = "registry+https://github.com/rust-lang/crates.io-index" 1536 | checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" 1537 | dependencies = [ 1538 | "libc", 1539 | "winapi", 1540 | ] 1541 | 1542 | [[package]] 1543 | name = "parking_lot" 1544 | version = "0.12.1" 1545 | source = "registry+https://github.com/rust-lang/crates.io-index" 1546 | checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" 1547 | dependencies = [ 1548 | "lock_api", 1549 | "parking_lot_core", 1550 | ] 1551 | 1552 | [[package]] 1553 | name = "parking_lot_core" 1554 | version = "0.9.7" 1555 | source = "registry+https://github.com/rust-lang/crates.io-index" 1556 | checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" 1557 | dependencies = [ 1558 | "cfg-if", 1559 | "libc", 1560 | "redox_syscall", 1561 | "smallvec", 1562 | "windows-sys 0.45.0", 1563 | ] 1564 | 1565 | [[package]] 1566 | name = "paste" 1567 | version = "1.0.15" 1568 | source = "registry+https://github.com/rust-lang/crates.io-index" 1569 | checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" 1570 | 1571 | [[package]] 1572 | name = "percent-encoding" 1573 | version = "2.3.1" 1574 | source = "registry+https://github.com/rust-lang/crates.io-index" 1575 | checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" 1576 | 1577 | [[package]] 1578 | name = "pest" 1579 | version = "2.7.10" 1580 | source = "registry+https://github.com/rust-lang/crates.io-index" 1581 | checksum = "560131c633294438da9f7c4b08189194b20946c8274c6b9e38881a7874dc8ee8" 1582 | dependencies = [ 1583 | "memchr", 1584 | "thiserror", 1585 | "ucd-trie", 1586 | ] 1587 | 1588 | [[package]] 1589 | name = "pest_derive" 1590 | version = "2.7.10" 1591 | source = "registry+https://github.com/rust-lang/crates.io-index" 1592 | checksum = "26293c9193fbca7b1a3bf9b79dc1e388e927e6cacaa78b4a3ab705a1d3d41459" 1593 | dependencies = [ 1594 | "pest", 1595 | "pest_generator", 1596 | ] 1597 | 1598 | [[package]] 1599 | name = "pest_generator" 1600 | version = "2.7.10" 1601 | source = "registry+https://github.com/rust-lang/crates.io-index" 1602 | checksum = "3ec22af7d3fb470a85dd2ca96b7c577a1eb4ef6f1683a9fe9a8c16e136c04687" 1603 | dependencies = [ 1604 | "pest", 1605 | "pest_meta", 1606 | "proc-macro2", 1607 | "quote", 1608 | "syn 2.0.66", 1609 | ] 1610 | 1611 | [[package]] 1612 | name = "pest_meta" 1613 | version = "2.7.10" 1614 | source = "registry+https://github.com/rust-lang/crates.io-index" 1615 | checksum = "d7a240022f37c361ec1878d646fc5b7d7c4d28d5946e1a80ad5a7a4f4ca0bdcd" 1616 | dependencies = [ 1617 | "once_cell", 1618 | "pest", 1619 | "sha2", 1620 | ] 1621 | 1622 | [[package]] 1623 | name = "phf" 1624 | version = "0.11.2" 1625 | source = "registry+https://github.com/rust-lang/crates.io-index" 1626 | checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" 1627 | dependencies = [ 1628 | "phf_macros", 1629 | "phf_shared", 1630 | ] 1631 | 1632 | [[package]] 1633 | name = "phf_generator" 1634 | version = "0.11.2" 1635 | source = "registry+https://github.com/rust-lang/crates.io-index" 1636 | checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" 1637 | dependencies = [ 1638 | "phf_shared", 1639 | "rand", 1640 | ] 1641 | 1642 | [[package]] 1643 | name = "phf_macros" 1644 | version = "0.11.2" 1645 | source = "registry+https://github.com/rust-lang/crates.io-index" 1646 | checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" 1647 | dependencies = [ 1648 | "phf_generator", 1649 | "phf_shared", 1650 | "proc-macro2", 1651 | "quote", 1652 | "syn 2.0.66", 1653 | ] 1654 | 1655 | [[package]] 1656 | name = "phf_shared" 1657 | version = "0.11.2" 1658 | source = "registry+https://github.com/rust-lang/crates.io-index" 1659 | checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" 1660 | dependencies = [ 1661 | "siphasher", 1662 | ] 1663 | 1664 | [[package]] 1665 | name = "pin-project-lite" 1666 | version = "0.2.9" 1667 | source = "registry+https://github.com/rust-lang/crates.io-index" 1668 | checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" 1669 | 1670 | [[package]] 1671 | name = "pkg-config" 1672 | version = "0.3.26" 1673 | source = "registry+https://github.com/rust-lang/crates.io-index" 1674 | checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" 1675 | 1676 | [[package]] 1677 | name = "portable-atomic" 1678 | version = "1.6.0" 1679 | source = "registry+https://github.com/rust-lang/crates.io-index" 1680 | checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" 1681 | 1682 | [[package]] 1683 | name = "powerfmt" 1684 | version = "0.2.0" 1685 | source = "registry+https://github.com/rust-lang/crates.io-index" 1686 | checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" 1687 | 1688 | [[package]] 1689 | name = "ppv-lite86" 1690 | version = "0.2.17" 1691 | source = "registry+https://github.com/rust-lang/crates.io-index" 1692 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 1693 | 1694 | [[package]] 1695 | name = "proc-macro2" 1696 | version = "1.0.85" 1697 | source = "registry+https://github.com/rust-lang/crates.io-index" 1698 | checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" 1699 | dependencies = [ 1700 | "unicode-ident", 1701 | ] 1702 | 1703 | [[package]] 1704 | name = "puffin" 1705 | version = "0.16.0" 1706 | source = "registry+https://github.com/rust-lang/crates.io-index" 1707 | checksum = "76425abd4e1a0ad4bd6995dd974b52f414fca9974171df8e3708b3e660d05a21" 1708 | dependencies = [ 1709 | "anyhow", 1710 | "byteorder", 1711 | "cfg-if", 1712 | "instant", 1713 | "once_cell", 1714 | ] 1715 | 1716 | [[package]] 1717 | name = "pulp" 1718 | version = "0.18.21" 1719 | source = "registry+https://github.com/rust-lang/crates.io-index" 1720 | checksum = "0ec8d02258294f59e4e223b41ad7e81c874aa6b15bc4ced9ba3965826da0eed5" 1721 | dependencies = [ 1722 | "bytemuck", 1723 | "libm", 1724 | "num-complex", 1725 | "reborrow", 1726 | ] 1727 | 1728 | [[package]] 1729 | name = "pyo3" 1730 | version = "0.18.1" 1731 | source = "registry+https://github.com/rust-lang/crates.io-index" 1732 | checksum = "06a3d8e8a46ab2738109347433cb7b96dffda2e4a218b03ef27090238886b147" 1733 | dependencies = [ 1734 | "cfg-if", 1735 | "indoc", 1736 | "libc", 1737 | "memoffset 0.8.0", 1738 | "parking_lot", 1739 | "pyo3-build-config", 1740 | "pyo3-ffi", 1741 | "pyo3-macros", 1742 | "unindent", 1743 | ] 1744 | 1745 | [[package]] 1746 | name = "pyo3-build-config" 1747 | version = "0.18.1" 1748 | source = "registry+https://github.com/rust-lang/crates.io-index" 1749 | checksum = "75439f995d07ddfad42b192dfcf3bc66a7ecfd8b4a1f5f6f046aa5c2c5d7677d" 1750 | dependencies = [ 1751 | "once_cell", 1752 | "target-lexicon", 1753 | ] 1754 | 1755 | [[package]] 1756 | name = "pyo3-ffi" 1757 | version = "0.18.1" 1758 | source = "registry+https://github.com/rust-lang/crates.io-index" 1759 | checksum = "839526a5c07a17ff44823679b68add4a58004de00512a95b6c1c98a6dcac0ee5" 1760 | dependencies = [ 1761 | "libc", 1762 | "pyo3-build-config", 1763 | ] 1764 | 1765 | [[package]] 1766 | name = "pyo3-macros" 1767 | version = "0.18.1" 1768 | source = "registry+https://github.com/rust-lang/crates.io-index" 1769 | checksum = "bd44cf207476c6a9760c4653559be4f206efafb924d3e4cbf2721475fc0d6cc5" 1770 | dependencies = [ 1771 | "proc-macro2", 1772 | "pyo3-macros-backend", 1773 | "quote", 1774 | "syn 1.0.107", 1775 | ] 1776 | 1777 | [[package]] 1778 | name = "pyo3-macros-backend" 1779 | version = "0.18.1" 1780 | source = "registry+https://github.com/rust-lang/crates.io-index" 1781 | checksum = "dc1f43d8e30460f36350d18631ccf85ded64c059829208fe680904c65bcd0a4c" 1782 | dependencies = [ 1783 | "proc-macro2", 1784 | "quote", 1785 | "syn 1.0.107", 1786 | ] 1787 | 1788 | [[package]] 1789 | name = "quote" 1790 | version = "1.0.36" 1791 | source = "registry+https://github.com/rust-lang/crates.io-index" 1792 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" 1793 | dependencies = [ 1794 | "proc-macro2", 1795 | ] 1796 | 1797 | [[package]] 1798 | name = "rand" 1799 | version = "0.8.5" 1800 | source = "registry+https://github.com/rust-lang/crates.io-index" 1801 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 1802 | dependencies = [ 1803 | "libc", 1804 | "rand_chacha", 1805 | "rand_core", 1806 | ] 1807 | 1808 | [[package]] 1809 | name = "rand_chacha" 1810 | version = "0.3.1" 1811 | source = "registry+https://github.com/rust-lang/crates.io-index" 1812 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 1813 | dependencies = [ 1814 | "ppv-lite86", 1815 | "rand_core", 1816 | ] 1817 | 1818 | [[package]] 1819 | name = "rand_core" 1820 | version = "0.6.4" 1821 | source = "registry+https://github.com/rust-lang/crates.io-index" 1822 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 1823 | dependencies = [ 1824 | "getrandom", 1825 | ] 1826 | 1827 | [[package]] 1828 | name = "rand_distr" 1829 | version = "0.4.3" 1830 | source = "registry+https://github.com/rust-lang/crates.io-index" 1831 | checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" 1832 | dependencies = [ 1833 | "num-traits", 1834 | "rand", 1835 | ] 1836 | 1837 | [[package]] 1838 | name = "rand_pcg" 1839 | version = "0.3.1" 1840 | source = "registry+https://github.com/rust-lang/crates.io-index" 1841 | checksum = "59cad018caf63deb318e5a4586d99a24424a364f40f1e5778c29aca23f4fc73e" 1842 | dependencies = [ 1843 | "rand_core", 1844 | "serde", 1845 | ] 1846 | 1847 | [[package]] 1848 | name = "raw-cpuid" 1849 | version = "10.7.0" 1850 | source = "registry+https://github.com/rust-lang/crates.io-index" 1851 | checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" 1852 | dependencies = [ 1853 | "bitflags 1.3.2", 1854 | ] 1855 | 1856 | [[package]] 1857 | name = "rayon" 1858 | version = "1.10.0" 1859 | source = "registry+https://github.com/rust-lang/crates.io-index" 1860 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 1861 | dependencies = [ 1862 | "either", 1863 | "rayon-core", 1864 | ] 1865 | 1866 | [[package]] 1867 | name = "rayon-cond" 1868 | version = "0.3.0" 1869 | source = "registry+https://github.com/rust-lang/crates.io-index" 1870 | checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9" 1871 | dependencies = [ 1872 | "either", 1873 | "itertools 0.11.0", 1874 | "rayon", 1875 | ] 1876 | 1877 | [[package]] 1878 | name = "rayon-core" 1879 | version = "1.12.1" 1880 | source = "registry+https://github.com/rust-lang/crates.io-index" 1881 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 1882 | dependencies = [ 1883 | "crossbeam-deque", 1884 | "crossbeam-utils", 1885 | ] 1886 | 1887 | [[package]] 1888 | name = "reborrow" 1889 | version = "0.5.5" 1890 | source = "registry+https://github.com/rust-lang/crates.io-index" 1891 | checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430" 1892 | 1893 | [[package]] 1894 | name = "redox_syscall" 1895 | version = "0.2.16" 1896 | source = "registry+https://github.com/rust-lang/crates.io-index" 1897 | checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" 1898 | dependencies = [ 1899 | "bitflags 1.3.2", 1900 | ] 1901 | 1902 | [[package]] 1903 | name = "redox_users" 1904 | version = "0.4.5" 1905 | source = "registry+https://github.com/rust-lang/crates.io-index" 1906 | checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" 1907 | dependencies = [ 1908 | "getrandom", 1909 | "libredox", 1910 | "thiserror", 1911 | ] 1912 | 1913 | [[package]] 1914 | name = "regex" 1915 | version = "1.10.5" 1916 | source = "registry+https://github.com/rust-lang/crates.io-index" 1917 | checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" 1918 | dependencies = [ 1919 | "aho-corasick", 1920 | "memchr", 1921 | "regex-automata", 1922 | "regex-syntax", 1923 | ] 1924 | 1925 | [[package]] 1926 | name = "regex-automata" 1927 | version = "0.4.7" 1928 | source = "registry+https://github.com/rust-lang/crates.io-index" 1929 | checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" 1930 | dependencies = [ 1931 | "aho-corasick", 1932 | "memchr", 1933 | "regex-syntax", 1934 | ] 1935 | 1936 | [[package]] 1937 | name = "regex-syntax" 1938 | version = "0.8.4" 1939 | source = "registry+https://github.com/rust-lang/crates.io-index" 1940 | checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" 1941 | 1942 | [[package]] 1943 | name = "ring" 1944 | version = "0.17.3" 1945 | source = "registry+https://github.com/rust-lang/crates.io-index" 1946 | checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e" 1947 | dependencies = [ 1948 | "cc", 1949 | "getrandom", 1950 | "libc", 1951 | "spin", 1952 | "untrusted", 1953 | "windows-sys 0.48.0", 1954 | ] 1955 | 1956 | [[package]] 1957 | name = "roaring" 1958 | version = "0.10.5" 1959 | source = "registry+https://github.com/rust-lang/crates.io-index" 1960 | checksum = "7699249cc2c7d71939f30868f47e9d7add0bdc030d90ee10bfd16887ff8bb1c8" 1961 | dependencies = [ 1962 | "bytemuck", 1963 | "byteorder", 1964 | ] 1965 | 1966 | [[package]] 1967 | name = "rstar" 1968 | version = "0.11.0" 1969 | source = "registry+https://github.com/rust-lang/crates.io-index" 1970 | checksum = "73111312eb7a2287d229f06c00ff35b51ddee180f017ab6dec1f69d62ac098d6" 1971 | dependencies = [ 1972 | "heapless", 1973 | "num-traits", 1974 | "serde", 1975 | "smallvec", 1976 | ] 1977 | 1978 | [[package]] 1979 | name = "rustc-hash" 1980 | version = "1.1.0" 1981 | source = "registry+https://github.com/rust-lang/crates.io-index" 1982 | checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" 1983 | 1984 | [[package]] 1985 | name = "rustc_version" 1986 | version = "0.4.0" 1987 | source = "registry+https://github.com/rust-lang/crates.io-index" 1988 | checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" 1989 | dependencies = [ 1990 | "semver", 1991 | ] 1992 | 1993 | [[package]] 1994 | name = "rustix" 1995 | version = "0.38.34" 1996 | source = "registry+https://github.com/rust-lang/crates.io-index" 1997 | checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" 1998 | dependencies = [ 1999 | "bitflags 2.5.0", 2000 | "errno", 2001 | "libc", 2002 | "linux-raw-sys", 2003 | "windows-sys 0.52.0", 2004 | ] 2005 | 2006 | [[package]] 2007 | name = "rustls" 2008 | version = "0.22.4" 2009 | source = "registry+https://github.com/rust-lang/crates.io-index" 2010 | checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" 2011 | dependencies = [ 2012 | "log", 2013 | "ring", 2014 | "rustls-pki-types", 2015 | "rustls-webpki", 2016 | "subtle", 2017 | "zeroize", 2018 | ] 2019 | 2020 | [[package]] 2021 | name = "rustls-pki-types" 2022 | version = "1.7.0" 2023 | source = "registry+https://github.com/rust-lang/crates.io-index" 2024 | checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" 2025 | 2026 | [[package]] 2027 | name = "rustls-webpki" 2028 | version = "0.102.4" 2029 | source = "registry+https://github.com/rust-lang/crates.io-index" 2030 | checksum = "ff448f7e92e913c4b7d4c6d8e4540a1724b319b4152b8aef6d4cf8339712b33e" 2031 | dependencies = [ 2032 | "ring", 2033 | "rustls-pki-types", 2034 | "untrusted", 2035 | ] 2036 | 2037 | [[package]] 2038 | name = "ryu" 2039 | version = "1.0.12" 2040 | source = "registry+https://github.com/rust-lang/crates.io-index" 2041 | checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" 2042 | 2043 | [[package]] 2044 | name = "safetensors" 2045 | version = "0.4.3" 2046 | source = "registry+https://github.com/rust-lang/crates.io-index" 2047 | checksum = "8ced76b22c7fba1162f11a5a75d9d8405264b467a07ae0c9c29be119b9297db9" 2048 | dependencies = [ 2049 | "serde", 2050 | "serde_json", 2051 | ] 2052 | 2053 | [[package]] 2054 | name = "same-file" 2055 | version = "1.0.6" 2056 | source = "registry+https://github.com/rust-lang/crates.io-index" 2057 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 2058 | dependencies = [ 2059 | "winapi-util", 2060 | ] 2061 | 2062 | [[package]] 2063 | name = "scopeguard" 2064 | version = "1.1.0" 2065 | source = "registry+https://github.com/rust-lang/crates.io-index" 2066 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 2067 | 2068 | [[package]] 2069 | name = "semver" 2070 | version = "1.0.16" 2071 | source = "registry+https://github.com/rust-lang/crates.io-index" 2072 | checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" 2073 | 2074 | [[package]] 2075 | name = "seq-macro" 2076 | version = "0.3.5" 2077 | source = "registry+https://github.com/rust-lang/crates.io-index" 2078 | checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" 2079 | 2080 | [[package]] 2081 | name = "serde" 2082 | version = "1.0.203" 2083 | source = "registry+https://github.com/rust-lang/crates.io-index" 2084 | checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" 2085 | dependencies = [ 2086 | "serde_derive", 2087 | ] 2088 | 2089 | [[package]] 2090 | name = "serde-cs" 2091 | version = "0.2.4" 2092 | source = "registry+https://github.com/rust-lang/crates.io-index" 2093 | checksum = "07d1716265edfe2ff32c1f3168aedc01964b75432aadae3407c9f1fea40557c9" 2094 | dependencies = [ 2095 | "serde", 2096 | ] 2097 | 2098 | [[package]] 2099 | name = "serde_derive" 2100 | version = "1.0.203" 2101 | source = "registry+https://github.com/rust-lang/crates.io-index" 2102 | checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" 2103 | dependencies = [ 2104 | "proc-macro2", 2105 | "quote", 2106 | "syn 2.0.66", 2107 | ] 2108 | 2109 | [[package]] 2110 | name = "serde_json" 2111 | version = "1.0.117" 2112 | source = "registry+https://github.com/rust-lang/crates.io-index" 2113 | checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" 2114 | dependencies = [ 2115 | "indexmap", 2116 | "itoa", 2117 | "ryu", 2118 | "serde", 2119 | ] 2120 | 2121 | [[package]] 2122 | name = "serde_plain" 2123 | version = "1.0.2" 2124 | source = "registry+https://github.com/rust-lang/crates.io-index" 2125 | checksum = "9ce1fc6db65a611022b23a0dec6975d63fb80a302cb3388835ff02c097258d50" 2126 | dependencies = [ 2127 | "serde", 2128 | ] 2129 | 2130 | [[package]] 2131 | name = "serde_urlencoded" 2132 | version = "0.7.1" 2133 | source = "registry+https://github.com/rust-lang/crates.io-index" 2134 | checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" 2135 | dependencies = [ 2136 | "form_urlencoded", 2137 | "itoa", 2138 | "ryu", 2139 | "serde", 2140 | ] 2141 | 2142 | [[package]] 2143 | name = "sha2" 2144 | version = "0.10.8" 2145 | source = "registry+https://github.com/rust-lang/crates.io-index" 2146 | checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" 2147 | dependencies = [ 2148 | "cfg-if", 2149 | "cpufeatures", 2150 | "digest", 2151 | ] 2152 | 2153 | [[package]] 2154 | name = "siphasher" 2155 | version = "0.3.11" 2156 | source = "registry+https://github.com/rust-lang/crates.io-index" 2157 | checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" 2158 | 2159 | [[package]] 2160 | name = "slice-group-by" 2161 | version = "0.3.1" 2162 | source = "registry+https://github.com/rust-lang/crates.io-index" 2163 | checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7" 2164 | 2165 | [[package]] 2166 | name = "smallstr" 2167 | version = "0.3.0" 2168 | source = "registry+https://github.com/rust-lang/crates.io-index" 2169 | checksum = "63b1aefdf380735ff8ded0b15f31aab05daf1f70216c01c02a12926badd1df9d" 2170 | dependencies = [ 2171 | "serde", 2172 | "smallvec", 2173 | ] 2174 | 2175 | [[package]] 2176 | name = "smallvec" 2177 | version = "1.13.2" 2178 | source = "registry+https://github.com/rust-lang/crates.io-index" 2179 | checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" 2180 | 2181 | [[package]] 2182 | name = "smartstring" 2183 | version = "1.0.1" 2184 | source = "registry+https://github.com/rust-lang/crates.io-index" 2185 | checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" 2186 | dependencies = [ 2187 | "autocfg", 2188 | "static_assertions", 2189 | "version_check", 2190 | ] 2191 | 2192 | [[package]] 2193 | name = "socks" 2194 | version = "0.3.4" 2195 | source = "registry+https://github.com/rust-lang/crates.io-index" 2196 | checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b" 2197 | dependencies = [ 2198 | "byteorder", 2199 | "libc", 2200 | "winapi", 2201 | ] 2202 | 2203 | [[package]] 2204 | name = "spin" 2205 | version = "0.9.5" 2206 | source = "registry+https://github.com/rust-lang/crates.io-index" 2207 | checksum = "7dccf47db1b41fa1573ed27ccf5e08e3ca771cb994f776668c5ebda893b248fc" 2208 | dependencies = [ 2209 | "lock_api", 2210 | ] 2211 | 2212 | [[package]] 2213 | name = "spm_precompiled" 2214 | version = "0.1.4" 2215 | source = "registry+https://github.com/rust-lang/crates.io-index" 2216 | checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326" 2217 | dependencies = [ 2218 | "base64 0.13.1", 2219 | "nom", 2220 | "serde", 2221 | "unicode-segmentation", 2222 | ] 2223 | 2224 | [[package]] 2225 | name = "stable_deref_trait" 2226 | version = "1.2.0" 2227 | source = "registry+https://github.com/rust-lang/crates.io-index" 2228 | checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" 2229 | 2230 | [[package]] 2231 | name = "static_assertions" 2232 | version = "1.1.0" 2233 | source = "registry+https://github.com/rust-lang/crates.io-index" 2234 | checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" 2235 | 2236 | [[package]] 2237 | name = "strsim" 2238 | version = "0.10.0" 2239 | source = "registry+https://github.com/rust-lang/crates.io-index" 2240 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 2241 | 2242 | [[package]] 2243 | name = "subtle" 2244 | version = "2.5.0" 2245 | source = "registry+https://github.com/rust-lang/crates.io-index" 2246 | checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" 2247 | 2248 | [[package]] 2249 | name = "syn" 2250 | version = "1.0.107" 2251 | source = "registry+https://github.com/rust-lang/crates.io-index" 2252 | checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" 2253 | dependencies = [ 2254 | "proc-macro2", 2255 | "quote", 2256 | "unicode-ident", 2257 | ] 2258 | 2259 | [[package]] 2260 | name = "syn" 2261 | version = "2.0.66" 2262 | source = "registry+https://github.com/rust-lang/crates.io-index" 2263 | checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" 2264 | dependencies = [ 2265 | "proc-macro2", 2266 | "quote", 2267 | "unicode-ident", 2268 | ] 2269 | 2270 | [[package]] 2271 | name = "synchronoise" 2272 | version = "1.0.1" 2273 | source = "registry+https://github.com/rust-lang/crates.io-index" 2274 | checksum = "3dbc01390fc626ce8d1cffe3376ded2b72a11bb70e1c75f404a210e4daa4def2" 2275 | dependencies = [ 2276 | "crossbeam-queue", 2277 | ] 2278 | 2279 | [[package]] 2280 | name = "synstructure" 2281 | version = "0.13.1" 2282 | source = "registry+https://github.com/rust-lang/crates.io-index" 2283 | checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" 2284 | dependencies = [ 2285 | "proc-macro2", 2286 | "quote", 2287 | "syn 2.0.66", 2288 | ] 2289 | 2290 | [[package]] 2291 | name = "sysctl" 2292 | version = "0.5.5" 2293 | source = "registry+https://github.com/rust-lang/crates.io-index" 2294 | checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" 2295 | dependencies = [ 2296 | "bitflags 2.5.0", 2297 | "byteorder", 2298 | "enum-as-inner", 2299 | "libc", 2300 | "thiserror", 2301 | "walkdir", 2302 | ] 2303 | 2304 | [[package]] 2305 | name = "target-lexicon" 2306 | version = "0.12.5" 2307 | source = "registry+https://github.com/rust-lang/crates.io-index" 2308 | checksum = "9410d0f6853b1d94f0e519fb95df60f29d2c1eff2d921ffdf01a4c8a3b54f12d" 2309 | 2310 | [[package]] 2311 | name = "tempfile" 2312 | version = "3.10.1" 2313 | source = "registry+https://github.com/rust-lang/crates.io-index" 2314 | checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" 2315 | dependencies = [ 2316 | "cfg-if", 2317 | "fastrand", 2318 | "rustix", 2319 | "windows-sys 0.52.0", 2320 | ] 2321 | 2322 | [[package]] 2323 | name = "thiserror" 2324 | version = "1.0.61" 2325 | source = "registry+https://github.com/rust-lang/crates.io-index" 2326 | checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" 2327 | dependencies = [ 2328 | "thiserror-impl", 2329 | ] 2330 | 2331 | [[package]] 2332 | name = "thiserror-impl" 2333 | version = "1.0.61" 2334 | source = "registry+https://github.com/rust-lang/crates.io-index" 2335 | checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" 2336 | dependencies = [ 2337 | "proc-macro2", 2338 | "quote", 2339 | "syn 2.0.66", 2340 | ] 2341 | 2342 | [[package]] 2343 | name = "tiktoken-rs" 2344 | version = "0.5.9" 2345 | source = "registry+https://github.com/rust-lang/crates.io-index" 2346 | checksum = "c314e7ce51440f9e8f5a497394682a57b7c323d0f4d0a6b1b13c429056e0e234" 2347 | dependencies = [ 2348 | "anyhow", 2349 | "base64 0.21.7", 2350 | "bstr", 2351 | "fancy-regex", 2352 | "lazy_static", 2353 | "parking_lot", 2354 | "rustc-hash", 2355 | ] 2356 | 2357 | [[package]] 2358 | name = "time" 2359 | version = "0.3.36" 2360 | source = "registry+https://github.com/rust-lang/crates.io-index" 2361 | checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" 2362 | dependencies = [ 2363 | "deranged", 2364 | "itoa", 2365 | "num-conv", 2366 | "powerfmt", 2367 | "serde", 2368 | "time-core", 2369 | "time-macros", 2370 | ] 2371 | 2372 | [[package]] 2373 | name = "time-core" 2374 | version = "0.1.2" 2375 | source = "registry+https://github.com/rust-lang/crates.io-index" 2376 | checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" 2377 | 2378 | [[package]] 2379 | name = "time-macros" 2380 | version = "0.2.18" 2381 | source = "registry+https://github.com/rust-lang/crates.io-index" 2382 | checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" 2383 | dependencies = [ 2384 | "num-conv", 2385 | "time-core", 2386 | ] 2387 | 2388 | [[package]] 2389 | name = "tinyvec" 2390 | version = "1.6.0" 2391 | source = "registry+https://github.com/rust-lang/crates.io-index" 2392 | checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" 2393 | dependencies = [ 2394 | "tinyvec_macros", 2395 | ] 2396 | 2397 | [[package]] 2398 | name = "tinyvec_macros" 2399 | version = "0.1.1" 2400 | source = "registry+https://github.com/rust-lang/crates.io-index" 2401 | checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" 2402 | 2403 | [[package]] 2404 | name = "tokenizers" 2405 | version = "0.15.2" 2406 | source = "git+https://github.com/huggingface/tokenizers.git?tag=v0.15.2#701a73b869602b5639589d197e805349cdba3223" 2407 | dependencies = [ 2408 | "aho-corasick", 2409 | "derive_builder", 2410 | "esaxx-rs", 2411 | "getrandom", 2412 | "itertools 0.12.1", 2413 | "lazy_static", 2414 | "log", 2415 | "macro_rules_attribute", 2416 | "monostate", 2417 | "onig", 2418 | "paste", 2419 | "rand", 2420 | "rayon", 2421 | "rayon-cond", 2422 | "regex", 2423 | "regex-syntax", 2424 | "serde", 2425 | "serde_json", 2426 | "spm_precompiled", 2427 | "thiserror", 2428 | "unicode-normalization-alignments", 2429 | "unicode-segmentation", 2430 | "unicode_categories", 2431 | ] 2432 | 2433 | [[package]] 2434 | name = "tracing" 2435 | version = "0.1.40" 2436 | source = "registry+https://github.com/rust-lang/crates.io-index" 2437 | checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" 2438 | dependencies = [ 2439 | "pin-project-lite", 2440 | "tracing-attributes", 2441 | "tracing-core", 2442 | ] 2443 | 2444 | [[package]] 2445 | name = "tracing-attributes" 2446 | version = "0.1.27" 2447 | source = "registry+https://github.com/rust-lang/crates.io-index" 2448 | checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" 2449 | dependencies = [ 2450 | "proc-macro2", 2451 | "quote", 2452 | "syn 2.0.66", 2453 | ] 2454 | 2455 | [[package]] 2456 | name = "tracing-core" 2457 | version = "0.1.32" 2458 | source = "registry+https://github.com/rust-lang/crates.io-index" 2459 | checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" 2460 | dependencies = [ 2461 | "once_cell", 2462 | ] 2463 | 2464 | [[package]] 2465 | name = "typenum" 2466 | version = "1.16.0" 2467 | source = "registry+https://github.com/rust-lang/crates.io-index" 2468 | checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" 2469 | 2470 | [[package]] 2471 | name = "ucd-trie" 2472 | version = "0.1.6" 2473 | source = "registry+https://github.com/rust-lang/crates.io-index" 2474 | checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" 2475 | 2476 | [[package]] 2477 | name = "unescaper" 2478 | version = "0.1.4" 2479 | source = "registry+https://github.com/rust-lang/crates.io-index" 2480 | checksum = "0adf6ad32eb5b3cadff915f7b770faaac8f7ff0476633aa29eb0d9584d889d34" 2481 | dependencies = [ 2482 | "thiserror", 2483 | ] 2484 | 2485 | [[package]] 2486 | name = "unicode-bidi" 2487 | version = "0.3.10" 2488 | source = "registry+https://github.com/rust-lang/crates.io-index" 2489 | checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" 2490 | 2491 | [[package]] 2492 | name = "unicode-ident" 2493 | version = "1.0.6" 2494 | source = "registry+https://github.com/rust-lang/crates.io-index" 2495 | checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" 2496 | 2497 | [[package]] 2498 | name = "unicode-normalization" 2499 | version = "0.1.22" 2500 | source = "registry+https://github.com/rust-lang/crates.io-index" 2501 | checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" 2502 | dependencies = [ 2503 | "tinyvec", 2504 | ] 2505 | 2506 | [[package]] 2507 | name = "unicode-normalization-alignments" 2508 | version = "0.1.12" 2509 | source = "registry+https://github.com/rust-lang/crates.io-index" 2510 | checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" 2511 | dependencies = [ 2512 | "smallvec", 2513 | ] 2514 | 2515 | [[package]] 2516 | name = "unicode-segmentation" 2517 | version = "1.10.1" 2518 | source = "registry+https://github.com/rust-lang/crates.io-index" 2519 | checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" 2520 | 2521 | [[package]] 2522 | name = "unicode-width" 2523 | version = "0.1.13" 2524 | source = "registry+https://github.com/rust-lang/crates.io-index" 2525 | checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" 2526 | 2527 | [[package]] 2528 | name = "unicode_categories" 2529 | version = "0.1.1" 2530 | source = "registry+https://github.com/rust-lang/crates.io-index" 2531 | checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" 2532 | 2533 | [[package]] 2534 | name = "unindent" 2535 | version = "0.1.11" 2536 | source = "registry+https://github.com/rust-lang/crates.io-index" 2537 | checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" 2538 | 2539 | [[package]] 2540 | name = "untrusted" 2541 | version = "0.9.0" 2542 | source = "registry+https://github.com/rust-lang/crates.io-index" 2543 | checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" 2544 | 2545 | [[package]] 2546 | name = "ureq" 2547 | version = "2.9.7" 2548 | source = "registry+https://github.com/rust-lang/crates.io-index" 2549 | checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd" 2550 | dependencies = [ 2551 | "base64 0.22.1", 2552 | "flate2", 2553 | "log", 2554 | "once_cell", 2555 | "rustls", 2556 | "rustls-pki-types", 2557 | "rustls-webpki", 2558 | "serde", 2559 | "serde_json", 2560 | "socks", 2561 | "url", 2562 | "webpki-roots", 2563 | ] 2564 | 2565 | [[package]] 2566 | name = "url" 2567 | version = "2.5.0" 2568 | source = "registry+https://github.com/rust-lang/crates.io-index" 2569 | checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" 2570 | dependencies = [ 2571 | "form_urlencoded", 2572 | "idna", 2573 | "percent-encoding", 2574 | ] 2575 | 2576 | [[package]] 2577 | name = "uuid" 2578 | version = "1.8.0" 2579 | source = "registry+https://github.com/rust-lang/crates.io-index" 2580 | checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" 2581 | dependencies = [ 2582 | "getrandom", 2583 | ] 2584 | 2585 | [[package]] 2586 | name = "version_check" 2587 | version = "0.9.4" 2588 | source = "registry+https://github.com/rust-lang/crates.io-index" 2589 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 2590 | 2591 | [[package]] 2592 | name = "walkdir" 2593 | version = "2.5.0" 2594 | source = "registry+https://github.com/rust-lang/crates.io-index" 2595 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 2596 | dependencies = [ 2597 | "same-file", 2598 | "winapi-util", 2599 | ] 2600 | 2601 | [[package]] 2602 | name = "wasi" 2603 | version = "0.11.0+wasi-snapshot-preview1" 2604 | source = "registry+https://github.com/rust-lang/crates.io-index" 2605 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 2606 | 2607 | [[package]] 2608 | name = "webpki-roots" 2609 | version = "0.26.2" 2610 | source = "registry+https://github.com/rust-lang/crates.io-index" 2611 | checksum = "3c452ad30530b54a4d8e71952716a212b08efd0f3562baa66c29a618b07da7c3" 2612 | dependencies = [ 2613 | "rustls-pki-types", 2614 | ] 2615 | 2616 | [[package]] 2617 | name = "whatlang" 2618 | version = "0.16.4" 2619 | source = "registry+https://github.com/rust-lang/crates.io-index" 2620 | checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0" 2621 | dependencies = [ 2622 | "hashbrown", 2623 | "once_cell", 2624 | ] 2625 | 2626 | [[package]] 2627 | name = "winapi" 2628 | version = "0.3.9" 2629 | source = "registry+https://github.com/rust-lang/crates.io-index" 2630 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 2631 | dependencies = [ 2632 | "winapi-i686-pc-windows-gnu", 2633 | "winapi-x86_64-pc-windows-gnu", 2634 | ] 2635 | 2636 | [[package]] 2637 | name = "winapi-i686-pc-windows-gnu" 2638 | version = "0.4.0" 2639 | source = "registry+https://github.com/rust-lang/crates.io-index" 2640 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 2641 | 2642 | [[package]] 2643 | name = "winapi-util" 2644 | version = "0.1.8" 2645 | source = "registry+https://github.com/rust-lang/crates.io-index" 2646 | checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" 2647 | dependencies = [ 2648 | "windows-sys 0.52.0", 2649 | ] 2650 | 2651 | [[package]] 2652 | name = "winapi-x86_64-pc-windows-gnu" 2653 | version = "0.4.0" 2654 | source = "registry+https://github.com/rust-lang/crates.io-index" 2655 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 2656 | 2657 | [[package]] 2658 | name = "windows-sys" 2659 | version = "0.45.0" 2660 | source = "registry+https://github.com/rust-lang/crates.io-index" 2661 | checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" 2662 | dependencies = [ 2663 | "windows-targets 0.42.1", 2664 | ] 2665 | 2666 | [[package]] 2667 | name = "windows-sys" 2668 | version = "0.48.0" 2669 | source = "registry+https://github.com/rust-lang/crates.io-index" 2670 | checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" 2671 | dependencies = [ 2672 | "windows-targets 0.48.0", 2673 | ] 2674 | 2675 | [[package]] 2676 | name = "windows-sys" 2677 | version = "0.52.0" 2678 | source = "registry+https://github.com/rust-lang/crates.io-index" 2679 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 2680 | dependencies = [ 2681 | "windows-targets 0.52.5", 2682 | ] 2683 | 2684 | [[package]] 2685 | name = "windows-targets" 2686 | version = "0.42.1" 2687 | source = "registry+https://github.com/rust-lang/crates.io-index" 2688 | checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" 2689 | dependencies = [ 2690 | "windows_aarch64_gnullvm 0.42.1", 2691 | "windows_aarch64_msvc 0.42.1", 2692 | "windows_i686_gnu 0.42.1", 2693 | "windows_i686_msvc 0.42.1", 2694 | "windows_x86_64_gnu 0.42.1", 2695 | "windows_x86_64_gnullvm 0.42.1", 2696 | "windows_x86_64_msvc 0.42.1", 2697 | ] 2698 | 2699 | [[package]] 2700 | name = "windows-targets" 2701 | version = "0.48.0" 2702 | source = "registry+https://github.com/rust-lang/crates.io-index" 2703 | checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" 2704 | dependencies = [ 2705 | "windows_aarch64_gnullvm 0.48.0", 2706 | "windows_aarch64_msvc 0.48.0", 2707 | "windows_i686_gnu 0.48.0", 2708 | "windows_i686_msvc 0.48.0", 2709 | "windows_x86_64_gnu 0.48.0", 2710 | "windows_x86_64_gnullvm 0.48.0", 2711 | "windows_x86_64_msvc 0.48.0", 2712 | ] 2713 | 2714 | [[package]] 2715 | name = "windows-targets" 2716 | version = "0.52.5" 2717 | source = "registry+https://github.com/rust-lang/crates.io-index" 2718 | checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" 2719 | dependencies = [ 2720 | "windows_aarch64_gnullvm 0.52.5", 2721 | "windows_aarch64_msvc 0.52.5", 2722 | "windows_i686_gnu 0.52.5", 2723 | "windows_i686_gnullvm", 2724 | "windows_i686_msvc 0.52.5", 2725 | "windows_x86_64_gnu 0.52.5", 2726 | "windows_x86_64_gnullvm 0.52.5", 2727 | "windows_x86_64_msvc 0.52.5", 2728 | ] 2729 | 2730 | [[package]] 2731 | name = "windows_aarch64_gnullvm" 2732 | version = "0.42.1" 2733 | source = "registry+https://github.com/rust-lang/crates.io-index" 2734 | checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" 2735 | 2736 | [[package]] 2737 | name = "windows_aarch64_gnullvm" 2738 | version = "0.48.0" 2739 | source = "registry+https://github.com/rust-lang/crates.io-index" 2740 | checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" 2741 | 2742 | [[package]] 2743 | name = "windows_aarch64_gnullvm" 2744 | version = "0.52.5" 2745 | source = "registry+https://github.com/rust-lang/crates.io-index" 2746 | checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" 2747 | 2748 | [[package]] 2749 | name = "windows_aarch64_msvc" 2750 | version = "0.42.1" 2751 | source = "registry+https://github.com/rust-lang/crates.io-index" 2752 | checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" 2753 | 2754 | [[package]] 2755 | name = "windows_aarch64_msvc" 2756 | version = "0.48.0" 2757 | source = "registry+https://github.com/rust-lang/crates.io-index" 2758 | checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" 2759 | 2760 | [[package]] 2761 | name = "windows_aarch64_msvc" 2762 | version = "0.52.5" 2763 | source = "registry+https://github.com/rust-lang/crates.io-index" 2764 | checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" 2765 | 2766 | [[package]] 2767 | name = "windows_i686_gnu" 2768 | version = "0.42.1" 2769 | source = "registry+https://github.com/rust-lang/crates.io-index" 2770 | checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" 2771 | 2772 | [[package]] 2773 | name = "windows_i686_gnu" 2774 | version = "0.48.0" 2775 | source = "registry+https://github.com/rust-lang/crates.io-index" 2776 | checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" 2777 | 2778 | [[package]] 2779 | name = "windows_i686_gnu" 2780 | version = "0.52.5" 2781 | source = "registry+https://github.com/rust-lang/crates.io-index" 2782 | checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" 2783 | 2784 | [[package]] 2785 | name = "windows_i686_gnullvm" 2786 | version = "0.52.5" 2787 | source = "registry+https://github.com/rust-lang/crates.io-index" 2788 | checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" 2789 | 2790 | [[package]] 2791 | name = "windows_i686_msvc" 2792 | version = "0.42.1" 2793 | source = "registry+https://github.com/rust-lang/crates.io-index" 2794 | checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" 2795 | 2796 | [[package]] 2797 | name = "windows_i686_msvc" 2798 | version = "0.48.0" 2799 | source = "registry+https://github.com/rust-lang/crates.io-index" 2800 | checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" 2801 | 2802 | [[package]] 2803 | name = "windows_i686_msvc" 2804 | version = "0.52.5" 2805 | source = "registry+https://github.com/rust-lang/crates.io-index" 2806 | checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" 2807 | 2808 | [[package]] 2809 | name = "windows_x86_64_gnu" 2810 | version = "0.42.1" 2811 | source = "registry+https://github.com/rust-lang/crates.io-index" 2812 | checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" 2813 | 2814 | [[package]] 2815 | name = "windows_x86_64_gnu" 2816 | version = "0.48.0" 2817 | source = "registry+https://github.com/rust-lang/crates.io-index" 2818 | checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" 2819 | 2820 | [[package]] 2821 | name = "windows_x86_64_gnu" 2822 | version = "0.52.5" 2823 | source = "registry+https://github.com/rust-lang/crates.io-index" 2824 | checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" 2825 | 2826 | [[package]] 2827 | name = "windows_x86_64_gnullvm" 2828 | version = "0.42.1" 2829 | source = "registry+https://github.com/rust-lang/crates.io-index" 2830 | checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" 2831 | 2832 | [[package]] 2833 | name = "windows_x86_64_gnullvm" 2834 | version = "0.48.0" 2835 | source = "registry+https://github.com/rust-lang/crates.io-index" 2836 | checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" 2837 | 2838 | [[package]] 2839 | name = "windows_x86_64_gnullvm" 2840 | version = "0.52.5" 2841 | source = "registry+https://github.com/rust-lang/crates.io-index" 2842 | checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" 2843 | 2844 | [[package]] 2845 | name = "windows_x86_64_msvc" 2846 | version = "0.42.1" 2847 | source = "registry+https://github.com/rust-lang/crates.io-index" 2848 | checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" 2849 | 2850 | [[package]] 2851 | name = "windows_x86_64_msvc" 2852 | version = "0.48.0" 2853 | source = "registry+https://github.com/rust-lang/crates.io-index" 2854 | checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" 2855 | 2856 | [[package]] 2857 | name = "windows_x86_64_msvc" 2858 | version = "0.52.5" 2859 | source = "registry+https://github.com/rust-lang/crates.io-index" 2860 | checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" 2861 | 2862 | [[package]] 2863 | name = "yoke" 2864 | version = "0.7.4" 2865 | source = "registry+https://github.com/rust-lang/crates.io-index" 2866 | checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" 2867 | dependencies = [ 2868 | "serde", 2869 | "stable_deref_trait", 2870 | "yoke-derive", 2871 | "zerofrom", 2872 | ] 2873 | 2874 | [[package]] 2875 | name = "yoke-derive" 2876 | version = "0.7.4" 2877 | source = "registry+https://github.com/rust-lang/crates.io-index" 2878 | checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" 2879 | dependencies = [ 2880 | "proc-macro2", 2881 | "quote", 2882 | "syn 2.0.66", 2883 | "synstructure", 2884 | ] 2885 | 2886 | [[package]] 2887 | name = "zerocopy" 2888 | version = "0.7.34" 2889 | source = "registry+https://github.com/rust-lang/crates.io-index" 2890 | checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" 2891 | dependencies = [ 2892 | "zerocopy-derive", 2893 | ] 2894 | 2895 | [[package]] 2896 | name = "zerocopy-derive" 2897 | version = "0.7.34" 2898 | source = "registry+https://github.com/rust-lang/crates.io-index" 2899 | checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" 2900 | dependencies = [ 2901 | "proc-macro2", 2902 | "quote", 2903 | "syn 2.0.66", 2904 | ] 2905 | 2906 | [[package]] 2907 | name = "zerofrom" 2908 | version = "0.1.4" 2909 | source = "registry+https://github.com/rust-lang/crates.io-index" 2910 | checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" 2911 | dependencies = [ 2912 | "zerofrom-derive", 2913 | ] 2914 | 2915 | [[package]] 2916 | name = "zerofrom-derive" 2917 | version = "0.1.4" 2918 | source = "registry+https://github.com/rust-lang/crates.io-index" 2919 | checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" 2920 | dependencies = [ 2921 | "proc-macro2", 2922 | "quote", 2923 | "syn 2.0.66", 2924 | "synstructure", 2925 | ] 2926 | 2927 | [[package]] 2928 | name = "zeroize" 2929 | version = "1.8.1" 2930 | source = "registry+https://github.com/rust-lang/crates.io-index" 2931 | checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" 2932 | 2933 | [[package]] 2934 | name = "zerovec" 2935 | version = "0.10.2" 2936 | source = "registry+https://github.com/rust-lang/crates.io-index" 2937 | checksum = "bb2cc8827d6c0994478a15c53f374f46fbd41bea663d809b14744bc42e6b109c" 2938 | dependencies = [ 2939 | "zerofrom", 2940 | ] 2941 | 2942 | [[package]] 2943 | name = "zip" 2944 | version = "0.6.6" 2945 | source = "registry+https://github.com/rust-lang/crates.io-index" 2946 | checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" 2947 | dependencies = [ 2948 | "byteorder", 2949 | "crc32fast", 2950 | "crossbeam-utils", 2951 | ] 2952 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "milli_py" 3 | version = "1.8.1" 4 | edition = "2021" 5 | authors = ["Alexandro Sanchez Bach "] 6 | 7 | [lib] 8 | name = "milli" 9 | 10 | # Needed to produce a shared library for Python. 11 | crate-type = ["cdylib"] 12 | 13 | [dependencies] 14 | milli = { path = "meilisearch/milli" } 15 | pyo3 = { version = "0.18.1", features = ["extension-module"] } 16 | serde_json = { version = "1.0.93" } 17 | serde = { version = "1.0.152" } 18 | heed = "=0.20.0-alpha.9" 19 | obkv = "0.2.1" 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Alexandro Sanchez 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # milli-py 2 | 3 | [![ci-badge](https://github.com/AlexAltea/milli-py/actions/workflows/ci.yml/badge.svg)](https://github.com/AlexAltea/milli-py/actions/workflows/ci.yml) 4 | [![](https://img.shields.io/pypi/v/milli.svg)](https://pypi.python.org/pypi/milli) 5 | 6 | Python bindings for [Milli](https://github.com/meilisearch/meilisearch/tree/main/milli), the embeddable Rust-based search engine powering [Meilisearch](https://www.meilisearch.com/). 7 | 8 | Due to limitations around Rust lifecycles, methods available via `IndexDocuments` and `Search` have been integrated directly into the `Index` class. This sacrifices/simplifies functionality available in the original *milli* package. 9 | 10 | Install the package via: 11 | 12 | ```sh 13 | pip install milli 14 | ``` 15 | 16 | ## Usage 17 | 18 | Basic usage of the *milli-py*: 19 | 20 | ```py 21 | import milli 22 | 23 | index = milli.Index("path/to/index") 24 | index.add_documents([ 25 | { "id": 0, "title": "Hello world", "content": "This is a sample" }, 26 | { "id": 1, "title": "Hello moon", "content": "This is another sample" }, 27 | { "id": 2, "title": "Hello sun", "content": "This is yet another sample" }, 28 | ]) 29 | results = index.search("wrold") 30 | document = index.get_document(results[0]) 31 | assert(document['title'] == "Hello world") 32 | ``` 33 | 34 | ## Documentation 35 | 36 | Full documentation for *milli-py* is available at [`docs/index.md`](./docs/index.md). 37 | 38 | View it online at https://github.com/AlexAltea/milli-py/blob/master/docs/index.md. 39 | 40 | ## Examples 41 | 42 | Real-world examples of *milli-py* in action: 43 | 44 | - [Curator](https://github.com/AlexAltea/curator/blob/master/curator/databases/omdb.py#L9): Searching 140k movies (~10 MB csv, ~140 MB index) in around 36ms to obtain metadata from OMDB and auto-rename/tag movie collections. 45 | 46 | Do you have other interesting examples? Let me know! 47 | -------------------------------------------------------------------------------- /benchmarks/bench.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import random 4 | import string 5 | import tempfile 6 | import timeit 7 | 8 | # Configuration 9 | BENCH_TIMES = 1 10 | DOC_COUNT = 100000 11 | DOC_SIZE = 100 12 | 13 | def gen_documents(): 14 | documents = [] 15 | characters = ' ' + string.ascii_lowercase 16 | random.seed(0) 17 | for i in range(DOC_COUNT): 18 | data = ''.join(random.choices(characters, k=DOC_SIZE)) 19 | documents.append({ "id": 0, "data": data }) 20 | return documents 21 | 22 | def bench_milli(docs, target_query): 23 | import milli 24 | with tempfile.TemporaryDirectory() as tmp: 25 | ix = milli.Index(tmp) 26 | ix.add_documents(docs) 27 | t = timeit.timeit(lambda: ix.search(target_query), number=BENCH_TIMES) / BENCH_TIMES 28 | del(ix) 29 | return t 30 | 31 | def bench_whoosh(docs, target_query): 32 | from whoosh.util.testing import TempIndex 33 | from whoosh.fields import Schema, TEXT 34 | from whoosh.index import create_in 35 | from whoosh.qparser import QueryParser 36 | schema = Schema(data=TEXT(stored=True)) 37 | with TempIndex(schema) as ix: 38 | writer = ix.writer() 39 | for doc in docs: 40 | writer.add_document(data=doc['data']) 41 | writer.commit() 42 | with ix.searcher() as searcher: 43 | query = QueryParser("data", ix.schema).parse(target_query) 44 | t = timeit.timeit(lambda: searcher.search(query), number=BENCH_TIMES) / BENCH_TIMES 45 | return t 46 | 47 | def main(): 48 | print("Generating documents...") 49 | docs = gen_documents() 50 | target_docid = random.randint(0, DOC_COUNT) 51 | target_start = random.randint(1 * DOC_SIZE // 10, 52 | 9 * DOC_SIZE // 10) 53 | target_query = docs[target_docid]['data'][target_start:target_start+(DOC_SIZE // 10)] 54 | 55 | print("Bencharking Milli...") 56 | t_milli = bench_milli(docs, target_query) 57 | print(f'Time per query: {t_milli}') 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /benchmarks/requirements.txt: -------------------------------------------------------------------------------- 1 | Whoosh==2.7.4 -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Documentation 2 | 3 | ## `Index` 4 | 5 | Main class used to interface with an index/database in the local filesystem. An *index* is a directory containing a pair of `data.mdb` and `lock.mdb` files. 6 | 7 | ### Constructor 8 | 9 | > *Index(path, map_size=...)* 10 | 11 | Opens or creates an index at the specified directory, limiting the maximum size of the underlying databse. 12 | 13 | | Parameter | Required | Type | Description | 14 | |-----------|----------|------|-------------| 15 | | `path` | Yes | [Path-like object](https://docs.python.org/3/glossary.html#term-path-like-object) | Index directory. Directory must exist. | 16 | | `map_size` | No | [`int`](https://docs.python.org/3/library/functions.html#int) | Maximum size in bytes of `data.mdb`. Defaults set by Milli. | 17 | 18 | Example: 19 | 20 | ```py 21 | >>> index = Index('path/to/index', map_size=2**30) # Open/create index of up-to 1 GiB 22 | ``` 23 | 24 | ### Methods 25 | 26 | #### `Index.add_documents` 27 | 28 | > *Index.add_documents(documents)* 29 | 30 | Adds documents to the index. 31 | 32 | | Parameter | Required | Type | Description | 33 | |-----------|----------|------|-------------| 34 | | `documents` | Yes | [`List[Dict[str,Any]]`](https://docs.python.org/3/library/typing.html#typing.List) | List of JSON-convertible dictionaries, i.e. dictionaries with string keys mapping to integers, floats, booleans, strings, arrays, and other dictionaries with string keys (potentially nested). | 35 | 36 | Returns: TODO. 37 | 38 | Example: 39 | 40 | ```py 41 | >>> index.add_documents([ 42 | { 'id': 0, 'title': 'Hello earth', 'tags': ['greeting', 'planet'], 'orbit': 3 }, 43 | { 'id': 1, 'title': 'Hello mars', 'tags': ['greeting', 'planet'], 'orbit': 4 }, 44 | { 'id': 2, 'title': 'Hello sun', 'tags': ['greeting', 'star'] }, 45 | ]) 46 | ``` 47 | 48 | #### `Index.all_documents` 49 | 50 | > *Index.all_documents()* 51 | 52 | Iterator of all documents in the index alongside their internal IDs. 53 | 54 | Returns: `Iterator[Tuple[int,Dict]]`. 55 | 56 | #### `Index.clear_documents` 57 | 58 | > *Index.clear_documents()* 59 | 60 | Remove all documents from the index. 61 | 62 | Returns: Number of documents removed. 63 | 64 | #### `Index.delete_documents` 65 | 66 | > *Index.delete_documents(ids)* 67 | 68 | Removes documents from the index given their external ID. 69 | 70 | | Parameter | Required | Type | Description | 71 | |-----------|----------|------|-------------| 72 | | `ids` | Yes | [`List[str]`](https://docs.python.org/3/library/typing.html#typing.List) | List of strings, each corresponding to an external ID. | 73 | 74 | Returns: TODO. 75 | 76 | #### `Index.get_document` 77 | 78 | > *Index.get_document(id)* 79 | 80 | Obtain a document from the index given its internal ID. 81 | 82 | | Parameter | Required | Type | Description | 83 | |-----------|----------|------|-------------| 84 | | `id` | Yes | [`int`](https://docs.python.org/3/library/functions.html#int) | Internal document ID. | 85 | 86 | Returns: [`Dict[str,Any]`](https://docs.python.org/3/library/typing.html#typing.Dict). Document contents. 87 | 88 | Example: 89 | 90 | ```py 91 | >>> index.get_document(0) 92 | { 'id': 0, 'title': 'Hello earth', 'tags': ['greeting', 'planet'], 'orbit': 3 } 93 | ``` 94 | 95 | #### `Index.get_documents` 96 | 97 | > *Index.get_documents(ids)* 98 | 99 | Obtain a list of document from the index given their internal IDs. 100 | 101 | | Parameter | Required | Type | Description | 102 | |-----------|----------|------|-------------| 103 | | `ids` | Yes | [`List[int]`](https://docs.python.org/3/library/typing.html#typing.List) | List of internal document IDs. | 104 | 105 | Returns: [`List[Dict[str,Any]]`](https://docs.python.org/3/library/typing.html#typing.List). List of document contents. 106 | 107 | Example (formatted): 108 | ```py 109 | >>> index.get_documents([1,2]) 110 | [ 111 | { 'id': 1, 'title': 'Hello mars', 'tags': ['greeting', 'planet'], 'orbit': 4 }, 112 | { 'id': 2, 'title': 'Hello sun', 'tags': ['greeting', 'star'] } 113 | ] 114 | ``` 115 | 116 | #### `Index.search` 117 | 118 | > *Index.search(query)* 119 | 120 | Searches the index for the given input string. 121 | 122 | | Parameter | Required | Type | Description | 123 | |-----------|----------|------|-------------| 124 | | `query` | Yes | [`str`](https://docs.python.org/3/library/stdtypes.html#str) | Text to query the index with. | 125 | 126 | Returns: [`List[int]`](https://docs.python.org/3/library/typing.html#typing.List). List of internal IDs of matching documents, sorted by decreasing match score. You can retrieve the full documents by applying [`Index.get_documents`](#indexget_documents) on this list. 127 | 128 | Example: 129 | 130 | ```py 131 | >>> index.search('earht') 132 | [0] 133 | ``` 134 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "milli" 3 | requires-python = ">=3.8" 4 | repository = "https://github.com/AlexAltea/milli-py" 5 | classifiers = [ 6 | "Development Status :: 5 - Production/Stable", 7 | "Intended Audience :: Developers", 8 | "License :: OSI Approved :: MIT License", 9 | "Programming Language :: Python", 10 | "Programming Language :: Python :: 3.8", 11 | "Programming Language :: Python :: 3.9", 12 | "Programming Language :: Python :: 3.10", 13 | "Programming Language :: Python :: 3.11", 14 | "Programming Language :: Python :: 3.12", 15 | "Programming Language :: Python :: Implementation :: CPython", 16 | "Programming Language :: Rust", 17 | "Typing :: Typed", 18 | ] 19 | 20 | [build-system] 21 | requires = ["maturin>=0.14,<0.15"] 22 | build-backend = "maturin" 23 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | maturin==0.14.13 2 | -------------------------------------------------------------------------------- /src/conv.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | use std::fmt; 3 | use std::marker::PhantomData; 4 | 5 | use pyo3::exceptions::PyTypeError; 6 | use pyo3::prelude::*; 7 | use pyo3::types::{PyDict, PyFloat, PyList, PyTuple}; 8 | use serde::de::{DeserializeSeed, Deserializer, MapAccess, SeqAccess, Visitor}; 9 | 10 | // From https://github.com/mozilla-services/python-canonicaljson-rs/blob/62599b246055a1c8a78e5777acdfe0fd594be3d8/src/lib.rs#L87-L167 11 | #[derive(Debug)] 12 | pub enum PyCanonicalJSONError { 13 | InvalidConversion { error: String }, 14 | PyErr { error: String }, 15 | DictKeyNotSerializable { typename: String }, 16 | InvalidFloat { value: PyObject }, 17 | InvalidCast { typename: String }, 18 | } 19 | 20 | impl From for PyCanonicalJSONError { 21 | fn from(error: pyo3::PyErr) -> PyCanonicalJSONError { 22 | PyCanonicalJSONError::PyErr { 23 | error: format!("{:?}", error), 24 | } 25 | } 26 | } 27 | 28 | impl From for pyo3::PyErr { 29 | fn from(e: PyCanonicalJSONError) -> pyo3::PyErr { 30 | match e { 31 | PyCanonicalJSONError::InvalidConversion { error } => { 32 | PyErr::new::(format!("Conversion error: {:?}", error)) 33 | } 34 | PyCanonicalJSONError::PyErr { error } => { 35 | PyErr::new::(format!("Python Runtime exception: {}", error)) 36 | } 37 | PyCanonicalJSONError::DictKeyNotSerializable { typename } => { 38 | PyErr::new::(format!( 39 | "Dictionary key is not serializable: {}", 40 | typename 41 | )) 42 | } 43 | PyCanonicalJSONError::InvalidFloat { value } => { 44 | PyErr::new::(format!("Invalid float: {:?}", value)) 45 | } 46 | PyCanonicalJSONError::InvalidCast { typename } => { 47 | PyErr::new::(format!("Invalid type: {}", typename)) 48 | } 49 | } 50 | } 51 | } 52 | 53 | 54 | pub fn to_json(py: Python, obj: &PyObject) -> Result { 55 | macro_rules! return_cast { 56 | ($t:ty, $f:expr) => { 57 | if let Ok(val) = obj.downcast::<$t>(py) { 58 | return $f(val); 59 | } 60 | }; 61 | } 62 | 63 | macro_rules! return_to_value { 64 | ($t:ty) => { 65 | if let Ok(val) = obj.extract::<$t>(py) { 66 | return serde_json::value::to_value(val).map_err(|error| { 67 | PyCanonicalJSONError::InvalidConversion { 68 | error: format!("{}", error), 69 | } 70 | }); 71 | } 72 | }; 73 | } 74 | 75 | if obj.as_ref(py).eq(&py.None())? { 76 | return Ok(serde_json::Value::Null); 77 | } 78 | 79 | return_to_value!(String); 80 | return_to_value!(bool); 81 | return_to_value!(u64); 82 | return_to_value!(i64); 83 | 84 | return_cast!(PyDict, |x: &PyDict| { 85 | let mut map = serde_json::Map::new(); 86 | for (key_obj, value) in x.iter() { 87 | let key = if key_obj.eq(py.None().as_ref(py))? { 88 | Ok("null".to_string()) 89 | } else if let Ok(val) = key_obj.extract::() { 90 | Ok(if val { 91 | "true".to_string() 92 | } else { 93 | "false".to_string() 94 | }) 95 | } else if let Ok(val) = key_obj.str() { 96 | Ok(val.to_string()) 97 | } else { 98 | Err(PyCanonicalJSONError::DictKeyNotSerializable { 99 | typename: key_obj 100 | .to_object(py) 101 | .as_ref(py) 102 | .get_type() 103 | .name()? 104 | .to_string(), 105 | }) 106 | }; 107 | map.insert(key?, to_json(py, &value.to_object(py))?); 108 | } 109 | Ok(serde_json::Value::Object(map)) 110 | }); 111 | 112 | return_cast!(PyList, |x: &PyList| Ok(serde_json::Value::Array( 113 | x.iter().map(|x| to_json(py, &x.to_object(py)).unwrap()).collect() 114 | ))); 115 | 116 | return_cast!(PyTuple, |x: &PyTuple| Ok(serde_json::Value::Array( 117 | x.iter().map(|x| to_json(py, &x.to_object(py)).unwrap()).collect() 118 | ))); 119 | 120 | return_cast!(PyFloat, |x: &PyFloat| { 121 | match serde_json::Number::from_f64(x.value()) { 122 | Some(n) => Ok(serde_json::Value::Number(n)), 123 | None => Err(PyCanonicalJSONError::InvalidFloat { 124 | value: x.to_object(py), 125 | }), 126 | } 127 | }); 128 | 129 | // At this point we can't cast it, set up the error object 130 | Err(PyCanonicalJSONError::InvalidCast { 131 | typename: obj.as_ref(py).get_type().name()?.to_string(), 132 | }) 133 | } 134 | 135 | 136 | // From https://github.com/mre/hyperjson/blob/87335d442869832b46e7e9f10800a27360dd8169/src/lib.rs#L397 137 | #[derive(Copy, Clone)] 138 | pub struct ObkvValue<'a> { 139 | py: Python<'a>, 140 | } 141 | 142 | impl<'a> ObkvValue<'a> { 143 | pub fn new(py: Python<'a>) -> ObkvValue<'a> { 144 | ObkvValue { py } 145 | } 146 | } 147 | 148 | impl<'de, 'a> DeserializeSeed<'de> for ObkvValue<'a> { 149 | type Value = PyObject; 150 | 151 | fn deserialize(self, deserializer: D) -> Result 152 | where 153 | D: Deserializer<'de>, 154 | { 155 | deserializer.deserialize_any(self) 156 | } 157 | } 158 | 159 | impl<'de, 'a> Visitor<'de> for ObkvValue<'a> { 160 | type Value = PyObject; 161 | 162 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 163 | formatter.write_str("any valid JSON value") 164 | } 165 | 166 | fn visit_bool(self, value: bool) -> Result 167 | where E: serde::de::Error { 168 | Ok(value.to_object(self.py)) 169 | } 170 | 171 | fn visit_i64(self, value: i64) -> Result 172 | where E: serde::de::Error { 173 | Ok(value.to_object(self.py)) 174 | } 175 | 176 | fn visit_u64(self, value: u64) -> Result 177 | where E: serde::de::Error { 178 | Ok(value.to_object(self.py)) 179 | } 180 | 181 | fn visit_f64(self, value: f64) -> Result 182 | where E: serde::de::Error { 183 | Ok(value.to_object(self.py)) 184 | } 185 | 186 | fn visit_str(self, value: &str) -> Result 187 | where E: serde::de::Error { 188 | Ok(value.to_object(self.py)) 189 | } 190 | 191 | fn visit_unit(self) -> Result { 192 | Ok(self.py.None()) 193 | } 194 | 195 | fn visit_seq(self, mut seq: A) -> Result 196 | where A: SeqAccess<'de> { 197 | let mut elements = Vec::new(); 198 | while let Some(elem) = seq.next_element_seed(self)? { 199 | elements.push(elem); 200 | } 201 | Ok(elements.to_object(self.py)) 202 | } 203 | 204 | fn visit_map(self, mut map: A) -> Result 205 | where A: MapAccess<'de> { 206 | let mut entries = BTreeMap::new(); 207 | while let Some((key, value)) = map.next_entry_seed(PhantomData::, self)? { 208 | entries.insert(key, value); 209 | } 210 | Ok(entries.to_object(self.py)) 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate milli as mi; 2 | 3 | use std::ops::Deref; 4 | 5 | use pyo3::prelude::*; 6 | use pyo3::types::*; 7 | 8 | use mi::{DocumentId, Index, Search}; 9 | use mi::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; 10 | use mi::update::{ClearDocuments, DocumentAdditionResult, 11 | IndexerConfig, IndexDocumentsConfig, IndexDocumentsMethod, IndexDocuments}; 12 | use serde::Deserializer; 13 | 14 | mod conv; 15 | 16 | // Helpers 17 | macro_rules! obkv_to_pydict { 18 | ($self:ident, $py:ident, $rtxn:ident, $obkv:ident) => {{ 19 | let fields = $self.index.fields_ids_map(&$rtxn).unwrap(); 20 | let dict = PyDict::new($py); 21 | for (id, bytes) in $obkv.iter() { 22 | let key = fields.name(id); 23 | let mut deserializer = serde_json::Deserializer::from_slice(&bytes); 24 | let value = conv::ObkvValue::new($py); 25 | let value = deserializer.deserialize_any(value).unwrap(); 26 | dict.set_item(key, value).unwrap(); 27 | } 28 | dict 29 | }}; 30 | } 31 | 32 | #[pyclass(name="Index")] 33 | struct PyIndex { 34 | index: Index, 35 | } 36 | 37 | #[pymethods] 38 | impl PyIndex { 39 | #[new] 40 | fn new(path: String, map_size: Option) -> Self { 41 | let mut options = mi::heed::EnvOpenOptions::new(); 42 | if map_size.is_some() { 43 | options.map_size(map_size.unwrap()); 44 | } 45 | let index = Index::new(options, &path).unwrap(); 46 | return PyIndex{ index }; 47 | } 48 | 49 | fn add_documents(&self, py: Python<'_>, list: &PyList, update_method: Option) -> PyResult { 50 | let mut config = IndexDocumentsConfig::default(); 51 | if update_method.is_some() { 52 | config.update_method = update_method.unwrap().into(); 53 | } 54 | 55 | let mut wtxn = self.write_txn().unwrap(); 56 | let indexer_config = IndexerConfig::default(); 57 | let builder = IndexDocuments::new( 58 | &mut wtxn, &self, &indexer_config, config.clone(), |_| (), || false).unwrap(); 59 | 60 | // Convert Python array into Vec 61 | let list = list.to_object(py); 62 | let list = conv::to_json(py, &list)?; 63 | let mut docbuilder = DocumentsBatchBuilder::new(Vec::new()); 64 | for item in list.as_array().unwrap() { 65 | let object = item.as_object().unwrap(); 66 | docbuilder.append_json_object(object).unwrap(); 67 | } 68 | let vector = docbuilder.into_inner().unwrap(); 69 | let reader = DocumentsBatchReader::from_reader(std::io::Cursor::new(vector)).unwrap(); 70 | 71 | let (builder, _user_error) = builder.add_documents(reader).unwrap(); 72 | let result = builder.execute().unwrap(); 73 | wtxn.commit().unwrap(); 74 | Ok(result.into()) 75 | } 76 | 77 | fn all_documents(&self, py: Python<'_>) -> PyResult> { 78 | let rtxn = self.read_txn().unwrap(); 79 | let docs = self.index.all_documents(&rtxn).unwrap(); 80 | 81 | // TODO: Wrap as a Python iterator without converting to list 82 | let list = PyList::empty(py); 83 | for document in docs { 84 | let (docid, obkv) = document.unwrap(); 85 | let doc = obkv_to_pydict!(self, py, rtxn, obkv); 86 | let tuple = PyTuple::new(py, &[docid.into_py(py), doc.into()]); 87 | list.append(tuple).unwrap(); 88 | } 89 | let iter = PyIterator::from_object(py, list).unwrap(); 90 | Ok(iter.into()) 91 | } 92 | 93 | fn clear_documents(&self) -> PyResult { 94 | let mut wtxn = self.write_txn().unwrap(); 95 | let builder = ClearDocuments::new(&mut wtxn, self); 96 | let result = builder.execute().unwrap(); 97 | wtxn.commit().unwrap(); 98 | Ok(result.into()) 99 | } 100 | 101 | fn delete_documents(&self, ids: Vec) -> PyResult { 102 | let config = IndexDocumentsConfig::default(); 103 | let indexer_config = IndexerConfig::default(); 104 | let mut wtxn = self.write_txn().unwrap(); 105 | let builder = IndexDocuments::new( 106 | &mut wtxn, 107 | &self, 108 | &indexer_config, 109 | config.clone(), |_| (), || false).unwrap(); 110 | 111 | let (builder, removed) = builder.remove_documents(ids).unwrap(); 112 | let _result = builder.execute().unwrap(); 113 | wtxn.commit().unwrap(); 114 | Ok(removed.unwrap().into()) 115 | } 116 | 117 | fn get_document(&self, py: Python<'_>, id: DocumentId) -> PyResult> { 118 | let rtxn = self.read_txn().unwrap(); 119 | let (_docid, obkv) = self.index.documents(&rtxn, [id]).unwrap()[0]; 120 | let dict = obkv_to_pydict!(self, py, rtxn, obkv); 121 | Ok(dict.into()) 122 | } 123 | 124 | fn get_documents(&self, py: Python<'_>, ids: Vec) -> PyResult> { 125 | let rtxn = self.read_txn().unwrap(); 126 | let docs = self.documents(&rtxn, ids).unwrap(); 127 | let list = PyList::empty(py); 128 | for (_docid, obkv) in docs { 129 | list.append(obkv_to_pydict!(self, py, rtxn, obkv)).unwrap(); 130 | } 131 | Ok(list.into()) 132 | } 133 | 134 | fn primary_key(&self) -> PyResult> { 135 | let rtxn = self.read_txn().unwrap(); 136 | let result = self.index.primary_key(&rtxn).unwrap(); 137 | let converted_result = result.map(|s| s.to_string()); 138 | Ok(converted_result) 139 | } 140 | 141 | fn search(&self, query: String) -> Vec { 142 | let rtxn = self.read_txn().unwrap(); 143 | let mut search = Search::new(&rtxn, &self); 144 | search.query(query); 145 | let results = search.execute().unwrap(); 146 | return results.documents_ids; 147 | } 148 | } 149 | 150 | impl Deref for PyIndex { 151 | type Target = Index; 152 | fn deref(&self) -> &Self::Target { 153 | &self.index 154 | } 155 | } 156 | 157 | impl Drop for PyIndex { 158 | fn drop(&mut self) { 159 | self.index.clone().prepare_for_closing(); 160 | } 161 | } 162 | 163 | #[derive(Clone)] 164 | #[pyclass(name="IndexDocumentsMethod")] 165 | enum PyIndexDocumentsMethod { 166 | ReplaceDocuments, 167 | UpdateDocuments, 168 | } 169 | impl From for IndexDocumentsMethod { 170 | fn from(value: PyIndexDocumentsMethod) -> Self { 171 | match value { 172 | PyIndexDocumentsMethod::ReplaceDocuments => Self::ReplaceDocuments, 173 | PyIndexDocumentsMethod::UpdateDocuments => Self::UpdateDocuments, 174 | } 175 | } 176 | } 177 | 178 | #[pyclass(name="DocumentAdditionResult")] 179 | struct PyDocumentAdditionResult { 180 | #[pyo3(get, set)] 181 | indexed_documents: u64, 182 | #[pyo3(get, set)] 183 | number_of_documents: u64, 184 | } 185 | impl From for PyDocumentAdditionResult { 186 | fn from(value: DocumentAdditionResult) -> Self { 187 | PyDocumentAdditionResult{ 188 | indexed_documents: value.indexed_documents, 189 | number_of_documents: value.number_of_documents, 190 | } 191 | } 192 | } 193 | 194 | #[pymodule] 195 | fn milli(_py: Python<'_>, m: &PyModule) -> PyResult<()> { 196 | m.add_class::()?; 197 | m.add_class::()?; 198 | m.add_class::()?; 199 | Ok(()) 200 | } 201 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from tests import * 4 | 5 | def test(): 6 | test_milli_index() 7 | 8 | if __name__ == '__main__': 9 | test() 10 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Imports 2 | from .test_milli_index import * 3 | 4 | -------------------------------------------------------------------------------- /tests/test_milli_index.py: -------------------------------------------------------------------------------- 1 | import milli 2 | import tempfile 3 | 4 | def test_milli_index(): 5 | # Document addition and primary keys 6 | with tempfile.TemporaryDirectory() as tmp: 7 | index = milli.Index(tmp) 8 | assert(index.primary_key() == None) 9 | 10 | # Without an explicit external ID nothing gets indexed 11 | result = index.add_documents([ 12 | { "title": "Hello world" }, 13 | ]) 14 | assert(result.indexed_documents == 0) 15 | assert(result.number_of_documents == 0) 16 | assert(index.primary_key() == None) 17 | 18 | # With an explicit external ID 19 | result = index.add_documents([ 20 | { "id": 1, "title": "Hello world" }, 21 | ]) 22 | assert(result.indexed_documents == 1) 23 | assert(result.number_of_documents == 1) 24 | assert(index.primary_key() == "id") 25 | 26 | # Note the document has an external ID (== 1) and internal ID (== 0) 27 | assert(index.get_documents([0]) == [{ "id": 1, "title": "Hello world" }]) 28 | del(index) 29 | 30 | # Document listing 31 | with tempfile.TemporaryDirectory() as tmp: 32 | index = milli.Index(tmp) 33 | index.add_documents([ 34 | { "id": 11, "title": "Hello moon", "content": "This is another sample" }, 35 | { "id": 12, "title": "Hello sun", "content": "This is yet another sample" }, 36 | ]) 37 | docs = index.all_documents() 38 | assert(next(docs) == (0, { "id": 11, "title": "Hello moon", "content": "This is another sample" })) 39 | assert(next(docs) == (1, { "id": 12, "title": "Hello sun", "content": "This is yet another sample" })) 40 | del(index) 41 | 42 | # Document search 43 | with tempfile.TemporaryDirectory() as tmp: 44 | index = milli.Index(tmp) 45 | index.add_documents([ 46 | { "id": 0, "title": "Hello world", "content": "This is a sample" }, 47 | { "id": 1, "title": "Hello moon", "content": "This is another sample" }, 48 | { "id": 2, "title": "Hello sun", "content": "This is yet another sample" }, 49 | ]) 50 | results = index.search("wrold") 51 | document = index.get_document(results[0]) 52 | assert(document['title'] == "Hello world") 53 | del(index) 54 | 55 | # Document update 56 | with tempfile.TemporaryDirectory() as tmp: 57 | index = milli.Index(tmp) 58 | index.add_documents([ 59 | { "id": 0, "title": "Hello world", "content": "This is a sample" }, 60 | { "id": 1, "title": "Hello moon", "content": "This is another sample" }, 61 | { "id": 2, "title": "Hello sun", "content": "This is yet another sample" }, 62 | ]) 63 | index.add_documents([ 64 | { "id": 1, "title": "Hello mars" }, 65 | ], milli.IndexDocumentsMethod.ReplaceDocuments) 66 | index.add_documents([ 67 | { "id": 2, "title": "Hello helios" }, 68 | { "id": 0, "people": True }, 69 | ], milli.IndexDocumentsMethod.UpdateDocuments) 70 | assert(index.get_documents([1, 2, 0]) == [ 71 | {'id': 1, 'title': 'Hello mars'}, 72 | {'id': 2, 'title': 'Hello helios', 'content': 'This is yet another sample'}, 73 | {'id': 0, 'title': 'Hello world', 'content': 'This is a sample', 'people': True}, 74 | ]) 75 | del(index) 76 | 77 | # Document removal 78 | with tempfile.TemporaryDirectory() as tmp: 79 | index = milli.Index(tmp) 80 | index.add_documents([ 81 | { "id": 0, "title": "Hello world", "content": "This is a sample" }, 82 | { "id": 1, "title": "Hello moon", "content": "This is another sample" }, 83 | { "id": 2, "title": "Hello sun", "content": "This is yet another sample" }, 84 | ]) 85 | result = index.delete_documents(["2", "0", "7"]) 86 | assert(result == 2) 87 | assert(index.get_document(1)['id'] == 1) 88 | del(index) 89 | 90 | # Document clearing 91 | with tempfile.TemporaryDirectory() as tmp: 92 | index = milli.Index(tmp) 93 | index.add_documents([ 94 | { "id": 0, "title": "Hello world", "content": "This is a sample" }, 95 | { "id": 1, "title": "Hello moon", "content": "This is another sample" }, 96 | { "id": 2, "title": "Hello sun", "content": "This is yet another sample" }, 97 | ]) 98 | result = index.clear_documents() 99 | assert(result == 3) 100 | del(index) 101 | --------------------------------------------------------------------------------