├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md └── src ├── bin ├── ce-dataset.rs └── ce-query.rs └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "adler" 7 | version = "1.0.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 10 | 11 | [[package]] 12 | name = "aes" 13 | version = "0.8.2" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "433cfd6710c9986c576a25ca913c39d66a6474107b406f34f91d4a8923395241" 16 | dependencies = [ 17 | "cfg-if", 18 | "cipher", 19 | "cpufeatures", 20 | ] 21 | 22 | [[package]] 23 | name = "ahash" 24 | version = "0.8.3" 25 | source = "registry+https://github.com/rust-lang/crates.io-index" 26 | checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" 27 | dependencies = [ 28 | "cfg-if", 29 | "once_cell", 30 | "version_check", 31 | ] 32 | 33 | [[package]] 34 | name = "aho-corasick" 35 | version = "1.0.2" 36 | source = "registry+https://github.com/rust-lang/crates.io-index" 37 | checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" 38 | dependencies = [ 39 | "memchr", 40 | ] 41 | 42 | [[package]] 43 | name = "anstream" 44 | version = "0.3.2" 45 | source = "registry+https://github.com/rust-lang/crates.io-index" 46 | checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" 47 | dependencies = [ 48 | "anstyle", 49 | "anstyle-parse", 50 | "anstyle-query", 51 | "anstyle-wincon", 52 | "colorchoice", 53 | "is-terminal", 54 | "utf8parse", 55 | ] 56 | 57 | [[package]] 58 | name = "anstyle" 59 | version = "1.0.0" 60 | source = "registry+https://github.com/rust-lang/crates.io-index" 61 | checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" 62 | 63 | [[package]] 64 | name = "anstyle-parse" 65 | version = "0.2.0" 66 | source = "registry+https://github.com/rust-lang/crates.io-index" 67 | checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee" 68 | dependencies = [ 69 | "utf8parse", 70 | ] 71 | 72 | [[package]] 73 | name = "anstyle-query" 74 | version = "1.0.0" 75 | source = "registry+https://github.com/rust-lang/crates.io-index" 76 | checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" 77 | dependencies = [ 78 | "windows-sys 0.48.0", 79 | ] 80 | 81 | [[package]] 82 | name = "anstyle-wincon" 83 | version = "1.0.1" 84 | source = "registry+https://github.com/rust-lang/crates.io-index" 85 | checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" 86 | dependencies = [ 87 | "anstyle", 88 | "windows-sys 0.48.0", 89 | ] 90 | 91 | [[package]] 92 | name = "anyhow" 93 | version = "1.0.71" 94 | source = "registry+https://github.com/rust-lang/crates.io-index" 95 | checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" 96 | 97 | [[package]] 98 | name = "autocfg" 99 | version = "1.1.0" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 102 | 103 | [[package]] 104 | name = "base64" 105 | version = "0.13.1" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" 108 | 109 | [[package]] 110 | name = "base64" 111 | version = "0.21.2" 112 | source = "registry+https://github.com/rust-lang/crates.io-index" 113 | checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" 114 | 115 | [[package]] 116 | name = "base64ct" 117 | version = "1.6.0" 118 | source = "registry+https://github.com/rust-lang/crates.io-index" 119 | checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" 120 | 121 | [[package]] 122 | name = "bitflags" 123 | version = "1.3.2" 124 | source = "registry+https://github.com/rust-lang/crates.io-index" 125 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 126 | 127 | [[package]] 128 | name = "block-buffer" 129 | version = "0.10.4" 130 | source = "registry+https://github.com/rust-lang/crates.io-index" 131 | checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" 132 | dependencies = [ 133 | "generic-array", 134 | ] 135 | 136 | [[package]] 137 | name = "bumpalo" 138 | version = "3.13.0" 139 | source = "registry+https://github.com/rust-lang/crates.io-index" 140 | checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" 141 | 142 | [[package]] 143 | name = "byteorder" 144 | version = "1.4.3" 145 | source = "registry+https://github.com/rust-lang/crates.io-index" 146 | checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" 147 | 148 | [[package]] 149 | name = "bytes" 150 | version = "1.4.0" 151 | source = "registry+https://github.com/rust-lang/crates.io-index" 152 | checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" 153 | 154 | [[package]] 155 | name = "bzip2" 156 | version = "0.4.4" 157 | source = "registry+https://github.com/rust-lang/crates.io-index" 158 | checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" 159 | dependencies = [ 160 | "bzip2-sys", 161 | "libc", 162 | ] 163 | 164 | [[package]] 165 | name = "bzip2-sys" 166 | version = "0.1.11+1.0.8" 167 | source = "registry+https://github.com/rust-lang/crates.io-index" 168 | checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" 169 | dependencies = [ 170 | "cc", 171 | "libc", 172 | "pkg-config", 173 | ] 174 | 175 | [[package]] 176 | name = "cached-path" 177 | version = "0.6.1" 178 | source = "registry+https://github.com/rust-lang/crates.io-index" 179 | checksum = "097968e38f1319207f057d0f4d76452e4f4f847a5de61c5215379f297fa034f3" 180 | dependencies = [ 181 | "flate2", 182 | "fs2", 183 | "glob", 184 | "indicatif 0.16.2", 185 | "log", 186 | "rand", 187 | "reqwest", 188 | "serde", 189 | "serde_json", 190 | "sha2", 191 | "tar", 192 | "tempfile", 193 | "thiserror", 194 | "zip", 195 | ] 196 | 197 | [[package]] 198 | name = "cc" 199 | version = "1.0.79" 200 | source = "registry+https://github.com/rust-lang/crates.io-index" 201 | checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" 202 | dependencies = [ 203 | "jobserver", 204 | ] 205 | 206 | [[package]] 207 | name = "cfg-if" 208 | version = "1.0.0" 209 | source = "registry+https://github.com/rust-lang/crates.io-index" 210 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 211 | 212 | [[package]] 213 | name = "cipher" 214 | version = "0.4.4" 215 | source = "registry+https://github.com/rust-lang/crates.io-index" 216 | checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" 217 | dependencies = [ 218 | "crypto-common", 219 | "inout", 220 | ] 221 | 222 | [[package]] 223 | name = "clap" 224 | version = "4.3.2" 225 | source = "registry+https://github.com/rust-lang/crates.io-index" 226 | checksum = "401a4694d2bf92537b6867d94de48c4842089645fdcdf6c71865b175d836e9c2" 227 | dependencies = [ 228 | "clap_builder", 229 | "clap_derive", 230 | "once_cell", 231 | ] 232 | 233 | [[package]] 234 | name = "clap_builder" 235 | version = "4.3.1" 236 | source = "registry+https://github.com/rust-lang/crates.io-index" 237 | checksum = "72394f3339a76daf211e57d4bcb374410f3965dcc606dd0e03738c7888766980" 238 | dependencies = [ 239 | "anstream", 240 | "anstyle", 241 | "bitflags", 242 | "clap_lex", 243 | "strsim", 244 | ] 245 | 246 | [[package]] 247 | name = "clap_derive" 248 | version = "4.3.2" 249 | source = "registry+https://github.com/rust-lang/crates.io-index" 250 | checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f" 251 | dependencies = [ 252 | "heck", 253 | "proc-macro2", 254 | "quote", 255 | "syn", 256 | ] 257 | 258 | [[package]] 259 | name = "clap_lex" 260 | version = "0.5.0" 261 | source = "registry+https://github.com/rust-lang/crates.io-index" 262 | checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" 263 | 264 | [[package]] 265 | name = "colorchoice" 266 | version = "1.0.0" 267 | source = "registry+https://github.com/rust-lang/crates.io-index" 268 | checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" 269 | 270 | [[package]] 271 | name = "compute-embeddings" 272 | version = "0.1.0" 273 | dependencies = [ 274 | "anyhow", 275 | "clap", 276 | "indicatif 0.17.5", 277 | "itertools", 278 | "rust-bert", 279 | "serde", 280 | "serde_json", 281 | "ureq", 282 | ] 283 | 284 | [[package]] 285 | name = "console" 286 | version = "0.15.7" 287 | source = "registry+https://github.com/rust-lang/crates.io-index" 288 | checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" 289 | dependencies = [ 290 | "encode_unicode", 291 | "lazy_static", 292 | "libc", 293 | "unicode-width", 294 | "windows-sys 0.45.0", 295 | ] 296 | 297 | [[package]] 298 | name = "constant_time_eq" 299 | version = "0.1.5" 300 | source = "registry+https://github.com/rust-lang/crates.io-index" 301 | checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" 302 | 303 | [[package]] 304 | name = "core-foundation" 305 | version = "0.9.3" 306 | source = "registry+https://github.com/rust-lang/crates.io-index" 307 | checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" 308 | dependencies = [ 309 | "core-foundation-sys", 310 | "libc", 311 | ] 312 | 313 | [[package]] 314 | name = "core-foundation-sys" 315 | version = "0.8.4" 316 | source = "registry+https://github.com/rust-lang/crates.io-index" 317 | checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" 318 | 319 | [[package]] 320 | name = "cpufeatures" 321 | version = "0.2.7" 322 | source = "registry+https://github.com/rust-lang/crates.io-index" 323 | checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58" 324 | dependencies = [ 325 | "libc", 326 | ] 327 | 328 | [[package]] 329 | name = "crc32fast" 330 | version = "1.3.2" 331 | source = "registry+https://github.com/rust-lang/crates.io-index" 332 | checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" 333 | dependencies = [ 334 | "cfg-if", 335 | ] 336 | 337 | [[package]] 338 | name = "crossbeam-channel" 339 | version = "0.5.8" 340 | source = "registry+https://github.com/rust-lang/crates.io-index" 341 | checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" 342 | dependencies = [ 343 | "cfg-if", 344 | "crossbeam-utils", 345 | ] 346 | 347 | [[package]] 348 | name = "crossbeam-deque" 349 | version = "0.8.3" 350 | source = "registry+https://github.com/rust-lang/crates.io-index" 351 | checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" 352 | dependencies = [ 353 | "cfg-if", 354 | "crossbeam-epoch", 355 | "crossbeam-utils", 356 | ] 357 | 358 | [[package]] 359 | name = "crossbeam-epoch" 360 | version = "0.9.14" 361 | source = "registry+https://github.com/rust-lang/crates.io-index" 362 | checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" 363 | dependencies = [ 364 | "autocfg", 365 | "cfg-if", 366 | "crossbeam-utils", 367 | "memoffset", 368 | "scopeguard", 369 | ] 370 | 371 | [[package]] 372 | name = "crossbeam-utils" 373 | version = "0.8.15" 374 | source = "registry+https://github.com/rust-lang/crates.io-index" 375 | checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" 376 | dependencies = [ 377 | "cfg-if", 378 | ] 379 | 380 | [[package]] 381 | name = "crunchy" 382 | version = "0.2.2" 383 | source = "registry+https://github.com/rust-lang/crates.io-index" 384 | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" 385 | 386 | [[package]] 387 | name = "crypto-common" 388 | version = "0.1.6" 389 | source = "registry+https://github.com/rust-lang/crates.io-index" 390 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" 391 | dependencies = [ 392 | "generic-array", 393 | "typenum", 394 | ] 395 | 396 | [[package]] 397 | name = "csv" 398 | version = "1.2.2" 399 | source = "registry+https://github.com/rust-lang/crates.io-index" 400 | checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" 401 | dependencies = [ 402 | "csv-core", 403 | "itoa", 404 | "ryu", 405 | "serde", 406 | ] 407 | 408 | [[package]] 409 | name = "csv-core" 410 | version = "0.1.10" 411 | source = "registry+https://github.com/rust-lang/crates.io-index" 412 | checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" 413 | dependencies = [ 414 | "memchr", 415 | ] 416 | 417 | [[package]] 418 | name = "digest" 419 | version = "0.10.7" 420 | source = "registry+https://github.com/rust-lang/crates.io-index" 421 | checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" 422 | dependencies = [ 423 | "block-buffer", 424 | "crypto-common", 425 | "subtle", 426 | ] 427 | 428 | [[package]] 429 | name = "dirs" 430 | version = "4.0.0" 431 | source = "registry+https://github.com/rust-lang/crates.io-index" 432 | checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" 433 | dependencies = [ 434 | "dirs-sys", 435 | ] 436 | 437 | [[package]] 438 | name = "dirs-sys" 439 | version = "0.3.7" 440 | source = "registry+https://github.com/rust-lang/crates.io-index" 441 | checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" 442 | dependencies = [ 443 | "libc", 444 | "redox_users", 445 | "winapi", 446 | ] 447 | 448 | [[package]] 449 | name = "either" 450 | version = "1.8.1" 451 | source = "registry+https://github.com/rust-lang/crates.io-index" 452 | checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" 453 | 454 | [[package]] 455 | name = "encode_unicode" 456 | version = "0.3.6" 457 | source = "registry+https://github.com/rust-lang/crates.io-index" 458 | checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" 459 | 460 | [[package]] 461 | name = "encoding_rs" 462 | version = "0.8.32" 463 | source = "registry+https://github.com/rust-lang/crates.io-index" 464 | checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" 465 | dependencies = [ 466 | "cfg-if", 467 | ] 468 | 469 | [[package]] 470 | name = "errno" 471 | version = "0.3.1" 472 | source = "registry+https://github.com/rust-lang/crates.io-index" 473 | checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" 474 | dependencies = [ 475 | "errno-dragonfly", 476 | "libc", 477 | "windows-sys 0.48.0", 478 | ] 479 | 480 | [[package]] 481 | name = "errno-dragonfly" 482 | version = "0.1.2" 483 | source = "registry+https://github.com/rust-lang/crates.io-index" 484 | checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" 485 | dependencies = [ 486 | "cc", 487 | "libc", 488 | ] 489 | 490 | [[package]] 491 | name = "fastrand" 492 | version = "1.9.0" 493 | source = "registry+https://github.com/rust-lang/crates.io-index" 494 | checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" 495 | dependencies = [ 496 | "instant", 497 | ] 498 | 499 | [[package]] 500 | name = "filetime" 501 | version = "0.2.21" 502 | source = "registry+https://github.com/rust-lang/crates.io-index" 503 | checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" 504 | dependencies = [ 505 | "cfg-if", 506 | "libc", 507 | "redox_syscall 0.2.16", 508 | "windows-sys 0.48.0", 509 | ] 510 | 511 | [[package]] 512 | name = "flate2" 513 | version = "1.0.26" 514 | source = "registry+https://github.com/rust-lang/crates.io-index" 515 | checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" 516 | dependencies = [ 517 | "crc32fast", 518 | "miniz_oxide", 519 | ] 520 | 521 | [[package]] 522 | name = "fnv" 523 | version = "1.0.7" 524 | source = "registry+https://github.com/rust-lang/crates.io-index" 525 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 526 | 527 | [[package]] 528 | name = "foreign-types" 529 | version = "0.3.2" 530 | source = "registry+https://github.com/rust-lang/crates.io-index" 531 | checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" 532 | dependencies = [ 533 | "foreign-types-shared", 534 | ] 535 | 536 | [[package]] 537 | name = "foreign-types-shared" 538 | version = "0.1.1" 539 | source = "registry+https://github.com/rust-lang/crates.io-index" 540 | checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" 541 | 542 | [[package]] 543 | name = "form_urlencoded" 544 | version = "1.2.0" 545 | source = "registry+https://github.com/rust-lang/crates.io-index" 546 | checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" 547 | dependencies = [ 548 | "percent-encoding", 549 | ] 550 | 551 | [[package]] 552 | name = "fs2" 553 | version = "0.4.3" 554 | source = "registry+https://github.com/rust-lang/crates.io-index" 555 | checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" 556 | dependencies = [ 557 | "libc", 558 | "winapi", 559 | ] 560 | 561 | [[package]] 562 | name = "futures-channel" 563 | version = "0.3.28" 564 | source = "registry+https://github.com/rust-lang/crates.io-index" 565 | checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" 566 | dependencies = [ 567 | "futures-core", 568 | ] 569 | 570 | [[package]] 571 | name = "futures-core" 572 | version = "0.3.28" 573 | source = "registry+https://github.com/rust-lang/crates.io-index" 574 | checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" 575 | 576 | [[package]] 577 | name = "futures-io" 578 | version = "0.3.28" 579 | source = "registry+https://github.com/rust-lang/crates.io-index" 580 | checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" 581 | 582 | [[package]] 583 | name = "futures-sink" 584 | version = "0.3.28" 585 | source = "registry+https://github.com/rust-lang/crates.io-index" 586 | checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" 587 | 588 | [[package]] 589 | name = "futures-task" 590 | version = "0.3.28" 591 | source = "registry+https://github.com/rust-lang/crates.io-index" 592 | checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" 593 | 594 | [[package]] 595 | name = "futures-util" 596 | version = "0.3.28" 597 | source = "registry+https://github.com/rust-lang/crates.io-index" 598 | checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" 599 | dependencies = [ 600 | "futures-core", 601 | "futures-io", 602 | "futures-task", 603 | "memchr", 604 | "pin-project-lite", 605 | "pin-utils", 606 | "slab", 607 | ] 608 | 609 | [[package]] 610 | name = "generic-array" 611 | version = "0.14.7" 612 | source = "registry+https://github.com/rust-lang/crates.io-index" 613 | checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" 614 | dependencies = [ 615 | "typenum", 616 | "version_check", 617 | ] 618 | 619 | [[package]] 620 | name = "getrandom" 621 | version = "0.2.9" 622 | source = "registry+https://github.com/rust-lang/crates.io-index" 623 | checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" 624 | dependencies = [ 625 | "cfg-if", 626 | "libc", 627 | "wasi", 628 | ] 629 | 630 | [[package]] 631 | name = "glob" 632 | version = "0.3.1" 633 | source = "registry+https://github.com/rust-lang/crates.io-index" 634 | checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" 635 | 636 | [[package]] 637 | name = "h2" 638 | version = "0.3.19" 639 | source = "registry+https://github.com/rust-lang/crates.io-index" 640 | checksum = "d357c7ae988e7d2182f7d7871d0b963962420b0678b0997ce7de72001aeab782" 641 | dependencies = [ 642 | "bytes", 643 | "fnv", 644 | "futures-core", 645 | "futures-sink", 646 | "futures-util", 647 | "http", 648 | "indexmap", 649 | "slab", 650 | "tokio", 651 | "tokio-util", 652 | "tracing", 653 | ] 654 | 655 | [[package]] 656 | name = "half" 657 | version = "2.2.1" 658 | source = "registry+https://github.com/rust-lang/crates.io-index" 659 | checksum = "02b4af3693f1b705df946e9fe5631932443781d0aabb423b62fcd4d73f6d2fd0" 660 | dependencies = [ 661 | "crunchy", 662 | ] 663 | 664 | [[package]] 665 | name = "hashbrown" 666 | version = "0.12.3" 667 | source = "registry+https://github.com/rust-lang/crates.io-index" 668 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 669 | 670 | [[package]] 671 | name = "hashbrown" 672 | version = "0.13.2" 673 | source = "registry+https://github.com/rust-lang/crates.io-index" 674 | checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" 675 | dependencies = [ 676 | "ahash", 677 | ] 678 | 679 | [[package]] 680 | name = "heck" 681 | version = "0.4.1" 682 | source = "registry+https://github.com/rust-lang/crates.io-index" 683 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" 684 | 685 | [[package]] 686 | name = "hermit-abi" 687 | version = "0.2.6" 688 | source = "registry+https://github.com/rust-lang/crates.io-index" 689 | checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" 690 | dependencies = [ 691 | "libc", 692 | ] 693 | 694 | [[package]] 695 | name = "hermit-abi" 696 | version = "0.3.1" 697 | source = "registry+https://github.com/rust-lang/crates.io-index" 698 | checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" 699 | 700 | [[package]] 701 | name = "hmac" 702 | version = "0.12.1" 703 | source = "registry+https://github.com/rust-lang/crates.io-index" 704 | checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" 705 | dependencies = [ 706 | "digest", 707 | ] 708 | 709 | [[package]] 710 | name = "http" 711 | version = "0.2.9" 712 | source = "registry+https://github.com/rust-lang/crates.io-index" 713 | checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" 714 | dependencies = [ 715 | "bytes", 716 | "fnv", 717 | "itoa", 718 | ] 719 | 720 | [[package]] 721 | name = "http-body" 722 | version = "0.4.5" 723 | source = "registry+https://github.com/rust-lang/crates.io-index" 724 | checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" 725 | dependencies = [ 726 | "bytes", 727 | "http", 728 | "pin-project-lite", 729 | ] 730 | 731 | [[package]] 732 | name = "httparse" 733 | version = "1.8.0" 734 | source = "registry+https://github.com/rust-lang/crates.io-index" 735 | checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" 736 | 737 | [[package]] 738 | name = "httpdate" 739 | version = "1.0.2" 740 | source = "registry+https://github.com/rust-lang/crates.io-index" 741 | checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" 742 | 743 | [[package]] 744 | name = "hyper" 745 | version = "0.14.26" 746 | source = "registry+https://github.com/rust-lang/crates.io-index" 747 | checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4" 748 | dependencies = [ 749 | "bytes", 750 | "futures-channel", 751 | "futures-core", 752 | "futures-util", 753 | "h2", 754 | "http", 755 | "http-body", 756 | "httparse", 757 | "httpdate", 758 | "itoa", 759 | "pin-project-lite", 760 | "socket2", 761 | "tokio", 762 | "tower-service", 763 | "tracing", 764 | "want", 765 | ] 766 | 767 | [[package]] 768 | name = "hyper-tls" 769 | version = "0.5.0" 770 | source = "registry+https://github.com/rust-lang/crates.io-index" 771 | checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" 772 | dependencies = [ 773 | "bytes", 774 | "hyper", 775 | "native-tls", 776 | "tokio", 777 | "tokio-native-tls", 778 | ] 779 | 780 | [[package]] 781 | name = "idna" 782 | version = "0.4.0" 783 | source = "registry+https://github.com/rust-lang/crates.io-index" 784 | checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" 785 | dependencies = [ 786 | "unicode-bidi", 787 | "unicode-normalization", 788 | ] 789 | 790 | [[package]] 791 | name = "indexmap" 792 | version = "1.9.3" 793 | source = "registry+https://github.com/rust-lang/crates.io-index" 794 | checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" 795 | dependencies = [ 796 | "autocfg", 797 | "hashbrown 0.12.3", 798 | ] 799 | 800 | [[package]] 801 | name = "indicatif" 802 | version = "0.16.2" 803 | source = "registry+https://github.com/rust-lang/crates.io-index" 804 | checksum = "2d207dc617c7a380ab07ff572a6e52fa202a2a8f355860ac9c38e23f8196be1b" 805 | dependencies = [ 806 | "console", 807 | "lazy_static", 808 | "number_prefix", 809 | "regex", 810 | ] 811 | 812 | [[package]] 813 | name = "indicatif" 814 | version = "0.17.5" 815 | source = "registry+https://github.com/rust-lang/crates.io-index" 816 | checksum = "8ff8cc23a7393a397ed1d7f56e6365cba772aba9f9912ab968b03043c395d057" 817 | dependencies = [ 818 | "console", 819 | "instant", 820 | "number_prefix", 821 | "portable-atomic", 822 | "unicode-width", 823 | ] 824 | 825 | [[package]] 826 | name = "inout" 827 | version = "0.1.3" 828 | source = "registry+https://github.com/rust-lang/crates.io-index" 829 | checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" 830 | dependencies = [ 831 | "generic-array", 832 | ] 833 | 834 | [[package]] 835 | name = "instant" 836 | version = "0.1.12" 837 | source = "registry+https://github.com/rust-lang/crates.io-index" 838 | checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" 839 | dependencies = [ 840 | "cfg-if", 841 | ] 842 | 843 | [[package]] 844 | name = "io-lifetimes" 845 | version = "1.0.11" 846 | source = "registry+https://github.com/rust-lang/crates.io-index" 847 | checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" 848 | dependencies = [ 849 | "hermit-abi 0.3.1", 850 | "libc", 851 | "windows-sys 0.48.0", 852 | ] 853 | 854 | [[package]] 855 | name = "ipnet" 856 | version = "2.7.2" 857 | source = "registry+https://github.com/rust-lang/crates.io-index" 858 | checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" 859 | 860 | [[package]] 861 | name = "is-terminal" 862 | version = "0.4.7" 863 | source = "registry+https://github.com/rust-lang/crates.io-index" 864 | checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" 865 | dependencies = [ 866 | "hermit-abi 0.3.1", 867 | "io-lifetimes", 868 | "rustix", 869 | "windows-sys 0.48.0", 870 | ] 871 | 872 | [[package]] 873 | name = "itertools" 874 | version = "0.10.5" 875 | source = "registry+https://github.com/rust-lang/crates.io-index" 876 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" 877 | dependencies = [ 878 | "either", 879 | ] 880 | 881 | [[package]] 882 | name = "itoa" 883 | version = "1.0.6" 884 | source = "registry+https://github.com/rust-lang/crates.io-index" 885 | checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" 886 | 887 | [[package]] 888 | name = "jobserver" 889 | version = "0.1.26" 890 | source = "registry+https://github.com/rust-lang/crates.io-index" 891 | checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" 892 | dependencies = [ 893 | "libc", 894 | ] 895 | 896 | [[package]] 897 | name = "js-sys" 898 | version = "0.3.63" 899 | source = "registry+https://github.com/rust-lang/crates.io-index" 900 | checksum = "2f37a4a5928311ac501dee68b3c7613a1037d0edb30c8e5427bd832d55d1b790" 901 | dependencies = [ 902 | "wasm-bindgen", 903 | ] 904 | 905 | [[package]] 906 | name = "lazy_static" 907 | version = "1.4.0" 908 | source = "registry+https://github.com/rust-lang/crates.io-index" 909 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 910 | 911 | [[package]] 912 | name = "libc" 913 | version = "0.2.145" 914 | source = "registry+https://github.com/rust-lang/crates.io-index" 915 | checksum = "fc86cde3ff845662b8f4ef6cb50ea0e20c524eb3d29ae048287e06a1b3fa6a81" 916 | 917 | [[package]] 918 | name = "linux-raw-sys" 919 | version = "0.3.8" 920 | source = "registry+https://github.com/rust-lang/crates.io-index" 921 | checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" 922 | 923 | [[package]] 924 | name = "log" 925 | version = "0.4.18" 926 | source = "registry+https://github.com/rust-lang/crates.io-index" 927 | checksum = "518ef76f2f87365916b142844c16d8fefd85039bc5699050210a7778ee1cd1de" 928 | 929 | [[package]] 930 | name = "matrixmultiply" 931 | version = "0.3.7" 932 | source = "registry+https://github.com/rust-lang/crates.io-index" 933 | checksum = "090126dc04f95dc0d1c1c91f61bdd474b3930ca064c1edc8a849da2c6cbe1e77" 934 | dependencies = [ 935 | "autocfg", 936 | "rawpointer", 937 | ] 938 | 939 | [[package]] 940 | name = "memchr" 941 | version = "2.5.0" 942 | source = "registry+https://github.com/rust-lang/crates.io-index" 943 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 944 | 945 | [[package]] 946 | name = "memoffset" 947 | version = "0.8.0" 948 | source = "registry+https://github.com/rust-lang/crates.io-index" 949 | checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" 950 | dependencies = [ 951 | "autocfg", 952 | ] 953 | 954 | [[package]] 955 | name = "mime" 956 | version = "0.3.17" 957 | source = "registry+https://github.com/rust-lang/crates.io-index" 958 | checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" 959 | 960 | [[package]] 961 | name = "miniz_oxide" 962 | version = "0.7.1" 963 | source = "registry+https://github.com/rust-lang/crates.io-index" 964 | checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" 965 | dependencies = [ 966 | "adler", 967 | ] 968 | 969 | [[package]] 970 | name = "mio" 971 | version = "0.8.8" 972 | source = "registry+https://github.com/rust-lang/crates.io-index" 973 | checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" 974 | dependencies = [ 975 | "libc", 976 | "wasi", 977 | "windows-sys 0.48.0", 978 | ] 979 | 980 | [[package]] 981 | name = "native-tls" 982 | version = "0.2.11" 983 | source = "registry+https://github.com/rust-lang/crates.io-index" 984 | checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" 985 | dependencies = [ 986 | "lazy_static", 987 | "libc", 988 | "log", 989 | "openssl", 990 | "openssl-probe", 991 | "openssl-sys", 992 | "schannel", 993 | "security-framework", 994 | "security-framework-sys", 995 | "tempfile", 996 | ] 997 | 998 | [[package]] 999 | name = "ndarray" 1000 | version = "0.15.6" 1001 | source = "registry+https://github.com/rust-lang/crates.io-index" 1002 | checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" 1003 | dependencies = [ 1004 | "matrixmultiply", 1005 | "num-complex", 1006 | "num-integer", 1007 | "num-traits", 1008 | "rawpointer", 1009 | ] 1010 | 1011 | [[package]] 1012 | name = "num-complex" 1013 | version = "0.4.3" 1014 | source = "registry+https://github.com/rust-lang/crates.io-index" 1015 | checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" 1016 | dependencies = [ 1017 | "num-traits", 1018 | ] 1019 | 1020 | [[package]] 1021 | name = "num-integer" 1022 | version = "0.1.45" 1023 | source = "registry+https://github.com/rust-lang/crates.io-index" 1024 | checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" 1025 | dependencies = [ 1026 | "autocfg", 1027 | "num-traits", 1028 | ] 1029 | 1030 | [[package]] 1031 | name = "num-traits" 1032 | version = "0.2.15" 1033 | source = "registry+https://github.com/rust-lang/crates.io-index" 1034 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" 1035 | dependencies = [ 1036 | "autocfg", 1037 | ] 1038 | 1039 | [[package]] 1040 | name = "num_cpus" 1041 | version = "1.15.0" 1042 | source = "registry+https://github.com/rust-lang/crates.io-index" 1043 | checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" 1044 | dependencies = [ 1045 | "hermit-abi 0.2.6", 1046 | "libc", 1047 | ] 1048 | 1049 | [[package]] 1050 | name = "number_prefix" 1051 | version = "0.4.0" 1052 | source = "registry+https://github.com/rust-lang/crates.io-index" 1053 | checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" 1054 | 1055 | [[package]] 1056 | name = "once_cell" 1057 | version = "1.18.0" 1058 | source = "registry+https://github.com/rust-lang/crates.io-index" 1059 | checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" 1060 | 1061 | [[package]] 1062 | name = "openssl" 1063 | version = "0.10.54" 1064 | source = "registry+https://github.com/rust-lang/crates.io-index" 1065 | checksum = "69b3f656a17a6cbc115b5c7a40c616947d213ba182135b014d6051b73ab6f019" 1066 | dependencies = [ 1067 | "bitflags", 1068 | "cfg-if", 1069 | "foreign-types", 1070 | "libc", 1071 | "once_cell", 1072 | "openssl-macros", 1073 | "openssl-sys", 1074 | ] 1075 | 1076 | [[package]] 1077 | name = "openssl-macros" 1078 | version = "0.1.1" 1079 | source = "registry+https://github.com/rust-lang/crates.io-index" 1080 | checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" 1081 | dependencies = [ 1082 | "proc-macro2", 1083 | "quote", 1084 | "syn", 1085 | ] 1086 | 1087 | [[package]] 1088 | name = "openssl-probe" 1089 | version = "0.1.5" 1090 | source = "registry+https://github.com/rust-lang/crates.io-index" 1091 | checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" 1092 | 1093 | [[package]] 1094 | name = "openssl-sys" 1095 | version = "0.9.88" 1096 | source = "registry+https://github.com/rust-lang/crates.io-index" 1097 | checksum = "c2ce0f250f34a308dcfdbb351f511359857d4ed2134ba715a4eadd46e1ffd617" 1098 | dependencies = [ 1099 | "cc", 1100 | "libc", 1101 | "pkg-config", 1102 | "vcpkg", 1103 | ] 1104 | 1105 | [[package]] 1106 | name = "ordered-float" 1107 | version = "3.7.0" 1108 | source = "registry+https://github.com/rust-lang/crates.io-index" 1109 | checksum = "2fc2dbde8f8a79f2102cc474ceb0ad68e3b80b85289ea62389b60e66777e4213" 1110 | dependencies = [ 1111 | "num-traits", 1112 | ] 1113 | 1114 | [[package]] 1115 | name = "password-hash" 1116 | version = "0.4.2" 1117 | source = "registry+https://github.com/rust-lang/crates.io-index" 1118 | checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" 1119 | dependencies = [ 1120 | "base64ct", 1121 | "rand_core", 1122 | "subtle", 1123 | ] 1124 | 1125 | [[package]] 1126 | name = "pbkdf2" 1127 | version = "0.11.0" 1128 | source = "registry+https://github.com/rust-lang/crates.io-index" 1129 | checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917" 1130 | dependencies = [ 1131 | "digest", 1132 | "hmac", 1133 | "password-hash", 1134 | "sha2", 1135 | ] 1136 | 1137 | [[package]] 1138 | name = "percent-encoding" 1139 | version = "2.3.0" 1140 | source = "registry+https://github.com/rust-lang/crates.io-index" 1141 | checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" 1142 | 1143 | [[package]] 1144 | name = "pin-project-lite" 1145 | version = "0.2.9" 1146 | source = "registry+https://github.com/rust-lang/crates.io-index" 1147 | checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" 1148 | 1149 | [[package]] 1150 | name = "pin-utils" 1151 | version = "0.1.0" 1152 | source = "registry+https://github.com/rust-lang/crates.io-index" 1153 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 1154 | 1155 | [[package]] 1156 | name = "pkg-config" 1157 | version = "0.3.27" 1158 | source = "registry+https://github.com/rust-lang/crates.io-index" 1159 | checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" 1160 | 1161 | [[package]] 1162 | name = "portable-atomic" 1163 | version = "1.3.3" 1164 | source = "registry+https://github.com/rust-lang/crates.io-index" 1165 | checksum = "767eb9f07d4a5ebcb39bbf2d452058a93c011373abf6832e24194a1c3f004794" 1166 | 1167 | [[package]] 1168 | name = "ppv-lite86" 1169 | version = "0.2.17" 1170 | source = "registry+https://github.com/rust-lang/crates.io-index" 1171 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 1172 | 1173 | [[package]] 1174 | name = "proc-macro2" 1175 | version = "1.0.59" 1176 | source = "registry+https://github.com/rust-lang/crates.io-index" 1177 | checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b" 1178 | dependencies = [ 1179 | "unicode-ident", 1180 | ] 1181 | 1182 | [[package]] 1183 | name = "protobuf" 1184 | version = "2.27.1" 1185 | source = "registry+https://github.com/rust-lang/crates.io-index" 1186 | checksum = "cf7e6d18738ecd0902d30d1ad232c9125985a3422929b16c65517b38adc14f96" 1187 | 1188 | [[package]] 1189 | name = "quote" 1190 | version = "1.0.28" 1191 | source = "registry+https://github.com/rust-lang/crates.io-index" 1192 | checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" 1193 | dependencies = [ 1194 | "proc-macro2", 1195 | ] 1196 | 1197 | [[package]] 1198 | name = "rand" 1199 | version = "0.8.5" 1200 | source = "registry+https://github.com/rust-lang/crates.io-index" 1201 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 1202 | dependencies = [ 1203 | "libc", 1204 | "rand_chacha", 1205 | "rand_core", 1206 | ] 1207 | 1208 | [[package]] 1209 | name = "rand_chacha" 1210 | version = "0.3.1" 1211 | source = "registry+https://github.com/rust-lang/crates.io-index" 1212 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 1213 | dependencies = [ 1214 | "ppv-lite86", 1215 | "rand_core", 1216 | ] 1217 | 1218 | [[package]] 1219 | name = "rand_core" 1220 | version = "0.6.4" 1221 | source = "registry+https://github.com/rust-lang/crates.io-index" 1222 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 1223 | dependencies = [ 1224 | "getrandom", 1225 | ] 1226 | 1227 | [[package]] 1228 | name = "rawpointer" 1229 | version = "0.2.1" 1230 | source = "registry+https://github.com/rust-lang/crates.io-index" 1231 | checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" 1232 | 1233 | [[package]] 1234 | name = "rayon" 1235 | version = "1.7.0" 1236 | source = "registry+https://github.com/rust-lang/crates.io-index" 1237 | checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" 1238 | dependencies = [ 1239 | "either", 1240 | "rayon-core", 1241 | ] 1242 | 1243 | [[package]] 1244 | name = "rayon-core" 1245 | version = "1.11.0" 1246 | source = "registry+https://github.com/rust-lang/crates.io-index" 1247 | checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" 1248 | dependencies = [ 1249 | "crossbeam-channel", 1250 | "crossbeam-deque", 1251 | "crossbeam-utils", 1252 | "num_cpus", 1253 | ] 1254 | 1255 | [[package]] 1256 | name = "redox_syscall" 1257 | version = "0.2.16" 1258 | source = "registry+https://github.com/rust-lang/crates.io-index" 1259 | checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" 1260 | dependencies = [ 1261 | "bitflags", 1262 | ] 1263 | 1264 | [[package]] 1265 | name = "redox_syscall" 1266 | version = "0.3.5" 1267 | source = "registry+https://github.com/rust-lang/crates.io-index" 1268 | checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" 1269 | dependencies = [ 1270 | "bitflags", 1271 | ] 1272 | 1273 | [[package]] 1274 | name = "redox_users" 1275 | version = "0.4.3" 1276 | source = "registry+https://github.com/rust-lang/crates.io-index" 1277 | checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" 1278 | dependencies = [ 1279 | "getrandom", 1280 | "redox_syscall 0.2.16", 1281 | "thiserror", 1282 | ] 1283 | 1284 | [[package]] 1285 | name = "regex" 1286 | version = "1.8.4" 1287 | source = "registry+https://github.com/rust-lang/crates.io-index" 1288 | checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" 1289 | dependencies = [ 1290 | "aho-corasick", 1291 | "memchr", 1292 | "regex-syntax", 1293 | ] 1294 | 1295 | [[package]] 1296 | name = "regex-syntax" 1297 | version = "0.7.2" 1298 | source = "registry+https://github.com/rust-lang/crates.io-index" 1299 | checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" 1300 | 1301 | [[package]] 1302 | name = "reqwest" 1303 | version = "0.11.18" 1304 | source = "registry+https://github.com/rust-lang/crates.io-index" 1305 | checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55" 1306 | dependencies = [ 1307 | "base64 0.21.2", 1308 | "bytes", 1309 | "encoding_rs", 1310 | "futures-core", 1311 | "futures-util", 1312 | "h2", 1313 | "http", 1314 | "http-body", 1315 | "hyper", 1316 | "hyper-tls", 1317 | "ipnet", 1318 | "js-sys", 1319 | "log", 1320 | "mime", 1321 | "native-tls", 1322 | "once_cell", 1323 | "percent-encoding", 1324 | "pin-project-lite", 1325 | "serde", 1326 | "serde_json", 1327 | "serde_urlencoded", 1328 | "tokio", 1329 | "tokio-native-tls", 1330 | "tower-service", 1331 | "url", 1332 | "wasm-bindgen", 1333 | "wasm-bindgen-futures", 1334 | "web-sys", 1335 | "winreg", 1336 | ] 1337 | 1338 | [[package]] 1339 | name = "ring" 1340 | version = "0.16.20" 1341 | source = "registry+https://github.com/rust-lang/crates.io-index" 1342 | checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" 1343 | dependencies = [ 1344 | "cc", 1345 | "libc", 1346 | "once_cell", 1347 | "spin", 1348 | "untrusted", 1349 | "web-sys", 1350 | "winapi", 1351 | ] 1352 | 1353 | [[package]] 1354 | name = "rust-bert" 1355 | version = "0.21.0" 1356 | source = "registry+https://github.com/rust-lang/crates.io-index" 1357 | checksum = "8792cccdf842159ef04a35f247df68ccc42192e9a67e389e6cd0f1a83970a6a0" 1358 | dependencies = [ 1359 | "cached-path", 1360 | "dirs", 1361 | "half", 1362 | "lazy_static", 1363 | "ordered-float", 1364 | "regex", 1365 | "rust_tokenizers", 1366 | "serde", 1367 | "serde_json", 1368 | "tch", 1369 | "thiserror", 1370 | "uuid", 1371 | ] 1372 | 1373 | [[package]] 1374 | name = "rust_tokenizers" 1375 | version = "8.1.0" 1376 | source = "registry+https://github.com/rust-lang/crates.io-index" 1377 | checksum = "1f367f6b13bc686e822237b97caeb4b2e366dd1936ec204f11d266ede402c31b" 1378 | dependencies = [ 1379 | "csv", 1380 | "hashbrown 0.13.2", 1381 | "itertools", 1382 | "lazy_static", 1383 | "protobuf", 1384 | "rayon", 1385 | "regex", 1386 | "serde", 1387 | "serde_json", 1388 | "thiserror", 1389 | "unicode-normalization", 1390 | "unicode-normalization-alignments", 1391 | ] 1392 | 1393 | [[package]] 1394 | name = "rustix" 1395 | version = "0.37.19" 1396 | source = "registry+https://github.com/rust-lang/crates.io-index" 1397 | checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" 1398 | dependencies = [ 1399 | "bitflags", 1400 | "errno", 1401 | "io-lifetimes", 1402 | "libc", 1403 | "linux-raw-sys", 1404 | "windows-sys 0.48.0", 1405 | ] 1406 | 1407 | [[package]] 1408 | name = "rustls" 1409 | version = "0.20.8" 1410 | source = "registry+https://github.com/rust-lang/crates.io-index" 1411 | checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f" 1412 | dependencies = [ 1413 | "log", 1414 | "ring", 1415 | "sct", 1416 | "webpki", 1417 | ] 1418 | 1419 | [[package]] 1420 | name = "ryu" 1421 | version = "1.0.13" 1422 | source = "registry+https://github.com/rust-lang/crates.io-index" 1423 | checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" 1424 | 1425 | [[package]] 1426 | name = "safetensors" 1427 | version = "0.3.1" 1428 | source = "registry+https://github.com/rust-lang/crates.io-index" 1429 | checksum = "a1d818a2cb3f564a1844be835011acf5c7ec8ad1986a47f73abc7b5fea91cc3a" 1430 | dependencies = [ 1431 | "serde", 1432 | "serde_json", 1433 | ] 1434 | 1435 | [[package]] 1436 | name = "schannel" 1437 | version = "0.1.21" 1438 | source = "registry+https://github.com/rust-lang/crates.io-index" 1439 | checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3" 1440 | dependencies = [ 1441 | "windows-sys 0.42.0", 1442 | ] 1443 | 1444 | [[package]] 1445 | name = "scopeguard" 1446 | version = "1.1.0" 1447 | source = "registry+https://github.com/rust-lang/crates.io-index" 1448 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 1449 | 1450 | [[package]] 1451 | name = "sct" 1452 | version = "0.7.0" 1453 | source = "registry+https://github.com/rust-lang/crates.io-index" 1454 | checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" 1455 | dependencies = [ 1456 | "ring", 1457 | "untrusted", 1458 | ] 1459 | 1460 | [[package]] 1461 | name = "security-framework" 1462 | version = "2.9.1" 1463 | source = "registry+https://github.com/rust-lang/crates.io-index" 1464 | checksum = "1fc758eb7bffce5b308734e9b0c1468893cae9ff70ebf13e7090be8dcbcc83a8" 1465 | dependencies = [ 1466 | "bitflags", 1467 | "core-foundation", 1468 | "core-foundation-sys", 1469 | "libc", 1470 | "security-framework-sys", 1471 | ] 1472 | 1473 | [[package]] 1474 | name = "security-framework-sys" 1475 | version = "2.9.0" 1476 | source = "registry+https://github.com/rust-lang/crates.io-index" 1477 | checksum = "f51d0c0d83bec45f16480d0ce0058397a69e48fcdc52d1dc8855fb68acbd31a7" 1478 | dependencies = [ 1479 | "core-foundation-sys", 1480 | "libc", 1481 | ] 1482 | 1483 | [[package]] 1484 | name = "serde" 1485 | version = "1.0.163" 1486 | source = "registry+https://github.com/rust-lang/crates.io-index" 1487 | checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2" 1488 | dependencies = [ 1489 | "serde_derive", 1490 | ] 1491 | 1492 | [[package]] 1493 | name = "serde_derive" 1494 | version = "1.0.163" 1495 | source = "registry+https://github.com/rust-lang/crates.io-index" 1496 | checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e" 1497 | dependencies = [ 1498 | "proc-macro2", 1499 | "quote", 1500 | "syn", 1501 | ] 1502 | 1503 | [[package]] 1504 | name = "serde_json" 1505 | version = "1.0.96" 1506 | source = "registry+https://github.com/rust-lang/crates.io-index" 1507 | checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" 1508 | dependencies = [ 1509 | "itoa", 1510 | "ryu", 1511 | "serde", 1512 | ] 1513 | 1514 | [[package]] 1515 | name = "serde_urlencoded" 1516 | version = "0.7.1" 1517 | source = "registry+https://github.com/rust-lang/crates.io-index" 1518 | checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" 1519 | dependencies = [ 1520 | "form_urlencoded", 1521 | "itoa", 1522 | "ryu", 1523 | "serde", 1524 | ] 1525 | 1526 | [[package]] 1527 | name = "sha1" 1528 | version = "0.10.5" 1529 | source = "registry+https://github.com/rust-lang/crates.io-index" 1530 | checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" 1531 | dependencies = [ 1532 | "cfg-if", 1533 | "cpufeatures", 1534 | "digest", 1535 | ] 1536 | 1537 | [[package]] 1538 | name = "sha2" 1539 | version = "0.10.6" 1540 | source = "registry+https://github.com/rust-lang/crates.io-index" 1541 | checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" 1542 | dependencies = [ 1543 | "cfg-if", 1544 | "cpufeatures", 1545 | "digest", 1546 | ] 1547 | 1548 | [[package]] 1549 | name = "slab" 1550 | version = "0.4.8" 1551 | source = "registry+https://github.com/rust-lang/crates.io-index" 1552 | checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" 1553 | dependencies = [ 1554 | "autocfg", 1555 | ] 1556 | 1557 | [[package]] 1558 | name = "smallvec" 1559 | version = "1.10.0" 1560 | source = "registry+https://github.com/rust-lang/crates.io-index" 1561 | checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" 1562 | 1563 | [[package]] 1564 | name = "socket2" 1565 | version = "0.4.9" 1566 | source = "registry+https://github.com/rust-lang/crates.io-index" 1567 | checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" 1568 | dependencies = [ 1569 | "libc", 1570 | "winapi", 1571 | ] 1572 | 1573 | [[package]] 1574 | name = "spin" 1575 | version = "0.5.2" 1576 | source = "registry+https://github.com/rust-lang/crates.io-index" 1577 | checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" 1578 | 1579 | [[package]] 1580 | name = "strsim" 1581 | version = "0.10.0" 1582 | source = "registry+https://github.com/rust-lang/crates.io-index" 1583 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 1584 | 1585 | [[package]] 1586 | name = "subtle" 1587 | version = "2.5.0" 1588 | source = "registry+https://github.com/rust-lang/crates.io-index" 1589 | checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" 1590 | 1591 | [[package]] 1592 | name = "syn" 1593 | version = "2.0.18" 1594 | source = "registry+https://github.com/rust-lang/crates.io-index" 1595 | checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" 1596 | dependencies = [ 1597 | "proc-macro2", 1598 | "quote", 1599 | "unicode-ident", 1600 | ] 1601 | 1602 | [[package]] 1603 | name = "tar" 1604 | version = "0.4.38" 1605 | source = "registry+https://github.com/rust-lang/crates.io-index" 1606 | checksum = "4b55807c0344e1e6c04d7c965f5289c39a8d94ae23ed5c0b57aabac549f871c6" 1607 | dependencies = [ 1608 | "filetime", 1609 | "libc", 1610 | "xattr", 1611 | ] 1612 | 1613 | [[package]] 1614 | name = "tch" 1615 | version = "0.13.0" 1616 | source = "registry+https://github.com/rust-lang/crates.io-index" 1617 | checksum = "9cbd9ce6fb581a1b918db880b649d1364b50f7f6717eda8497bcdc929cddd4b9" 1618 | dependencies = [ 1619 | "half", 1620 | "lazy_static", 1621 | "libc", 1622 | "ndarray", 1623 | "rand", 1624 | "safetensors", 1625 | "thiserror", 1626 | "torch-sys", 1627 | "zip", 1628 | ] 1629 | 1630 | [[package]] 1631 | name = "tempfile" 1632 | version = "3.5.0" 1633 | source = "registry+https://github.com/rust-lang/crates.io-index" 1634 | checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" 1635 | dependencies = [ 1636 | "cfg-if", 1637 | "fastrand", 1638 | "redox_syscall 0.3.5", 1639 | "rustix", 1640 | "windows-sys 0.45.0", 1641 | ] 1642 | 1643 | [[package]] 1644 | name = "thiserror" 1645 | version = "1.0.40" 1646 | source = "registry+https://github.com/rust-lang/crates.io-index" 1647 | checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" 1648 | dependencies = [ 1649 | "thiserror-impl", 1650 | ] 1651 | 1652 | [[package]] 1653 | name = "thiserror-impl" 1654 | version = "1.0.40" 1655 | source = "registry+https://github.com/rust-lang/crates.io-index" 1656 | checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" 1657 | dependencies = [ 1658 | "proc-macro2", 1659 | "quote", 1660 | "syn", 1661 | ] 1662 | 1663 | [[package]] 1664 | name = "time" 1665 | version = "0.3.21" 1666 | source = "registry+https://github.com/rust-lang/crates.io-index" 1667 | checksum = "8f3403384eaacbca9923fa06940178ac13e4edb725486d70e8e15881d0c836cc" 1668 | dependencies = [ 1669 | "serde", 1670 | "time-core", 1671 | ] 1672 | 1673 | [[package]] 1674 | name = "time-core" 1675 | version = "0.1.1" 1676 | source = "registry+https://github.com/rust-lang/crates.io-index" 1677 | checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" 1678 | 1679 | [[package]] 1680 | name = "tinyvec" 1681 | version = "1.6.0" 1682 | source = "registry+https://github.com/rust-lang/crates.io-index" 1683 | checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" 1684 | dependencies = [ 1685 | "tinyvec_macros", 1686 | ] 1687 | 1688 | [[package]] 1689 | name = "tinyvec_macros" 1690 | version = "0.1.1" 1691 | source = "registry+https://github.com/rust-lang/crates.io-index" 1692 | checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" 1693 | 1694 | [[package]] 1695 | name = "tokio" 1696 | version = "1.28.2" 1697 | source = "registry+https://github.com/rust-lang/crates.io-index" 1698 | checksum = "94d7b1cfd2aa4011f2de74c2c4c63665e27a71006b0a192dcd2710272e73dfa2" 1699 | dependencies = [ 1700 | "autocfg", 1701 | "bytes", 1702 | "libc", 1703 | "mio", 1704 | "num_cpus", 1705 | "pin-project-lite", 1706 | "socket2", 1707 | "windows-sys 0.48.0", 1708 | ] 1709 | 1710 | [[package]] 1711 | name = "tokio-native-tls" 1712 | version = "0.3.1" 1713 | source = "registry+https://github.com/rust-lang/crates.io-index" 1714 | checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" 1715 | dependencies = [ 1716 | "native-tls", 1717 | "tokio", 1718 | ] 1719 | 1720 | [[package]] 1721 | name = "tokio-util" 1722 | version = "0.7.8" 1723 | source = "registry+https://github.com/rust-lang/crates.io-index" 1724 | checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" 1725 | dependencies = [ 1726 | "bytes", 1727 | "futures-core", 1728 | "futures-sink", 1729 | "pin-project-lite", 1730 | "tokio", 1731 | "tracing", 1732 | ] 1733 | 1734 | [[package]] 1735 | name = "torch-sys" 1736 | version = "0.13.0" 1737 | source = "registry+https://github.com/rust-lang/crates.io-index" 1738 | checksum = "42b2b81a479510717464df1d07c02cb4aebb26539a39b5db6637dda114a476cb" 1739 | dependencies = [ 1740 | "anyhow", 1741 | "cc", 1742 | "libc", 1743 | "serde", 1744 | "serde_json", 1745 | "ureq", 1746 | "zip", 1747 | ] 1748 | 1749 | [[package]] 1750 | name = "tower-service" 1751 | version = "0.3.2" 1752 | source = "registry+https://github.com/rust-lang/crates.io-index" 1753 | checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" 1754 | 1755 | [[package]] 1756 | name = "tracing" 1757 | version = "0.1.37" 1758 | source = "registry+https://github.com/rust-lang/crates.io-index" 1759 | checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" 1760 | dependencies = [ 1761 | "cfg-if", 1762 | "pin-project-lite", 1763 | "tracing-core", 1764 | ] 1765 | 1766 | [[package]] 1767 | name = "tracing-core" 1768 | version = "0.1.31" 1769 | source = "registry+https://github.com/rust-lang/crates.io-index" 1770 | checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" 1771 | dependencies = [ 1772 | "once_cell", 1773 | ] 1774 | 1775 | [[package]] 1776 | name = "try-lock" 1777 | version = "0.2.4" 1778 | source = "registry+https://github.com/rust-lang/crates.io-index" 1779 | checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" 1780 | 1781 | [[package]] 1782 | name = "typenum" 1783 | version = "1.16.0" 1784 | source = "registry+https://github.com/rust-lang/crates.io-index" 1785 | checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" 1786 | 1787 | [[package]] 1788 | name = "unicode-bidi" 1789 | version = "0.3.13" 1790 | source = "registry+https://github.com/rust-lang/crates.io-index" 1791 | checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" 1792 | 1793 | [[package]] 1794 | name = "unicode-ident" 1795 | version = "1.0.9" 1796 | source = "registry+https://github.com/rust-lang/crates.io-index" 1797 | checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" 1798 | 1799 | [[package]] 1800 | name = "unicode-normalization" 1801 | version = "0.1.22" 1802 | source = "registry+https://github.com/rust-lang/crates.io-index" 1803 | checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" 1804 | dependencies = [ 1805 | "tinyvec", 1806 | ] 1807 | 1808 | [[package]] 1809 | name = "unicode-normalization-alignments" 1810 | version = "0.1.12" 1811 | source = "registry+https://github.com/rust-lang/crates.io-index" 1812 | checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" 1813 | dependencies = [ 1814 | "smallvec", 1815 | ] 1816 | 1817 | [[package]] 1818 | name = "unicode-width" 1819 | version = "0.1.10" 1820 | source = "registry+https://github.com/rust-lang/crates.io-index" 1821 | checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" 1822 | 1823 | [[package]] 1824 | name = "untrusted" 1825 | version = "0.7.1" 1826 | source = "registry+https://github.com/rust-lang/crates.io-index" 1827 | checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" 1828 | 1829 | [[package]] 1830 | name = "ureq" 1831 | version = "2.6.2" 1832 | source = "registry+https://github.com/rust-lang/crates.io-index" 1833 | checksum = "338b31dd1314f68f3aabf3ed57ab922df95ffcd902476ca7ba3c4ce7b908c46d" 1834 | dependencies = [ 1835 | "base64 0.13.1", 1836 | "encoding_rs", 1837 | "flate2", 1838 | "log", 1839 | "once_cell", 1840 | "rustls", 1841 | "serde", 1842 | "serde_json", 1843 | "url", 1844 | "webpki", 1845 | "webpki-roots", 1846 | ] 1847 | 1848 | [[package]] 1849 | name = "url" 1850 | version = "2.4.0" 1851 | source = "registry+https://github.com/rust-lang/crates.io-index" 1852 | checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" 1853 | dependencies = [ 1854 | "form_urlencoded", 1855 | "idna", 1856 | "percent-encoding", 1857 | ] 1858 | 1859 | [[package]] 1860 | name = "utf8parse" 1861 | version = "0.2.1" 1862 | source = "registry+https://github.com/rust-lang/crates.io-index" 1863 | checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" 1864 | 1865 | [[package]] 1866 | name = "uuid" 1867 | version = "1.3.3" 1868 | source = "registry+https://github.com/rust-lang/crates.io-index" 1869 | checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2" 1870 | dependencies = [ 1871 | "getrandom", 1872 | ] 1873 | 1874 | [[package]] 1875 | name = "vcpkg" 1876 | version = "0.2.15" 1877 | source = "registry+https://github.com/rust-lang/crates.io-index" 1878 | checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" 1879 | 1880 | [[package]] 1881 | name = "version_check" 1882 | version = "0.9.4" 1883 | source = "registry+https://github.com/rust-lang/crates.io-index" 1884 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 1885 | 1886 | [[package]] 1887 | name = "want" 1888 | version = "0.3.0" 1889 | source = "registry+https://github.com/rust-lang/crates.io-index" 1890 | checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" 1891 | dependencies = [ 1892 | "log", 1893 | "try-lock", 1894 | ] 1895 | 1896 | [[package]] 1897 | name = "wasi" 1898 | version = "0.11.0+wasi-snapshot-preview1" 1899 | source = "registry+https://github.com/rust-lang/crates.io-index" 1900 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 1901 | 1902 | [[package]] 1903 | name = "wasm-bindgen" 1904 | version = "0.2.86" 1905 | source = "registry+https://github.com/rust-lang/crates.io-index" 1906 | checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73" 1907 | dependencies = [ 1908 | "cfg-if", 1909 | "wasm-bindgen-macro", 1910 | ] 1911 | 1912 | [[package]] 1913 | name = "wasm-bindgen-backend" 1914 | version = "0.2.86" 1915 | source = "registry+https://github.com/rust-lang/crates.io-index" 1916 | checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb" 1917 | dependencies = [ 1918 | "bumpalo", 1919 | "log", 1920 | "once_cell", 1921 | "proc-macro2", 1922 | "quote", 1923 | "syn", 1924 | "wasm-bindgen-shared", 1925 | ] 1926 | 1927 | [[package]] 1928 | name = "wasm-bindgen-futures" 1929 | version = "0.4.36" 1930 | source = "registry+https://github.com/rust-lang/crates.io-index" 1931 | checksum = "2d1985d03709c53167ce907ff394f5316aa22cb4e12761295c5dc57dacb6297e" 1932 | dependencies = [ 1933 | "cfg-if", 1934 | "js-sys", 1935 | "wasm-bindgen", 1936 | "web-sys", 1937 | ] 1938 | 1939 | [[package]] 1940 | name = "wasm-bindgen-macro" 1941 | version = "0.2.86" 1942 | source = "registry+https://github.com/rust-lang/crates.io-index" 1943 | checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258" 1944 | dependencies = [ 1945 | "quote", 1946 | "wasm-bindgen-macro-support", 1947 | ] 1948 | 1949 | [[package]] 1950 | name = "wasm-bindgen-macro-support" 1951 | version = "0.2.86" 1952 | source = "registry+https://github.com/rust-lang/crates.io-index" 1953 | checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8" 1954 | dependencies = [ 1955 | "proc-macro2", 1956 | "quote", 1957 | "syn", 1958 | "wasm-bindgen-backend", 1959 | "wasm-bindgen-shared", 1960 | ] 1961 | 1962 | [[package]] 1963 | name = "wasm-bindgen-shared" 1964 | version = "0.2.86" 1965 | source = "registry+https://github.com/rust-lang/crates.io-index" 1966 | checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93" 1967 | 1968 | [[package]] 1969 | name = "web-sys" 1970 | version = "0.3.63" 1971 | source = "registry+https://github.com/rust-lang/crates.io-index" 1972 | checksum = "3bdd9ef4e984da1187bf8110c5cf5b845fbc87a23602cdf912386a76fcd3a7c2" 1973 | dependencies = [ 1974 | "js-sys", 1975 | "wasm-bindgen", 1976 | ] 1977 | 1978 | [[package]] 1979 | name = "webpki" 1980 | version = "0.22.0" 1981 | source = "registry+https://github.com/rust-lang/crates.io-index" 1982 | checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd" 1983 | dependencies = [ 1984 | "ring", 1985 | "untrusted", 1986 | ] 1987 | 1988 | [[package]] 1989 | name = "webpki-roots" 1990 | version = "0.22.6" 1991 | source = "registry+https://github.com/rust-lang/crates.io-index" 1992 | checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87" 1993 | dependencies = [ 1994 | "webpki", 1995 | ] 1996 | 1997 | [[package]] 1998 | name = "winapi" 1999 | version = "0.3.9" 2000 | source = "registry+https://github.com/rust-lang/crates.io-index" 2001 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 2002 | dependencies = [ 2003 | "winapi-i686-pc-windows-gnu", 2004 | "winapi-x86_64-pc-windows-gnu", 2005 | ] 2006 | 2007 | [[package]] 2008 | name = "winapi-i686-pc-windows-gnu" 2009 | version = "0.4.0" 2010 | source = "registry+https://github.com/rust-lang/crates.io-index" 2011 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 2012 | 2013 | [[package]] 2014 | name = "winapi-x86_64-pc-windows-gnu" 2015 | version = "0.4.0" 2016 | source = "registry+https://github.com/rust-lang/crates.io-index" 2017 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 2018 | 2019 | [[package]] 2020 | name = "windows-sys" 2021 | version = "0.42.0" 2022 | source = "registry+https://github.com/rust-lang/crates.io-index" 2023 | checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" 2024 | dependencies = [ 2025 | "windows_aarch64_gnullvm 0.42.2", 2026 | "windows_aarch64_msvc 0.42.2", 2027 | "windows_i686_gnu 0.42.2", 2028 | "windows_i686_msvc 0.42.2", 2029 | "windows_x86_64_gnu 0.42.2", 2030 | "windows_x86_64_gnullvm 0.42.2", 2031 | "windows_x86_64_msvc 0.42.2", 2032 | ] 2033 | 2034 | [[package]] 2035 | name = "windows-sys" 2036 | version = "0.45.0" 2037 | source = "registry+https://github.com/rust-lang/crates.io-index" 2038 | checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" 2039 | dependencies = [ 2040 | "windows-targets 0.42.2", 2041 | ] 2042 | 2043 | [[package]] 2044 | name = "windows-sys" 2045 | version = "0.48.0" 2046 | source = "registry+https://github.com/rust-lang/crates.io-index" 2047 | checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" 2048 | dependencies = [ 2049 | "windows-targets 0.48.0", 2050 | ] 2051 | 2052 | [[package]] 2053 | name = "windows-targets" 2054 | version = "0.42.2" 2055 | source = "registry+https://github.com/rust-lang/crates.io-index" 2056 | checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" 2057 | dependencies = [ 2058 | "windows_aarch64_gnullvm 0.42.2", 2059 | "windows_aarch64_msvc 0.42.2", 2060 | "windows_i686_gnu 0.42.2", 2061 | "windows_i686_msvc 0.42.2", 2062 | "windows_x86_64_gnu 0.42.2", 2063 | "windows_x86_64_gnullvm 0.42.2", 2064 | "windows_x86_64_msvc 0.42.2", 2065 | ] 2066 | 2067 | [[package]] 2068 | name = "windows-targets" 2069 | version = "0.48.0" 2070 | source = "registry+https://github.com/rust-lang/crates.io-index" 2071 | checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" 2072 | dependencies = [ 2073 | "windows_aarch64_gnullvm 0.48.0", 2074 | "windows_aarch64_msvc 0.48.0", 2075 | "windows_i686_gnu 0.48.0", 2076 | "windows_i686_msvc 0.48.0", 2077 | "windows_x86_64_gnu 0.48.0", 2078 | "windows_x86_64_gnullvm 0.48.0", 2079 | "windows_x86_64_msvc 0.48.0", 2080 | ] 2081 | 2082 | [[package]] 2083 | name = "windows_aarch64_gnullvm" 2084 | version = "0.42.2" 2085 | source = "registry+https://github.com/rust-lang/crates.io-index" 2086 | checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" 2087 | 2088 | [[package]] 2089 | name = "windows_aarch64_gnullvm" 2090 | version = "0.48.0" 2091 | source = "registry+https://github.com/rust-lang/crates.io-index" 2092 | checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" 2093 | 2094 | [[package]] 2095 | name = "windows_aarch64_msvc" 2096 | version = "0.42.2" 2097 | source = "registry+https://github.com/rust-lang/crates.io-index" 2098 | checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" 2099 | 2100 | [[package]] 2101 | name = "windows_aarch64_msvc" 2102 | version = "0.48.0" 2103 | source = "registry+https://github.com/rust-lang/crates.io-index" 2104 | checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" 2105 | 2106 | [[package]] 2107 | name = "windows_i686_gnu" 2108 | version = "0.42.2" 2109 | source = "registry+https://github.com/rust-lang/crates.io-index" 2110 | checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" 2111 | 2112 | [[package]] 2113 | name = "windows_i686_gnu" 2114 | version = "0.48.0" 2115 | source = "registry+https://github.com/rust-lang/crates.io-index" 2116 | checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" 2117 | 2118 | [[package]] 2119 | name = "windows_i686_msvc" 2120 | version = "0.42.2" 2121 | source = "registry+https://github.com/rust-lang/crates.io-index" 2122 | checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" 2123 | 2124 | [[package]] 2125 | name = "windows_i686_msvc" 2126 | version = "0.48.0" 2127 | source = "registry+https://github.com/rust-lang/crates.io-index" 2128 | checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" 2129 | 2130 | [[package]] 2131 | name = "windows_x86_64_gnu" 2132 | version = "0.42.2" 2133 | source = "registry+https://github.com/rust-lang/crates.io-index" 2134 | checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" 2135 | 2136 | [[package]] 2137 | name = "windows_x86_64_gnu" 2138 | version = "0.48.0" 2139 | source = "registry+https://github.com/rust-lang/crates.io-index" 2140 | checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" 2141 | 2142 | [[package]] 2143 | name = "windows_x86_64_gnullvm" 2144 | version = "0.42.2" 2145 | source = "registry+https://github.com/rust-lang/crates.io-index" 2146 | checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" 2147 | 2148 | [[package]] 2149 | name = "windows_x86_64_gnullvm" 2150 | version = "0.48.0" 2151 | source = "registry+https://github.com/rust-lang/crates.io-index" 2152 | checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" 2153 | 2154 | [[package]] 2155 | name = "windows_x86_64_msvc" 2156 | version = "0.42.2" 2157 | source = "registry+https://github.com/rust-lang/crates.io-index" 2158 | checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" 2159 | 2160 | [[package]] 2161 | name = "windows_x86_64_msvc" 2162 | version = "0.48.0" 2163 | source = "registry+https://github.com/rust-lang/crates.io-index" 2164 | checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" 2165 | 2166 | [[package]] 2167 | name = "winreg" 2168 | version = "0.10.1" 2169 | source = "registry+https://github.com/rust-lang/crates.io-index" 2170 | checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" 2171 | dependencies = [ 2172 | "winapi", 2173 | ] 2174 | 2175 | [[package]] 2176 | name = "xattr" 2177 | version = "0.2.3" 2178 | source = "registry+https://github.com/rust-lang/crates.io-index" 2179 | checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc" 2180 | dependencies = [ 2181 | "libc", 2182 | ] 2183 | 2184 | [[package]] 2185 | name = "zip" 2186 | version = "0.6.6" 2187 | source = "registry+https://github.com/rust-lang/crates.io-index" 2188 | checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" 2189 | dependencies = [ 2190 | "aes", 2191 | "byteorder", 2192 | "bzip2", 2193 | "constant_time_eq", 2194 | "crc32fast", 2195 | "crossbeam-utils", 2196 | "flate2", 2197 | "hmac", 2198 | "pbkdf2", 2199 | "sha1", 2200 | "time", 2201 | "zstd", 2202 | ] 2203 | 2204 | [[package]] 2205 | name = "zstd" 2206 | version = "0.11.2+zstd.1.5.2" 2207 | source = "registry+https://github.com/rust-lang/crates.io-index" 2208 | checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" 2209 | dependencies = [ 2210 | "zstd-safe", 2211 | ] 2212 | 2213 | [[package]] 2214 | name = "zstd-safe" 2215 | version = "5.0.2+zstd.1.5.2" 2216 | source = "registry+https://github.com/rust-lang/crates.io-index" 2217 | checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" 2218 | dependencies = [ 2219 | "libc", 2220 | "zstd-sys", 2221 | ] 2222 | 2223 | [[package]] 2224 | name = "zstd-sys" 2225 | version = "2.0.8+zstd.1.5.5" 2226 | source = "registry+https://github.com/rust-lang/crates.io-index" 2227 | checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" 2228 | dependencies = [ 2229 | "cc", 2230 | "libc", 2231 | "pkg-config", 2232 | ] 2233 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "compute-embeddings" 3 | description = "A small tool to compute the embeddings of a list of JSON documents" 4 | version = "0.1.0" 5 | edition = "2021" 6 | 7 | [dependencies] 8 | anyhow = "1.0.71" 9 | clap = { version = "4.3.2", features = ["derive"] } 10 | indicatif = "0.17.5" 11 | itertools = "0.10.5" 12 | rust-bert = { version = "0.21.0", features = ["download-libtorch"] } 13 | serde = "1.0.163" 14 | serde_json = "1.0.96" 15 | ureq = { version = "2.6.2", features = ["json", "charset"] } 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Meilisearch 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # compute-embeddings 2 | A small tool to compute the embeddings of a list of JSON documents 3 | 4 | ## Installation 5 | 6 | You must have the Rust toolchain installed, it is pretty easy to install [by following the official tutorial](https://www.rust-lang.org/tools/install). 7 | 8 | ## Example Input Documents 9 | 10 | ```json 11 | [ 12 | { 13 | "name": "3-Year Unlimited Cloud Storage Service Activation Card - Other", 14 | "description": "Enjoy 3 years of unlimited Cloud storage service with this activation card, which allows you to remotely access your favorite music, movies and other media via a compatible device and enables private file sharing with loved ones.", 15 | "brand": "Pogoplug", 16 | "categories": [ 17 | "Best Buy Gift Cards", 18 | "Entertainment Gift Cards" 19 | ], 20 | "hierarchicalCategories": { 21 | "lvl0": "Best Buy Gift Cards", 22 | "lvl1": "Best Buy Gift Cards > Entertainment Gift Cards" 23 | }, 24 | "type": "Online data backup", 25 | "price": 69, 26 | "price_range": "50 - 100", 27 | "image": "https://cdn-demo.algolia.com/bestbuy/1696302_sc.jpg", 28 | "url": "http://www.bestbuy.com/site/3-year-unlimited-cloud-storage-service-activation-card-other/1696302.p?id=1219066776306&skuId=1696302&cmp=RMX&ky=1uWSHMdQqBeVJB9cXgEke60s5EjfS6M1W", 29 | "free_shipping": true, 30 | "popularity": 10000, 31 | "rating": 2, 32 | "objectID": "1696302" 33 | } 34 | ] 35 | ``` 36 | 37 | ## Usage for Meilisearch 38 | 39 | ```bash 40 | cat file.json | cargo run --release --bin ce-dataset -- --batched-documents 8 --semantic-api all-mini-lm-l6v2 --documents-style meilisearch name description brand categories _type object_id > file-with-embeddings.json 41 | ``` 42 | 43 | ### Example Output File 44 | 45 | ```json5 46 | [ 47 | { 48 | "name": "3-Year Unlimited Cloud Storage Service Activation Card - Other", 49 | "description": "Enjoy 3 years of unlimited Cloud storage service with this activation card, which allows you to remotely access your favorite music, movies and other media via a compatible device and enables private file sharing with loved ones.", 50 | "brand": "Pogoplug", 51 | "categories": [ 52 | "Best Buy Gift Cards", 53 | "Entertainment Gift Cards" 54 | ], 55 | "hierarchicalCategories": { 56 | "lvl0": "Best Buy Gift Cards", 57 | "lvl1": "Best Buy Gift Cards > Entertainment Gift Cards" 58 | }, 59 | "type": "Online data backup", 60 | "price": 69.0, 61 | "price_range": "50 - 100", 62 | "image": "https://cdn-demo.algolia.com/bestbuy/1696302_sc.jpg", 63 | "url": "http://www.bestbuy.com/site/3-year-unlimited-cloud-storage-service-activation-card-other/1696302.p?id=1219066776306&skuId=1696302&cmp=RMX&ky=1uWSHMdQqBeVJB9cXgEke60s5EjfS6M1W", 64 | "free_shipping": true, 65 | "popularity": 10000, 66 | "rating": 2, 67 | "objectID": "1696302", 68 | "_vectors": [ 69 | -0.10141887, 70 | 0.009569897, 71 | 0.04121973 72 | // [...] 73 | ] 74 | } 75 | ] 76 | ``` 77 | 78 | ### Query Meilisearch 79 | 80 | Once you have sent your documents into your Meilisearch index you can query it with vectors too! 81 | 82 | ```bash 83 | echo '{ 84 | "vector": '$(cargo run --release --bin ce-query -- --semantic-api all-mini-lm-l6v2 'toys r us')', 85 | "attributesToRetrieve": ["name", "description"] 86 | }' \ 87 | | curl -X POST -H 'content-type: application/json' 'localhost:7700/indexes/movies/search' --data-binary @- | jq 88 | ``` 89 | -------------------------------------------------------------------------------- /src/bin/ce-dataset.rs: -------------------------------------------------------------------------------- 1 | use clap::{Parser, ValueEnum}; 2 | use std::env::var; 3 | use std::io::{self, BufReader, BufWriter}; 4 | use std::time::Instant; 5 | 6 | use compute_embeddings::{openai_vectors, SemanticApi}; 7 | use indicatif::{ProgressBar, ProgressIterator, ProgressStyle}; 8 | use itertools::Itertools; 9 | use rust_bert::pipelines::sentence_embeddings::{ 10 | SentenceEmbeddingsBuilder, SentenceEmbeddingsModelType, 11 | }; 12 | use serde::{Deserialize, Serialize}; 13 | use serde_json::{Map, Value}; 14 | 15 | #[derive(Parser, Debug)] 16 | #[command(author, version, about, long_about = None)] 17 | struct Args { 18 | /// The style of the output documents 19 | #[arg(long)] 20 | documents_style: DocumentStyle, 21 | 22 | /// Number of documents processed at the same time. 23 | #[arg(long, default_value_t = 4)] 24 | batched_documents: usize, 25 | 26 | /// The API you want to use to compute the embeddings. 27 | #[arg(long)] 28 | semantic_api: SemanticApi, 29 | 30 | /// The fields to concatenate in this specific order to generate the embeddings. 31 | documents_fields: Vec, 32 | } 33 | 34 | fn main() -> anyhow::Result<()> { 35 | let Args { 36 | documents_style, 37 | batched_documents, 38 | semantic_api, 39 | documents_fields, 40 | } = Args::parse(); 41 | 42 | let reader = BufReader::new(io::stdin()); 43 | let documents: Vec = serde_json::from_reader(reader)?; 44 | 45 | let progress_style = ProgressStyle::with_template("{wide_bar} {pos}/{len} {eta}").unwrap(); 46 | let progress_bar = ProgressBar::new(documents.len() as u64).with_style(progress_style); 47 | 48 | // Set-up sentence embeddings model 49 | let now = Instant::now(); 50 | let mut model = None; 51 | 52 | eprintln!("It took {:.02?} to initialize the model.", now.elapsed()); 53 | 54 | let mut output = Vec::new(); 55 | for chunk in documents 56 | .into_iter() 57 | .enumerate() 58 | .progress_with(progress_bar) 59 | .chunks(batched_documents) 60 | .into_iter() 61 | { 62 | let chunk: Vec<_> = chunk.collect(); 63 | let sentences: Vec<_> = chunk 64 | .iter() 65 | .map(|(_, payload)| payload.text(&documents_fields)) 66 | .collect(); 67 | 68 | let vectors = match semantic_api { 69 | SemanticApi::OpenAi => { 70 | let api_key = var("OPENAI_API_KEY").expect("missing OPENAI_API_KEY env variable"); 71 | openai_vectors(sentences, &api_key)? 72 | } 73 | SemanticApi::AllMiniLmL6V2 => { 74 | let model = match model.as_ref() { 75 | Some(model) => model, 76 | None => { 77 | let m = SentenceEmbeddingsBuilder::remote( 78 | SentenceEmbeddingsModelType::AllMiniLmL6V2, 79 | ) 80 | .create_model()?; 81 | model.get_or_insert(m) 82 | } 83 | }; 84 | 85 | model.encode(&sentences)? 86 | } 87 | }; 88 | 89 | for entry in chunk.into_iter().zip(vectors) { 90 | output.push(entry); 91 | } 92 | } 93 | 94 | match documents_style { 95 | DocumentStyle::Meilisearch => { 96 | let output: Vec<_> = output 97 | .into_iter() 98 | .map(|((_, mut payload), vector)| { 99 | payload._vectors = Some(vector); 100 | payload 101 | }) 102 | .collect(); 103 | let writer = BufWriter::new(io::stdout()); 104 | serde_json::to_writer_pretty(writer, &output)?; 105 | } 106 | DocumentStyle::Qdrant => { 107 | let points = output 108 | .into_iter() 109 | .map(|((id, payload), vector)| Point { 110 | id, 111 | vector, 112 | payload, 113 | }) 114 | .collect(); 115 | let output = Output { points }; 116 | let writer = BufWriter::new(io::stdout()); 117 | serde_json::to_writer_pretty(writer, &output)?; 118 | } 119 | } 120 | 121 | Ok(()) 122 | } 123 | 124 | #[derive(Debug, Serialize, Deserialize)] 125 | struct Input { 126 | #[serde(flatten)] 127 | fields: Map, 128 | #[serde(skip_deserializing, skip_serializing_if = "Option::is_none")] 129 | _vectors: Option>, 130 | } 131 | 132 | impl Input { 133 | fn text(&self, fields: I) -> String 134 | where 135 | I: IntoIterator, 136 | I::Item: AsRef, 137 | { 138 | let mut internal_buffer = String::new(); 139 | let mut text = String::new(); 140 | for field_name in fields { 141 | if let Some(value) = self.fields.get(field_name.as_ref()) { 142 | internal_buffer.clear(); 143 | if let Some(t) = json_to_string(value, &mut internal_buffer) { 144 | text.push_str(t); 145 | text.push(' '); 146 | } 147 | } 148 | } 149 | 150 | text 151 | } 152 | } 153 | 154 | /// Transform a JSON value into a string that can be indexed. 155 | fn json_to_string<'a>(value: &'a Value, buffer: &'a mut String) -> Option<&'a str> { 156 | fn inner(value: &Value, output: &mut String) -> bool { 157 | use std::fmt::Write; 158 | match value { 159 | Value::Null | Value::Object(_) => false, 160 | Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(), 161 | Value::Number(number) => write!(output, "{}", number).is_ok(), 162 | Value::String(string) => write!(output, "{}", string).is_ok(), 163 | Value::Array(array) => { 164 | let mut count = 0; 165 | for value in array { 166 | if inner(value, output) { 167 | output.push_str(" "); 168 | count += 1; 169 | } 170 | } 171 | // check that at least one value was written 172 | count != 0 173 | } 174 | } 175 | } 176 | 177 | if let Value::String(string) = value { 178 | Some(string) 179 | } else if inner(value, buffer) { 180 | Some(buffer) 181 | } else { 182 | None 183 | } 184 | } 185 | 186 | // { 187 | // "points": [ 188 | // {"id": 1, "vector": [0.05, 0.61, 0.76, 0.74], "payload": {"city": "Berlin" }} 189 | // ] 190 | // } 191 | #[derive(Debug, Serialize)] 192 | struct Output { 193 | points: Vec, 194 | } 195 | 196 | #[derive(Debug, Serialize)] 197 | struct Point { 198 | id: usize, 199 | vector: Vec, 200 | payload: Input, 201 | } 202 | 203 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] 204 | enum DocumentStyle { 205 | Meilisearch, 206 | Qdrant, 207 | } 208 | -------------------------------------------------------------------------------- /src/bin/ce-query.rs: -------------------------------------------------------------------------------- 1 | use std::env::var; 2 | use std::time::Instant; 3 | use std::{io}; 4 | 5 | use clap::Parser; 6 | use compute_embeddings::{openai_vectors, SemanticApi}; 7 | use rust_bert::pipelines::sentence_embeddings::{ 8 | SentenceEmbeddingsBuilder, SentenceEmbeddingsModelType, 9 | }; 10 | 11 | #[derive(Parser, Debug)] 12 | #[command(author, version, about, long_about = None)] 13 | struct Args { 14 | /// The API you want to use to compute the embeddings. 15 | #[arg(long)] 16 | semantic_api: SemanticApi, 17 | 18 | /// Generate the embeddings of this query. 19 | query: String, 20 | } 21 | 22 | fn main() -> anyhow::Result<()> { 23 | let Args { 24 | semantic_api, 25 | query, 26 | } = Args::parse(); 27 | 28 | let vector = match semantic_api { 29 | SemanticApi::OpenAi => { 30 | let now = Instant::now(); 31 | let api_key = var("OPENAI_API_KEY").expect("missing OPENAI_API_KEY env variable"); 32 | let vector = openai_vectors(vec![query], &api_key)?.remove(0); 33 | eprintln!("It took {:.02?} to encode the query.", now.elapsed()); 34 | vector 35 | } 36 | SemanticApi::AllMiniLmL6V2 => { 37 | let now = Instant::now(); 38 | // Set-up sentence embeddings model 39 | let model = 40 | SentenceEmbeddingsBuilder::remote(SentenceEmbeddingsModelType::AllMiniLmL6V2) 41 | .create_model()?; 42 | eprintln!("It took {:.02?} to initialize the model.", now.elapsed()); 43 | 44 | let now = Instant::now(); 45 | let vector = model.encode(&[&query])?.remove(0); 46 | eprintln!("It took {:.02?} to encode the query.", now.elapsed()); 47 | vector 48 | } 49 | }; 50 | 51 | let writer = io::stdout(); 52 | serde_json::to_writer(writer, &vector)?; 53 | 54 | Ok(()) 55 | } 56 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::thread; 2 | use std::time::Duration; 3 | 4 | use anyhow::bail; 5 | use clap::ValueEnum; 6 | use serde::Deserialize; 7 | 8 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] 9 | pub enum SemanticApi { 10 | OpenAi, 11 | AllMiniLmL6V2, 12 | } 13 | 14 | #[derive(Debug, Deserialize)] 15 | struct OpenAiResponse { 16 | data: Vec, 17 | } 18 | 19 | #[derive(Debug, Deserialize)] 20 | struct Embedding { 21 | embedding: Vec, 22 | // object: String, 23 | // index: usize, 24 | } 25 | 26 | pub fn openai_vectors( 27 | sentences: Vec, 28 | openai_api_key: &str, 29 | ) -> anyhow::Result>> { 30 | use ureq::Error; 31 | 32 | let mut sentences = sentences; 33 | let mut wait_for = Duration::from_secs(2); 34 | for _ in 0..100 { 35 | let result = ureq::post("https://api.openai.com/v1/embeddings") 36 | .set("Authorization", &format!("Bearer {openai_api_key}")) 37 | .send_json(ureq::json!({ 38 | "model": "text-embedding-ada-002".to_string(), 39 | "input": sentences, 40 | })); 41 | match result { 42 | Err(Error::Status(status, response)) if matches!(status, 503 | 429 | 500) => { 43 | let response = response.into_string()?; 44 | eprintln!( 45 | "Retrying after {:.02?}: status {} {}", 46 | wait_for, status, response 47 | ); 48 | thread::sleep(wait_for); 49 | wait_for *= 2; 50 | } 51 | Err(Error::Status(400, response)) => { 52 | // Most of the time it is due to the OpenAI 8191 max tokens 53 | let max_length = sentences.iter().map(|s| s.len()).max().unwrap(); 54 | let cut_at = max_length * 80 / 100; 55 | eprintln!( 56 | "Seeing error cutting sentences from max {max_length} to {cut_at}: {}", 57 | response.into_string()?, 58 | ); 59 | sentences = sentences 60 | .into_iter() 61 | .map(|mut s| { 62 | s.truncate(cut_at); 63 | s 64 | }) 65 | .collect(); 66 | } 67 | Err(Error::Status(_, resp)) => { 68 | bail!( 69 | "Cannot query OpenAI due to a {} status code. {}", 70 | resp.status(), 71 | resp.into_string()?, 72 | ) 73 | } 74 | Err(transport) => bail!("Cannot query OpenAI due to {}", transport), 75 | Ok(response) => { 76 | let response: OpenAiResponse = response.into_json()?; 77 | return Ok(response.data.into_iter().map(|d| d.embedding).collect()); 78 | } 79 | } 80 | } 81 | bail!("Cannot query OpenAI, too many retry") 82 | } 83 | --------------------------------------------------------------------------------