├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── car_data ├── __init__.py ├── constants.py ├── dataset.py ├── graphics.py ├── lin_features.py ├── losses.py ├── model.py ├── scripts │ ├── baseline.py │ ├── classify.py │ ├── classify_viz.py │ ├── classify_viz_animate.py │ ├── clip_features.py │ ├── data_bench.py │ ├── data_viz.py │ ├── filter_index.py │ ├── plot_runs.py │ ├── recalibrate.py │ ├── train.py │ ├── train_distill.py │ └── train_filter.py └── train_loop.py ├── setup.py └── src ├── chan_util.rs ├── db.rs ├── dedup_images.rs ├── export_data.rs ├── image_util.rs ├── kbb.rs ├── main.rs ├── make_models.rs ├── parse_util.rs ├── scrape_kbb.rs ├── task_queue.rs └── types.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | data 3 | *.egg-info 4 | __pycache__ 5 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "adler" 7 | version = "1.0.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 10 | 11 | [[package]] 12 | name = "aes" 13 | version = "0.7.5" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8" 16 | dependencies = [ 17 | "cfg-if", 18 | "cipher", 19 | "cpufeatures", 20 | "opaque-debug", 21 | ] 22 | 23 | [[package]] 24 | name = "ahash" 25 | version = "0.7.6" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" 28 | dependencies = [ 29 | "getrandom 0.2.8", 30 | "once_cell", 31 | "version_check", 32 | ] 33 | 34 | [[package]] 35 | name = "anyhow" 36 | version = "1.0.66" 37 | source = "registry+https://github.com/rust-lang/crates.io-index" 38 | checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6" 39 | 40 | [[package]] 41 | name = "async-channel" 42 | version = "1.7.1" 43 | source = "registry+https://github.com/rust-lang/crates.io-index" 44 | checksum = "e14485364214912d3b19cc3435dde4df66065127f05fa0d75c712f36f12c2f28" 45 | dependencies = [ 46 | "concurrent-queue", 47 | "event-listener", 48 | "futures-core", 49 | ] 50 | 51 | [[package]] 52 | name = "atty" 53 | version = "0.2.14" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 56 | dependencies = [ 57 | "hermit-abi", 58 | "libc", 59 | "winapi", 60 | ] 61 | 62 | [[package]] 63 | name = "autocfg" 64 | version = "1.1.0" 65 | source = "registry+https://github.com/rust-lang/crates.io-index" 66 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 67 | 68 | [[package]] 69 | name = "base64" 70 | version = "0.13.1" 71 | source = "registry+https://github.com/rust-lang/crates.io-index" 72 | checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" 73 | 74 | [[package]] 75 | name = "base64ct" 76 | version = "1.5.3" 77 | source = "registry+https://github.com/rust-lang/crates.io-index" 78 | checksum = "b645a089122eccb6111b4f81cbc1a49f5900ac4666bb93ac027feaecf15607bf" 79 | 80 | [[package]] 81 | name = "bit_field" 82 | version = "0.10.1" 83 | source = "registry+https://github.com/rust-lang/crates.io-index" 84 | checksum = "dcb6dd1c2376d2e096796e234a70e17e94cc2d5d54ff8ce42b28cef1d0d359a4" 85 | 86 | [[package]] 87 | name = "bitflags" 88 | version = "1.3.2" 89 | source = "registry+https://github.com/rust-lang/crates.io-index" 90 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 91 | 92 | [[package]] 93 | name = "block-buffer" 94 | version = "0.10.3" 95 | source = "registry+https://github.com/rust-lang/crates.io-index" 96 | checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e" 97 | dependencies = [ 98 | "generic-array", 99 | ] 100 | 101 | [[package]] 102 | name = "bumpalo" 103 | version = "3.11.1" 104 | source = "registry+https://github.com/rust-lang/crates.io-index" 105 | checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" 106 | 107 | [[package]] 108 | name = "bytemuck" 109 | version = "1.12.3" 110 | source = "registry+https://github.com/rust-lang/crates.io-index" 111 | checksum = "aaa3a8d9a1ca92e282c96a32d6511b695d7d994d1d102ba85d279f9b2756947f" 112 | 113 | [[package]] 114 | name = "byteorder" 115 | version = "1.4.3" 116 | source = "registry+https://github.com/rust-lang/crates.io-index" 117 | checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" 118 | 119 | [[package]] 120 | name = "bytes" 121 | version = "1.3.0" 122 | source = "registry+https://github.com/rust-lang/crates.io-index" 123 | checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c" 124 | 125 | [[package]] 126 | name = "bzip2" 127 | version = "0.4.3" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | checksum = "6afcd980b5f3a45017c57e57a2fcccbb351cc43a356ce117ef760ef8052b89b0" 130 | dependencies = [ 131 | "bzip2-sys", 132 | "libc", 133 | ] 134 | 135 | [[package]] 136 | name = "bzip2-sys" 137 | version = "0.1.11+1.0.8" 138 | source = "registry+https://github.com/rust-lang/crates.io-index" 139 | checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" 140 | dependencies = [ 141 | "cc", 142 | "libc", 143 | "pkg-config", 144 | ] 145 | 146 | [[package]] 147 | name = "cache-padded" 148 | version = "1.2.0" 149 | source = "registry+https://github.com/rust-lang/crates.io-index" 150 | checksum = "c1db59621ec70f09c5e9b597b220c7a2b43611f4710dc03ceb8748637775692c" 151 | 152 | [[package]] 153 | name = "car-data" 154 | version = "0.1.0" 155 | dependencies = [ 156 | "anyhow", 157 | "async-channel", 158 | "clap", 159 | "image", 160 | "npy-writer", 161 | "rand 0.8.5", 162 | "reqwest", 163 | "rusqlite", 164 | "scraper", 165 | "serde_json", 166 | "sha2", 167 | "tokio", 168 | ] 169 | 170 | [[package]] 171 | name = "cc" 172 | version = "1.0.77" 173 | source = "registry+https://github.com/rust-lang/crates.io-index" 174 | checksum = "e9f73505338f7d905b19d18738976aae232eb46b8efc15554ffc56deb5d9ebe4" 175 | dependencies = [ 176 | "jobserver", 177 | ] 178 | 179 | [[package]] 180 | name = "cfg-if" 181 | version = "1.0.0" 182 | source = "registry+https://github.com/rust-lang/crates.io-index" 183 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 184 | 185 | [[package]] 186 | name = "cipher" 187 | version = "0.3.0" 188 | source = "registry+https://github.com/rust-lang/crates.io-index" 189 | checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7" 190 | dependencies = [ 191 | "generic-array", 192 | ] 193 | 194 | [[package]] 195 | name = "clap" 196 | version = "3.2.23" 197 | source = "registry+https://github.com/rust-lang/crates.io-index" 198 | checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" 199 | dependencies = [ 200 | "atty", 201 | "bitflags", 202 | "clap_derive", 203 | "clap_lex", 204 | "indexmap", 205 | "once_cell", 206 | "strsim", 207 | "termcolor", 208 | "textwrap", 209 | ] 210 | 211 | [[package]] 212 | name = "clap_derive" 213 | version = "3.2.18" 214 | source = "registry+https://github.com/rust-lang/crates.io-index" 215 | checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65" 216 | dependencies = [ 217 | "heck", 218 | "proc-macro-error", 219 | "proc-macro2", 220 | "quote", 221 | "syn", 222 | ] 223 | 224 | [[package]] 225 | name = "clap_lex" 226 | version = "0.2.4" 227 | source = "registry+https://github.com/rust-lang/crates.io-index" 228 | checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" 229 | dependencies = [ 230 | "os_str_bytes", 231 | ] 232 | 233 | [[package]] 234 | name = "color_quant" 235 | version = "1.1.0" 236 | source = "registry+https://github.com/rust-lang/crates.io-index" 237 | checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" 238 | 239 | [[package]] 240 | name = "concurrent-queue" 241 | version = "1.2.4" 242 | source = "registry+https://github.com/rust-lang/crates.io-index" 243 | checksum = "af4780a44ab5696ea9e28294517f1fffb421a83a25af521333c838635509db9c" 244 | dependencies = [ 245 | "cache-padded", 246 | ] 247 | 248 | [[package]] 249 | name = "constant_time_eq" 250 | version = "0.1.5" 251 | source = "registry+https://github.com/rust-lang/crates.io-index" 252 | checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" 253 | 254 | [[package]] 255 | name = "convert_case" 256 | version = "0.4.0" 257 | source = "registry+https://github.com/rust-lang/crates.io-index" 258 | checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" 259 | 260 | [[package]] 261 | name = "core-foundation" 262 | version = "0.9.3" 263 | source = "registry+https://github.com/rust-lang/crates.io-index" 264 | checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" 265 | dependencies = [ 266 | "core-foundation-sys", 267 | "libc", 268 | ] 269 | 270 | [[package]] 271 | name = "core-foundation-sys" 272 | version = "0.8.3" 273 | source = "registry+https://github.com/rust-lang/crates.io-index" 274 | checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" 275 | 276 | [[package]] 277 | name = "cpufeatures" 278 | version = "0.2.5" 279 | source = "registry+https://github.com/rust-lang/crates.io-index" 280 | checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" 281 | dependencies = [ 282 | "libc", 283 | ] 284 | 285 | [[package]] 286 | name = "crc32fast" 287 | version = "1.3.2" 288 | source = "registry+https://github.com/rust-lang/crates.io-index" 289 | checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" 290 | dependencies = [ 291 | "cfg-if", 292 | ] 293 | 294 | [[package]] 295 | name = "crossbeam-channel" 296 | version = "0.5.6" 297 | source = "registry+https://github.com/rust-lang/crates.io-index" 298 | checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" 299 | dependencies = [ 300 | "cfg-if", 301 | "crossbeam-utils", 302 | ] 303 | 304 | [[package]] 305 | name = "crossbeam-deque" 306 | version = "0.8.2" 307 | source = "registry+https://github.com/rust-lang/crates.io-index" 308 | checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" 309 | dependencies = [ 310 | "cfg-if", 311 | "crossbeam-epoch", 312 | "crossbeam-utils", 313 | ] 314 | 315 | [[package]] 316 | name = "crossbeam-epoch" 317 | version = "0.9.13" 318 | source = "registry+https://github.com/rust-lang/crates.io-index" 319 | checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" 320 | dependencies = [ 321 | "autocfg", 322 | "cfg-if", 323 | "crossbeam-utils", 324 | "memoffset", 325 | "scopeguard", 326 | ] 327 | 328 | [[package]] 329 | name = "crossbeam-utils" 330 | version = "0.8.14" 331 | source = "registry+https://github.com/rust-lang/crates.io-index" 332 | checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" 333 | dependencies = [ 334 | "cfg-if", 335 | ] 336 | 337 | [[package]] 338 | name = "crunchy" 339 | version = "0.2.2" 340 | source = "registry+https://github.com/rust-lang/crates.io-index" 341 | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" 342 | 343 | [[package]] 344 | name = "crypto-common" 345 | version = "0.1.6" 346 | source = "registry+https://github.com/rust-lang/crates.io-index" 347 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" 348 | dependencies = [ 349 | "generic-array", 350 | "typenum", 351 | ] 352 | 353 | [[package]] 354 | name = "cssparser" 355 | version = "0.27.2" 356 | source = "registry+https://github.com/rust-lang/crates.io-index" 357 | checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" 358 | dependencies = [ 359 | "cssparser-macros", 360 | "dtoa-short", 361 | "itoa 0.4.8", 362 | "matches", 363 | "phf 0.8.0", 364 | "proc-macro2", 365 | "quote", 366 | "smallvec", 367 | "syn", 368 | ] 369 | 370 | [[package]] 371 | name = "cssparser-macros" 372 | version = "0.6.0" 373 | source = "registry+https://github.com/rust-lang/crates.io-index" 374 | checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e" 375 | dependencies = [ 376 | "quote", 377 | "syn", 378 | ] 379 | 380 | [[package]] 381 | name = "derive_more" 382 | version = "0.99.17" 383 | source = "registry+https://github.com/rust-lang/crates.io-index" 384 | checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" 385 | dependencies = [ 386 | "convert_case", 387 | "proc-macro2", 388 | "quote", 389 | "rustc_version", 390 | "syn", 391 | ] 392 | 393 | [[package]] 394 | name = "digest" 395 | version = "0.10.6" 396 | source = "registry+https://github.com/rust-lang/crates.io-index" 397 | checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" 398 | dependencies = [ 399 | "block-buffer", 400 | "crypto-common", 401 | "subtle", 402 | ] 403 | 404 | [[package]] 405 | name = "dtoa" 406 | version = "0.4.8" 407 | source = "registry+https://github.com/rust-lang/crates.io-index" 408 | checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0" 409 | 410 | [[package]] 411 | name = "dtoa-short" 412 | version = "0.3.3" 413 | source = "registry+https://github.com/rust-lang/crates.io-index" 414 | checksum = "bde03329ae10e79ede66c9ce4dc930aa8599043b0743008548680f25b91502d6" 415 | dependencies = [ 416 | "dtoa", 417 | ] 418 | 419 | [[package]] 420 | name = "ego-tree" 421 | version = "0.6.2" 422 | source = "registry+https://github.com/rust-lang/crates.io-index" 423 | checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591" 424 | 425 | [[package]] 426 | name = "either" 427 | version = "1.8.0" 428 | source = "registry+https://github.com/rust-lang/crates.io-index" 429 | checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" 430 | 431 | [[package]] 432 | name = "encoding_rs" 433 | version = "0.8.31" 434 | source = "registry+https://github.com/rust-lang/crates.io-index" 435 | checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b" 436 | dependencies = [ 437 | "cfg-if", 438 | ] 439 | 440 | [[package]] 441 | name = "event-listener" 442 | version = "2.5.3" 443 | source = "registry+https://github.com/rust-lang/crates.io-index" 444 | checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" 445 | 446 | [[package]] 447 | name = "exr" 448 | version = "1.5.2" 449 | source = "registry+https://github.com/rust-lang/crates.io-index" 450 | checksum = "8eb5f255b5980bb0c8cf676b675d1a99be40f316881444f44e0462eaf5df5ded" 451 | dependencies = [ 452 | "bit_field", 453 | "flume", 454 | "half", 455 | "lebe", 456 | "miniz_oxide", 457 | "smallvec", 458 | "threadpool", 459 | ] 460 | 461 | [[package]] 462 | name = "fallible-iterator" 463 | version = "0.2.0" 464 | source = "registry+https://github.com/rust-lang/crates.io-index" 465 | checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" 466 | 467 | [[package]] 468 | name = "fallible-streaming-iterator" 469 | version = "0.1.9" 470 | source = "registry+https://github.com/rust-lang/crates.io-index" 471 | checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" 472 | 473 | [[package]] 474 | name = "flate2" 475 | version = "1.0.25" 476 | source = "registry+https://github.com/rust-lang/crates.io-index" 477 | checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" 478 | dependencies = [ 479 | "crc32fast", 480 | "miniz_oxide", 481 | ] 482 | 483 | [[package]] 484 | name = "flume" 485 | version = "0.10.14" 486 | source = "registry+https://github.com/rust-lang/crates.io-index" 487 | checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577" 488 | dependencies = [ 489 | "futures-core", 490 | "futures-sink", 491 | "nanorand", 492 | "pin-project", 493 | "spin 0.9.4", 494 | ] 495 | 496 | [[package]] 497 | name = "fnv" 498 | version = "1.0.7" 499 | source = "registry+https://github.com/rust-lang/crates.io-index" 500 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 501 | 502 | [[package]] 503 | name = "form_urlencoded" 504 | version = "1.1.0" 505 | source = "registry+https://github.com/rust-lang/crates.io-index" 506 | checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" 507 | dependencies = [ 508 | "percent-encoding", 509 | ] 510 | 511 | [[package]] 512 | name = "futf" 513 | version = "0.1.5" 514 | source = "registry+https://github.com/rust-lang/crates.io-index" 515 | checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" 516 | dependencies = [ 517 | "mac", 518 | "new_debug_unreachable", 519 | ] 520 | 521 | [[package]] 522 | name = "futures-channel" 523 | version = "0.3.25" 524 | source = "registry+https://github.com/rust-lang/crates.io-index" 525 | checksum = "52ba265a92256105f45b719605a571ffe2d1f0fea3807304b522c1d778f79eed" 526 | dependencies = [ 527 | "futures-core", 528 | ] 529 | 530 | [[package]] 531 | name = "futures-core" 532 | version = "0.3.25" 533 | source = "registry+https://github.com/rust-lang/crates.io-index" 534 | checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac" 535 | 536 | [[package]] 537 | name = "futures-sink" 538 | version = "0.3.25" 539 | source = "registry+https://github.com/rust-lang/crates.io-index" 540 | checksum = "39c15cf1a4aa79df40f1bb462fb39676d0ad9e366c2a33b590d7c66f4f81fcf9" 541 | 542 | [[package]] 543 | name = "futures-task" 544 | version = "0.3.25" 545 | source = "registry+https://github.com/rust-lang/crates.io-index" 546 | checksum = "2ffb393ac5d9a6eaa9d3fdf37ae2776656b706e200c8e16b1bdb227f5198e6ea" 547 | 548 | [[package]] 549 | name = "futures-util" 550 | version = "0.3.25" 551 | source = "registry+https://github.com/rust-lang/crates.io-index" 552 | checksum = "197676987abd2f9cadff84926f410af1c183608d36641465df73ae8211dc65d6" 553 | dependencies = [ 554 | "futures-core", 555 | "futures-task", 556 | "pin-project-lite", 557 | "pin-utils", 558 | ] 559 | 560 | [[package]] 561 | name = "fxhash" 562 | version = "0.2.1" 563 | source = "registry+https://github.com/rust-lang/crates.io-index" 564 | checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" 565 | dependencies = [ 566 | "byteorder", 567 | ] 568 | 569 | [[package]] 570 | name = "generic-array" 571 | version = "0.14.6" 572 | source = "registry+https://github.com/rust-lang/crates.io-index" 573 | checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" 574 | dependencies = [ 575 | "typenum", 576 | "version_check", 577 | ] 578 | 579 | [[package]] 580 | name = "getopts" 581 | version = "0.2.21" 582 | source = "registry+https://github.com/rust-lang/crates.io-index" 583 | checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" 584 | dependencies = [ 585 | "unicode-width", 586 | ] 587 | 588 | [[package]] 589 | name = "getrandom" 590 | version = "0.1.16" 591 | source = "registry+https://github.com/rust-lang/crates.io-index" 592 | checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" 593 | dependencies = [ 594 | "cfg-if", 595 | "libc", 596 | "wasi 0.9.0+wasi-snapshot-preview1", 597 | ] 598 | 599 | [[package]] 600 | name = "getrandom" 601 | version = "0.2.8" 602 | source = "registry+https://github.com/rust-lang/crates.io-index" 603 | checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" 604 | dependencies = [ 605 | "cfg-if", 606 | "js-sys", 607 | "libc", 608 | "wasi 0.11.0+wasi-snapshot-preview1", 609 | "wasm-bindgen", 610 | ] 611 | 612 | [[package]] 613 | name = "gif" 614 | version = "0.11.4" 615 | source = "registry+https://github.com/rust-lang/crates.io-index" 616 | checksum = "3edd93c6756b4dfaf2709eafcc345ba2636565295c198a9cfbf75fa5e3e00b06" 617 | dependencies = [ 618 | "color_quant", 619 | "weezl", 620 | ] 621 | 622 | [[package]] 623 | name = "h2" 624 | version = "0.3.15" 625 | source = "registry+https://github.com/rust-lang/crates.io-index" 626 | checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4" 627 | dependencies = [ 628 | "bytes", 629 | "fnv", 630 | "futures-core", 631 | "futures-sink", 632 | "futures-util", 633 | "http", 634 | "indexmap", 635 | "slab", 636 | "tokio", 637 | "tokio-util", 638 | "tracing", 639 | ] 640 | 641 | [[package]] 642 | name = "half" 643 | version = "2.1.0" 644 | source = "registry+https://github.com/rust-lang/crates.io-index" 645 | checksum = "ad6a9459c9c30b177b925162351f97e7d967c7ea8bab3b8352805327daf45554" 646 | dependencies = [ 647 | "crunchy", 648 | ] 649 | 650 | [[package]] 651 | name = "hashbrown" 652 | version = "0.12.3" 653 | source = "registry+https://github.com/rust-lang/crates.io-index" 654 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 655 | dependencies = [ 656 | "ahash", 657 | ] 658 | 659 | [[package]] 660 | name = "hashlink" 661 | version = "0.8.1" 662 | source = "registry+https://github.com/rust-lang/crates.io-index" 663 | checksum = "69fe1fcf8b4278d860ad0548329f892a3631fb63f82574df68275f34cdbe0ffa" 664 | dependencies = [ 665 | "hashbrown", 666 | ] 667 | 668 | [[package]] 669 | name = "heck" 670 | version = "0.4.0" 671 | source = "registry+https://github.com/rust-lang/crates.io-index" 672 | checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" 673 | 674 | [[package]] 675 | name = "hermit-abi" 676 | version = "0.1.19" 677 | source = "registry+https://github.com/rust-lang/crates.io-index" 678 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 679 | dependencies = [ 680 | "libc", 681 | ] 682 | 683 | [[package]] 684 | name = "hmac" 685 | version = "0.12.1" 686 | source = "registry+https://github.com/rust-lang/crates.io-index" 687 | checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" 688 | dependencies = [ 689 | "digest", 690 | ] 691 | 692 | [[package]] 693 | name = "html5ever" 694 | version = "0.26.0" 695 | source = "registry+https://github.com/rust-lang/crates.io-index" 696 | checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" 697 | dependencies = [ 698 | "log", 699 | "mac", 700 | "markup5ever", 701 | "proc-macro2", 702 | "quote", 703 | "syn", 704 | ] 705 | 706 | [[package]] 707 | name = "http" 708 | version = "0.2.8" 709 | source = "registry+https://github.com/rust-lang/crates.io-index" 710 | checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" 711 | dependencies = [ 712 | "bytes", 713 | "fnv", 714 | "itoa 1.0.4", 715 | ] 716 | 717 | [[package]] 718 | name = "http-body" 719 | version = "0.4.5" 720 | source = "registry+https://github.com/rust-lang/crates.io-index" 721 | checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" 722 | dependencies = [ 723 | "bytes", 724 | "http", 725 | "pin-project-lite", 726 | ] 727 | 728 | [[package]] 729 | name = "httparse" 730 | version = "1.8.0" 731 | source = "registry+https://github.com/rust-lang/crates.io-index" 732 | checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" 733 | 734 | [[package]] 735 | name = "httpdate" 736 | version = "1.0.2" 737 | source = "registry+https://github.com/rust-lang/crates.io-index" 738 | checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" 739 | 740 | [[package]] 741 | name = "hyper" 742 | version = "0.14.23" 743 | source = "registry+https://github.com/rust-lang/crates.io-index" 744 | checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c" 745 | dependencies = [ 746 | "bytes", 747 | "futures-channel", 748 | "futures-core", 749 | "futures-util", 750 | "h2", 751 | "http", 752 | "http-body", 753 | "httparse", 754 | "httpdate", 755 | "itoa 1.0.4", 756 | "pin-project-lite", 757 | "socket2", 758 | "tokio", 759 | "tower-service", 760 | "tracing", 761 | "want", 762 | ] 763 | 764 | [[package]] 765 | name = "hyper-rustls" 766 | version = "0.23.1" 767 | source = "registry+https://github.com/rust-lang/crates.io-index" 768 | checksum = "59df7c4e19c950e6e0e868dcc0a300b09a9b88e9ec55bd879ca819087a77355d" 769 | dependencies = [ 770 | "http", 771 | "hyper", 772 | "rustls", 773 | "tokio", 774 | "tokio-rustls", 775 | ] 776 | 777 | [[package]] 778 | name = "idna" 779 | version = "0.3.0" 780 | source = "registry+https://github.com/rust-lang/crates.io-index" 781 | checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" 782 | dependencies = [ 783 | "unicode-bidi", 784 | "unicode-normalization", 785 | ] 786 | 787 | [[package]] 788 | name = "image" 789 | version = "0.24.5" 790 | source = "registry+https://github.com/rust-lang/crates.io-index" 791 | checksum = "69b7ea949b537b0fd0af141fff8c77690f2ce96f4f41f042ccb6c69c6c965945" 792 | dependencies = [ 793 | "bytemuck", 794 | "byteorder", 795 | "color_quant", 796 | "exr", 797 | "gif", 798 | "jpeg-decoder", 799 | "num-rational", 800 | "num-traits", 801 | "png", 802 | "scoped_threadpool", 803 | "tiff", 804 | ] 805 | 806 | [[package]] 807 | name = "indexmap" 808 | version = "1.9.2" 809 | source = "registry+https://github.com/rust-lang/crates.io-index" 810 | checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" 811 | dependencies = [ 812 | "autocfg", 813 | "hashbrown", 814 | ] 815 | 816 | [[package]] 817 | name = "ipnet" 818 | version = "2.5.1" 819 | source = "registry+https://github.com/rust-lang/crates.io-index" 820 | checksum = "f88c5561171189e69df9d98bcf18fd5f9558300f7ea7b801eb8a0fd748bd8745" 821 | 822 | [[package]] 823 | name = "itoa" 824 | version = "0.4.8" 825 | source = "registry+https://github.com/rust-lang/crates.io-index" 826 | checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" 827 | 828 | [[package]] 829 | name = "itoa" 830 | version = "1.0.4" 831 | source = "registry+https://github.com/rust-lang/crates.io-index" 832 | checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" 833 | 834 | [[package]] 835 | name = "jobserver" 836 | version = "0.1.25" 837 | source = "registry+https://github.com/rust-lang/crates.io-index" 838 | checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b" 839 | dependencies = [ 840 | "libc", 841 | ] 842 | 843 | [[package]] 844 | name = "jpeg-decoder" 845 | version = "0.3.0" 846 | source = "registry+https://github.com/rust-lang/crates.io-index" 847 | checksum = "bc0000e42512c92e31c2252315bda326620a4e034105e900c98ec492fa077b3e" 848 | dependencies = [ 849 | "rayon", 850 | ] 851 | 852 | [[package]] 853 | name = "js-sys" 854 | version = "0.3.60" 855 | source = "registry+https://github.com/rust-lang/crates.io-index" 856 | checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" 857 | dependencies = [ 858 | "wasm-bindgen", 859 | ] 860 | 861 | [[package]] 862 | name = "lazy_static" 863 | version = "1.4.0" 864 | source = "registry+https://github.com/rust-lang/crates.io-index" 865 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 866 | 867 | [[package]] 868 | name = "lebe" 869 | version = "0.5.2" 870 | source = "registry+https://github.com/rust-lang/crates.io-index" 871 | checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8" 872 | 873 | [[package]] 874 | name = "libc" 875 | version = "0.2.137" 876 | source = "registry+https://github.com/rust-lang/crates.io-index" 877 | checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89" 878 | 879 | [[package]] 880 | name = "libsqlite3-sys" 881 | version = "0.25.2" 882 | source = "registry+https://github.com/rust-lang/crates.io-index" 883 | checksum = "29f835d03d717946d28b1d1ed632eb6f0e24a299388ee623d0c23118d3e8a7fa" 884 | dependencies = [ 885 | "cc", 886 | "pkg-config", 887 | "vcpkg", 888 | ] 889 | 890 | [[package]] 891 | name = "lock_api" 892 | version = "0.4.9" 893 | source = "registry+https://github.com/rust-lang/crates.io-index" 894 | checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" 895 | dependencies = [ 896 | "autocfg", 897 | "scopeguard", 898 | ] 899 | 900 | [[package]] 901 | name = "log" 902 | version = "0.4.17" 903 | source = "registry+https://github.com/rust-lang/crates.io-index" 904 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" 905 | dependencies = [ 906 | "cfg-if", 907 | ] 908 | 909 | [[package]] 910 | name = "mac" 911 | version = "0.1.1" 912 | source = "registry+https://github.com/rust-lang/crates.io-index" 913 | checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" 914 | 915 | [[package]] 916 | name = "markup5ever" 917 | version = "0.11.0" 918 | source = "registry+https://github.com/rust-lang/crates.io-index" 919 | checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" 920 | dependencies = [ 921 | "log", 922 | "phf 0.10.1", 923 | "phf_codegen 0.10.0", 924 | "string_cache", 925 | "string_cache_codegen", 926 | "tendril", 927 | ] 928 | 929 | [[package]] 930 | name = "matches" 931 | version = "0.1.9" 932 | source = "registry+https://github.com/rust-lang/crates.io-index" 933 | checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" 934 | 935 | [[package]] 936 | name = "memchr" 937 | version = "2.5.0" 938 | source = "registry+https://github.com/rust-lang/crates.io-index" 939 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 940 | 941 | [[package]] 942 | name = "memoffset" 943 | version = "0.7.1" 944 | source = "registry+https://github.com/rust-lang/crates.io-index" 945 | checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" 946 | dependencies = [ 947 | "autocfg", 948 | ] 949 | 950 | [[package]] 951 | name = "mime" 952 | version = "0.3.16" 953 | source = "registry+https://github.com/rust-lang/crates.io-index" 954 | checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" 955 | 956 | [[package]] 957 | name = "miniz_oxide" 958 | version = "0.6.2" 959 | source = "registry+https://github.com/rust-lang/crates.io-index" 960 | checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" 961 | dependencies = [ 962 | "adler", 963 | ] 964 | 965 | [[package]] 966 | name = "mio" 967 | version = "0.8.5" 968 | source = "registry+https://github.com/rust-lang/crates.io-index" 969 | checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" 970 | dependencies = [ 971 | "libc", 972 | "log", 973 | "wasi 0.11.0+wasi-snapshot-preview1", 974 | "windows-sys 0.42.0", 975 | ] 976 | 977 | [[package]] 978 | name = "nanorand" 979 | version = "0.7.0" 980 | source = "registry+https://github.com/rust-lang/crates.io-index" 981 | checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" 982 | dependencies = [ 983 | "getrandom 0.2.8", 984 | ] 985 | 986 | [[package]] 987 | name = "new_debug_unreachable" 988 | version = "1.0.4" 989 | source = "registry+https://github.com/rust-lang/crates.io-index" 990 | checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" 991 | 992 | [[package]] 993 | name = "nodrop" 994 | version = "0.1.14" 995 | source = "registry+https://github.com/rust-lang/crates.io-index" 996 | checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" 997 | 998 | [[package]] 999 | name = "npy-writer" 1000 | version = "0.1.0" 1001 | source = "registry+https://github.com/rust-lang/crates.io-index" 1002 | checksum = "a08beb23d1e6dfaf8c1e306d6eb24e4f5ad6c9507be26aecadf1e9305b883358" 1003 | dependencies = [ 1004 | "zip", 1005 | ] 1006 | 1007 | [[package]] 1008 | name = "num-integer" 1009 | version = "0.1.45" 1010 | source = "registry+https://github.com/rust-lang/crates.io-index" 1011 | checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" 1012 | dependencies = [ 1013 | "autocfg", 1014 | "num-traits", 1015 | ] 1016 | 1017 | [[package]] 1018 | name = "num-rational" 1019 | version = "0.4.1" 1020 | source = "registry+https://github.com/rust-lang/crates.io-index" 1021 | checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" 1022 | dependencies = [ 1023 | "autocfg", 1024 | "num-integer", 1025 | "num-traits", 1026 | ] 1027 | 1028 | [[package]] 1029 | name = "num-traits" 1030 | version = "0.2.15" 1031 | source = "registry+https://github.com/rust-lang/crates.io-index" 1032 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" 1033 | dependencies = [ 1034 | "autocfg", 1035 | ] 1036 | 1037 | [[package]] 1038 | name = "num_cpus" 1039 | version = "1.14.0" 1040 | source = "registry+https://github.com/rust-lang/crates.io-index" 1041 | checksum = "f6058e64324c71e02bc2b150e4f3bc8286db6c83092132ffa3f6b1eab0f9def5" 1042 | dependencies = [ 1043 | "hermit-abi", 1044 | "libc", 1045 | ] 1046 | 1047 | [[package]] 1048 | name = "once_cell" 1049 | version = "1.16.0" 1050 | source = "registry+https://github.com/rust-lang/crates.io-index" 1051 | checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" 1052 | 1053 | [[package]] 1054 | name = "opaque-debug" 1055 | version = "0.3.0" 1056 | source = "registry+https://github.com/rust-lang/crates.io-index" 1057 | checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" 1058 | 1059 | [[package]] 1060 | name = "openssl-probe" 1061 | version = "0.1.5" 1062 | source = "registry+https://github.com/rust-lang/crates.io-index" 1063 | checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" 1064 | 1065 | [[package]] 1066 | name = "os_str_bytes" 1067 | version = "6.4.1" 1068 | source = "registry+https://github.com/rust-lang/crates.io-index" 1069 | checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" 1070 | 1071 | [[package]] 1072 | name = "parking_lot" 1073 | version = "0.12.1" 1074 | source = "registry+https://github.com/rust-lang/crates.io-index" 1075 | checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" 1076 | dependencies = [ 1077 | "lock_api", 1078 | "parking_lot_core", 1079 | ] 1080 | 1081 | [[package]] 1082 | name = "parking_lot_core" 1083 | version = "0.9.4" 1084 | source = "registry+https://github.com/rust-lang/crates.io-index" 1085 | checksum = "4dc9e0dc2adc1c69d09143aff38d3d30c5c3f0df0dad82e6d25547af174ebec0" 1086 | dependencies = [ 1087 | "cfg-if", 1088 | "libc", 1089 | "redox_syscall", 1090 | "smallvec", 1091 | "windows-sys 0.42.0", 1092 | ] 1093 | 1094 | [[package]] 1095 | name = "password-hash" 1096 | version = "0.4.2" 1097 | source = "registry+https://github.com/rust-lang/crates.io-index" 1098 | checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" 1099 | dependencies = [ 1100 | "base64ct", 1101 | "rand_core 0.6.4", 1102 | "subtle", 1103 | ] 1104 | 1105 | [[package]] 1106 | name = "pbkdf2" 1107 | version = "0.11.0" 1108 | source = "registry+https://github.com/rust-lang/crates.io-index" 1109 | checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917" 1110 | dependencies = [ 1111 | "digest", 1112 | "hmac", 1113 | "password-hash", 1114 | "sha2", 1115 | ] 1116 | 1117 | [[package]] 1118 | name = "percent-encoding" 1119 | version = "2.2.0" 1120 | source = "registry+https://github.com/rust-lang/crates.io-index" 1121 | checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" 1122 | 1123 | [[package]] 1124 | name = "phf" 1125 | version = "0.8.0" 1126 | source = "registry+https://github.com/rust-lang/crates.io-index" 1127 | checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" 1128 | dependencies = [ 1129 | "phf_macros", 1130 | "phf_shared 0.8.0", 1131 | "proc-macro-hack", 1132 | ] 1133 | 1134 | [[package]] 1135 | name = "phf" 1136 | version = "0.10.1" 1137 | source = "registry+https://github.com/rust-lang/crates.io-index" 1138 | checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" 1139 | dependencies = [ 1140 | "phf_shared 0.10.0", 1141 | ] 1142 | 1143 | [[package]] 1144 | name = "phf_codegen" 1145 | version = "0.8.0" 1146 | source = "registry+https://github.com/rust-lang/crates.io-index" 1147 | checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" 1148 | dependencies = [ 1149 | "phf_generator 0.8.0", 1150 | "phf_shared 0.8.0", 1151 | ] 1152 | 1153 | [[package]] 1154 | name = "phf_codegen" 1155 | version = "0.10.0" 1156 | source = "registry+https://github.com/rust-lang/crates.io-index" 1157 | checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" 1158 | dependencies = [ 1159 | "phf_generator 0.10.0", 1160 | "phf_shared 0.10.0", 1161 | ] 1162 | 1163 | [[package]] 1164 | name = "phf_generator" 1165 | version = "0.8.0" 1166 | source = "registry+https://github.com/rust-lang/crates.io-index" 1167 | checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" 1168 | dependencies = [ 1169 | "phf_shared 0.8.0", 1170 | "rand 0.7.3", 1171 | ] 1172 | 1173 | [[package]] 1174 | name = "phf_generator" 1175 | version = "0.10.0" 1176 | source = "registry+https://github.com/rust-lang/crates.io-index" 1177 | checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" 1178 | dependencies = [ 1179 | "phf_shared 0.10.0", 1180 | "rand 0.8.5", 1181 | ] 1182 | 1183 | [[package]] 1184 | name = "phf_macros" 1185 | version = "0.8.0" 1186 | source = "registry+https://github.com/rust-lang/crates.io-index" 1187 | checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" 1188 | dependencies = [ 1189 | "phf_generator 0.8.0", 1190 | "phf_shared 0.8.0", 1191 | "proc-macro-hack", 1192 | "proc-macro2", 1193 | "quote", 1194 | "syn", 1195 | ] 1196 | 1197 | [[package]] 1198 | name = "phf_shared" 1199 | version = "0.8.0" 1200 | source = "registry+https://github.com/rust-lang/crates.io-index" 1201 | checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" 1202 | dependencies = [ 1203 | "siphasher", 1204 | ] 1205 | 1206 | [[package]] 1207 | name = "phf_shared" 1208 | version = "0.10.0" 1209 | source = "registry+https://github.com/rust-lang/crates.io-index" 1210 | checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" 1211 | dependencies = [ 1212 | "siphasher", 1213 | ] 1214 | 1215 | [[package]] 1216 | name = "pin-project" 1217 | version = "1.0.12" 1218 | source = "registry+https://github.com/rust-lang/crates.io-index" 1219 | checksum = "ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc" 1220 | dependencies = [ 1221 | "pin-project-internal", 1222 | ] 1223 | 1224 | [[package]] 1225 | name = "pin-project-internal" 1226 | version = "1.0.12" 1227 | source = "registry+https://github.com/rust-lang/crates.io-index" 1228 | checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" 1229 | dependencies = [ 1230 | "proc-macro2", 1231 | "quote", 1232 | "syn", 1233 | ] 1234 | 1235 | [[package]] 1236 | name = "pin-project-lite" 1237 | version = "0.2.9" 1238 | source = "registry+https://github.com/rust-lang/crates.io-index" 1239 | checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" 1240 | 1241 | [[package]] 1242 | name = "pin-utils" 1243 | version = "0.1.0" 1244 | source = "registry+https://github.com/rust-lang/crates.io-index" 1245 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 1246 | 1247 | [[package]] 1248 | name = "pkg-config" 1249 | version = "0.3.26" 1250 | source = "registry+https://github.com/rust-lang/crates.io-index" 1251 | checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" 1252 | 1253 | [[package]] 1254 | name = "png" 1255 | version = "0.17.7" 1256 | source = "registry+https://github.com/rust-lang/crates.io-index" 1257 | checksum = "5d708eaf860a19b19ce538740d2b4bdeeb8337fa53f7738455e706623ad5c638" 1258 | dependencies = [ 1259 | "bitflags", 1260 | "crc32fast", 1261 | "flate2", 1262 | "miniz_oxide", 1263 | ] 1264 | 1265 | [[package]] 1266 | name = "ppv-lite86" 1267 | version = "0.2.17" 1268 | source = "registry+https://github.com/rust-lang/crates.io-index" 1269 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 1270 | 1271 | [[package]] 1272 | name = "precomputed-hash" 1273 | version = "0.1.1" 1274 | source = "registry+https://github.com/rust-lang/crates.io-index" 1275 | checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" 1276 | 1277 | [[package]] 1278 | name = "proc-macro-error" 1279 | version = "1.0.4" 1280 | source = "registry+https://github.com/rust-lang/crates.io-index" 1281 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" 1282 | dependencies = [ 1283 | "proc-macro-error-attr", 1284 | "proc-macro2", 1285 | "quote", 1286 | "syn", 1287 | "version_check", 1288 | ] 1289 | 1290 | [[package]] 1291 | name = "proc-macro-error-attr" 1292 | version = "1.0.4" 1293 | source = "registry+https://github.com/rust-lang/crates.io-index" 1294 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" 1295 | dependencies = [ 1296 | "proc-macro2", 1297 | "quote", 1298 | "version_check", 1299 | ] 1300 | 1301 | [[package]] 1302 | name = "proc-macro-hack" 1303 | version = "0.5.19" 1304 | source = "registry+https://github.com/rust-lang/crates.io-index" 1305 | checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" 1306 | 1307 | [[package]] 1308 | name = "proc-macro2" 1309 | version = "1.0.47" 1310 | source = "registry+https://github.com/rust-lang/crates.io-index" 1311 | checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" 1312 | dependencies = [ 1313 | "unicode-ident", 1314 | ] 1315 | 1316 | [[package]] 1317 | name = "quote" 1318 | version = "1.0.21" 1319 | source = "registry+https://github.com/rust-lang/crates.io-index" 1320 | checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" 1321 | dependencies = [ 1322 | "proc-macro2", 1323 | ] 1324 | 1325 | [[package]] 1326 | name = "rand" 1327 | version = "0.7.3" 1328 | source = "registry+https://github.com/rust-lang/crates.io-index" 1329 | checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" 1330 | dependencies = [ 1331 | "getrandom 0.1.16", 1332 | "libc", 1333 | "rand_chacha 0.2.2", 1334 | "rand_core 0.5.1", 1335 | "rand_hc", 1336 | "rand_pcg", 1337 | ] 1338 | 1339 | [[package]] 1340 | name = "rand" 1341 | version = "0.8.5" 1342 | source = "registry+https://github.com/rust-lang/crates.io-index" 1343 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 1344 | dependencies = [ 1345 | "libc", 1346 | "rand_chacha 0.3.1", 1347 | "rand_core 0.6.4", 1348 | ] 1349 | 1350 | [[package]] 1351 | name = "rand_chacha" 1352 | version = "0.2.2" 1353 | source = "registry+https://github.com/rust-lang/crates.io-index" 1354 | checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" 1355 | dependencies = [ 1356 | "ppv-lite86", 1357 | "rand_core 0.5.1", 1358 | ] 1359 | 1360 | [[package]] 1361 | name = "rand_chacha" 1362 | version = "0.3.1" 1363 | source = "registry+https://github.com/rust-lang/crates.io-index" 1364 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 1365 | dependencies = [ 1366 | "ppv-lite86", 1367 | "rand_core 0.6.4", 1368 | ] 1369 | 1370 | [[package]] 1371 | name = "rand_core" 1372 | version = "0.5.1" 1373 | source = "registry+https://github.com/rust-lang/crates.io-index" 1374 | checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" 1375 | dependencies = [ 1376 | "getrandom 0.1.16", 1377 | ] 1378 | 1379 | [[package]] 1380 | name = "rand_core" 1381 | version = "0.6.4" 1382 | source = "registry+https://github.com/rust-lang/crates.io-index" 1383 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 1384 | dependencies = [ 1385 | "getrandom 0.2.8", 1386 | ] 1387 | 1388 | [[package]] 1389 | name = "rand_hc" 1390 | version = "0.2.0" 1391 | source = "registry+https://github.com/rust-lang/crates.io-index" 1392 | checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" 1393 | dependencies = [ 1394 | "rand_core 0.5.1", 1395 | ] 1396 | 1397 | [[package]] 1398 | name = "rand_pcg" 1399 | version = "0.2.1" 1400 | source = "registry+https://github.com/rust-lang/crates.io-index" 1401 | checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" 1402 | dependencies = [ 1403 | "rand_core 0.5.1", 1404 | ] 1405 | 1406 | [[package]] 1407 | name = "rayon" 1408 | version = "1.6.0" 1409 | source = "registry+https://github.com/rust-lang/crates.io-index" 1410 | checksum = "1e060280438193c554f654141c9ea9417886713b7acd75974c85b18a69a88e0b" 1411 | dependencies = [ 1412 | "crossbeam-deque", 1413 | "either", 1414 | "rayon-core", 1415 | ] 1416 | 1417 | [[package]] 1418 | name = "rayon-core" 1419 | version = "1.10.1" 1420 | source = "registry+https://github.com/rust-lang/crates.io-index" 1421 | checksum = "cac410af5d00ab6884528b4ab69d1e8e146e8d471201800fa1b4524126de6ad3" 1422 | dependencies = [ 1423 | "crossbeam-channel", 1424 | "crossbeam-deque", 1425 | "crossbeam-utils", 1426 | "num_cpus", 1427 | ] 1428 | 1429 | [[package]] 1430 | name = "redox_syscall" 1431 | version = "0.2.16" 1432 | source = "registry+https://github.com/rust-lang/crates.io-index" 1433 | checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" 1434 | dependencies = [ 1435 | "bitflags", 1436 | ] 1437 | 1438 | [[package]] 1439 | name = "reqwest" 1440 | version = "0.11.13" 1441 | source = "registry+https://github.com/rust-lang/crates.io-index" 1442 | checksum = "68cc60575865c7831548863cc02356512e3f1dc2f3f82cb837d7fc4cc8f3c97c" 1443 | dependencies = [ 1444 | "base64", 1445 | "bytes", 1446 | "encoding_rs", 1447 | "futures-core", 1448 | "futures-util", 1449 | "h2", 1450 | "http", 1451 | "http-body", 1452 | "hyper", 1453 | "hyper-rustls", 1454 | "ipnet", 1455 | "js-sys", 1456 | "log", 1457 | "mime", 1458 | "once_cell", 1459 | "percent-encoding", 1460 | "pin-project-lite", 1461 | "rustls", 1462 | "rustls-native-certs", 1463 | "rustls-pemfile", 1464 | "serde", 1465 | "serde_json", 1466 | "serde_urlencoded", 1467 | "tokio", 1468 | "tokio-rustls", 1469 | "tower-service", 1470 | "url", 1471 | "wasm-bindgen", 1472 | "wasm-bindgen-futures", 1473 | "web-sys", 1474 | "winreg", 1475 | ] 1476 | 1477 | [[package]] 1478 | name = "ring" 1479 | version = "0.16.20" 1480 | source = "registry+https://github.com/rust-lang/crates.io-index" 1481 | checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" 1482 | dependencies = [ 1483 | "cc", 1484 | "libc", 1485 | "once_cell", 1486 | "spin 0.5.2", 1487 | "untrusted", 1488 | "web-sys", 1489 | "winapi", 1490 | ] 1491 | 1492 | [[package]] 1493 | name = "rusqlite" 1494 | version = "0.28.0" 1495 | source = "registry+https://github.com/rust-lang/crates.io-index" 1496 | checksum = "01e213bc3ecb39ac32e81e51ebe31fd888a940515173e3a18a35f8c6e896422a" 1497 | dependencies = [ 1498 | "bitflags", 1499 | "fallible-iterator", 1500 | "fallible-streaming-iterator", 1501 | "hashlink", 1502 | "libsqlite3-sys", 1503 | "smallvec", 1504 | ] 1505 | 1506 | [[package]] 1507 | name = "rustc_version" 1508 | version = "0.4.0" 1509 | source = "registry+https://github.com/rust-lang/crates.io-index" 1510 | checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" 1511 | dependencies = [ 1512 | "semver", 1513 | ] 1514 | 1515 | [[package]] 1516 | name = "rustls" 1517 | version = "0.20.7" 1518 | source = "registry+https://github.com/rust-lang/crates.io-index" 1519 | checksum = "539a2bfe908f471bfa933876bd1eb6a19cf2176d375f82ef7f99530a40e48c2c" 1520 | dependencies = [ 1521 | "log", 1522 | "ring", 1523 | "sct", 1524 | "webpki", 1525 | ] 1526 | 1527 | [[package]] 1528 | name = "rustls-native-certs" 1529 | version = "0.6.2" 1530 | source = "registry+https://github.com/rust-lang/crates.io-index" 1531 | checksum = "0167bac7a9f490495f3c33013e7722b53cb087ecbe082fb0c6387c96f634ea50" 1532 | dependencies = [ 1533 | "openssl-probe", 1534 | "rustls-pemfile", 1535 | "schannel", 1536 | "security-framework", 1537 | ] 1538 | 1539 | [[package]] 1540 | name = "rustls-pemfile" 1541 | version = "1.0.1" 1542 | source = "registry+https://github.com/rust-lang/crates.io-index" 1543 | checksum = "0864aeff53f8c05aa08d86e5ef839d3dfcf07aeba2db32f12db0ef716e87bd55" 1544 | dependencies = [ 1545 | "base64", 1546 | ] 1547 | 1548 | [[package]] 1549 | name = "ryu" 1550 | version = "1.0.11" 1551 | source = "registry+https://github.com/rust-lang/crates.io-index" 1552 | checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" 1553 | 1554 | [[package]] 1555 | name = "schannel" 1556 | version = "0.1.20" 1557 | source = "registry+https://github.com/rust-lang/crates.io-index" 1558 | checksum = "88d6731146462ea25d9244b2ed5fd1d716d25c52e4d54aa4fb0f3c4e9854dbe2" 1559 | dependencies = [ 1560 | "lazy_static", 1561 | "windows-sys 0.36.1", 1562 | ] 1563 | 1564 | [[package]] 1565 | name = "scoped_threadpool" 1566 | version = "0.1.9" 1567 | source = "registry+https://github.com/rust-lang/crates.io-index" 1568 | checksum = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8" 1569 | 1570 | [[package]] 1571 | name = "scopeguard" 1572 | version = "1.1.0" 1573 | source = "registry+https://github.com/rust-lang/crates.io-index" 1574 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 1575 | 1576 | [[package]] 1577 | name = "scraper" 1578 | version = "0.13.0" 1579 | source = "registry+https://github.com/rust-lang/crates.io-index" 1580 | checksum = "5684396b456f3eb69ceeb34d1b5cb1a2f6acf7ca4452131efa3ba0ee2c2d0a70" 1581 | dependencies = [ 1582 | "cssparser", 1583 | "ego-tree", 1584 | "getopts", 1585 | "html5ever", 1586 | "matches", 1587 | "selectors", 1588 | "smallvec", 1589 | "tendril", 1590 | ] 1591 | 1592 | [[package]] 1593 | name = "sct" 1594 | version = "0.7.0" 1595 | source = "registry+https://github.com/rust-lang/crates.io-index" 1596 | checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" 1597 | dependencies = [ 1598 | "ring", 1599 | "untrusted", 1600 | ] 1601 | 1602 | [[package]] 1603 | name = "security-framework" 1604 | version = "2.7.0" 1605 | source = "registry+https://github.com/rust-lang/crates.io-index" 1606 | checksum = "2bc1bb97804af6631813c55739f771071e0f2ed33ee20b68c86ec505d906356c" 1607 | dependencies = [ 1608 | "bitflags", 1609 | "core-foundation", 1610 | "core-foundation-sys", 1611 | "libc", 1612 | "security-framework-sys", 1613 | ] 1614 | 1615 | [[package]] 1616 | name = "security-framework-sys" 1617 | version = "2.6.1" 1618 | source = "registry+https://github.com/rust-lang/crates.io-index" 1619 | checksum = "0160a13a177a45bfb43ce71c01580998474f556ad854dcbca936dd2841a5c556" 1620 | dependencies = [ 1621 | "core-foundation-sys", 1622 | "libc", 1623 | ] 1624 | 1625 | [[package]] 1626 | name = "selectors" 1627 | version = "0.22.0" 1628 | source = "registry+https://github.com/rust-lang/crates.io-index" 1629 | checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" 1630 | dependencies = [ 1631 | "bitflags", 1632 | "cssparser", 1633 | "derive_more", 1634 | "fxhash", 1635 | "log", 1636 | "matches", 1637 | "phf 0.8.0", 1638 | "phf_codegen 0.8.0", 1639 | "precomputed-hash", 1640 | "servo_arc", 1641 | "smallvec", 1642 | "thin-slice", 1643 | ] 1644 | 1645 | [[package]] 1646 | name = "semver" 1647 | version = "1.0.14" 1648 | source = "registry+https://github.com/rust-lang/crates.io-index" 1649 | checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4" 1650 | 1651 | [[package]] 1652 | name = "serde" 1653 | version = "1.0.147" 1654 | source = "registry+https://github.com/rust-lang/crates.io-index" 1655 | checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" 1656 | 1657 | [[package]] 1658 | name = "serde_json" 1659 | version = "1.0.89" 1660 | source = "registry+https://github.com/rust-lang/crates.io-index" 1661 | checksum = "020ff22c755c2ed3f8cf162dbb41a7268d934702f3ed3631656ea597e08fc3db" 1662 | dependencies = [ 1663 | "itoa 1.0.4", 1664 | "ryu", 1665 | "serde", 1666 | ] 1667 | 1668 | [[package]] 1669 | name = "serde_urlencoded" 1670 | version = "0.7.1" 1671 | source = "registry+https://github.com/rust-lang/crates.io-index" 1672 | checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" 1673 | dependencies = [ 1674 | "form_urlencoded", 1675 | "itoa 1.0.4", 1676 | "ryu", 1677 | "serde", 1678 | ] 1679 | 1680 | [[package]] 1681 | name = "servo_arc" 1682 | version = "0.1.1" 1683 | source = "registry+https://github.com/rust-lang/crates.io-index" 1684 | checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" 1685 | dependencies = [ 1686 | "nodrop", 1687 | "stable_deref_trait", 1688 | ] 1689 | 1690 | [[package]] 1691 | name = "sha1" 1692 | version = "0.10.5" 1693 | source = "registry+https://github.com/rust-lang/crates.io-index" 1694 | checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" 1695 | dependencies = [ 1696 | "cfg-if", 1697 | "cpufeatures", 1698 | "digest", 1699 | ] 1700 | 1701 | [[package]] 1702 | name = "sha2" 1703 | version = "0.10.6" 1704 | source = "registry+https://github.com/rust-lang/crates.io-index" 1705 | checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" 1706 | dependencies = [ 1707 | "cfg-if", 1708 | "cpufeatures", 1709 | "digest", 1710 | ] 1711 | 1712 | [[package]] 1713 | name = "signal-hook-registry" 1714 | version = "1.4.0" 1715 | source = "registry+https://github.com/rust-lang/crates.io-index" 1716 | checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" 1717 | dependencies = [ 1718 | "libc", 1719 | ] 1720 | 1721 | [[package]] 1722 | name = "siphasher" 1723 | version = "0.3.10" 1724 | source = "registry+https://github.com/rust-lang/crates.io-index" 1725 | checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" 1726 | 1727 | [[package]] 1728 | name = "slab" 1729 | version = "0.4.7" 1730 | source = "registry+https://github.com/rust-lang/crates.io-index" 1731 | checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef" 1732 | dependencies = [ 1733 | "autocfg", 1734 | ] 1735 | 1736 | [[package]] 1737 | name = "smallvec" 1738 | version = "1.10.0" 1739 | source = "registry+https://github.com/rust-lang/crates.io-index" 1740 | checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" 1741 | 1742 | [[package]] 1743 | name = "socket2" 1744 | version = "0.4.7" 1745 | source = "registry+https://github.com/rust-lang/crates.io-index" 1746 | checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" 1747 | dependencies = [ 1748 | "libc", 1749 | "winapi", 1750 | ] 1751 | 1752 | [[package]] 1753 | name = "spin" 1754 | version = "0.5.2" 1755 | source = "registry+https://github.com/rust-lang/crates.io-index" 1756 | checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" 1757 | 1758 | [[package]] 1759 | name = "spin" 1760 | version = "0.9.4" 1761 | source = "registry+https://github.com/rust-lang/crates.io-index" 1762 | checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09" 1763 | dependencies = [ 1764 | "lock_api", 1765 | ] 1766 | 1767 | [[package]] 1768 | name = "stable_deref_trait" 1769 | version = "1.2.0" 1770 | source = "registry+https://github.com/rust-lang/crates.io-index" 1771 | checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" 1772 | 1773 | [[package]] 1774 | name = "string_cache" 1775 | version = "0.8.4" 1776 | source = "registry+https://github.com/rust-lang/crates.io-index" 1777 | checksum = "213494b7a2b503146286049378ce02b482200519accc31872ee8be91fa820a08" 1778 | dependencies = [ 1779 | "new_debug_unreachable", 1780 | "once_cell", 1781 | "parking_lot", 1782 | "phf_shared 0.10.0", 1783 | "precomputed-hash", 1784 | "serde", 1785 | ] 1786 | 1787 | [[package]] 1788 | name = "string_cache_codegen" 1789 | version = "0.5.2" 1790 | source = "registry+https://github.com/rust-lang/crates.io-index" 1791 | checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" 1792 | dependencies = [ 1793 | "phf_generator 0.10.0", 1794 | "phf_shared 0.10.0", 1795 | "proc-macro2", 1796 | "quote", 1797 | ] 1798 | 1799 | [[package]] 1800 | name = "strsim" 1801 | version = "0.10.0" 1802 | source = "registry+https://github.com/rust-lang/crates.io-index" 1803 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 1804 | 1805 | [[package]] 1806 | name = "subtle" 1807 | version = "2.4.1" 1808 | source = "registry+https://github.com/rust-lang/crates.io-index" 1809 | checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" 1810 | 1811 | [[package]] 1812 | name = "syn" 1813 | version = "1.0.103" 1814 | source = "registry+https://github.com/rust-lang/crates.io-index" 1815 | checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" 1816 | dependencies = [ 1817 | "proc-macro2", 1818 | "quote", 1819 | "unicode-ident", 1820 | ] 1821 | 1822 | [[package]] 1823 | name = "tendril" 1824 | version = "0.4.3" 1825 | source = "registry+https://github.com/rust-lang/crates.io-index" 1826 | checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" 1827 | dependencies = [ 1828 | "futf", 1829 | "mac", 1830 | "utf-8", 1831 | ] 1832 | 1833 | [[package]] 1834 | name = "termcolor" 1835 | version = "1.1.3" 1836 | source = "registry+https://github.com/rust-lang/crates.io-index" 1837 | checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" 1838 | dependencies = [ 1839 | "winapi-util", 1840 | ] 1841 | 1842 | [[package]] 1843 | name = "textwrap" 1844 | version = "0.16.0" 1845 | source = "registry+https://github.com/rust-lang/crates.io-index" 1846 | checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" 1847 | 1848 | [[package]] 1849 | name = "thin-slice" 1850 | version = "0.1.1" 1851 | source = "registry+https://github.com/rust-lang/crates.io-index" 1852 | checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" 1853 | 1854 | [[package]] 1855 | name = "threadpool" 1856 | version = "1.8.1" 1857 | source = "registry+https://github.com/rust-lang/crates.io-index" 1858 | checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" 1859 | dependencies = [ 1860 | "num_cpus", 1861 | ] 1862 | 1863 | [[package]] 1864 | name = "tiff" 1865 | version = "0.8.0" 1866 | source = "registry+https://github.com/rust-lang/crates.io-index" 1867 | checksum = "f17def29300a156c19ae30814710d9c63cd50288a49c6fd3a10ccfbe4cf886fd" 1868 | dependencies = [ 1869 | "flate2", 1870 | "jpeg-decoder", 1871 | "weezl", 1872 | ] 1873 | 1874 | [[package]] 1875 | name = "time" 1876 | version = "0.3.17" 1877 | source = "registry+https://github.com/rust-lang/crates.io-index" 1878 | checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" 1879 | dependencies = [ 1880 | "itoa 1.0.4", 1881 | "serde", 1882 | "time-core", 1883 | "time-macros", 1884 | ] 1885 | 1886 | [[package]] 1887 | name = "time-core" 1888 | version = "0.1.0" 1889 | source = "registry+https://github.com/rust-lang/crates.io-index" 1890 | checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" 1891 | 1892 | [[package]] 1893 | name = "time-macros" 1894 | version = "0.2.6" 1895 | source = "registry+https://github.com/rust-lang/crates.io-index" 1896 | checksum = "d967f99f534ca7e495c575c62638eebc2898a8c84c119b89e250477bc4ba16b2" 1897 | dependencies = [ 1898 | "time-core", 1899 | ] 1900 | 1901 | [[package]] 1902 | name = "tinyvec" 1903 | version = "1.6.0" 1904 | source = "registry+https://github.com/rust-lang/crates.io-index" 1905 | checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" 1906 | dependencies = [ 1907 | "tinyvec_macros", 1908 | ] 1909 | 1910 | [[package]] 1911 | name = "tinyvec_macros" 1912 | version = "0.1.0" 1913 | source = "registry+https://github.com/rust-lang/crates.io-index" 1914 | checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" 1915 | 1916 | [[package]] 1917 | name = "tokio" 1918 | version = "1.22.0" 1919 | source = "registry+https://github.com/rust-lang/crates.io-index" 1920 | checksum = "d76ce4a75fb488c605c54bf610f221cea8b0dafb53333c1a67e8ee199dcd2ae3" 1921 | dependencies = [ 1922 | "autocfg", 1923 | "bytes", 1924 | "libc", 1925 | "memchr", 1926 | "mio", 1927 | "num_cpus", 1928 | "parking_lot", 1929 | "pin-project-lite", 1930 | "signal-hook-registry", 1931 | "socket2", 1932 | "tokio-macros", 1933 | "winapi", 1934 | ] 1935 | 1936 | [[package]] 1937 | name = "tokio-macros" 1938 | version = "1.8.0" 1939 | source = "registry+https://github.com/rust-lang/crates.io-index" 1940 | checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484" 1941 | dependencies = [ 1942 | "proc-macro2", 1943 | "quote", 1944 | "syn", 1945 | ] 1946 | 1947 | [[package]] 1948 | name = "tokio-rustls" 1949 | version = "0.23.4" 1950 | source = "registry+https://github.com/rust-lang/crates.io-index" 1951 | checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" 1952 | dependencies = [ 1953 | "rustls", 1954 | "tokio", 1955 | "webpki", 1956 | ] 1957 | 1958 | [[package]] 1959 | name = "tokio-util" 1960 | version = "0.7.4" 1961 | source = "registry+https://github.com/rust-lang/crates.io-index" 1962 | checksum = "0bb2e075f03b3d66d8d8785356224ba688d2906a371015e225beeb65ca92c740" 1963 | dependencies = [ 1964 | "bytes", 1965 | "futures-core", 1966 | "futures-sink", 1967 | "pin-project-lite", 1968 | "tokio", 1969 | "tracing", 1970 | ] 1971 | 1972 | [[package]] 1973 | name = "tower-service" 1974 | version = "0.3.2" 1975 | source = "registry+https://github.com/rust-lang/crates.io-index" 1976 | checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" 1977 | 1978 | [[package]] 1979 | name = "tracing" 1980 | version = "0.1.37" 1981 | source = "registry+https://github.com/rust-lang/crates.io-index" 1982 | checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" 1983 | dependencies = [ 1984 | "cfg-if", 1985 | "pin-project-lite", 1986 | "tracing-core", 1987 | ] 1988 | 1989 | [[package]] 1990 | name = "tracing-core" 1991 | version = "0.1.30" 1992 | source = "registry+https://github.com/rust-lang/crates.io-index" 1993 | checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" 1994 | dependencies = [ 1995 | "once_cell", 1996 | ] 1997 | 1998 | [[package]] 1999 | name = "try-lock" 2000 | version = "0.2.3" 2001 | source = "registry+https://github.com/rust-lang/crates.io-index" 2002 | checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" 2003 | 2004 | [[package]] 2005 | name = "typenum" 2006 | version = "1.15.0" 2007 | source = "registry+https://github.com/rust-lang/crates.io-index" 2008 | checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" 2009 | 2010 | [[package]] 2011 | name = "unicode-bidi" 2012 | version = "0.3.8" 2013 | source = "registry+https://github.com/rust-lang/crates.io-index" 2014 | checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" 2015 | 2016 | [[package]] 2017 | name = "unicode-ident" 2018 | version = "1.0.5" 2019 | source = "registry+https://github.com/rust-lang/crates.io-index" 2020 | checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" 2021 | 2022 | [[package]] 2023 | name = "unicode-normalization" 2024 | version = "0.1.22" 2025 | source = "registry+https://github.com/rust-lang/crates.io-index" 2026 | checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" 2027 | dependencies = [ 2028 | "tinyvec", 2029 | ] 2030 | 2031 | [[package]] 2032 | name = "unicode-width" 2033 | version = "0.1.10" 2034 | source = "registry+https://github.com/rust-lang/crates.io-index" 2035 | checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" 2036 | 2037 | [[package]] 2038 | name = "untrusted" 2039 | version = "0.7.1" 2040 | source = "registry+https://github.com/rust-lang/crates.io-index" 2041 | checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" 2042 | 2043 | [[package]] 2044 | name = "url" 2045 | version = "2.3.1" 2046 | source = "registry+https://github.com/rust-lang/crates.io-index" 2047 | checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" 2048 | dependencies = [ 2049 | "form_urlencoded", 2050 | "idna", 2051 | "percent-encoding", 2052 | ] 2053 | 2054 | [[package]] 2055 | name = "utf-8" 2056 | version = "0.7.6" 2057 | source = "registry+https://github.com/rust-lang/crates.io-index" 2058 | checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" 2059 | 2060 | [[package]] 2061 | name = "vcpkg" 2062 | version = "0.2.15" 2063 | source = "registry+https://github.com/rust-lang/crates.io-index" 2064 | checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" 2065 | 2066 | [[package]] 2067 | name = "version_check" 2068 | version = "0.9.4" 2069 | source = "registry+https://github.com/rust-lang/crates.io-index" 2070 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 2071 | 2072 | [[package]] 2073 | name = "want" 2074 | version = "0.3.0" 2075 | source = "registry+https://github.com/rust-lang/crates.io-index" 2076 | checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" 2077 | dependencies = [ 2078 | "log", 2079 | "try-lock", 2080 | ] 2081 | 2082 | [[package]] 2083 | name = "wasi" 2084 | version = "0.9.0+wasi-snapshot-preview1" 2085 | source = "registry+https://github.com/rust-lang/crates.io-index" 2086 | checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" 2087 | 2088 | [[package]] 2089 | name = "wasi" 2090 | version = "0.11.0+wasi-snapshot-preview1" 2091 | source = "registry+https://github.com/rust-lang/crates.io-index" 2092 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 2093 | 2094 | [[package]] 2095 | name = "wasm-bindgen" 2096 | version = "0.2.83" 2097 | source = "registry+https://github.com/rust-lang/crates.io-index" 2098 | checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" 2099 | dependencies = [ 2100 | "cfg-if", 2101 | "wasm-bindgen-macro", 2102 | ] 2103 | 2104 | [[package]] 2105 | name = "wasm-bindgen-backend" 2106 | version = "0.2.83" 2107 | source = "registry+https://github.com/rust-lang/crates.io-index" 2108 | checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" 2109 | dependencies = [ 2110 | "bumpalo", 2111 | "log", 2112 | "once_cell", 2113 | "proc-macro2", 2114 | "quote", 2115 | "syn", 2116 | "wasm-bindgen-shared", 2117 | ] 2118 | 2119 | [[package]] 2120 | name = "wasm-bindgen-futures" 2121 | version = "0.4.33" 2122 | source = "registry+https://github.com/rust-lang/crates.io-index" 2123 | checksum = "23639446165ca5a5de86ae1d8896b737ae80319560fbaa4c2887b7da6e7ebd7d" 2124 | dependencies = [ 2125 | "cfg-if", 2126 | "js-sys", 2127 | "wasm-bindgen", 2128 | "web-sys", 2129 | ] 2130 | 2131 | [[package]] 2132 | name = "wasm-bindgen-macro" 2133 | version = "0.2.83" 2134 | source = "registry+https://github.com/rust-lang/crates.io-index" 2135 | checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" 2136 | dependencies = [ 2137 | "quote", 2138 | "wasm-bindgen-macro-support", 2139 | ] 2140 | 2141 | [[package]] 2142 | name = "wasm-bindgen-macro-support" 2143 | version = "0.2.83" 2144 | source = "registry+https://github.com/rust-lang/crates.io-index" 2145 | checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" 2146 | dependencies = [ 2147 | "proc-macro2", 2148 | "quote", 2149 | "syn", 2150 | "wasm-bindgen-backend", 2151 | "wasm-bindgen-shared", 2152 | ] 2153 | 2154 | [[package]] 2155 | name = "wasm-bindgen-shared" 2156 | version = "0.2.83" 2157 | source = "registry+https://github.com/rust-lang/crates.io-index" 2158 | checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" 2159 | 2160 | [[package]] 2161 | name = "web-sys" 2162 | version = "0.3.60" 2163 | source = "registry+https://github.com/rust-lang/crates.io-index" 2164 | checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" 2165 | dependencies = [ 2166 | "js-sys", 2167 | "wasm-bindgen", 2168 | ] 2169 | 2170 | [[package]] 2171 | name = "webpki" 2172 | version = "0.22.0" 2173 | source = "registry+https://github.com/rust-lang/crates.io-index" 2174 | checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd" 2175 | dependencies = [ 2176 | "ring", 2177 | "untrusted", 2178 | ] 2179 | 2180 | [[package]] 2181 | name = "weezl" 2182 | version = "0.1.7" 2183 | source = "registry+https://github.com/rust-lang/crates.io-index" 2184 | checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb" 2185 | 2186 | [[package]] 2187 | name = "winapi" 2188 | version = "0.3.9" 2189 | source = "registry+https://github.com/rust-lang/crates.io-index" 2190 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 2191 | dependencies = [ 2192 | "winapi-i686-pc-windows-gnu", 2193 | "winapi-x86_64-pc-windows-gnu", 2194 | ] 2195 | 2196 | [[package]] 2197 | name = "winapi-i686-pc-windows-gnu" 2198 | version = "0.4.0" 2199 | source = "registry+https://github.com/rust-lang/crates.io-index" 2200 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 2201 | 2202 | [[package]] 2203 | name = "winapi-util" 2204 | version = "0.1.5" 2205 | source = "registry+https://github.com/rust-lang/crates.io-index" 2206 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 2207 | dependencies = [ 2208 | "winapi", 2209 | ] 2210 | 2211 | [[package]] 2212 | name = "winapi-x86_64-pc-windows-gnu" 2213 | version = "0.4.0" 2214 | source = "registry+https://github.com/rust-lang/crates.io-index" 2215 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 2216 | 2217 | [[package]] 2218 | name = "windows-sys" 2219 | version = "0.36.1" 2220 | source = "registry+https://github.com/rust-lang/crates.io-index" 2221 | checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" 2222 | dependencies = [ 2223 | "windows_aarch64_msvc 0.36.1", 2224 | "windows_i686_gnu 0.36.1", 2225 | "windows_i686_msvc 0.36.1", 2226 | "windows_x86_64_gnu 0.36.1", 2227 | "windows_x86_64_msvc 0.36.1", 2228 | ] 2229 | 2230 | [[package]] 2231 | name = "windows-sys" 2232 | version = "0.42.0" 2233 | source = "registry+https://github.com/rust-lang/crates.io-index" 2234 | checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" 2235 | dependencies = [ 2236 | "windows_aarch64_gnullvm", 2237 | "windows_aarch64_msvc 0.42.0", 2238 | "windows_i686_gnu 0.42.0", 2239 | "windows_i686_msvc 0.42.0", 2240 | "windows_x86_64_gnu 0.42.0", 2241 | "windows_x86_64_gnullvm", 2242 | "windows_x86_64_msvc 0.42.0", 2243 | ] 2244 | 2245 | [[package]] 2246 | name = "windows_aarch64_gnullvm" 2247 | version = "0.42.0" 2248 | source = "registry+https://github.com/rust-lang/crates.io-index" 2249 | checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" 2250 | 2251 | [[package]] 2252 | name = "windows_aarch64_msvc" 2253 | version = "0.36.1" 2254 | source = "registry+https://github.com/rust-lang/crates.io-index" 2255 | checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" 2256 | 2257 | [[package]] 2258 | name = "windows_aarch64_msvc" 2259 | version = "0.42.0" 2260 | source = "registry+https://github.com/rust-lang/crates.io-index" 2261 | checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" 2262 | 2263 | [[package]] 2264 | name = "windows_i686_gnu" 2265 | version = "0.36.1" 2266 | source = "registry+https://github.com/rust-lang/crates.io-index" 2267 | checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" 2268 | 2269 | [[package]] 2270 | name = "windows_i686_gnu" 2271 | version = "0.42.0" 2272 | source = "registry+https://github.com/rust-lang/crates.io-index" 2273 | checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" 2274 | 2275 | [[package]] 2276 | name = "windows_i686_msvc" 2277 | version = "0.36.1" 2278 | source = "registry+https://github.com/rust-lang/crates.io-index" 2279 | checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" 2280 | 2281 | [[package]] 2282 | name = "windows_i686_msvc" 2283 | version = "0.42.0" 2284 | source = "registry+https://github.com/rust-lang/crates.io-index" 2285 | checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" 2286 | 2287 | [[package]] 2288 | name = "windows_x86_64_gnu" 2289 | version = "0.36.1" 2290 | source = "registry+https://github.com/rust-lang/crates.io-index" 2291 | checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" 2292 | 2293 | [[package]] 2294 | name = "windows_x86_64_gnu" 2295 | version = "0.42.0" 2296 | source = "registry+https://github.com/rust-lang/crates.io-index" 2297 | checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" 2298 | 2299 | [[package]] 2300 | name = "windows_x86_64_gnullvm" 2301 | version = "0.42.0" 2302 | source = "registry+https://github.com/rust-lang/crates.io-index" 2303 | checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" 2304 | 2305 | [[package]] 2306 | name = "windows_x86_64_msvc" 2307 | version = "0.36.1" 2308 | source = "registry+https://github.com/rust-lang/crates.io-index" 2309 | checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" 2310 | 2311 | [[package]] 2312 | name = "windows_x86_64_msvc" 2313 | version = "0.42.0" 2314 | source = "registry+https://github.com/rust-lang/crates.io-index" 2315 | checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" 2316 | 2317 | [[package]] 2318 | name = "winreg" 2319 | version = "0.10.1" 2320 | source = "registry+https://github.com/rust-lang/crates.io-index" 2321 | checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" 2322 | dependencies = [ 2323 | "winapi", 2324 | ] 2325 | 2326 | [[package]] 2327 | name = "zip" 2328 | version = "0.6.3" 2329 | source = "registry+https://github.com/rust-lang/crates.io-index" 2330 | checksum = "537ce7411d25e54e8ae21a7ce0b15840e7bfcff15b51d697ec3266cc76bdf080" 2331 | dependencies = [ 2332 | "aes", 2333 | "byteorder", 2334 | "bzip2", 2335 | "constant_time_eq", 2336 | "crc32fast", 2337 | "crossbeam-utils", 2338 | "flate2", 2339 | "hmac", 2340 | "pbkdf2", 2341 | "sha1", 2342 | "time", 2343 | "zstd", 2344 | ] 2345 | 2346 | [[package]] 2347 | name = "zstd" 2348 | version = "0.11.2+zstd.1.5.2" 2349 | source = "registry+https://github.com/rust-lang/crates.io-index" 2350 | checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" 2351 | dependencies = [ 2352 | "zstd-safe", 2353 | ] 2354 | 2355 | [[package]] 2356 | name = "zstd-safe" 2357 | version = "5.0.2+zstd.1.5.2" 2358 | source = "registry+https://github.com/rust-lang/crates.io-index" 2359 | checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" 2360 | dependencies = [ 2361 | "libc", 2362 | "zstd-sys", 2363 | ] 2364 | 2365 | [[package]] 2366 | name = "zstd-sys" 2367 | version = "2.0.4+zstd.1.5.2" 2368 | source = "registry+https://github.com/rust-lang/crates.io-index" 2369 | checksum = "4fa202f2ef00074143e219d15b62ffc317d17cc33909feac471c044087cad7b0" 2370 | dependencies = [ 2371 | "cc", 2372 | "libc", 2373 | ] 2374 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "car-data" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | anyhow = { version="1.0" } 10 | async-channel = { version="1.7.1" } 11 | clap = { version="3.2.20", features=["derive"] } 12 | image = { version="0.24.5" } 13 | npy-writer = { version="0.1.0", features=["zip"] } 14 | rand = { version="0.8.5", features=["std_rng"] } 15 | reqwest = { version="0.11.11", default-features = false, features = ["rustls-tls-native-roots"] } 16 | rusqlite = { version="0.28.0", features = ["bundled"] } 17 | scraper = { version="0.13.0" } 18 | serde_json = { version="1.0" } 19 | sha2 = { version="0.10.6" } 20 | tokio = { version="1.20.1", features=["full"] } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2022 Alexander Quinn Nichol 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # car-data 2 | 3 | This is a hobby project to predict various attributes of cars from photos. 4 | 5 | Demo and blog post: 6 | 7 | * [Blog post](https://blog.aqnichol.com/2022/12/31/large-scale-vehicle-classification/). 8 | * [Gradio demo](https://huggingface.co/spaces/unixpickle/car-data) 9 | 10 | # Usage 11 | 12 | First, you should: 13 | 14 | * Compile the scraper with `cargo build --release`. 15 | * Install the Python package with `pip install -e .`. 16 | 17 | ## Scraping data 18 | 19 | To run the scraper, run: 20 | 21 | ``` 22 | ./target/release/car-data scrape-kbb /path/to/db.db /path/to/images 23 | ``` 24 | 25 | In the above command, `/path/to/db.db` is the path where the metadata will be saved. It is stored as a sqlite3 database. The `/path/to/images` directory will be used to dump raw images. 26 | 27 | To deduplicate and downsample the downloaded images, run: 28 | 29 | ``` 30 | ./target/release/car-data dedup-images \ 31 | /path/to/db.db \ 32 | /path/to/images \ 33 | /path/to/dedup 34 | ``` 35 | 36 | From here on out, we will use the `/path/to/dedup` directory instead of `/path/to/images`, since the former directory contains all of the images we will actually use for training. 37 | 38 | To export the resulting dataset as a `.npz` file to load in Python, run: 39 | 40 | ``` 41 | ./target/release/car-data export-data \ 42 | /path/to/db.db \ 43 | /path/to/index.npz 44 | ``` 45 | 46 | ## Filtering the dataset 47 | 48 | To filter the dataset, you will first want to compute feature vectors for the entire dataset. These will be exported as a directory full of npz files with shards of features. You can do this with the following command: 49 | 50 | ``` 51 | python3 -m car_data.scripts.clip_features \ 52 | /path/to/dedup \ 53 | /path/to/features 54 | ``` 55 | 56 | Once you have labeled some images for the filter, you can train it quickly like so: 57 | 58 | ``` 59 | python3 -m car_data.scripts.train_filter \ 60 | --positive_dirs /path/to/positive_dir \ 61 | --negative_dirs /path/to/negative_dir \ 62 | --model_out /path/to/filter.pt 63 | ``` 64 | 65 | To filter the dataset `.npz` file using the filter, you can use this command: 66 | 67 | ``` 68 | python3 -m car_data.scripts.filter_index \ 69 | --index /path/to/index.npz \ 70 | --feature_dir /path/to/features \ 71 | --classifier_path /path/to/filter.pt \ 72 | --output_path /path/to/index_filtered.npz 73 | ``` 74 | 75 | ## Training a model 76 | 77 | To train a MobileNetV2 with auxiliary losses: 78 | 79 | ``` 80 | python3 -m car_data.scripts.train \ 81 | --index_path /path/to/index_filtered.npz \ 82 | --image_dir /path/to/dedup \ 83 | --save_dir /path/to/mobilenetv2_save_dir \ 84 | --lr 1e-4 \ 85 | --batch_size 64 \ 86 | --eval_interval 1 \ 87 | --use_data_aug \ 88 | --model mobilenetv2 89 | ``` 90 | 91 | To finetune CLIP with auxiliary losses: 92 | 93 | ``` 94 | python3 -m car_data.scripts.train \ 95 | --index_path /path/to/index_filtered.npz \ 96 | --image_dir /path/to/dedup \ 97 | --save_dir /path/to/clip_save_dir \ 98 | --lr 1e-5 \ 99 | --batch_size 64 \ 100 | --microbatch 16 \ 101 | --eval_interval 1 \ 102 | --use_data_aug \ 103 | --model clip 104 | ``` 105 | -------------------------------------------------------------------------------- /car_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/unixpickle/car-data/5e496b4190767fa24a88d135613eb620d7929499/car_data/__init__.py -------------------------------------------------------------------------------- /car_data/constants.py: -------------------------------------------------------------------------------- 1 | PRICE_CUTOFFS = [ 2 | 10_000.0, 3 | 15_000.0, 4 | 20_000.0, 5 | 25_000.0, 6 | 30_000.0, 7 | 35_000.0, 8 | 40_000.0, 9 | 50_000.0, 10 | 60_000.0, 11 | ] 12 | 13 | NUM_PRICE_BINS = len(PRICE_CUTOFFS) + 1 14 | 15 | PRICE_BIN_LABELS = [ 16 | "$0-$10,000", 17 | "$10,000-$15,000", 18 | "$15,000-$20,000", 19 | "$20,000-$25,000", 20 | "$25,000-$30,000", 21 | "$30,000-$35,000", 22 | "$35,000-$40,000", 23 | "$40,000-$50,000", 24 | "$50,000-$60,000", 25 | "$60,000+", 26 | ] 27 | 28 | MEDIAN_PRICE_SCALE = 30000.0 29 | 30 | YEARS = list(range(1983, 2024)) 31 | NUM_YEARS = len(YEARS) + 1 32 | 33 | MAKES_MODELS = ( 34 | ("Ford", "F150"), 35 | ("Chevrolet", "Silverado 1500"), 36 | ("RAM", "1500"), 37 | ("Jeep", "Wrangler"), 38 | ("Ford", "Explorer"), 39 | ("Nissan", "Rogue"), 40 | ("Jeep", "Grand Cherokee"), 41 | ("Chevrolet", "Equinox"), 42 | ("GMC", "Sierra 1500"), 43 | ("Ford", "Escape"), 44 | ("Honda", "Accord"), 45 | ("Toyota", "Camry"), 46 | ("Toyota", "RAV4"), 47 | ("Honda", "Civic"), 48 | ("Honda", "CR-V"), 49 | ("MAZDA", "CX-5"), 50 | ("Toyota", "Tacoma"), 51 | ("Ford", "F250"), 52 | ("Toyota", "Corolla"), 53 | ("Toyota", "Highlander"), 54 | ("Jeep", "Cherokee"), 55 | ("Nissan", "Altima"), 56 | ("Subaru", "Outback"), 57 | ("RAM", "2500"), 58 | ("Honda", "Pilot"), 59 | ("Chevrolet", "Malibu"), 60 | ("Hyundai", "Tucson"), 61 | ("Ford", "Mustang"), 62 | ("Chevrolet", "Traverse"), 63 | ("Hyundai", "Santa Fe"), 64 | ("Hyundai", "Elantra"), 65 | ("Jeep", "Compass"), 66 | ("Chevrolet", "Silverado 2500"), 67 | ("Ford", "Edge"), 68 | ("Nissan", "Frontier"), 69 | ("Chevrolet", "Tahoe"), 70 | ("GMC", "Terrain"), 71 | ("Toyota", "Tundra"), 72 | ("GMC", "Acadia"), 73 | ("Volkswagen", "Tiguan"), 74 | ("Hyundai", "Sonata"), 75 | ("Subaru", "Forester"), 76 | ("Jeep", "Gladiator"), 77 | ("Chevrolet", "Colorado"), 78 | ("Nissan", "Pathfinder"), 79 | ("Toyota", "4Runner"), 80 | ("Ford", "Fusion"), 81 | ("Nissan", "Sentra"), 82 | ("Kia", "Sorento"), 83 | ("GMC", "Sierra 2500"), 84 | ("Ford", "F350"), 85 | ("Subaru", "Crosstrek"), 86 | ("Kia", "Sportage"), 87 | ("Honda", "HR-V"), 88 | ("Kia", "Forte"), 89 | ("Honda", "Odyssey"), 90 | ("Ford", "Bronco Sport"), 91 | ("Dodge", "Challenger"), 92 | ("Dodge", "Charger"), 93 | ("Buick", "Enclave"), 94 | ("Chevrolet", "Blazer"), 95 | ("Acura", "MDX"), 96 | ("Audi", "Q5"), 97 | ("Volkswagen", "Atlas"), 98 | ("Buick", "Envision"), 99 | ("Kia", "Soul"), 100 | ("Chrysler", "Pacifica"), 101 | ("Hyundai", "Kona"), 102 | ("Chevrolet", "Camaro"), 103 | ("Jeep", "Grand Cherokee L"), 104 | ("MAZDA", "CX-9"), 105 | ("Dodge", "Durango"), 106 | ("Nissan", "Murano"), 107 | ("Chevrolet", "Trax"), 108 | ("GMC", "Yukon"), 109 | ("Volkswagen", "Jetta"), 110 | ("BMW", "X5"), 111 | ("Chevrolet", "Suburban"), 112 | ("Ford", "Expedition"), 113 | ("Nissan", "Rogue Sport"), 114 | ("RAM", "3500"), 115 | ("Ford", "Bronco"), 116 | ("Honda", "Ridgeline"), 117 | ("Chevrolet", "Corvette"), 118 | ("Cadillac", "XT5"), 119 | ("Toyota", "Sienna"), 120 | ("Mitsubishi", "Outlander"), 121 | ("Kia", "Telluride"), 122 | ("Buick", "Encore"), 123 | ("Mercedes-Benz", "C 300"), 124 | ("BMW", "X3"), 125 | ("Subaru", "Ascent"), 126 | ("Honda", "Passport"), 127 | ("MAZDA", "MAZDA3"), 128 | ("Buick", "Encore GX"), 129 | ("Volvo", "XC90"), 130 | ("Mercedes-Benz", "GLC 300"), 131 | ("Ford", "Ranger"), 132 | ("Jeep", "Renegade"), 133 | ("Lexus", "RX 350"), 134 | ("Volvo", "XC60"), 135 | ("Kia", "Optima"), 136 | ("Chevrolet", "Silverado 3500"), 137 | ("Dodge", "Grand Caravan"), 138 | ("INFINITI", "QX60"), 139 | ("Nissan", "Titan"), 140 | ("Subaru", "WRX"), 141 | ("GMC", "Canyon"), 142 | ("Tesla", "Model 3"), 143 | ("Chevrolet", "Cruze"), 144 | ("Lexus", "ES 350"), 145 | ("Nissan", "Armada"), 146 | ("GMC", "Yukon XL"), 147 | ("GMC", "Sierra 3500"), 148 | ("Hyundai", "Palisade"), 149 | ("Ford", "Focus"), 150 | ("Kia", "Niro"), 151 | ("Toyota", "Prius"), 152 | ("INFINITI", "QX80"), 153 | ("Porsche", "Macan"), 154 | ("Chevrolet", "TrailBlazer"), 155 | ("Cadillac", "XT4"), 156 | ("MAZDA", "CX-50"), 157 | ("Lincoln", "Corsair"), 158 | ("Audi", "Q7"), 159 | ("Ford", "Expedition Max"), 160 | ("Cadillac", "Escalade"), 161 | ("MINI", "Cooper"), 162 | ("Acura", "RDX"), 163 | ("Subaru", "Impreza"), 164 | ("Audi", "A4"), 165 | ("Nissan", "Kicks"), 166 | ("Nissan", "Maxima"), 167 | ("Porsche", "Cayenne"), 168 | ("Dodge", "Journey"), 169 | ("Porsche", "911"), 170 | ("RAM", "ProMaster"), 171 | ("Mercedes-Benz", "GLE 350"), 172 | ("Ford", "EcoSport"), 173 | ("Volkswagen", "Taos"), 174 | ("MAZDA", "CX-30"), 175 | ("Lincoln", "Nautilus"), 176 | ("Land Rover", "Range Rover"), 177 | ("Mitsubishi", "Outlander Sport"), 178 | ("Lexus", "GX 460"), 179 | ("Volkswagen", "Passat"), 180 | ("Land Rover", "Range Rover Sport"), 181 | ("Nissan", "Versa"), 182 | ("Volvo", "XC40"), 183 | ("Mercedes-Benz", "E 350"), 184 | ("Chrysler", "300"), 185 | ("Chevrolet", "Impala"), 186 | ("Subaru", "Legacy"), 187 | ("Acura", "TLX"), 188 | ("Mercedes-Benz", "Sprinter"), 189 | ("Cadillac", "CT5"), 190 | ("Mercedes-Benz", "GLA 250"), 191 | ("Hyundai", "Santa Cruz"), 192 | ("Tesla", "Model S"), 193 | ("Mercedes-Benz", "GLB 250"), 194 | ("INFINITI", "Q50"), 195 | ("Kia", "K5"), 196 | ("Cadillac", "XT6"), 197 | ("Audi", "Q3"), 198 | ("INFINITI", "QX50"), 199 | ("Ford", "Transit 250"), 200 | ("Ford", "Mustang Mach-E"), 201 | ("Kia", "Seltos"), 202 | ("MAZDA", "MX-5 Miata"), 203 | ("Audi", "A5"), 204 | ("Lincoln", "Aviator"), 205 | ("BMW", "X1"), 206 | ("Kia", "Rio"), 207 | ("Chevrolet", "Express 2500"), 208 | ("Ford", "Transit 350"), 209 | ("Toyota", "Venza"), 210 | ("Mercedes-Benz", "S 500"), 211 | ("Cadillac", "Escalade ESV"), 212 | ("Jeep", "Wagoneer"), 213 | ("Chevrolet", "Bolt"), 214 | ("MINI", "Cooper Countryman"), 215 | ("Toyota", "Sequoia"), 216 | ("Mercedes-Benz", "CLA 250"), 217 | ("BMW", "X7"), 218 | ("Cadillac", "CTS"), 219 | ("Hyundai", "Venue"), 220 | ("Volkswagen", "ID.4"), 221 | ("Toyota", "Avalon"), 222 | ("Jeep", "Patriot"), 223 | ("Tesla", "Model Y"), 224 | ("Nissan", "Leaf"), 225 | ("Audi", "A3"), 226 | ("Acura", "Integra"), 227 | ("Ford", "Transit Connect"), 228 | ("Lexus", "NX 300"), 229 | ("Audi", "A6"), 230 | ("Mercedes-Benz", "EQS 450+"), 231 | ("Chevrolet", "Spark"), 232 | ("Jaguar", "F-PACE"), 233 | ("Mercedes-Benz", "S 580"), 234 | ("Chevrolet", "Sonic"), 235 | ("Lincoln", "Navigator"), 236 | ("Toyota", "C-HR"), 237 | ("Ford", "Fiesta"), 238 | ("RAM", "ProMaster City"), 239 | ("Volvo", "S60"), 240 | ("BMW", "330i xDrive"), 241 | ("Ford", "Flex"), 242 | ("MAZDA", "MAZDA6"), 243 | ("Toyota", "Corolla Cross"), 244 | ("Lincoln", "MKZ"), 245 | ("Chevrolet", "Express 3500"), 246 | ("Hyundai", "Accent"), 247 | ("Land Rover", "Discovery Sport"), 248 | ("Tesla", "Model X"), 249 | ("Honda", "Fit"), 250 | ("Alfa Romeo", "Stelvio"), 251 | ("Chrysler", "200"), 252 | ("Volkswagen", "Beetle"), 253 | ("Cadillac", "CT4"), 254 | ("Ford", "Maverick"), 255 | ("Volkswagen", "GTI"), 256 | ("Lincoln", "MKC"), 257 | ("Porsche", "Panamera"), 258 | ("Ford", "F450"), 259 | ("Lexus", "NX 350"), 260 | ("Chrysler", "Town & Country"), 261 | ("Kia", "Stinger"), 262 | ("Land Rover", "Range Rover Velar"), 263 | ("Audi", "S5"), 264 | ("BMW", "330i"), 265 | ("Volkswagen", "Golf"), 266 | ("Mercedes-Benz", "GLS 450"), 267 | ("Lexus", "IS 350"), 268 | ("Land Rover", "Range Rover Evoque"), 269 | ("Toyota", "Prius Prime"), 270 | ("Acura", "ILX"), 271 | ("Genesis", "G70"), 272 | ("Ford", "Taurus"), 273 | ("Hyundai", "Veloster"), 274 | ("Lexus", "IS 300"), 275 | ("Land Rover", "Defender"), 276 | ("Genesis", "GV80"), 277 | ("Alfa Romeo", "Giulia"), 278 | ("BMW", "X6"), 279 | ("Hyundai", "Ioniq 5"), 280 | ("Audi", "SQ5"), 281 | ("BMW", "328i"), 282 | ("BMW", "i3"), 283 | ("Cadillac", "ATS"), 284 | ("Mercedes-Benz", "S 550"), 285 | ("Lincoln", "Navigator L"), 286 | ("Mercedes-Benz", "E 450"), 287 | ("Buick", "LaCrosse"), 288 | ("Ford", "E-350 and Econoline 350"), 289 | ("BMW", "M3"), 290 | ("Mercedes-Benz", "GLE 53 AMG"), 291 | ("Lexus", "IS 250"), 292 | ("Mercedes-Benz", "E 300"), 293 | ("Cadillac", "SRX"), 294 | ("GMC", "Savana 2500"), 295 | ("INFINITI", "QX55"), 296 | ("Mitsubishi", "Eclipse Cross"), 297 | ("Audi", "Q8"), 298 | ("INFINITI", "Q60"), 299 | ("Kia", "Sedona"), 300 | ("Lincoln", "MKX"), 301 | ("Audi", "e-tron"), 302 | ("Chevrolet", "Volt"), 303 | ("BMW", "X4"), 304 | ("Chevrolet", "Bolt EUV"), 305 | ("Volvo", "C40"), 306 | ("Maserati", "Ghibli"), 307 | ("Lexus", "ES 300h"), 308 | ("Jaguar", "F-TYPE"), 309 | ("Cadillac", "XTS"), 310 | ("Genesis", "GV70"), 311 | ("BMW", "430i xDrive"), 312 | ("BMW", "430i"), 313 | ("BMW", "Z4"), 314 | ("BMW", "M4"), 315 | ("Land Rover", "Discovery"), 316 | ("Lexus", "GS 350"), 317 | ("Mercedes-Benz", "A 220"), 318 | ("Dodge", "Ram 1500 Truck"), 319 | ("Ford", "F550"), 320 | ("Hyundai", "Ioniq"), 321 | ("Mercedes-Benz", "ML 350"), 322 | ("Genesis", "G80"), 323 | ("MINI", "Cooper Clubman"), 324 | ("Maserati", "Levante"), 325 | ("Mercedes-Benz", "AMG GT"), 326 | ("BMW", "530i xDrive"), 327 | ("Lincoln", "Continental"), 328 | ("Chrysler", "Voyager"), 329 | ("Lexus", "LS 460"), 330 | ("MAZDA", "MX-5 Miata RF"), 331 | ("FIAT", "500"), 332 | ("Cadillac", "CT6"), 333 | ("MAZDA", "CX-3"), 334 | ("BMW", "M5"), 335 | ("BMW", "328i xDrive"), 336 | ("Hyundai", "Genesis"), 337 | ("Kia", "EV6"), 338 | ("INFINITI", "G37"), 339 | ("Audi", "A8"), 340 | ("Audi", "S4"), 341 | ("BMW", "X2"), 342 | ("BMW", "530i"), 343 | ("Lexus", "UX 250h"), 344 | ("Lexus", "RX 350L"), 345 | ("Mercedes-Benz", "G 63 AMG"), 346 | ("Nissan", "Juke"), 347 | ("Volkswagen", "Arteon"), 348 | ("Honda", "Insight"), 349 | ("Lexus", "RC 350"), 350 | ("RAM", "5500"), 351 | ("Audi", "A7"), 352 | ("Lexus", "NX 200t"), 353 | ("Nissan", "370Z"), 354 | ("Porsche", "Boxster"), 355 | ("BMW", "540i"), 356 | ("Buick", "Regal"), 357 | ("Dodge", "Dart"), 358 | ("BMW", "540i xDrive"), 359 | ("Mercedes-Benz", "GLE 450"), 360 | ("Ford", "Expedition EL"), 361 | ("Jeep", "Grand Wagoneer"), 362 | ("Bentley", "Continental"), 363 | ("Dodge", "Ram 2500 Truck"), 364 | ("Jeep", "Liberty"), 365 | ("Kia", "Carnival"), 366 | ("Mitsubishi", "Mirage G4"), 367 | ("Mercedes-Benz", "GL 450"), 368 | ("Mitsubishi", "Mirage"), 369 | ("Lexus", "RX 450h"), 370 | ("Porsche", "Taycan"), 371 | ("Acura", "TL"), 372 | ("Lexus", "CT 200h"), 373 | ("Nissan", "NV"), 374 | ("BMW", "440i xDrive"), 375 | ("Mercedes-Benz", "C 43 AMG"), 376 | ("Mercedes-Benz", "EQS 580"), 377 | ("Toyota", "Supra"), 378 | ("Mercedes-Benz", "GLK 350"), 379 | ("Lexus", "LS 500"), 380 | ("Toyota", "Prius C"), 381 | ("Toyota", "Yaris"), 382 | ("Jaguar", "XF"), 383 | ("Nissan", "Versa Note"), 384 | ("BMW", "335i"), 385 | ("Nissan", "Xterra"), 386 | ("Lexus", "NX 250"), 387 | ("Toyota", "FJ Cruiser"), 388 | ("Audi", "RS 5"), 389 | ("Volvo", "V60"), 390 | ("Audi", "S3"), 391 | ("BMW", "740i"), 392 | ("BMW", "128i"), 393 | ("Buick", "Verano"), 394 | ("Subaru", "BRZ"), 395 | ("Audi", "Q4 e-tron"), 396 | ("Chevrolet", "Avalanche"), 397 | ("Mercedes-Benz", "SL 550"), 398 | ("Ford", "C-MAX"), 399 | ("Toyota", "GR86"), 400 | ("BMW", "750i xDrive"), 401 | ("Ford", "Transit 150"), 402 | ("Mercedes-Benz", "Metris"), 403 | ("Mercedes-Benz", "S 560"), 404 | ("Nissan", "NV200"), 405 | ("Volkswagen", "Golf R"), 406 | ("Mercedes-Benz", "SL 63 AMG"), 407 | ("BMW", "M850i xDrive"), 408 | ("Lexus", "LX 570"), 409 | ("Mercedes-Benz", "G 550"), 410 | ("Ford", "E-450 and Econoline 450"), 411 | ("Ford", "E-Transit"), 412 | ("Mercedes-Benz", "C 250"), 413 | ("Mercedes-Benz", "CLS 450"), 414 | ("Mercedes-Benz", "S 63 AMG"), 415 | ("BMW", "530e"), 416 | ("BMW", "428i"), 417 | ("Mercedes-Benz", "GLC 43 AMG"), 418 | ("Volvo", "S90"), 419 | ("Dodge", "Avenger"), 420 | ("Lexus", "NX 300h"), 421 | ("Mercedes-Benz", "GLE 43 AMG"), 422 | ("Mercedes-Benz", "E 400"), 423 | ("Toyota", "Prius V"), 424 | ("BMW", "X5 M"), 425 | ("GMC", "Savana 3500"), 426 | ("Scion", "tC"), 427 | ("Volkswagen", "CC"), 428 | ("Acura", "TSX"), 429 | ("BMW", "228i xDrive"), 430 | ("BMW", "535i xDrive"), 431 | ("Porsche", "Cayman"), 432 | ("Subaru", "Impreza WRX"), 433 | ("BMW", "535i"), 434 | ("BMW", "M8"), 435 | ("Bentley", "Bentayga"), 436 | ("Maserati", "Quattroporte"), 437 | ("BMW", "M550i xDrive"), 438 | ("Jaguar", "XE"), 439 | ("Hyundai", "Kona N"), 440 | ("Porsche", "718 Cayman"), 441 | ("BMW", "M2"), 442 | ("Mercedes-Benz", "C 63 AMG"), 443 | ("BMW", "M340i"), 444 | ("Hyundai", "Elantra N"), 445 | ("BMW", "528i"), 446 | ("Ford", "E-250 and Econoline 250"), 447 | ("BMW", "i4"), 448 | ("FIAT", "500X"), 449 | ("BMW", "iX"), 450 | ("Audi", "TT"), 451 | ("Lexus", "IS 200t"), 452 | ("Maserati", "GranTurismo"), 453 | ("Dodge", "Ram 3500 Truck"), 454 | ("BMW", "650i"), 455 | ("Lexus", "UX 200"), 456 | ("Dodge", "Dakota"), 457 | ("INFINITI", "QX30"), 458 | ("Mercedes-Benz", "GLE 63 AMG"), 459 | ("Volkswagen", "Touareg"), 460 | ("Volkswagen", "e-Golf"), 461 | ("Lamborghini", "Huracan"), 462 | ("Lexus", "LC 500"), 463 | ("Land Rover", "LR4"), 464 | ("Lexus", "NX 350h"), 465 | ("BMW", "428i xDrive"), 466 | ("Jaguar", "XJ"), 467 | ("Lexus", "RC 300"), 468 | ("Toyota", "Mirai"), 469 | ("BMW", "330e"), 470 | ("Genesis", "G90"), 471 | ("Jaguar", "E-PACE"), 472 | ("Lamborghini", "Urus"), 473 | ("BMW", "M340i xDrive"), 474 | ("Audi", "RS 7"), 475 | ("Lexus", "ES 250"), 476 | ("Mercedes-Benz", "SL 55 AMG"), 477 | ("BMW", "320i"), 478 | ("Toyota", "Land Cruiser"), 479 | ("Ford", "Thunderbird"), 480 | ("Honda", "Element"), 481 | ("Scion", "xB"), 482 | ("BMW", "530e xDrive"), 483 | ("Porsche", "718 Boxster"), 484 | ("Buick", "Lucerne"), 485 | ("Mercedes-Benz", "E 53 AMG"), 486 | ("Mitsubishi", "Lancer"), 487 | ("Polestar", "Polestar 2"), 488 | ("RAM", "4500"), 489 | ("Scion", "FR-S"), 490 | ("Mercedes-Benz", "E 550"), 491 | ("Nissan", "GT-R"), 492 | ("BMW", "X6 M"), 493 | ("INFINITI", "Q70"), 494 | ("Audi", "R8"), 495 | ("Honda", "Clarity"), 496 | ("Mercedes-Benz", "E 63 AMG"), 497 | ("BMW", "320i xDrive"), 498 | ("Ford", "E-150 and Econoline 150"), 499 | ("Lexus", "GX 470"), 500 | ("Lincoln", "MKS"), 501 | ("BMW", "135i"), 502 | ("Mercedes-Benz", "GL 550"), 503 | ("Toyota", "86"), 504 | ("smart", "fortwo"), 505 | ("Chevrolet", "Express 1500"), 506 | ("BMW", "528i xDrive"), 507 | ("BMW", "M440i"), 508 | ("BMW", "230i"), 509 | ("INFINITI", "G35"), 510 | ("Mercedes-Benz", "S 450"), 511 | ("Mercedes-Benz", "SL 500"), 512 | ("BMW", "435i xDrive"), 513 | ("FIAT", "124 Spider"), 514 | ("Mercedes-Benz", "CLS 550"), 515 | ("Mercedes-Benz", "EQE 350+"), 516 | ("Mercury", "Grand Marquis"), 517 | ("Volkswagen", "Eos"), 518 | ("Chrysler", "PT Cruiser"), 519 | ("Lexus", "SC 430"), 520 | ("Lincoln", "Town Car"), 521 | ("Nissan", "Quest"), 522 | ("Audi", "S8"), 523 | ("BMW", "435i"), 524 | ("HUMMER", "H2"), 525 | ("Kia", "Cadenza"), 526 | ("BMW", "228i"), 527 | ("Chrysler", "Sebring"), 528 | ("Volvo", "XC70"), 529 | ("BMW", "335i xDrive"), 530 | ("Chevrolet", "Captiva Sport"), 531 | ("Ferrari", "California"), 532 | ("Ford", "Excursion"), 533 | ("BMW", "440i"), 534 | ("Chevrolet", "HHR"), 535 | ("INFINITI", "QX56"), 536 | ("INFINITI", "QX70"), 537 | ("MAZDA", "MAZDA5"), 538 | ("Pontiac", "G6"), 539 | ("Chevrolet", "Cobalt"), 540 | ("Rivian", "R1T"), 541 | ("Audi", "S6"), 542 | ("BMW", "750i"), 543 | ("BMW", "M240i xDrive"), 544 | ("BMW", "i8"), 545 | ) 546 | 547 | MAKE_MODEL_TO_INDEX = {x: i for i, x in enumerate(MAKES_MODELS)} 548 | 549 | NUM_MAKE_MODELS = len(MAKE_MODEL_TO_INDEX) + 1 550 | -------------------------------------------------------------------------------- /car_data/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import traceback 3 | from dataclasses import dataclass 4 | from typing import Iterator, List, Optional 5 | 6 | import numpy as np 7 | import torch 8 | import torchvision.transforms as transforms 9 | from PIL import Image 10 | from torch.utils.data import DataLoader, Dataset, Sampler 11 | 12 | 13 | def looping_loader( 14 | index_path: str, 15 | image_dir: str, 16 | batch_size: int, 17 | train: bool = True, 18 | use_data_aug: bool = False, 19 | last_seen_phash: Optional[str] = None, 20 | ) -> Iterator[List["CarImage"]]: 21 | dataset = CarImageDataset( 22 | index_path, image_dir, train=train, use_data_aug=use_data_aug 23 | ) 24 | sampler = CarImageDatasetSampler(dataset, last_seen_phash=last_seen_phash) 25 | loader = DataLoader( 26 | dataset, 27 | batch_size=batch_size, 28 | sampler=sampler, 29 | num_workers=4, 30 | collate_fn=lambda x: x, 31 | ) 32 | while True: 33 | yield from loader 34 | 35 | 36 | def image_transform(use_data_aug: bool) -> transforms.Compose: 37 | if use_data_aug: 38 | image_ops = [ 39 | transforms.RandomResizedCrop(224, scale=(0.8, 1.0), ratio=(1.0, 1.0)), 40 | transforms.RandomHorizontalFlip(), 41 | transforms.ColorJitter(0.4, 0.4, 0.4), 42 | ] 43 | else: 44 | image_ops = [ 45 | transforms.Resize(224), 46 | transforms.CenterCrop(224), 47 | ] 48 | return transforms.Compose( 49 | [ 50 | *image_ops, 51 | transforms.ToTensor(), 52 | transforms.Normalize( 53 | (0.48145466, 0.4578275, 0.40821073), 54 | (0.26862954, 0.26130258, 0.27577711), 55 | ), 56 | ] 57 | ) 58 | 59 | 60 | @dataclass 61 | class CarImage: 62 | image: torch.Tensor 63 | phash: str 64 | price: float 65 | make: Optional[str] 66 | model: Optional[str] 67 | year: Optional[int] 68 | 69 | 70 | class CarImageDataset(Dataset): 71 | def __init__( 72 | self, 73 | index_path: str, 74 | image_dir: str, 75 | train: bool = True, 76 | use_data_aug: bool = False, 77 | ): 78 | super().__init__() 79 | self.index_path = index_path 80 | self.image_dir = image_dir 81 | with open(index_path, "rb") as f: 82 | obj = np.load(f) 83 | phashes = obj["phashes"] 84 | ordering = np.argsort(phashes) # sorting hashes => random order 85 | 86 | test_count = len(ordering) // 10 87 | if train: 88 | ordering = ordering[test_count:] 89 | else: 90 | ordering = ordering[:test_count] 91 | 92 | self.phashes = phashes[ordering] 93 | self.prices = obj["prices"][ordering] 94 | self.makes = obj["makes"][ordering].tolist() 95 | self.models = obj["models"][ordering].tolist() 96 | self.years = obj["years"][ordering] 97 | self.transform = image_transform(use_data_aug) 98 | 99 | def __len__(self) -> int: 100 | return len(self.phashes) 101 | 102 | def __getitem__(self, idx: int) -> CarImage: 103 | phash = self.phashes[idx] 104 | img_path = os.path.join(self.image_dir, phash[:2], phash) 105 | try: 106 | img = Image.open(img_path).convert("RGB") 107 | except: 108 | # Don't kill the job due to a single missing or corrupted image. 109 | print(f"error loading: {img_path}") 110 | traceback.print_exc() 111 | img = Image.new("RGB", (256, 256)) 112 | return CarImage( 113 | image=self.transform(img), 114 | phash=self.phashes[idx].tolist(), 115 | price=self.prices[idx], 116 | make=self.makes[idx] or None, 117 | model=self.models[idx] or None, 118 | year=self.years[idx] or None, 119 | ) 120 | 121 | 122 | class CarImageDatasetSampler(Sampler): 123 | def __init__( 124 | self, data_source: CarImageDataset, last_seen_phash: Optional[str] = None 125 | ): 126 | self.data_source = data_source 127 | self._start_idx = 0 128 | if last_seen_phash is not None: 129 | self._start_idx = np.searchsorted(data_source.phashes, last_seen_phash) 130 | 131 | def __len__(self) -> int: 132 | return len(self.data_source) 133 | 134 | def __iter__(self) -> Iterator[int]: 135 | size = len(self.data_source) 136 | for i in range(size): 137 | yield (i + self._start_idx) % size 138 | -------------------------------------------------------------------------------- /car_data/graphics.py: -------------------------------------------------------------------------------- 1 | """ 2 | APIs for drawing predictions with Cairo. 3 | """ 4 | 5 | import io 6 | import math 7 | import os 8 | from abc import ABC, abstractmethod 9 | from contextlib import contextmanager 10 | from typing import Dict, Iterator, List, Tuple 11 | 12 | import cairo 13 | import numpy as np 14 | import torch 15 | import torch.nn.functional as F 16 | from PIL import Image 17 | 18 | from car_data.constants import MAKES_MODELS, PRICE_BIN_LABELS, YEARS 19 | 20 | PANEL_WIDTH = 550 21 | IMAGE_SIZE = 224 22 | 23 | 24 | @contextmanager 25 | def open_context(path: str, width: int, height: int) -> Iterator[cairo.Context]: 26 | _, ext = os.path.splitext(path) 27 | if ext.lower() == ".svg": 28 | with cairo.SVGSurface(path, width, height) as surface: 29 | ctx = cairo.Context(surface) 30 | yield ctx 31 | else: 32 | with cairo.ImageSurface( 33 | cairo.Format.RGB24, math.ceil(width), math.ceil(height) 34 | ) as surface: 35 | ctx = cairo.Context(surface) 36 | yield ctx 37 | surface.write_to_png(path) 38 | 39 | 40 | def prediction_element_size() -> Tuple[int, int]: 41 | with cairo.SVGSurface(io.BytesIO(), PANEL_WIDTH, 10000) as surface: 42 | ctx = cairo.Context(surface) 43 | element = prediction_element( 44 | ctx=ctx, 45 | idx=0, 46 | img=Image.fromarray(np.zeros((IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.uint8)), 47 | outputs=dict( 48 | price_median=torch.tensor(0.0), 49 | price_bin=torch.zeros(1, len(PRICE_BIN_LABELS)), 50 | make_model=torch.zeros(1, len(MAKES_MODELS) + 1), 51 | year=torch.zeros(1, len(YEARS) + 1), 52 | ), 53 | ) 54 | return math.ceil(element.width), math.ceil(element.height) 55 | 56 | 57 | def prediction_element( 58 | ctx: cairo.Context, idx: int, img: Image.Image, outputs: Dict[str, torch.Tensor] 59 | ) -> "Element": 60 | content = VStack( 61 | Empty(width=0.0, height=16.0), 62 | pad_to_width(ImageElement(crop_image(img)), PANEL_WIDTH), 63 | Padded(Separator(PANEL_WIDTH - 40.0), horiz=20, vert=16), 64 | pad_to_width( 65 | HStack( 66 | Text(ctx, "Price prediction:", font_size=30.0), 67 | Empty(width=10, height=1), 68 | Text( 69 | ctx, 70 | f"${int(round(outputs['price_median'].item()))}", 71 | font_size=30.0, 72 | bold=True, 73 | ), 74 | ), 75 | PANEL_WIDTH, 76 | ), 77 | Padded(Separator(PANEL_WIDTH - 40.0), horiz=20, vert=16), 78 | Empty(width=PANEL_WIDTH, height=16), 79 | HStack( 80 | TopN( 81 | ctx, 82 | PANEL_WIDTH / 2, 83 | "Price", 84 | PRICE_BIN_LABELS, 85 | F.softmax(outputs["price_bin"], dim=-1)[0].tolist(), 86 | 4, 87 | ), 88 | TopN( 89 | ctx, 90 | PANEL_WIDTH / 2, 91 | "Year", 92 | [str(year) for year in YEARS] + ["Unknown"], 93 | F.softmax(outputs["year"], dim=-1)[0].tolist(), 94 | 4, 95 | ), 96 | ), 97 | Empty(width=PANEL_WIDTH, height=16), 98 | pad_to_width( 99 | TopN( 100 | ctx, 101 | PANEL_WIDTH * 0.8, 102 | "Make/Model", 103 | [f"{make} {model}" for make, model in MAKES_MODELS] + ["Unknown"], 104 | F.softmax(outputs["make_model"], dim=-1)[0].tolist(), 105 | 5, 106 | ), 107 | PANEL_WIDTH, 108 | ), 109 | Empty(width=PANEL_WIDTH, height=16), 110 | ) 111 | return Overlay(Background(idx, PANEL_WIDTH, content.height), content) 112 | 113 | 114 | def crop_image(img: Image.Image) -> Image.Image: 115 | width, height = img.size 116 | size = min(width, height) 117 | left = (width - size) // 2 118 | top = (height - size) // 2 119 | img = img.crop((left, top, left + size, top + size)) 120 | return img.resize((IMAGE_SIZE, IMAGE_SIZE)) 121 | 122 | 123 | class Element(ABC): 124 | def __init__(self, width: float, height: float): 125 | self.width = width 126 | self.height = height 127 | 128 | @abstractmethod 129 | def draw_at(self, ctx: cairo.Context, x: float, y: float): 130 | """Draw the UI element at the coordinates.""" 131 | _, _ = x, y 132 | 133 | 134 | class Combination(Element): 135 | def __init__(self, *children: Element, horiz: bool = False, vert: bool = False): 136 | super().__init__( 137 | width=(sum if horiz else max)(x.width for x in children), 138 | height=(sum if vert else max)(x.height for x in children), 139 | ) 140 | self.horiz = horiz 141 | self.vert = vert 142 | self.children = children 143 | 144 | def draw_at(self, ctx: cairo.Context, x: float, y: float): 145 | for child in self.children: 146 | child.draw_at(ctx, x, y) 147 | if self.horiz: 148 | x += child.width 149 | if self.vert: 150 | y += child.height 151 | 152 | 153 | class VStack(Combination): 154 | def __init__(self, *children: Element): 155 | super().__init__( 156 | *children, 157 | vert=True, 158 | ) 159 | 160 | 161 | class HStack(Combination): 162 | def __init__(self, *children: Element): 163 | super().__init__( 164 | *children, 165 | horiz=True, 166 | ) 167 | 168 | 169 | class Overlay(Combination): 170 | def __init__(self, *children: Element): 171 | super().__init__( 172 | *children, 173 | ) 174 | 175 | 176 | class Padded(Element): 177 | def __init__(self, contained: Element, horiz: float = 0.0, vert: float = 0.0): 178 | super().__init__( 179 | width=contained.width + horiz * 2, 180 | height=contained.height + vert * 2, 181 | ) 182 | self.contained = contained 183 | self.horiz = horiz 184 | self.vert = vert 185 | 186 | def draw_at(self, ctx: cairo.Context, x: float, y: float): 187 | self.contained.draw_at(ctx, x + self.horiz, y + self.vert) 188 | 189 | 190 | class Empty(Element): 191 | def __init__(self, width: float, height: float): 192 | super().__init__(width, height) 193 | 194 | def draw_at(self, ctx: cairo.Context, x: float, y: float): 195 | _, _, _ = ctx, x, y 196 | 197 | 198 | class Separator(Element): 199 | def __init__(self, width: float): 200 | super().__init__(width, 1) 201 | 202 | def draw_at(self, ctx: cairo.Context, x: float, y: float): 203 | ctx.set_source_rgb(0.8, 0.8, 0.8) 204 | ctx.set_line_width(1.0) 205 | ctx.move_to(x, y + 0.5) 206 | ctx.line_to(x + self.width, y + 0.5) 207 | ctx.stroke() 208 | 209 | 210 | class Background(Element): 211 | def __init__(self, idx: int, width: float, height: float): 212 | super().__init__(width, height) 213 | self.idx = idx 214 | 215 | def draw_at(self, ctx: cairo.Context, x: float, y: float): 216 | if self.idx % 2: 217 | brightness = 0.97 218 | ctx.set_source_rgb(brightness, brightness, brightness) 219 | ctx.rectangle(x, y, self.width, self.height) 220 | ctx.fill() 221 | else: 222 | ctx.set_source_rgb(1, 1, 1) 223 | ctx.rectangle(x, y, self.width, self.height) 224 | ctx.fill() 225 | 226 | 227 | class ImageElement(Element): 228 | def __init__(self, img: Image.Image): 229 | width, height = img.size 230 | super().__init__(width, height) 231 | 232 | data = io.BytesIO() 233 | img.save(data, format="PNG") 234 | data.seek(0) 235 | self.source = cairo.ImageSurface.create_from_png(data) 236 | 237 | def draw_at(self, ctx: cairo.Context, x: float, y: float): 238 | ctx.set_source_surface( 239 | self.source, 240 | x, 241 | y, 242 | ) 243 | ctx.paint() 244 | 245 | 246 | class Text(Element): 247 | def __init__( 248 | self, ctx: cairo.Context, text: str, font_size: float, bold: bool = False 249 | ): 250 | ctx.set_font_size(font_size) 251 | ctx.select_font_face( 252 | "Arial", 253 | cairo.FONT_SLANT_NORMAL, 254 | cairo.FONT_WEIGHT_NORMAL if not bold else cairo.FONT_WEIGHT_BOLD, 255 | ) 256 | extents = ctx.text_extents(text) 257 | # Height should not depend on text to make consecutive labels 258 | # line up perfectly. 259 | height = ctx.text_extents( 260 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 261 | ).height 262 | self.text = text 263 | self.font_size = font_size 264 | self.bold = bold 265 | super().__init__(extents.width, height) 266 | 267 | def draw_at(self, ctx: cairo.Context, x: float, y: float): 268 | ctx.set_font_size(self.font_size) 269 | ctx.select_font_face( 270 | "Arial", 271 | cairo.FONT_SLANT_NORMAL, 272 | cairo.FONT_WEIGHT_NORMAL if not self.bold else cairo.FONT_WEIGHT_BOLD, 273 | ) 274 | ctx.set_source_rgb(0, 0, 0) 275 | hacky_height_centerer = 0.9 # found empirically to center vertically 276 | ctx.move_to(x, y + self.height * hacky_height_centerer) 277 | ctx.show_text(self.text) 278 | ctx.stroke() 279 | 280 | 281 | class ProbabilityBar(Element): 282 | def __init__( 283 | self, ctx: cairo.Context, width: float, title: str, probability: float 284 | ): 285 | title_element = Text(ctx, title, font_size=22.0) 286 | prob_text = Text(ctx, f"{(probability*100):2.1f}%", font_size=18.0) 287 | super().__init__(width, title_element.height + 30) 288 | self.probability = probability 289 | self.title_element = title_element 290 | self.prob_text = prob_text 291 | 292 | def draw_at(self, ctx: cairo.Context, x: float, y: float): 293 | self.title_element.draw_at(ctx, x, y) 294 | 295 | bar_y = y + self.title_element.height + 5 296 | bar_width = self.width - self.prob_text.width - 8 297 | 298 | self.prob_text.draw_at(ctx, x + self.width - self.prob_text.width, bar_y) 299 | 300 | ctx.set_source_rgb(0.9, 0.9, 0.9) 301 | ctx.rectangle(x, bar_y, bar_width, 20) 302 | ctx.fill() 303 | 304 | ctx.set_source_rgb(0x65 / 0xFF, 0xBC / 0xFF, 0xD4 / 0xFF) 305 | ctx.rectangle(x, bar_y, bar_width * self.probability, 20) 306 | ctx.fill() 307 | 308 | 309 | class TopN(VStack): 310 | def __init__( 311 | self, 312 | ctx: cairo.Context, 313 | width: float, 314 | title: str, 315 | labels: List[str], 316 | probs: List[float], 317 | n: int, 318 | ): 319 | elements = [] 320 | elements.append(pad_to_width(Text(ctx, title, font_size=24, bold=True), width)) 321 | elements.append(Padded(Separator(width - 32), horiz=16, vert=8)) 322 | bars = [] 323 | for i in np.argsort(probs)[::-1][:n]: 324 | bars.append(ProbabilityBar(ctx, width - 40, labels[i], probs[i])) 325 | max_prob_text_width = max(bar.prob_text.width for bar in bars) 326 | for bar in bars: 327 | bar.prob_text.width = max_prob_text_width 328 | elements.append(Padded(bar, horiz=20, vert=4)) 329 | super().__init__(*elements) 330 | 331 | 332 | def pad_to_width(e: Element, width: float) -> Padded: 333 | return Padded(e, horiz=max(0, (width - e.width) / 2)) 334 | -------------------------------------------------------------------------------- /car_data/lin_features.py: -------------------------------------------------------------------------------- 1 | from multiprocessing.pool import ThreadPool 2 | from typing import Callable, Iterator, List 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | from PIL import Image 8 | 9 | 10 | def compute_pooled_features( 11 | device: torch.device, 12 | model: nn.Module, 13 | preprocess: Callable[[Image.Image], torch.Tensor], 14 | paths: List[str], 15 | batch_size: int = 64, 16 | ) -> np.ndarray: 17 | with ThreadPool(8) as p: 18 | all_outs = [] 19 | for chunk in chunk_filenames(paths, batch_size): 20 | crops = [x for y in p.map(image_crops, chunk) for x in y] 21 | tensors = torch.stack(p.map(preprocess, crops), dim=0).to(device) 22 | with torch.no_grad(): 23 | features_out = ( 24 | model.encode_image(tensors).reshape([len(chunk), 3, -1]).mean(1) 25 | ) 26 | features_out /= torch.linalg.norm(features_out, dim=-1, keepdim=True) 27 | all_outs.append(features_out.cpu().numpy()) 28 | return np.concatenate(all_outs, axis=0) 29 | 30 | 31 | def chunk_filenames(paths: List[str], batch_size: int) -> Iterator[List[str]]: 32 | for i in range(0, len(paths), batch_size): 33 | yield paths[i : i + batch_size] 34 | 35 | 36 | def image_crops(path: str): 37 | img = Image.open(path) 38 | width, height = img.size 39 | min_dim = min(width, height) 40 | cx = width // 2 - min_dim 41 | cy = height // 2 - min_dim 42 | if width > height: 43 | crops = [ 44 | (0, 0, height, height), 45 | (cx, 0, cx + height, height), 46 | (width - height, 0, width, height), 47 | ] 48 | else: 49 | crops = [ 50 | (0, 0, width, width), 51 | (0, cy, width, cy + width), 52 | (0, height - width, width, height), 53 | ] 54 | outs = [] 55 | for box in crops: 56 | outs.append(img.crop(box=box)) 57 | return outs 58 | -------------------------------------------------------------------------------- /car_data/losses.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Dict, List, Sequence 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn.functional as F 7 | 8 | from .constants import MAKE_MODEL_TO_INDEX, MEDIAN_PRICE_SCALE, PRICE_CUTOFFS, YEARS 9 | from .dataset import CarImage 10 | 11 | 12 | @dataclass 13 | class LossWeights: 14 | price_ce: float = 1.0 15 | price_mae: float = 1.0 16 | year_ce: float = 1.0 17 | make_model_ce: float = 1.0 18 | 19 | @classmethod 20 | def parse(cls, x: str) -> "LossWeights": 21 | presets = { 22 | "default": LossWeights(), 23 | "price_ce_only": LossWeights( 24 | price_ce=1.0, price_mae=0.0, year_ce=0.0, make_model_ce=0.0 25 | ), 26 | } 27 | if x in presets: 28 | return presets[x] 29 | 30 | res = {} 31 | for part in x.split(","): 32 | pair = part.split("=") 33 | if len(pair) != 2: 34 | raise ValueError(f"expected pairs of k=v, but got token `{pair}`") 35 | res[pair[0]] = float(pair[1]) 36 | return cls(**res) 37 | 38 | 39 | @dataclass 40 | class LossTargets: 41 | prices: torch.Tensor 42 | price_bins: torch.Tensor 43 | years: torch.Tensor 44 | make_models: torch.Tensor 45 | 46 | @classmethod 47 | def cat(cls, items: Sequence["LossTargets"]) -> "LossTargets": 48 | return LossTargets( 49 | prices=torch.cat([x.prices for x in items]), 50 | price_bins=torch.cat([x.price_bins for x in items]), 51 | years=torch.cat([x.years for x in items]), 52 | make_models=torch.cat([x.make_models for x in items]), 53 | ) 54 | 55 | @classmethod 56 | def from_batch(cls, batch: List[CarImage], device: torch.device) -> "LossTargets": 57 | return cls( 58 | prices=torch.tensor( 59 | [x.price for x in batch], dtype=torch.float32, device=device 60 | ), 61 | price_bins=torch.tensor([bin_price(x.price) for x in batch], device=device), 62 | years=torch.tensor([bin_year(x.year) for x in batch], device=device), 63 | make_models=torch.tensor( 64 | [bin_make_model(x.make, x.model) for x in batch], device=device 65 | ), 66 | ) 67 | 68 | @classmethod 69 | def from_model_out(cls, outputs: Dict[str, torch.Tensor]) -> "LossTargets": 70 | return cls( 71 | prices=outputs["price_median"], 72 | price_bins=F.softmax(outputs["price_bin"], dim=-1), 73 | years=F.softmax(outputs["year"], dim=-1), 74 | make_models=F.softmax(outputs["make_model"], dim=-1), 75 | ) 76 | 77 | def metrics( 78 | self, weights: LossWeights, outputs: Dict[str, torch.Tensor] 79 | ) -> torch.Tensor: 80 | metrics = dict( 81 | price_ce=F.cross_entropy(outputs["price_bin"], self.price_bins), 82 | price_acc=( 83 | (outputs["price_bin"].argmax(-1) == self.price_bins).float().mean() 84 | ), 85 | price_mae=(outputs["price_median"] - self.prices).abs().float().mean(), 86 | year_ce=F.cross_entropy(outputs["year"], self.years), 87 | year_acc=((outputs["year"].argmax(-1) == self.years).float().mean()), 88 | make_model_ce=F.cross_entropy(outputs["make_model"], self.make_models), 89 | make_model_acc=( 90 | (outputs["make_model"].argmax(-1) == self.make_models).float().mean() 91 | ), 92 | ) 93 | metrics["loss"] = ( 94 | (weights.price_ce * metrics["price_ce"]) 95 | + (weights.price_mae * metrics["price_mae"] / MEDIAN_PRICE_SCALE) 96 | + (weights.year_ce * metrics["year_ce"]) 97 | + (weights.make_model_ce * metrics["make_model_ce"]) 98 | ) 99 | return metrics 100 | 101 | 102 | def bin_price(price: float) -> int: 103 | for i, cutoff in enumerate(PRICE_CUTOFFS): 104 | if price <= cutoff: 105 | return i 106 | return len(PRICE_CUTOFFS) 107 | 108 | 109 | def bin_prices(prices: np.ndarray) -> np.ndarray: 110 | return np.searchsorted(PRICE_CUTOFFS, prices) 111 | 112 | 113 | def bin_make_model(make: str, model: str) -> int: 114 | return MAKE_MODEL_TO_INDEX.get((make, model), len(MAKE_MODEL_TO_INDEX)) 115 | 116 | 117 | def bin_year(year: int) -> int: 118 | if year not in YEARS: 119 | return len(YEARS) 120 | return YEARS.index(year) 121 | -------------------------------------------------------------------------------- /car_data/model.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | 3 | import clip 4 | import torch 5 | import torch.nn as nn 6 | 7 | from .constants import MEDIAN_PRICE_SCALE, NUM_MAKE_MODELS, NUM_PRICE_BINS, NUM_YEARS 8 | 9 | 10 | def create_model(name: str, device: torch.device, download_root: Optional[str] = None): 11 | if name == "clip": 12 | return CLIPModel(device, download_root=download_root) 13 | elif name == "mobilenetv2": 14 | return MobileNetV2Model(device, download_root=download_root) 15 | else: 16 | raise ValueError(f"unknown model name: {name}") 17 | 18 | 19 | class CLIPModel(nn.Module): 20 | def __init__(self, device: torch.device, download_root: Optional[str] = None): 21 | super().__init__() 22 | self.device = device 23 | self.clip, _ = clip.load("ViT-B/16", device=device, download_root=download_root) 24 | self.clip.float() 25 | self.output = OutputLayer(512, device=device) 26 | 27 | def output_layer(self) -> "OutputLayer": 28 | return self.output 29 | 30 | def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]: 31 | h = self.clip.encode_image(x) 32 | return self.output(h) 33 | 34 | 35 | class MobileNetV2Model(nn.Module): 36 | def __init__(self, device: torch.device, download_root: Optional[str] = None): 37 | super().__init__() 38 | if download_root is not None: 39 | backup_dir = torch.hub.get_dir() 40 | torch.hub.set_dir(download_root) 41 | self.model = torch.hub.load( 42 | "pytorch/vision:v0.10.0", "mobilenet_v2", pretrained=True 43 | ).to(device) 44 | if download_root is not None: 45 | torch.hub.set_dir(backup_dir) 46 | self.model.classifier[1] = OutputLayer(1280, device=device) 47 | 48 | def output_layer(self) -> "OutputLayer": 49 | return self.model.classifier[1] 50 | 51 | def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]: 52 | return self.model(x) 53 | 54 | 55 | class OutputLayer(nn.Module): 56 | def __init__(self, n_features: int, device: torch.device): 57 | super().__init__() 58 | self.price_bin = nn.Linear(n_features, NUM_PRICE_BINS, device=device) 59 | self.price_median = nn.Linear(n_features, 1, device=device) 60 | self.make_model = nn.Linear(n_features, NUM_MAKE_MODELS, device=device) 61 | self.year = nn.Linear(n_features, NUM_YEARS, device=device) 62 | 63 | def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]: 64 | return dict( 65 | price_bin=self.price_bin(x), 66 | price_median=self.price_median(x)[..., 0] * MEDIAN_PRICE_SCALE, 67 | make_model=self.make_model(x), 68 | year=self.year(x), 69 | ) 70 | 71 | def scale_outputs(self, scales: Dict[str, float]): 72 | with torch.no_grad(): 73 | for key, scale in scales.items(): 74 | layer = getattr(self, key) 75 | layer.weight.mul_(scale) 76 | layer.bias.mul_(scale) 77 | -------------------------------------------------------------------------------- /car_data/scripts/baseline.py: -------------------------------------------------------------------------------- 1 | """ 2 | Compute the baseline accuracy of a dummy classifier. 3 | """ 4 | 5 | import argparse 6 | from collections import Counter 7 | 8 | import numpy as np 9 | from car_data.losses import bin_prices 10 | 11 | 12 | def main(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("index_path", type=str) 15 | args = parser.parse_args() 16 | 17 | prices = np.load(args.index_path)["prices"] 18 | max_count = max(Counter(bin_prices(prices)).values()) 19 | print(max_count / len(prices)) 20 | 21 | 22 | if __name__ == "__main__": 23 | main() 24 | -------------------------------------------------------------------------------- /car_data/scripts/classify.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the classifier on an image. 3 | """ 4 | 5 | import argparse 6 | from typing import Iterator, List 7 | 8 | import numpy as np 9 | import torch 10 | import torch.nn.functional as F 11 | import torchvision.transforms as transforms 12 | from car_data.constants import MAKES_MODELS, PRICE_BIN_LABELS, YEARS 13 | from car_data.model import create_model 14 | from PIL import Image 15 | 16 | 17 | def main(): 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument("--model_name", type=str, default="clip") 20 | parser.add_argument("checkpoint", type=str) 21 | parser.add_argument("image", type=str) 22 | args = parser.parse_args() 23 | 24 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 25 | model = create_model(args.model_name, device) 26 | model.load_state_dict(torch.load(args.checkpoint, map_location=device)) 27 | model.eval() 28 | 29 | transform = transforms.Compose( 30 | [ 31 | transforms.CenterCrop(224), 32 | transforms.ToTensor(), 33 | transforms.Normalize( 34 | (0.48145466, 0.4578275, 0.40821073), 35 | (0.26862954, 0.26130258, 0.27577711), 36 | ), 37 | ] 38 | ) 39 | image_tensor = transform(Image.open(args.image)).to(device) 40 | 41 | outputs = model(image_tensor[None]) 42 | 43 | print("---- Price ----") 44 | print(f"median: {outputs['price_median'].item():.02}") 45 | price_probs = F.softmax(outputs["price_bin"], dim=-1)[0].tolist() 46 | for label, prob in zip(pad_labels(PRICE_BIN_LABELS), price_probs): 47 | print(f"{label}: {(prob*100):.04}%") 48 | 49 | print("---- Make/model ----") 50 | make_model_probs = F.softmax(outputs["make_model"], dim=-1)[0].tolist() 51 | print_top_n( 52 | [f"{make} {model}" for make, model in MAKES_MODELS] + ["Unknown"], 53 | make_model_probs, 54 | ) 55 | 56 | print("---- year ----") 57 | year_probs = F.softmax(outputs["year"], dim=-1)[0].tolist() 58 | print_top_n([str(year) for year in YEARS] + ["Unknown"], year_probs) 59 | 60 | 61 | def print_top_n(labels: List[str], probs: List[float], n: int = 5): 62 | indices = np.argsort(-np.array(probs)) 63 | labels = [labels[i] for i in indices[:n]] 64 | probs = [probs[i] for i in indices[:n]] 65 | for label, prob in zip(pad_labels(labels), probs): 66 | print(f"{label}: {(prob*100):.04}%") 67 | 68 | 69 | def pad_labels(labels: List[str]) -> Iterator[str]: 70 | max_len = max(len(x) for x in labels) 71 | for label in labels: 72 | while len(label) < max_len: 73 | label = " " + label 74 | yield label 75 | 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /car_data/scripts/classify_viz.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create an SVG with a visual depiction of a model's predictions for a batch of 3 | input images. 4 | """ 5 | 6 | import argparse 7 | 8 | import torch 9 | from PIL import Image 10 | 11 | from car_data.dataset import image_transform 12 | from car_data.graphics import ( 13 | HStack, 14 | open_context, 15 | prediction_element, 16 | prediction_element_size, 17 | ) 18 | from car_data.model import create_model 19 | 20 | 21 | def main(): 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument("--output", type=str, default="viz.svg") 24 | parser.add_argument("--model_name", type=str, default="clip") 25 | parser.add_argument("--checkpoint", type=str, required=True) 26 | parser.add_argument("images", type=str, nargs="+") 27 | args = parser.parse_args() 28 | 29 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 30 | model = create_model(args.model_name, device) 31 | model.load_state_dict(torch.load(args.checkpoint, map_location=device)) 32 | model.eval() 33 | transform = image_transform(False) 34 | 35 | width, height = prediction_element_size() 36 | with open_context(args.output, width * len(args.images), height) as ctx: 37 | panels = [] 38 | for i, img_path in enumerate(args.images): 39 | img = Image.open(img_path).convert("RGB") 40 | outputs = model(transform(img)[None].to(device)) 41 | panels.append(prediction_element(ctx, i, img, outputs)) 42 | HStack(*panels).draw_at(ctx, 0, 0) 43 | 44 | 45 | if __name__ == "__main__": 46 | main() 47 | -------------------------------------------------------------------------------- /car_data/scripts/classify_viz_animate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create an animation of classification results as a crop is moved from one side 3 | of a rectangular image to the other. 4 | """ 5 | 6 | import argparse 7 | import io 8 | from typing import Iterator 9 | 10 | import cairo 11 | import torch 12 | from PIL import Image 13 | 14 | from car_data.dataset import image_transform 15 | from car_data.graphics import HStack, prediction_element, prediction_element_size 16 | from car_data.model import create_model 17 | 18 | 19 | def main(): 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("--output", type=str, default="viz.gif") 22 | parser.add_argument("--num_frames", type=int, default=10) 23 | parser.add_argument("--frame_rate", type=float, default=10.0) 24 | parser.add_argument("--model_name", type=str, default="clip") 25 | parser.add_argument("--checkpoint", type=str, required=True) 26 | parser.add_argument("images", type=str, nargs="+") 27 | args = parser.parse_args() 28 | 29 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 30 | model = create_model(args.model_name, device) 31 | model.load_state_dict(torch.load(args.checkpoint, map_location=device)) 32 | model.eval() 33 | transform = image_transform(False) 34 | 35 | imgs = [Image.open(path).convert("RGB") for path in args.images] 36 | crop_iter = zip(*(crops_of_image(img, args.num_frames) for img in imgs)) 37 | 38 | images = [] 39 | width, height = prediction_element_size() 40 | for crops in crop_iter: 41 | with cairo.ImageSurface( 42 | cairo.Format.RGB24, width * len(imgs), height 43 | ) as surface: 44 | ctx = cairo.Context(surface) 45 | panels = [] 46 | for i, crop in enumerate(crops): 47 | outputs = model(transform(crop)[None].to(device)) 48 | panels.append(prediction_element(ctx, i, crop, outputs)) 49 | HStack(*panels).draw_at(ctx, 0, 0) 50 | 51 | # Convert canvas to Pillow in the hackiest possible way. 52 | f = io.BytesIO() 53 | surface.write_to_png(f) 54 | f.seek(0) 55 | images.append(Image.open(f)) 56 | 57 | images[0].save( 58 | args.output, 59 | save_all=True, 60 | append_images=images[1:], 61 | duration=round(1000 / args.frame_rate), 62 | loop=0, 63 | ) 64 | 65 | 66 | def crops_of_image(img: Image.Image, n: int) -> Iterator[Image.Image]: 67 | width, height = img.size 68 | min_size = min(width, height) 69 | 70 | dx = (width - min_size) / (n - 1) 71 | dy = (height - min_size) / (n - 1) 72 | 73 | for i in range(n): 74 | x = round(i * dx) 75 | y = round(i * dy) 76 | yield img.crop((x, y, x + min_size, y + min_size)) 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /car_data/scripts/clip_features.py: -------------------------------------------------------------------------------- 1 | """ 2 | Compute three-crop CLIP features for all the images in a dataset to apply to 3 | filtering. 4 | """ 5 | 6 | import argparse 7 | import itertools 8 | import os 9 | from collections import defaultdict 10 | from typing import Dict, Iterator, List, Optional, Tuple 11 | 12 | import clip 13 | import numpy as np 14 | import torch 15 | from car_data.lin_features import compute_pooled_features 16 | 17 | 18 | def main(): 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("--shard_digits", type=int, default=4) 21 | parser.add_argument("--download_root", type=str, default=None) 22 | parser.add_argument("--batch_size", type=int, default=64) 23 | parser.add_argument("--old_feature_dir", type=str, default=None) 24 | parser.add_argument("image_dir", type=str) 25 | parser.add_argument("output_dir", type=str) 26 | args = parser.parse_args() 27 | 28 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 29 | model, preprocess = clip.load( 30 | "ViT-B/16", device=device, download_root=args.download_root 31 | ) 32 | 33 | print("reading paths...") 34 | prefixes = ["".join(x) for x in itertools.product(*(["0123456789abcdef"] * 2))] 35 | listing = sorted( 36 | x 37 | for prefix in prefixes 38 | for x in os.listdir(os.path.join(args.image_dir, prefix)) 39 | ) 40 | print("iterating...") 41 | for shard_id, filenames in group_by_prefix(listing, args.shard_digits): 42 | out_path = os.path.join(args.output_dir, f"{shard_id}.npz") 43 | if os.path.exists(out_path): 44 | continue 45 | 46 | old_features = None 47 | if args.old_feature_dir: 48 | old_path = os.path.join(args.old_feature_dir, f"{shard_id}.npz") 49 | if os.path.exists(old_path): 50 | old_features = dict(np.load(old_path)) 51 | 52 | filenames = filter_out_existing_filenames(old_features, filenames) 53 | if not len(filenames): 54 | assert old_features is not None 55 | out_dict = old_features 56 | else: 57 | print(f"working on {out_path}...") 58 | features = compute_pooled_features( 59 | device, 60 | model, 61 | preprocess, 62 | [os.path.join(args.image_dir, x[:2], x) for x in filenames], 63 | batch_size=args.batch_size, 64 | ) 65 | out_dict = combine_existing_features(old_features, filenames, features) 66 | np.savez(out_path + ".tmp.npz", **out_dict) 67 | os.rename(out_path + ".tmp.npz", out_path) 68 | 69 | 70 | def group_by_prefix( 71 | listing: List[str], prefix_len: int 72 | ) -> Iterator[Tuple[str, List[str]]]: 73 | groups = defaultdict(list) 74 | for item in listing: 75 | if len(item) < prefix_len or item.startswith("."): 76 | continue 77 | groups[item[:prefix_len]].append(item) 78 | for k in sorted(groups.keys()): 79 | yield k, groups[k] 80 | 81 | 82 | def filter_out_existing_filenames( 83 | old_features: Optional[Dict[str, np.ndarray]], filenames: List[str] 84 | ) -> List[str]: 85 | if old_features is None: 86 | return filenames 87 | old_set = set(old_features["filenames"].tolist()) 88 | return [x for x in filenames if x not in old_set] 89 | 90 | 91 | def combine_existing_features( 92 | old_features: Optional[Dict[str, np.ndarray]], 93 | new_filenames: List[str], 94 | new_features: np.ndarray, 95 | ) -> Dict[str, np.ndarray]: 96 | if old_features is None: 97 | return dict(features=new_features, filenames=new_filenames) 98 | all_filenames = np.array(old_features["filenames"].tolist() + new_filenames) 99 | all_features = np.concatenate([old_features["features"], new_features]) 100 | sorted_indices = np.argsort(all_filenames) 101 | return dict( 102 | features=all_features[sorted_indices], filenames=all_filenames[sorted_indices] 103 | ) 104 | 105 | 106 | if __name__ == "__main__": 107 | main() 108 | -------------------------------------------------------------------------------- /car_data/scripts/data_bench.py: -------------------------------------------------------------------------------- 1 | """ 2 | Benchmark the data loader. 3 | """ 4 | 5 | import argparse 6 | 7 | from car_data.dataset import looping_loader 8 | from tqdm.auto import tqdm 9 | 10 | 11 | def main(): 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("--index_path", type=str, required=True) 14 | parser.add_argument("--image_dir", type=str, required=True) 15 | args = parser.parse_args() 16 | 17 | dataset = looping_loader( 18 | index_path=args.index_path, image_dir=args.image_dir, batch_size=64 19 | ) 20 | next(dataset) 21 | for _ in tqdm(dataset): 22 | pass 23 | 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /car_data/scripts/data_viz.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dump some images from the data loader to make sure it is working. 3 | """ 4 | 5 | import argparse 6 | import os 7 | 8 | import torch 9 | from car_data.dataset import looping_loader 10 | from PIL import Image 11 | 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument("--test", action="store_true", default=False) 16 | parser.add_argument("--use_data_aug", action="store_true", default=False) 17 | parser.add_argument("--count", type=int, default=10) 18 | parser.add_argument("--index_path", type=str, required=True) 19 | parser.add_argument("--image_dir", type=str, required=True) 20 | parser.add_argument("--output_dir", type=str, required=True) 21 | args = parser.parse_args() 22 | 23 | os.makedirs(args.output_dir, exist_ok=True) 24 | 25 | loader = looping_loader( 26 | args.index_path, 27 | args.image_dir, 28 | 1, 29 | train=not args.test, 30 | use_data_aug=args.use_data_aug, 31 | ) 32 | for i in range(args.count): 33 | obj = next(loader)[0] 34 | mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).view(-1, 1, 1) 35 | std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).view(-1, 1, 1) 36 | img = ( 37 | (((obj.image * std) + mean) * 255.99) 38 | .permute(1, 2, 0) 39 | .clamp(0, 255) 40 | .to(torch.uint8) 41 | .cpu() 42 | .numpy() 43 | ) 44 | Image.fromarray(img).save(os.path.join(args.output_dir, f"{i}_{obj.price}.png")) 45 | 46 | 47 | if __name__ == "__main__": 48 | main() 49 | -------------------------------------------------------------------------------- /car_data/scripts/filter_index.py: -------------------------------------------------------------------------------- 1 | """ 2 | Filter a dataset index using a filter and pre-computed feature vectors. 3 | """ 4 | 5 | import argparse 6 | import os 7 | 8 | import numpy as np 9 | import torch 10 | from tqdm.auto import tqdm 11 | 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument("--index_path", type=str, required=True) 16 | parser.add_argument("--feature_dir", type=str, required=True) 17 | parser.add_argument("--classifier_path", type=str, required=True) 18 | parser.add_argument("--threshold", type=float, default=-0.5) 19 | parser.add_argument("--output_path", type=str, required=True) 20 | args = parser.parse_args() 21 | 22 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 23 | model = torch.jit.load(args.classifier_path).to(device) 24 | 25 | print("listing feature filenames...") 26 | feature_filenames = [ 27 | x 28 | for x in os.listdir(args.feature_dir) 29 | if not x.startswith(".") and x.endswith(".npz") 30 | ] 31 | 32 | print("computing kept IDs...") 33 | positive_ids = set() 34 | total_ids = 0 35 | for feature_filename in tqdm(feature_filenames): 36 | obj = np.load(os.path.join(args.feature_dir, feature_filename)) 37 | features = torch.from_numpy(obj["features"]).float().to(device) 38 | ids = obj["filenames"].tolist() 39 | with torch.no_grad(): 40 | preds = ( 41 | (model.decision_function(features) > args.threshold) 42 | .cpu() 43 | .numpy() 44 | .tolist() 45 | ) 46 | total_ids += len(ids) 47 | for pred, id in zip(preds, ids): 48 | if pred: 49 | positive_ids.add(id) 50 | 51 | print(f"filtering index; kept {len(positive_ids)}/{total_ids}...") 52 | 53 | obj = np.load(args.index_path) 54 | use_indices = np.array([x.tolist() in positive_ids for x in obj["phashes"]]) 55 | np.savez(args.output_path, **{k: obj[k][use_indices] for k in obj.keys()}) 56 | 57 | 58 | if __name__ == "__main__": 59 | main() 60 | -------------------------------------------------------------------------------- /car_data/scripts/plot_runs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Plot runs via their log files. 3 | """ 4 | 5 | import argparse 6 | from typing import Dict, List, Optional 7 | 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | 11 | 12 | def main(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("--ema_rate", type=float, default=0.99) 15 | parser.add_argument("--max_step", type=int, default=None) 16 | parser.add_argument("--key", type=str, default="loss") 17 | parser.add_argument("--output_path", type=str, default="plot.png") 18 | parser.add_argument("names_and_paths", nargs="+", type=str) 19 | args = parser.parse_args() 20 | 21 | plt.figure() 22 | for name, path in zip(args.names_and_paths[::2], args.names_and_paths[1::2]): 23 | lines = read_log_lines(path, args.max_step) 24 | steps = np.array([x["step"] for x in lines]) 25 | losses = np.array([x[args.key] for x in lines]) 26 | plt.plot( 27 | steps, 28 | smooth(losses, args.ema_rate), 29 | label=name, 30 | ) 31 | plt.legend() 32 | plt.xlabel("step") 33 | plt.ylabel(args.key) 34 | plt.savefig(args.output_path) 35 | 36 | 37 | def read_log_lines(path: str, max_step: Optional[int]) -> List[Dict[str, float]]: 38 | # map step to log dict, to allow restarts to overwrite old steps 39 | lines = {} 40 | 41 | with open(path, "r") as f: 42 | for line in f: 43 | if "step=" not in line: 44 | continue 45 | parts = line.split() 46 | obj = {} 47 | for item in parts: 48 | if "=" not in item: 49 | continue 50 | k, v = item.split("=") 51 | obj[k] = float(v) 52 | if "step" in obj and (max_step is None or obj["step"] < max_step): 53 | lines[obj["step"]] = obj 54 | 55 | return sorted(lines.values(), key=lambda x: x["step"]) 56 | 57 | 58 | def smooth(data: np.ndarray, ema_rate: float) -> np.ndarray: 59 | num = 0.0 60 | denom = 0.0 61 | results = [] 62 | for x in data: 63 | num = ema_rate * num + (1 - ema_rate) * x 64 | denom = ema_rate * denom + (1 - ema_rate) 65 | results.append(num / denom) 66 | return np.array(results) 67 | 68 | 69 | if __name__ == "__main__": 70 | main() 71 | -------------------------------------------------------------------------------- /car_data/scripts/recalibrate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Re-scale output heads of the model to minimize validation loss. 3 | This can help recalibrate overconfident model predictions after a model has 4 | overfit to the training data. 5 | """ 6 | 7 | import argparse 8 | from typing import Iterator, List 9 | 10 | import numpy as np 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | import torchvision.transforms as transforms 15 | from PIL import Image 16 | from tqdm.auto import tqdm 17 | 18 | from car_data.dataset import looping_loader 19 | from car_data.losses import LossTargets, LossWeights 20 | from car_data.model import create_model 21 | 22 | 23 | def main(): 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument("--index_path", type=str, required=True) 26 | parser.add_argument("--image_dir", type=str, required=True) 27 | parser.add_argument("--num_images", type=int, default=1024) 28 | parser.add_argument("--batch_size", type=int, default=8) 29 | parser.add_argument("--iterations", type=int, default=5000) 30 | parser.add_argument("--model_name", type=str, default="clip") 31 | parser.add_argument("checkpoint", type=str) 32 | parser.add_argument("checkpoint_out", type=str) 33 | args = parser.parse_args() 34 | 35 | print("creating model...") 36 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 37 | model = create_model(args.model_name, device) 38 | model.load_state_dict(torch.load(args.checkpoint, map_location=device)) 39 | model.eval() 40 | 41 | print("loading data...") 42 | dataset = looping_loader( 43 | index_path=args.index_path, 44 | image_dir=args.image_dir, 45 | batch_size=args.batch_size, 46 | train=False, 47 | ) 48 | outputs = [] 49 | targets = [] 50 | for _ in tqdm(range(args.num_images // args.batch_size)): 51 | batch = next(dataset) 52 | with torch.no_grad(): 53 | outputs.append( 54 | model(torch.stack([x.image for x in batch], dim=0).to(device)) 55 | ) 56 | targets.append(LossTargets.from_batch(batch, device)) 57 | 58 | outputs = {k: torch.cat([x[k] for x in outputs]) for k in outputs[0].keys()} 59 | targets = LossTargets.cat(targets) 60 | 61 | print("recalibrating...") 62 | 63 | def loss_fn(scales: torch.Tensor) -> torch.Tensor: 64 | scaled_outputs = {k: v * scales[i] for i, (k, v) in enumerate(outputs.items())} 65 | return targets.metrics(LossWeights(), scaled_outputs)["loss"] 66 | 67 | scales = nn.Parameter(torch.ones(len(outputs.keys()), device=device)) 68 | loss_fn = torch.jit.trace(loss_fn, scales) 69 | 70 | init_loss = loss_fn(scales).item() 71 | 72 | opt = torch.optim.Adam([scales], lr=1e-2) 73 | for i in tqdm(range(args.iterations)): 74 | loss = loss_fn(scales) 75 | opt.zero_grad() 76 | loss.backward() 77 | opt.step() 78 | 79 | scale_dict = dict(zip(outputs.keys(), scales.detach().cpu().tolist())) 80 | 81 | print("scales:") 82 | print() 83 | for name, scale in scale_dict.items(): 84 | print(f" {name}: {scale}") 85 | print() 86 | print(f"loss went from {init_loss} => {loss.item()}") 87 | 88 | model.output_layer().scale_outputs(scale_dict) 89 | torch.save(model.state_dict(), args.checkpoint_out) 90 | 91 | 92 | if __name__ == "__main__": 93 | main() 94 | -------------------------------------------------------------------------------- /car_data/scripts/train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Entrypoint for training. Pass hyperparameters and dataset as flags. 3 | """ 4 | 5 | import argparse 6 | import shlex 7 | import sys 8 | 9 | import torch 10 | from car_data.model import create_model 11 | from car_data.train_loop import TrainLoop, add_training_args, training_args_dict 12 | 13 | 14 | def main(): 15 | parser = argparse.ArgumentParser() 16 | add_training_args(parser) 17 | parser.add_argument("--model_name", type=str, default="clip") 18 | args = parser.parse_args() 19 | train_args = training_args_dict(args) 20 | 21 | print(f"COMMAND: {shlex.join(sys.argv)}") 22 | 23 | print("creating model...") 24 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 25 | model = create_model(args.model_name, device) 26 | 27 | print("creating trainer...") 28 | trainer = TrainLoop( 29 | **train_args, 30 | model=model, 31 | device=device, 32 | ) 33 | while True: 34 | trainer.run_step() 35 | 36 | 37 | if __name__ == "__main__": 38 | main() 39 | -------------------------------------------------------------------------------- /car_data/scripts/train_distill.py: -------------------------------------------------------------------------------- 1 | """ 2 | Entrypoint for distillation of one model into another. 3 | Similar to train.py, but pass --teacher_model_name and --teacher_model_path. 4 | """ 5 | 6 | import argparse 7 | import shlex 8 | import sys 9 | 10 | import torch 11 | from car_data.model import create_model 12 | from car_data.train_loop import ( 13 | DistillationTrainLoop, 14 | add_training_args, 15 | training_args_dict, 16 | ) 17 | 18 | 19 | def main(): 20 | parser = argparse.ArgumentParser() 21 | add_training_args(parser) 22 | parser.add_argument("--model_name", type=str, default="clip") 23 | parser.add_argument("--teacher_model_name", type=str, required=True) 24 | parser.add_argument("--teacher_model_path", type=str, required=True) 25 | args = parser.parse_args() 26 | train_args = training_args_dict(args) 27 | 28 | print(f"COMMAND: {shlex.join(sys.argv)}") 29 | 30 | print("creating model...") 31 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 32 | model = create_model(args.model_name, device) 33 | 34 | print("creating teacher model...") 35 | teacher = create_model(args.teacher_model_name, device) 36 | teacher.load_state_dict(torch.load(args.teacher_model_path, map_location=device)) 37 | 38 | print("creating trainer...") 39 | trainer = DistillationTrainLoop( 40 | **train_args, 41 | teacher=teacher, 42 | model=model, 43 | device=device, 44 | ) 45 | while True: 46 | trainer.run_step() 47 | 48 | 49 | if __name__ == "__main__": 50 | main() 51 | -------------------------------------------------------------------------------- /car_data/scripts/train_filter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Train a simple classifier on pooled features. 3 | """ 4 | 5 | import argparse 6 | import os 7 | from collections import defaultdict 8 | from typing import Any, Iterator, List, Tuple 9 | 10 | import clip 11 | import numpy as np 12 | import sk2torch 13 | import torch 14 | from car_data.lin_features import compute_pooled_features 15 | from sklearn.model_selection import cross_val_score 16 | from sklearn.svm import SVC 17 | 18 | 19 | def main(): 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("--positive_dirs", type=str, nargs="+", required=True) 22 | parser.add_argument("--negative_dirs", type=str, nargs="+", required=True) 23 | parser.add_argument("--download_root", type=str, default=None) 24 | parser.add_argument("--model_out", type=str, required=True) 25 | args = parser.parse_args() 26 | 27 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 28 | model, preprocess = clip.load( 29 | "ViT-B/16", device=device, download_root=args.download_root 30 | ) 31 | 32 | print("computing positive features...") 33 | positive_features = compute_pooled_features( 34 | device=device, 35 | model=model, 36 | preprocess=preprocess, 37 | paths=list(list_dirs(args.positive_dirs)), 38 | ) 39 | 40 | print("computing negative features...") 41 | negative_features = compute_pooled_features( 42 | device=device, 43 | model=model, 44 | preprocess=preprocess, 45 | paths=list(list_dirs(args.negative_dirs)), 46 | ) 47 | inputs = np.concatenate([positive_features, negative_features], axis=0) 48 | labels = np.array( 49 | [True] * len(positive_features) + [False] * len(negative_features) 50 | ) 51 | 52 | clf = SVC(random_state=0) 53 | 54 | print("validating...") 55 | preds = cross_val_preds(clf, inputs, labels, folds=10) 56 | for threshold in np.linspace(-2.0, 2.0, num=31): 57 | acc = np.mean((preds > threshold) == labels) 58 | false_neg = np.mean(((preds > threshold) != labels)[labels]) 59 | filter_frac = np.mean(((preds > threshold) == labels)[~labels]) 60 | print( 61 | f"threshold {threshold:.02}: acc={acc:.03f} false_neg={false_neg:.03f} filter_frac={filter_frac:.03f}" 62 | ) 63 | 64 | print("training...") 65 | clf.fit(inputs, labels) 66 | 67 | print("saving...") 68 | save_model = torch.jit.script(sk2torch.wrap(clf).float()) 69 | torch.jit.save(save_model, args.model_out) 70 | 71 | 72 | def cross_val_preds( 73 | model: Any, xs: np.ndarray, ys: np.ndarray, folds: int 74 | ) -> np.ndarray: 75 | """ 76 | Compute out-of-fold decision function outputs for all of the samples. 77 | """ 78 | perm = np.random.permutation(len(xs)) 79 | chunk_size = len(xs) // folds 80 | chunk_sizes = [chunk_size + int(i < len(xs) % folds) for i in range(folds)] 81 | index_chunks = np.split(perm, np.cumsum(chunk_sizes)[:-1]) 82 | 83 | all_outs = np.zeros(len(xs), dtype=np.float32) 84 | for val_indices in index_chunks: 85 | mask = np.ones(len(xs), dtype=bool) 86 | mask[val_indices] = False 87 | model.fit(xs[mask], ys[mask]) 88 | all_outs[~mask] = model.decision_function(xs[~mask]) 89 | return all_outs 90 | 91 | 92 | def list_dirs(dirs: List[str]) -> Iterator[str]: 93 | for sub_dir in dirs: 94 | for x in os.listdir(sub_dir): 95 | if not x.startswith("."): 96 | yield os.path.join(sub_dir, x) 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /car_data/train_loop.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | from abc import ABC, abstractmethod 5 | from collections import defaultdict 6 | from typing import Any, Dict, Iterator, List 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | import torch.optim as optim 12 | 13 | from .dataset import CarImage, looping_loader 14 | from .losses import LossTargets, LossWeights 15 | 16 | 17 | class TrainLoopBase(ABC): 18 | def __init__( 19 | self, 20 | *, 21 | index_path: str, 22 | image_dir: str, 23 | use_data_aug: bool, 24 | save_dir: str, 25 | batch_size: int, 26 | microbatch: int, 27 | eval_interval: int, 28 | save_interval: int, 29 | lr: float, 30 | weight_decay: float, 31 | model: nn.Module, 32 | device: torch.device, 33 | loss_weights: LossWeights, 34 | ): 35 | self.index_path = index_path 36 | self.image_dir = image_dir 37 | self.use_data_aug = use_data_aug 38 | self.save_dir = save_dir 39 | self.batch_size = batch_size 40 | self.microbatch = microbatch 41 | self.eval_interval = eval_interval 42 | self.save_interval = save_interval 43 | self.model = model 44 | self.device = device 45 | self.loss_weights = loss_weights 46 | 47 | os.makedirs(save_dir, exist_ok=True) 48 | 49 | self.dataset_state_path = os.path.join(save_dir, "dataset_state.json") 50 | if os.path.exists(self.dataset_state_path): 51 | print("loading dataset state:", self.dataset_state_path) 52 | with open(self.dataset_state_path, "rb") as f: 53 | self.dataset_state = json.load(f) 54 | else: 55 | self.dataset_state = dict(test=None, train=None) 56 | 57 | self.train_dataset = looping_loader( 58 | index_path, 59 | image_dir, 60 | batch_size=self.batch_size, 61 | train=True, 62 | use_data_aug=use_data_aug, 63 | last_seen_phash=self.dataset_state["train"], 64 | ) 65 | self.test_dataset = looping_loader( 66 | index_path, 67 | image_dir, 68 | batch_size=self.batch_size, 69 | train=False, 70 | last_seen_phash=self.dataset_state["test"], 71 | ) 72 | 73 | self.opt = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay) 74 | self.opt_state_path = os.path.join(save_dir, "opt.pt") 75 | if os.path.exists(self.opt_state_path): 76 | print("loading optimizer:", self.opt_state_path) 77 | self.opt.load_state_dict( 78 | torch.load(self.opt_state_path, map_location=device) 79 | ) 80 | 81 | self.model_state_path = os.path.join(save_dir, "model.pt") 82 | if os.path.exists(self.model_state_path): 83 | print("loading model:", self.model_state_path) 84 | model.load_state_dict( 85 | torch.load(self.model_state_path, map_location=device) 86 | ) 87 | 88 | self.step = 0 89 | self.step_state_path = os.path.join(save_dir, "step.json") 90 | if os.path.exists(self.step_state_path): 91 | print("loading step:", self.step_state_path) 92 | with open(self.step_state_path, "rb") as f: 93 | self.step = json.load(f) 94 | 95 | def run_step(self): 96 | results = LossAverage() 97 | 98 | if self.step % self.eval_interval == 0: 99 | with torch.no_grad(): 100 | batch = next(self.test_dataset) 101 | for microbatch in self._microbatches(batch): 102 | results.add( 103 | { 104 | f"eval_{k}": v 105 | for k, v in self.compute_losses(microbatch).items() 106 | }, 107 | len(microbatch), 108 | ) 109 | self.dataset_state["test"] = batch[-1].phash 110 | 111 | batch = next(self.train_dataset) 112 | self.opt.zero_grad() 113 | for microbatch in self._microbatches(batch): 114 | losses = self.compute_losses(microbatch) 115 | batch_frac = len(microbatch) / len(batch) 116 | (batch_frac * losses["loss"]).backward() 117 | results.add(losses, len(microbatch)) 118 | self.opt.step() 119 | self.dataset_state["train"] = batch[-1].phash 120 | 121 | print(results.format(self.step)) 122 | 123 | self.step += 1 124 | 125 | if not self.step % self.save_interval: 126 | print(f"saving at step {self.step}...") 127 | self.save() 128 | 129 | def _microbatches(self, batch: List[CarImage]) -> Iterator[List[CarImage]]: 130 | if not self.microbatch: 131 | yield batch 132 | else: 133 | for i in range(0, len(batch), self.microbatch): 134 | yield batch[i : i + self.microbatch] 135 | 136 | @abstractmethod 137 | def compute_losses(self, batch: List[CarImage]) -> Dict[str, torch.Tensor]: 138 | """ 139 | Compute a dict of loss scalars for the batch of images. 140 | """ 141 | 142 | def save(self): 143 | torch.save(self.model.state_dict(), _tmp_path(self.model_state_path)) 144 | torch.save(self.opt.state_dict(), _tmp_path(self.opt_state_path)) 145 | with open(_tmp_path(self.step_state_path), "w") as f: 146 | json.dump(self.step, f) 147 | with open(_tmp_path(self.dataset_state_path), "w") as f: 148 | json.dump(self.dataset_state, f) 149 | _rename_from_tmp(self.model_state_path) 150 | _rename_from_tmp(self.opt_state_path) 151 | _rename_from_tmp(self.step_state_path) 152 | _rename_from_tmp(self.dataset_state_path) 153 | 154 | 155 | class TrainLoop(TrainLoopBase): 156 | def compute_losses(self, batch: List[CarImage]) -> Dict[str, torch.Tensor]: 157 | images = torch.stack([x.image for x in batch], dim=0).to(self.device) 158 | targets = LossTargets.from_batch(batch, self.device) 159 | outputs = self.model(images) 160 | return targets.metrics(self.loss_weights, outputs) 161 | 162 | 163 | class DistillationTrainLoop(TrainLoopBase): 164 | def __init__(self, *, teacher: nn.Module, **kwargs): 165 | super().__init__(**kwargs) 166 | self.teacher = teacher 167 | 168 | def compute_losses(self, batch: List[CarImage]) -> Dict[str, torch.Tensor]: 169 | images = torch.stack([x.image for x in batch], dim=0).to(self.device) 170 | targets = LossTargets.from_batch(batch, self.device) 171 | with torch.no_grad(): 172 | teacher_out = self.teacher(images) 173 | teacher_targets = LossTargets.from_model_out(teacher_out) 174 | outputs = self.model(images) 175 | with torch.no_grad(): 176 | metrics = targets.metrics(self.loss_weights, outputs) 177 | metrics.update( 178 | { 179 | f"teacher_{k}": v 180 | for k, v in teacher_targets.metrics(self.loss_weights, outputs).item() 181 | } 182 | ) 183 | metrics["loss"] = metrics.pop("teacher_loss") 184 | return metrics 185 | 186 | 187 | class LossAverage: 188 | def __init__(self): 189 | self.results = defaultdict(lambda: 0.0) 190 | self.counts = defaultdict(lambda: 0) 191 | 192 | def add(self, losses: Dict[str, torch.Tensor], count: int): 193 | for k, v in losses.items(): 194 | self.results[k] += v.item() * count 195 | self.counts[k] += count 196 | 197 | def average(self) -> Dict[str, float]: 198 | return {k: v / self.counts[k] for k, v in self.results.items()} 199 | 200 | def format(self, step: int) -> str: 201 | key_strs = [f"step={step}"] 202 | avg = self.average() 203 | for k in sorted(avg.keys()): 204 | key_strs.append(f"{k}={avg[k]:.04f}") 205 | return " ".join(key_strs) 206 | 207 | 208 | def _tmp_path(orig_path: str) -> str: 209 | return orig_path + ".tmp" 210 | 211 | 212 | def _rename_from_tmp(path: str) -> str: 213 | os.rename(_tmp_path(path), path) 214 | 215 | 216 | def add_training_args(parser: argparse.ArgumentParser): 217 | parser.add_argument("--loss_weights", type=str, default="default") 218 | parser.add_argument("--lr", type=float, default=1e-4) 219 | parser.add_argument("--weight_decay", type=float, default=1e-3) 220 | parser.add_argument("--batch_size", type=int, default=4) 221 | parser.add_argument("--microbatch", type=int, default=0) 222 | parser.add_argument("--eval_interval", type=int, default=5) 223 | parser.add_argument("--save_interval", type=int, default=1000) 224 | parser.add_argument("--use_data_aug", action="store_true") 225 | parser.add_argument("--index_path", type=str, required=True) 226 | parser.add_argument("--image_dir", type=str, required=True) 227 | parser.add_argument("--save_dir", type=str, required=True) 228 | 229 | 230 | def training_args_dict(args: argparse.Namespace) -> Dict[str, Any]: 231 | res = {} 232 | for k in [ 233 | "lr", 234 | "weight_decay", 235 | "batch_size", 236 | "microbatch", 237 | "eval_interval", 238 | "save_interval", 239 | "use_data_aug", 240 | "index_path", 241 | "image_dir", 242 | "save_dir", 243 | ]: 244 | res[k] = getattr(args, k) 245 | res["loss_weights"] = LossWeights.parse(args.loss_weights) 246 | return res 247 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name="car-data", 5 | version="0.0.1", 6 | description="Train a car price estimator.", 7 | packages=["car_data"], 8 | install_requires=[ 9 | "torch", 10 | "torchvision", 11 | "sk2torch", 12 | "clip @ git+https://github.com/openai/CLIP.git", 13 | "pycairo", 14 | ], 15 | author="Alex Nichol", 16 | author_email="unixpickle@gmail.com", 17 | url="https://github.com/unixpickle/car-data", 18 | license="MIT", 19 | classifiers=[ 20 | "License :: OSI Approved :: MIT License", 21 | "Programming Language :: Python :: 3", 22 | "Programming Language :: Python :: 3.7", 23 | ], 24 | ) 25 | -------------------------------------------------------------------------------- /src/chan_util.rs: -------------------------------------------------------------------------------- 1 | use async_channel::Receiver; 2 | 3 | pub async fn recv_at_least_one(rx: &Receiver) -> Option> { 4 | if let Ok(obj) = rx.recv().await { 5 | let mut buffer = vec![obj]; 6 | loop { 7 | match rx.try_recv() { 8 | Ok(obj) => buffer.push(obj), 9 | _ => return Some(buffer), 10 | } 11 | } 12 | } else { 13 | None 14 | } 15 | } 16 | 17 | pub fn recv_at_least_one_blocking(rx: &Receiver) -> Option> { 18 | if let Ok(obj) = rx.recv_blocking() { 19 | let mut buffer = vec![obj]; 20 | loop { 21 | match rx.try_recv() { 22 | Ok(obj) => buffer.push(obj), 23 | _ => return Some(buffer), 24 | } 25 | } 26 | } else { 27 | None 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/db.rs: -------------------------------------------------------------------------------- 1 | use crate::chan_util::recv_at_least_one_blocking; 2 | use std::fmt::Write; 3 | use std::path::Path; 4 | 5 | use async_channel::{bounded, Receiver, Sender}; 6 | use rusqlite::{Connection, Transaction}; 7 | use sha2::Digest; 8 | use tokio::{spawn, task::spawn_blocking}; 9 | 10 | use crate::types::{Listing, OwnerInfo}; 11 | 12 | #[derive(Clone)] 13 | pub struct Database { 14 | req_chan: Sender< 15 | Box< 16 | dyn Send 17 | + FnOnce( 18 | anyhow::Result<&mut Transaction>, 19 | ) -> Box)>, 20 | >, 21 | >, 22 | } 23 | 24 | impl Database { 25 | pub async fn open>(path: P) -> anyhow::Result { 26 | let path = path.as_ref().to_owned(); 27 | spawn_blocking(move || -> anyhow::Result { 28 | let conn = Connection::open(path)?; 29 | Database::new_with_conn(conn) 30 | }) 31 | .await? 32 | } 33 | 34 | #[allow(dead_code)] 35 | pub async fn open_in_memory() -> anyhow::Result { 36 | spawn_blocking(move || -> anyhow::Result { 37 | let conn = Connection::open_in_memory()?; 38 | Database::new_with_conn(conn) 39 | }) 40 | .await? 41 | } 42 | 43 | fn new_with_conn(conn: Connection) -> anyhow::Result { 44 | create_tables(&conn)?; 45 | let (tx, rx) = bounded(100); 46 | spawn_blocking(move || Database::transaction_worker(conn, rx)); 47 | Ok(Database { req_chan: tx }) 48 | } 49 | 50 | pub async fn check_attempt( 51 | &self, 52 | website: &str, 53 | website_id: &str, 54 | ) -> anyhow::Result> { 55 | let website = website.to_owned(); 56 | let website_id = website_id.to_owned(); 57 | self.with_conn(move |conn| { 58 | let mut stmt = 59 | conn.prepare("SELECT success FROM attempt_ids WHERE website=?1 AND website_id=?2")?; 60 | let mut result_it = stmt.query_map::((&website, &website_id), |x| { 61 | Ok(x.get::<_, i8>(0)? == 1) 62 | })?; 63 | Ok(match result_it.next() { 64 | None => None, 65 | Some(x) => Some(x?), 66 | }) 67 | }) 68 | .await 69 | } 70 | 71 | pub async fn add_failed_attempt(&self, website: &str, website_id: &str) -> anyhow::Result<()> { 72 | let website = website.to_owned(); 73 | let website_id = website_id.to_owned(); 74 | self.with_conn(move |conn| { 75 | conn.execute( 76 | "INSERT OR IGNORE INTO attempt_ids (website, website_id, success) VALUES (?1, ?2, ?3)", 77 | (&website, &website_id, 0), 78 | )?; 79 | Ok(()) 80 | }) 81 | .await 82 | } 83 | 84 | pub async fn add_listing(&self, listing: Listing) -> anyhow::Result> { 85 | self.with_conn(move |conn| { 86 | let tx = conn.savepoint()?; 87 | if tx.execute("INSERT OR IGNORE INTO attempt_ids (website, website_id, success) VALUES (?1, ?2, 1)", (&listing.website, &listing.website_id))? != 1 { 88 | return Ok(None); 89 | } 90 | tx.execute( 91 | "INSERT INTO listings (website, website_id, title, price, make, model, year, odometer, engine, exterior_color, interior_color, drive_type, fuel_type, fuel_economy_0, fuel_economy_1, vin, stock_number, comments) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18)", 92 | rusqlite::params![ 93 | &listing.website, 94 | &listing.website_id, 95 | &listing.title, 96 | &listing.price, 97 | &listing.make, 98 | &listing.model, 99 | &listing.year, 100 | &listing.odometer, 101 | &listing.engine_description, 102 | &listing.exterior_color, 103 | &listing.interior_color, 104 | &listing.drive_type, 105 | &listing.fuel_type, 106 | &maybe_list_entry(&listing.fuel_economy, 0), 107 | &maybe_list_entry(&listing.fuel_economy, 1), 108 | &listing.vin, 109 | &listing.stock_number, 110 | &listing.comments 111 | ], 112 | )?; 113 | let last_id = tx.last_insert_rowid(); 114 | if let Some(image_urls) = &listing.image_urls { 115 | for (i, image_url) in image_urls.iter().enumerate() { 116 | tx.execute( 117 | "INSERT INTO images (listing_id, image_index, url, hash) VALUES (?1, ?2, ?3, ?4)", 118 | rusqlite::params![&last_id, &i, &image_url, &hash_image_url(&image_url)], 119 | )?; 120 | } 121 | } 122 | if let Some(owners) = &listing.owners { 123 | for (i, owner) in owners.iter().enumerate() { 124 | tx.execute( 125 | "INSERT INTO owners (listing_id, owner_index, website_id, name, website) VALUES (?1, ?2, ?3, ?4, ?5)", 126 | rusqlite::params![&last_id, &i, &owner.id, &owner.name, &owner.website], 127 | )?; 128 | } 129 | } 130 | tx.commit()?; 131 | Ok(Some(last_id)) 132 | }).await 133 | } 134 | 135 | #[allow(dead_code)] 136 | pub async fn listing_for_id(&self, id: i64) -> anyhow::Result> { 137 | self.with_conn(move |tx| Ok(retrieve_listing(tx, id)?)) 138 | .await 139 | } 140 | 141 | pub async fn insert_phashes( 142 | &self, 143 | hash_and_phash: Vec<(String, String)>, 144 | ) -> anyhow::Result<()> { 145 | self.with_conn(move |conn| { 146 | let tx = conn.savepoint()?; 147 | for (image_hash, phash) in hash_and_phash { 148 | tx.execute( 149 | "INSERT OR IGNORE INTO phashes (hash, phash, hash_count) VALUES 150 | (?1, ?2, (SELECT COUNT(*) from images WHERE hash=?1))", 151 | (&image_hash, &phash), 152 | )?; 153 | } 154 | tx.commit()?; 155 | Ok(()) 156 | }) 157 | .await 158 | } 159 | 160 | pub async fn counts(&self) -> anyhow::Result<(i64, i64)> { 161 | self.with_conn(move |tx| { 162 | let listing_count: i64 = 163 | tx.query_row("SELECT COUNT(*) FROM listings", (), |row| row.get(0))?; 164 | let attempt_count: i64 = 165 | tx.query_row("SELECT COUNT(*) FROM attempt_ids", (), |row| row.get(0))?; 166 | Ok((listing_count, attempt_count)) 167 | }) 168 | .await 169 | } 170 | 171 | pub async fn make_model_counts(&self) -> anyhow::Result> { 172 | self.with_conn(move |tx| { 173 | let mut stmt = tx.prepare( 174 | "SELECT 175 | make, 176 | model, 177 | COUNT(*) 178 | FROM listings 179 | GROUP BY make, model 180 | ORDER BY -COUNT(*)", 181 | )?; 182 | let results = stmt.query_map((), |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))?; 183 | Ok(results.into_iter().collect::>()?) 184 | }) 185 | .await 186 | } 187 | 188 | pub async fn completed_dedups>( 189 | &self, 190 | ) -> anyhow::Result { 191 | self.with_conn(move |tx| { 192 | let mut stmt = tx.prepare("SELECT hash FROM phashes")?; 193 | let results = stmt.query_map((), |row| Ok(row.get(0)?))?; 194 | Ok(results.into_iter().collect::>()?) 195 | }) 196 | .await 197 | } 198 | 199 | pub async fn get_attempt_ids>( 200 | &self, 201 | website: &str, 202 | ) -> anyhow::Result { 203 | let website = website.to_owned(); 204 | self.with_conn(move |tx| { 205 | let mut stmt = tx.prepare("SELECT website_id FROM attempt_ids WHERE website = ?1")?; 206 | let results = stmt.query_map((website,), |row| Ok(row.get(0)?))?; 207 | Ok(results.into_iter().collect::>()?) 208 | }) 209 | .await 210 | } 211 | 212 | pub fn unique_phashes(&self) -> Receiver> { 213 | let (tx, rx) = bounded(100); 214 | let db_clone = self.clone(); 215 | spawn(async move { 216 | let tx_clone = tx.clone(); 217 | let res = db_clone 218 | .with_conn(move |conn| { 219 | let mut stmt = conn.prepare( 220 | "SELECT 221 | phashes.phash, 222 | images.listing_id 223 | FROM phashes 224 | LEFT JOIN images ON images.hash = phashes.hash 225 | RIGHT JOIN listings ON listings.id == images.listing_id 226 | WHERE (SELECT SUM(hash_count) FROM phashes phashes2 WHERE phashes2.phash = phashes.phash) == 1", 227 | )?; 228 | let results = stmt.query_map((), |row| { 229 | Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)) 230 | })?; 231 | for row in results { 232 | let (phash, listing_id) = row?; 233 | if let Some(listing) = retrieve_listing(conn, listing_id)? { 234 | if tx.send_blocking(Ok((phash, listing))).is_err() { 235 | return Ok(()); 236 | } 237 | } else { 238 | return Err(anyhow::Error::msg(format!( 239 | "no listing found for ID {}", 240 | listing_id 241 | ))); 242 | } 243 | } 244 | Ok(()) 245 | }) 246 | .await; 247 | if let Err(e) = res { 248 | tx_clone.send(Err(e)).await.ok(); 249 | } 250 | }); 251 | rx 252 | } 253 | 254 | async fn with_conn< 255 | T: 'static + Send, 256 | F: 'static + Send + FnOnce(&mut Transaction) -> anyhow::Result, 257 | >( 258 | &self, 259 | f: F, 260 | ) -> anyhow::Result { 261 | let (res_tx, res_rx) = bounded(1); 262 | let res = self 263 | .req_chan 264 | .send(Box::new(move |maybe_tx| match maybe_tx { 265 | Ok(tx) => { 266 | let res = f(tx); 267 | Box::new(move |commit_res| { 268 | if res.is_ok() && !commit_res.is_ok() { 269 | res_tx.send_blocking(Err(commit_res.unwrap_err())).ok(); 270 | } else { 271 | res_tx.send_blocking(res).ok(); 272 | } 273 | }) 274 | } 275 | Err(e) => Box::new(move |_| { 276 | res_tx.send_blocking(Err(e)).ok(); 277 | }), 278 | })) 279 | .await; 280 | if res.is_err() { 281 | // The true error contains the argument we tried to send, 282 | // which we cannot wrap in anyhow for some reason. 283 | Err(anyhow::Error::msg("connection worker has died")) 284 | } else { 285 | res_rx.recv().await? 286 | } 287 | } 288 | 289 | fn transaction_worker( 290 | mut conn: Connection, 291 | rx: Receiver< 292 | Box< 293 | dyn Send 294 | + FnOnce( 295 | anyhow::Result<&mut Transaction>, 296 | ) -> Box)>, 297 | >, 298 | >, 299 | ) { 300 | while let Some(reqs) = recv_at_least_one_blocking(&rx) { 301 | match conn.transaction() { 302 | Ok(mut tx) => { 303 | let mut done_fns = Vec::new(); 304 | for req in reqs { 305 | done_fns.push(req(Ok(&mut tx))); 306 | } 307 | if let Err(e) = tx.commit() { 308 | let msg = format!("{}", e); 309 | for done_fn in done_fns { 310 | done_fn(Err(anyhow::Error::msg(msg.clone()))); 311 | } 312 | } else { 313 | for done_fn in done_fns { 314 | done_fn(Ok(())); 315 | } 316 | } 317 | } 318 | Err(e) => { 319 | let msg = format!("{}", e); 320 | for req in reqs { 321 | req(Err(anyhow::Error::msg(msg.clone())))(Ok(())) 322 | } 323 | } 324 | } 325 | } 326 | } 327 | } 328 | 329 | pub fn hash_image_url(url: &str) -> String { 330 | let mut hasher = sha2::Sha256::new(); 331 | hasher.update(url); 332 | let mut res = String::with_capacity(64); 333 | for ch in hasher.finalize() { 334 | write!(&mut res, "{:02x}", ch).unwrap(); 335 | } 336 | res 337 | } 338 | 339 | fn create_tables(conn: &Connection) -> anyhow::Result<()> { 340 | conn.execute( 341 | "CREATE TABLE if not exists attempt_ids ( 342 | website CHAR(16), 343 | website_id CHAR(64), 344 | success INT, 345 | PRIMARY KEY (website, website_id) 346 | )", 347 | (), 348 | )?; 349 | conn.execute( 350 | "CREATE TABLE if not exists listings ( 351 | id INTEGER PRIMARY KEY, 352 | website TEXT not null, 353 | website_id TEXT not null, 354 | title TEXT, 355 | price TEXT, 356 | make TEXT, 357 | model TEXT, 358 | year INT, 359 | odometer TEXT, 360 | engine TEXT, 361 | exterior_color TEXT, 362 | interior_color TEXT, 363 | drive_type TEXT, 364 | fuel_type TEXT, 365 | fuel_economy_0 TEXT, 366 | fuel_economy_1 TEXT, 367 | vin TEXT, 368 | stock_number TEXT, 369 | comments TEXT 370 | )", 371 | (), 372 | )?; 373 | conn.execute( 374 | "CREATE TABLE if not exists owners ( 375 | id INTEGER PRIMARY KEY, 376 | listing_id INT not null, 377 | owner_index INT not null, 378 | website_id TEXT, 379 | name TEXT, 380 | website TEXT 381 | )", 382 | (), 383 | )?; 384 | conn.execute( 385 | "CREATE TABLE if not exists images ( 386 | id INTEGER PRIMARY KEY, 387 | listing_id INT not null, 388 | image_index INT not null, 389 | url TEXT not null, 390 | hash CHAR(64) not null 391 | )", 392 | (), 393 | )?; 394 | conn.execute( 395 | "CREATE TABLE if not exists phashes ( 396 | id INTEGER PRIMARY KEY, 397 | hash CHAR(64) not null, 398 | hash_count INT not null, 399 | phash CHAR(64) not null, 400 | UNIQUE (hash) 401 | )", 402 | (), 403 | )?; 404 | conn.execute( 405 | "CREATE INDEX if not exists phashindex ON phashes(phash)", 406 | (), 407 | )?; 408 | conn.execute( 409 | "CREATE INDEX if not exists phasheshashindex ON phashes(hash)", 410 | (), 411 | )?; 412 | conn.execute( 413 | "CREATE INDEX if not exists imageshashindex ON images(hash)", 414 | (), 415 | )?; 416 | conn.execute( 417 | "CREATE INDEX if not exists owners_listingid ON owners(listing_id)", 418 | (), 419 | )?; 420 | conn.execute( 421 | "CREATE INDEX if not exists images_listingid ON images(listing_id)", 422 | (), 423 | )?; 424 | Ok(()) 425 | } 426 | 427 | fn maybe_list_entry(x: &Option>, i: usize) -> Option<&T> { 428 | x.as_ref().and_then(|v| v.get(i)) 429 | } 430 | 431 | fn maybe_build_list(x: Option, y: Option) -> Option> { 432 | if let Some(x) = x { 433 | let mut res = vec![x]; 434 | if let Some(y) = y { 435 | res.push(y); 436 | } 437 | Some(res) 438 | } else { 439 | None 440 | } 441 | } 442 | 443 | fn retrieve_listing(tx: &Transaction, id: i64) -> rusqlite::Result> { 444 | let row = tx.query_row_and_then( 445 | "SELECT website, website_id, title, price, make, model, year, odometer, engine, exterior_color, interior_color, drive_type, fuel_type, fuel_economy_0, fuel_economy_1, vin, stock_number, comments FROM listings WHERE id=?1", 446 | (id,), 447 | |row| -> rusqlite::Result { 448 | Ok(Listing{ 449 | website: row.get(0)?, 450 | website_id: row.get(1)?, 451 | title: row.get(2)?, 452 | price: row.get(3)?, 453 | make: row.get(4)?, 454 | model: row.get(5)?, 455 | year: row.get(6)?, 456 | odometer: row.get(7)?, 457 | engine_description: row.get(8)?, 458 | exterior_color: row.get(9)?, 459 | interior_color: row.get(10)?, 460 | drive_type: row.get(11)?, 461 | fuel_type: row.get(12)?, 462 | fuel_economy: maybe_build_list(row.get(13)?, row.get(14)?), 463 | owners: None, 464 | vin: row.get(15)?, 465 | stock_number: row.get(16)?, 466 | comments: row.get(17)?, 467 | image_urls: None, 468 | }) 469 | }, 470 | ); 471 | match row { 472 | Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), 473 | Err(e) => Err(e), 474 | Ok(mut x) => { 475 | let mut images = Vec::new(); 476 | for row in tx 477 | .prepare("SELECT url FROM images WHERE listing_id=?1 ORDER BY image_index")? 478 | .query_map((&id,), |x| Ok(x.get::<_, String>(0)?))? 479 | { 480 | images.push(row?); 481 | } 482 | if images.len() > 0 { 483 | x.image_urls = Some(images); 484 | } 485 | let mut owners = Vec::new(); 486 | for row in tx 487 | .prepare("SELECT website_id, name, website FROM owners WHERE listing_id=?1 ORDER BY owner_index")? 488 | .query_map((&id,), |x| Ok(OwnerInfo{id: x.get(0)?, name: x.get(1)?, website: x.get(2)?}))? 489 | { 490 | owners.push(row?); 491 | } 492 | if owners.len() > 0 { 493 | x.owners = Some(owners); 494 | } 495 | Ok(Some(x)) 496 | } 497 | } 498 | } 499 | 500 | #[cfg(test)] 501 | mod tests { 502 | use crate::types::{Listing, OwnerInfo}; 503 | 504 | use super::Database; 505 | 506 | #[test] 507 | fn attempt_ids() { 508 | tokio::runtime::Runtime::new().unwrap().block_on(async { 509 | let db = Database::open_in_memory().await.unwrap(); 510 | assert_eq!(db.check_attempt("kbb", "123").await.unwrap(), None); 511 | assert_eq!(db.check_attempt("kbb", "321").await.unwrap(), None); 512 | db.add_failed_attempt("kbb", "123").await.unwrap(); 513 | assert_eq!(db.check_attempt("kbb", "123").await.unwrap(), Some(false)); 514 | assert_eq!(db.check_attempt("kbb", "321").await.unwrap(), None); 515 | assert_eq!(db.check_attempt("kbb_v2", "123").await.unwrap(), None); 516 | db.add_failed_attempt("kbb_v2", "321").await.unwrap(); 517 | assert_eq!(db.check_attempt("kbb", "123").await.unwrap(), Some(false)); 518 | assert_eq!(db.check_attempt("kbb", "321").await.unwrap(), None); 519 | assert_eq!( 520 | db.check_attempt("kbb_v2", "321").await.unwrap(), 521 | Some(false) 522 | ); 523 | }); 524 | } 525 | 526 | #[test] 527 | fn add_listing() { 528 | tokio::runtime::Runtime::new().unwrap().block_on(async { 529 | let listing = Listing { 530 | website: "kbb".to_owned(), 531 | website_id: "321".to_owned(), 532 | title: "Car Listing".to_owned(), 533 | price: Some("$12.98".parse().unwrap()), 534 | make: Some("Nissan".to_owned()), 535 | model: Some("Altima".to_owned()), 536 | year: Some(2019), 537 | odometer: Some("52 mi".parse().unwrap()), 538 | engine_description: Some("fast boi".to_owned()), 539 | exterior_color: Some("Red".to_owned()), 540 | interior_color: None, 541 | drive_type: Some("RWD".parse().unwrap()), 542 | fuel_type: Some("Gasoline".parse().unwrap()), 543 | fuel_economy: Some(vec!["hello".to_owned(), "world".to_owned()]), 544 | owners: Some(vec![OwnerInfo { 545 | id: "1".to_owned(), 546 | name: Some("Annabelle".to_owned()), 547 | website: Some("corgi.com/foo".to_owned()), 548 | }]), 549 | vin: Some("123123123".to_owned()), 550 | stock_number: Some("123".to_owned()), 551 | comments: Some("this car is awesome".to_owned()), 552 | image_urls: Some(vec!["hello.com".to_owned(), "baz.com".to_owned()]), 553 | }; 554 | 555 | let db = Database::open_in_memory().await.unwrap(); 556 | assert_eq!(db.check_attempt("kbb", "123").await.unwrap(), None); 557 | assert_eq!(db.check_attempt("kbb", "321").await.unwrap(), None); 558 | db.add_failed_attempt("kbb", "123").await.unwrap(); 559 | let listing_id = db.add_listing(listing.clone()).await.unwrap().unwrap(); 560 | assert_eq!(db.check_attempt("kbb", "123").await.unwrap(), Some(false)); 561 | assert_eq!(db.check_attempt("kbb", "321").await.unwrap(), Some(true)); 562 | assert_eq!(db.listing_for_id(listing_id + 1).await.unwrap(), None); 563 | assert_eq!(db.listing_for_id(listing_id).await.unwrap(), Some(listing)); 564 | }); 565 | } 566 | } 567 | -------------------------------------------------------------------------------- /src/dedup_images.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::HashSet, 3 | fs::rename, 4 | path::{Path, PathBuf}, 5 | }; 6 | 7 | use crate::{ 8 | chan_util::recv_at_least_one, 9 | db::Database, 10 | image_util::{downsample_image, hash_image}, 11 | }; 12 | use clap::Parser; 13 | use image::ImageFormat; 14 | use tokio::{ 15 | fs::{create_dir_all, read_dir}, 16 | spawn, 17 | task::spawn_blocking, 18 | }; 19 | 20 | #[derive(Clone, Parser)] 21 | pub struct Args { 22 | #[clap(long, value_parser, default_value_t = 16)] 23 | hash_resolution: u32, 24 | 25 | #[clap(long, value_parser, default_value_t = 256)] 26 | out_resolution: u32, 27 | 28 | #[clap(long, value_parser, default_value_t = 4)] 29 | concurrency: usize, 30 | 31 | #[clap(value_parser)] 32 | db_path: String, 33 | 34 | #[clap(value_parser)] 35 | image_dir: String, 36 | 37 | #[clap(value_parser)] 38 | output_dir: String, 39 | } 40 | 41 | pub async fn main(args: Args) -> anyhow::Result<()> { 42 | create_hash_prefixes(&args.output_dir).await?; 43 | 44 | let (path_tx, path_rx) = async_channel::bounded(args.concurrency); 45 | let image_dir = args.image_dir.clone(); 46 | spawn(async move { 47 | for prefix in hash_prefixes() { 48 | let sub_dir: PathBuf = [&image_dir, &prefix].iter().collect(); 49 | let mut reader = read_dir(sub_dir).await.unwrap(); 50 | while let Some(path_info) = reader.next_entry().await.unwrap() { 51 | path_tx.send(path_info.path()).await.unwrap(); 52 | } 53 | } 54 | }); 55 | 56 | let db = Database::open(&args.db_path).await?; 57 | let completed: HashSet = db.completed_dedups().await?; 58 | 59 | let (hash_tx, hash_rx) = async_channel::bounded(100); 60 | for _ in 0..args.concurrency { 61 | let path_rx = path_rx.clone(); 62 | let hash_tx = hash_tx.clone(); 63 | let completed = completed.clone(); 64 | let args = args.clone(); 65 | spawn_blocking(move || { 66 | while let Ok(path) = path_rx.recv_blocking() { 67 | if completed.contains(&path_basename(&path)) { 68 | continue; 69 | } 70 | match hash_and_downsample(&args, &path) { 71 | Ok(hash) => hash_tx.send_blocking((path, hash)).unwrap(), 72 | Err(e) => eprintln!("error from {:?}: {}", path, e), 73 | } 74 | } 75 | }); 76 | } 77 | drop(hash_tx); 78 | 79 | let mut num_inserted: u64 = 0; 80 | while let Some(objs) = recv_at_least_one(&hash_rx).await { 81 | let batch_size = objs.len(); 82 | db.insert_phashes( 83 | objs.into_iter() 84 | .map(|(path, phash)| (path_basename(&path), phash)) 85 | .collect(), 86 | ) 87 | .await?; 88 | for _ in 0..batch_size { 89 | num_inserted += 1; 90 | if num_inserted % 100 == 0 { 91 | println!("inserted {} hashes", num_inserted); 92 | } 93 | } 94 | } 95 | Ok(()) 96 | } 97 | 98 | pub fn hash_prefixes() -> Vec { 99 | let chars = "0123456789abcdef"; 100 | chars 101 | .chars() 102 | .flat_map(|x| chars.chars().map(move |y| format!("{}{}", x, y))) 103 | .collect() 104 | } 105 | 106 | pub async fn create_hash_prefixes>(p: P) -> anyhow::Result<()> { 107 | for prefix in hash_prefixes() { 108 | let full_path: PathBuf = [p.as_ref(), &Path::new(&prefix)].iter().collect(); 109 | create_dir_all(full_path).await?; 110 | } 111 | Ok(()) 112 | } 113 | 114 | fn hash_and_downsample(args: &Args, path: &Path) -> anyhow::Result { 115 | let mut img = image::io::Reader::open(path)? 116 | .with_guessed_format()? 117 | .decode()?; 118 | let hash = hash_image(args.hash_resolution, &img); 119 | img = downsample_image(args.out_resolution, img); 120 | 121 | let out_path: PathBuf = [&args.output_dir, &hash[0..2], &hash].iter().collect(); 122 | let tmp_out_path: PathBuf = [ 123 | &args.output_dir, 124 | &hash[0..2], 125 | &format!("tmp_{}", path_basename(path)), 126 | ] 127 | .iter() 128 | .collect(); 129 | img.save_with_format(&tmp_out_path, ImageFormat::Jpeg)?; 130 | rename(tmp_out_path, out_path)?; 131 | Ok(hash) 132 | } 133 | 134 | fn path_basename(p: &Path) -> String { 135 | p.file_name().unwrap().to_string_lossy().into_owned() 136 | } 137 | -------------------------------------------------------------------------------- /src/export_data.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | use tokio::task::spawn_blocking; 3 | 4 | use npy_writer::NpzWriter; 5 | 6 | use crate::{ 7 | db::Database, 8 | types::{Price, PriceUnit}, 9 | }; 10 | 11 | #[derive(Clone, Parser)] 12 | pub struct Args { 13 | #[clap(long, value_parser, default_value_t = 1)] 14 | min_images: usize, 15 | 16 | #[clap(value_parser)] 17 | db_path: String, 18 | 19 | #[clap(value_parser)] 20 | output_path: String, 21 | } 22 | 23 | pub async fn main(args: Args) -> anyhow::Result<()> { 24 | let db = Database::open(&args.db_path).await?; 25 | let results = db.unique_phashes(); 26 | 27 | let mut phashes = Vec::new(); 28 | let mut prices = Vec::new(); 29 | let mut makes = Vec::new(); 30 | let mut models = Vec::new(); 31 | let mut years = Vec::new(); 32 | 33 | let mut seen = 0usize; 34 | let mut used = 0usize; 35 | 36 | while let Ok(item) = results.recv().await { 37 | let (phash, listing) = item?; 38 | seen += 1; 39 | if listing.image_urls.map(|x| x.len()).unwrap_or_default() >= args.min_images { 40 | if let Some(dollars) = get_dollar_amount(listing.price) { 41 | used += 1; 42 | phashes.push(phash); 43 | prices.push(dollars); 44 | makes.push(listing.make.unwrap_or_default()); 45 | models.push(listing.model.unwrap_or_default()); 46 | years.push(listing.year.unwrap_or_default()); 47 | } 48 | } 49 | if seen % 1000 == 0 { 50 | print_stats(seen, used); 51 | } 52 | } 53 | print_stats(seen, used); 54 | spawn_blocking(|| -> anyhow::Result<()> { 55 | let mut writer = NpzWriter::new(args.output_path)?; 56 | writer.write("phashes", phashes)?; 57 | writer.write("prices", prices)?; 58 | writer.write("makes", makes)?; 59 | writer.write("models", models)?; 60 | writer.write("years", years)?; 61 | writer.close()?; 62 | Ok(()) 63 | }) 64 | .await??; 65 | Ok(()) 66 | } 67 | 68 | fn print_stats(seen: usize, used: usize) { 69 | println!( 70 | "total={} used={} (frac={:.02}%)", 71 | seen, 72 | used, 73 | 100.0 * (used as f64) / (seen as f64), 74 | ); 75 | } 76 | 77 | fn get_dollar_amount(price: Option) -> Option { 78 | if let Some(p) = price { 79 | if p.unit == PriceUnit::Cents && p.value > 0 { 80 | return Some((p.value as f64) / 100.0); 81 | } 82 | } 83 | None 84 | } 85 | -------------------------------------------------------------------------------- /src/image_util.rs: -------------------------------------------------------------------------------- 1 | use image::{imageops::FilterType, DynamicImage, EncodableLayout}; 2 | use sha2::Digest; 3 | use std::fmt::Write; 4 | 5 | pub fn downsample_image(out_resolution: u32, img: DynamicImage) -> DynamicImage { 6 | let in_width = img.width(); 7 | let in_height = img.height(); 8 | let scale = (out_resolution as f64) / (in_width.min(in_height) as f64); 9 | if scale < 1.0 { 10 | img.resize( 11 | ((in_width as f64) * scale) as u32, 12 | ((in_height as f64) * scale) as u32, 13 | FilterType::Lanczos3, 14 | ) 15 | } else { 16 | img 17 | } 18 | } 19 | 20 | pub fn hash_image(resolution: u32, img: &DynamicImage) -> String { 21 | let orig_width = img.width(); 22 | let orig_height = img.height(); 23 | let small_img = img 24 | .resize_exact(resolution, resolution, FilterType::Lanczos3) 25 | .into_rgb8(); 26 | 27 | let mut bytes = Vec::new(); 28 | for px in small_img.as_bytes() { 29 | // Quantize each color to allow some wiggle room. 30 | bytes.push(px >> 4); 31 | } 32 | 33 | // Bin the aspect ratio to make sure we don't match very 34 | // differently sized images. 35 | let log_aspect_ratio = ((((orig_width as f64) / (orig_height as f64)).log2()) 36 | .clamp(-4.0, 4.0) 37 | .round() 38 | + 4.0) as u8; 39 | bytes.push(log_aspect_ratio); 40 | 41 | let mut hasher = sha2::Sha256::new(); 42 | hasher.update(&bytes); 43 | let mut res = String::with_capacity(64); 44 | for ch in hasher.finalize() { 45 | write!(&mut res, "{:02x}", ch).unwrap(); 46 | } 47 | res 48 | } 49 | -------------------------------------------------------------------------------- /src/kbb.rs: -------------------------------------------------------------------------------- 1 | use crate::types::{Listing, OwnerInfo, Price, PriceUnit}; 2 | use std::{collections::HashMap, future::Future, path::PathBuf, pin::Pin, time::Duration}; 3 | 4 | use crate::parse_util::{inner_text, FromJSON}; 5 | use reqwest::{RequestBuilder, Response}; 6 | use scraper::{Html, Selector}; 7 | use serde_json::Value; 8 | use tokio::{fs::File, io::AsyncWriteExt, time::sleep}; 9 | 10 | pub struct Client { 11 | client: reqwest::Client, 12 | num_retries: i32, 13 | } 14 | 15 | impl Client { 16 | pub fn new(num_retries: i32) -> Client { 17 | Client { 18 | client: reqwest::Client::new(), 19 | num_retries: num_retries, 20 | } 21 | } 22 | 23 | pub async fn run(&mut self, req: R) -> anyhow::Result { 24 | let mut last_err: anyhow::Error = anyhow::Error::msg("UNREACHABLE"); 25 | for i in 0..self.num_retries { 26 | let builder = req 27 | .build_request(&self) 28 | .timeout(Duration::from_secs(30)) 29 | .header("host", "www.kbb.com") 30 | .header("user-agent", format!("curl/1")); 31 | let result = builder.send().await; 32 | match result { 33 | Err(e) => { 34 | last_err = e.into(); 35 | self.client = reqwest::Client::new(); 36 | if i + 1 < self.num_retries { 37 | sleep(Duration::from_secs(10)).await; 38 | } 39 | } 40 | Ok(resp) => { 41 | let output = req.handle_response(resp).await; 42 | match output { 43 | Err(e) => { 44 | last_err = e.into(); 45 | } 46 | Ok(x) => { 47 | return Ok(x); 48 | } 49 | } 50 | } 51 | }; 52 | } 53 | Err(last_err) 54 | } 55 | } 56 | 57 | pub trait Request { 58 | type Output; 59 | type Err: Into; 60 | 61 | fn build_request(&self, client: &Client) -> RequestBuilder; 62 | 63 | fn handle_response( 64 | &self, 65 | resp: Response, 66 | ) -> Pin>>>; 67 | } 68 | 69 | // A request for fetching information about an individual car listing. 70 | pub struct ListingRequest(pub String); 71 | 72 | impl Request for ListingRequest { 73 | type Output = Option; 74 | type Err = anyhow::Error; 75 | 76 | fn build_request(&self, client: &Client) -> RequestBuilder { 77 | client.client.get(format!( 78 | "https://www.kbb.com/cars-for-sale/vehicledetails.xhtml?listingId={}", 79 | self.0 80 | )) 81 | } 82 | 83 | fn handle_response( 84 | &self, 85 | resp: Response, 86 | ) -> Pin>>> { 87 | let id = self.0.clone(); 88 | Box::pin(async { 89 | let text = resp.text().await?; 90 | let doc = Html::parse_fragment(&text); 91 | let titles: Vec<_> = doc.select(&Selector::parse("h1").unwrap()).collect(); 92 | if titles.len() == 0 { 93 | // The "car no longer available" page. 94 | return Ok(None); 95 | } else if titles.len() != 1 { 96 | return Err(anyhow::Error::msg("no title heading found on listing page")); 97 | } 98 | 99 | let doc_info = extract_doc_json(&doc)?; 100 | let inventory_item = 101 | >::extract_from_json(&doc_info, "initialState.inventory") 102 | .ok() 103 | .and_then(|x| x.into_values().next()); 104 | 105 | Ok(Some(Listing { 106 | website: "kbb.com".to_owned(), 107 | website_id: id, 108 | title: inner_text(&titles[0]), 109 | price: { 110 | f64::extract_from_json( 111 | &doc_info, 112 | "initialState.birf.pageData.page.vehicle.price", 113 | ) 114 | .ok() 115 | .map(|x| Price { 116 | value: 100 * (x as u64), 117 | unit: PriceUnit::Cents, 118 | }) 119 | }, 120 | make: { 121 | >::extract_from_json( 122 | &doc_info, 123 | "initialState.birf.pageData.page.vehicle.makeName", 124 | ) 125 | .ok() 126 | .and_then(vec_into_first) 127 | }, 128 | model: { 129 | >::extract_from_json( 130 | &doc_info, 131 | "initialState.birf.pageData.page.vehicle.modelName", 132 | ) 133 | .ok() 134 | .and_then(vec_into_first) 135 | }, 136 | year: { 137 | u64::extract_from_json( 138 | &doc_info, 139 | "initialState.birf.pageData.page.vehicle.car_year", 140 | ) 141 | .ok() 142 | }, 143 | odometer: { 144 | String::extract_from_json( 145 | &doc_info, 146 | "initialState.birf.pageData.page.vehicle.odometer", 147 | ) 148 | .ok() 149 | .and_then(|x| x.parse().ok()) 150 | }, 151 | engine_description: inventory_item 152 | .as_ref() 153 | .and_then(|x| String::extract_from_json(&x, "engine").ok()), 154 | exterior_color: { 155 | inventory_item 156 | .as_ref() 157 | .and_then(|x| String::extract_from_json(&x, "exteriorColorSimple").ok()) 158 | .or_else(|| { 159 | >::extract_from_json( 160 | &doc_info, 161 | "initialState.birf.pageData.page.vehicle.color", 162 | ) 163 | .ok() 164 | .and_then(vec_into_first) 165 | }) 166 | }, 167 | interior_color: { 168 | inventory_item 169 | .as_ref() 170 | .and_then(|x| String::extract_from_json(&x, "interiorColorSimple").ok()) 171 | }, 172 | drive_type: { 173 | inventory_item 174 | .as_ref() 175 | .and_then(|x| String::extract_from_json(&x, "driveGroup").ok()) 176 | .and_then(|x| x.parse().ok()) 177 | }, 178 | fuel_type: { 179 | inventory_item 180 | .as_ref() 181 | .and_then(|x| String::extract_from_json(&x, "fuelType").ok()) 182 | .and_then(|x| x.parse().ok()) 183 | }, 184 | fuel_economy: { 185 | >::extract_from_json( 186 | &doc_info, 187 | "initialState.birf.pageData.page.vehicle.fuelEconomy", 188 | ) 189 | .ok() 190 | }, 191 | owners: { 192 | >::extract_from_json(&doc_info, "initialState.owners") 193 | .ok() 194 | .map(|x| { 195 | let mut result = Vec::new(); 196 | for (owner_id, owner_info) in x.into_iter() { 197 | let name = String::extract_from_json(&owner_info, "name").ok(); 198 | let website = 199 | String::extract_from_json(&owner_info, "website.href").ok(); 200 | result.push(OwnerInfo { 201 | id: owner_id, 202 | name, 203 | website, 204 | }); 205 | } 206 | result 207 | }) 208 | }, 209 | vin: String::extract_from_json( 210 | &doc_info, 211 | "initialState.birf.pageData.page.vehicle.vin", 212 | ) 213 | .ok(), 214 | stock_number: String::extract_from_json( 215 | &doc_info, 216 | "initialState.birf.pageData.page.vehicle.stockNumber", 217 | ) 218 | .ok(), 219 | comments: { 220 | inventory_item 221 | .as_ref() 222 | .and_then(|x| { 223 | String::extract_from_json(&x, "additionalInfo.vehicleDescription").ok() 224 | }) 225 | .map(|x| x.replace("
", "\n")) 226 | .map(|x| inner_text(&Html::parse_fragment(&x).root_element())) 227 | }, 228 | image_urls: { 229 | inventory_item 230 | .as_ref() 231 | .and_then(|x| extract_image_urls(x).ok()) 232 | }, 233 | })) 234 | }) 235 | } 236 | } 237 | 238 | fn extract_doc_json(body: &Html) -> anyhow::Result { 239 | let preamble = "window.__BONNET_DATA__="; 240 | for x in body.select(&Selector::parse("script").unwrap()) { 241 | let contents = inner_text(&x); 242 | if !contents.starts_with(preamble) { 243 | continue; 244 | } 245 | return Ok(serde_json::from_str(&contents[preamble.len()..])?); 246 | } 247 | Err(anyhow::Error::msg("could not find JSON data in document")) 248 | } 249 | 250 | fn extract_image_urls(inventory_item: &Value) -> anyhow::Result> { 251 | let mut raw_result = >::extract_from_json(inventory_item, "images.sources")?; 252 | 253 | // Re-order so that the primary image URL is first. 254 | if let Ok(primary) = u64::extract_from_json(inventory_item, "images.primary") { 255 | let primary = primary as usize; 256 | if primary < raw_result.len() { 257 | let x = raw_result.remove(primary); 258 | raw_result.insert(0, x); 259 | } 260 | } 261 | 262 | Ok(raw_result 263 | .into_iter() 264 | .filter_map(|x| String::extract_from_json(&x, "src").ok()) 265 | .map(|x| { 266 | if x.starts_with("//") { 267 | format!("https://{}", x) 268 | } else { 269 | x 270 | } 271 | }) 272 | .collect()) 273 | } 274 | 275 | fn vec_into_first(list: Vec) -> Option { 276 | for x in list { 277 | return Some(x); 278 | } 279 | None 280 | } 281 | 282 | pub struct ImageDownloadRequest { 283 | pub url: String, 284 | pub out_path: PathBuf, 285 | } 286 | 287 | impl Request for ImageDownloadRequest { 288 | type Output = (); 289 | type Err = anyhow::Error; 290 | 291 | fn build_request(&self, client: &Client) -> RequestBuilder { 292 | client.client.get(&self.url) 293 | } 294 | 295 | fn handle_response( 296 | &self, 297 | mut resp: Response, 298 | ) -> Pin>>> { 299 | let out_path = self.out_path.clone(); 300 | Box::pin(async move { 301 | let mut out_file = File::create(&out_path).await?; 302 | while let Some(chunk) = resp.chunk().await? { 303 | out_file.write_all(&chunk).await?; 304 | } 305 | out_file.flush().await?; 306 | Ok(()) 307 | }) 308 | } 309 | } 310 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::process::ExitCode; 2 | 3 | use clap::Parser; 4 | 5 | mod chan_util; 6 | mod db; 7 | mod dedup_images; 8 | mod export_data; 9 | mod image_util; 10 | mod kbb; 11 | mod make_models; 12 | mod parse_util; 13 | mod scrape_kbb; 14 | mod task_queue; 15 | mod types; 16 | 17 | #[derive(Parser, Clone)] 18 | #[clap(author, version, about, long_about = None)] 19 | enum Args { 20 | ScrapeKbb { 21 | #[clap(flatten)] 22 | args: scrape_kbb::Args, 23 | }, 24 | DedupImages { 25 | #[clap(flatten)] 26 | args: dedup_images::Args, 27 | }, 28 | ExportData { 29 | #[clap(flatten)] 30 | args: export_data::Args, 31 | }, 32 | MakeModels { 33 | #[clap(flatten)] 34 | args: make_models::Args, 35 | }, 36 | } 37 | 38 | #[tokio::main] 39 | async fn main() -> ExitCode { 40 | let args = Args::parse(); 41 | if let Err(e) = match args { 42 | Args::ScrapeKbb { args } => scrape_kbb::main(args).await, 43 | Args::DedupImages { args } => dedup_images::main(args).await, 44 | Args::ExportData { args } => export_data::main(args).await, 45 | Args::MakeModels { args } => make_models::main(args).await, 46 | } { 47 | eprintln!("{}", e); 48 | ExitCode::FAILURE 49 | } else { 50 | ExitCode::SUCCESS 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/make_models.rs: -------------------------------------------------------------------------------- 1 | use crate::db::Database; 2 | use clap::Parser; 3 | 4 | #[derive(Clone, Parser)] 5 | pub struct Args { 6 | #[clap(value_parser)] 7 | db_path: String, 8 | } 9 | 10 | pub async fn main(args: Args) -> anyhow::Result<()> { 11 | let db = Database::open(args.db_path).await?; 12 | let counts = db.make_model_counts().await?; 13 | 14 | let total: i64 = counts.iter().map(|(_, _, count)| count).sum(); 15 | 16 | for i in 1..10 { 17 | let sub_idx = (((counts.len() * i) as f64) / 10.0).round() as usize; 18 | let sub_total: i64 = counts[0..sub_idx].iter().map(|(_, _, count)| count).sum(); 19 | let (_, _, sub_count) = &counts[sub_idx]; 20 | println!( 21 | "{}-percentile: {:.02}% (total items: {}) (per-entry {})", 22 | i * 10, 23 | (sub_total as f64) / (total as f64) * 100.0, 24 | sub_idx, 25 | sub_count 26 | ); 27 | } 28 | 29 | Ok(()) 30 | } 31 | -------------------------------------------------------------------------------- /src/parse_util.rs: -------------------------------------------------------------------------------- 1 | use scraper::ElementRef; 2 | use serde_json::Value; 3 | use std::{collections::HashMap, fmt::Write}; 4 | 5 | pub fn inner_text(obj: &ElementRef) -> String { 6 | let mut result = String::new(); 7 | for x in obj.text() { 8 | write!(&mut result, "{} ", x).unwrap(); 9 | } 10 | result.trim().to_owned() 11 | } 12 | 13 | pub trait FromJSON 14 | where 15 | Self: Sized, 16 | { 17 | fn from_json(value: &Value) -> anyhow::Result; 18 | 19 | fn extract_from_json(root: &Value, path: &str) -> anyhow::Result { 20 | let mut cur_obj = root; 21 | for part in path.split(".") { 22 | if let Value::Object(obj) = cur_obj { 23 | if let Some(x) = obj.get(part) { 24 | cur_obj = x; 25 | } else { 26 | return Err(anyhow::Error::msg(format!( 27 | "object path not found: {}", 28 | path 29 | ))); 30 | } 31 | } else { 32 | return Err(anyhow::Error::msg(format!( 33 | "incorrect type in object path: {}", 34 | path 35 | ))); 36 | } 37 | } 38 | match Self::from_json(cur_obj) { 39 | Ok(x) => Ok(x), 40 | Err(e) => Err(anyhow::Error::msg(format!( 41 | "error for object path {}: {}", 42 | path, e 43 | ))), 44 | } 45 | } 46 | } 47 | 48 | impl FromJSON for Value { 49 | fn from_json(value: &Value) -> anyhow::Result { 50 | Ok(value.clone()) 51 | } 52 | } 53 | 54 | impl FromJSON for f64 { 55 | fn from_json(value: &Value) -> anyhow::Result { 56 | match value { 57 | Value::Number(x) => { 58 | if let Some(f) = x.as_f64() { 59 | Ok(f) 60 | } else { 61 | Err(anyhow::Error::msg(format!("{} is not an f64", x))) 62 | } 63 | } 64 | _ => Err(anyhow::Error::msg(format!("{} is not a number", value))), 65 | } 66 | } 67 | } 68 | 69 | impl FromJSON for u64 { 70 | fn from_json(value: &Value) -> anyhow::Result { 71 | match value { 72 | Value::Number(x) => { 73 | if let Some(f) = x.as_u64() { 74 | Ok(f) 75 | } else { 76 | Err(anyhow::Error::msg(format!("{} is not a u64", x))) 77 | } 78 | } 79 | _ => Err(anyhow::Error::msg(format!("{} is not a number", value))), 80 | } 81 | } 82 | } 83 | 84 | impl FromJSON for String { 85 | fn from_json(value: &Value) -> anyhow::Result { 86 | match value { 87 | Value::String(x) => Ok(x.clone()), 88 | _ => Err(anyhow::Error::msg(format!("{} is not a string", value))), 89 | } 90 | } 91 | } 92 | 93 | impl FromJSON for Vec { 94 | fn from_json(value: &Value) -> anyhow::Result { 95 | match value { 96 | Value::Array(x) => x 97 | .iter() 98 | .map(|x| T::from_json(x)) 99 | .collect::>>(), 100 | _ => Err(anyhow::Error::msg(format!("{} is not an array", value))), 101 | } 102 | } 103 | } 104 | 105 | impl FromJSON for HashMap { 106 | fn from_json(value: &Value) -> anyhow::Result { 107 | match value { 108 | Value::Object(x) => x 109 | .iter() 110 | .map(|(k, v)| T::from_json(v).map(|x| (k.clone(), x))) 111 | .collect::>>(), 112 | _ => Err(anyhow::Error::msg(format!("{} is not an object", value))), 113 | } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/scrape_kbb.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::HashSet, 3 | path::{Path, PathBuf}, 4 | }; 5 | 6 | use crate::{ 7 | db::{hash_image_url, Database}, 8 | dedup_images::create_hash_prefixes, 9 | image_util::downsample_image, 10 | kbb::{Client, ImageDownloadRequest, ListingRequest}, 11 | task_queue::TaskQueue, 12 | types::Listing, 13 | }; 14 | use clap::Parser; 15 | use image::ImageFormat; 16 | use rand::seq::SliceRandom; 17 | use tokio::{spawn, sync::mpsc::channel, task::spawn_blocking, time::Instant}; 18 | 19 | const KBB_WEBSITE_NAME: &str = "kbb.com"; 20 | 21 | #[derive(Clone, Parser)] 22 | pub struct Args { 23 | #[clap(long, value_parser, default_value_t = 660000000)] 24 | min_id: i64, 25 | 26 | #[clap(long, value_parser, default_value_t = 668000000)] 27 | max_id: i64, 28 | 29 | #[clap(short, long, value_parser, default_value_t = 15)] 30 | num_retries: i32, 31 | 32 | #[clap(short, long, value_parser, default_value_t = 8)] 33 | concurrency: usize, 34 | 35 | #[clap(short, long, value_parser, default_value_t = 256)] 36 | resize_images: u32, 37 | 38 | #[clap(value_parser)] 39 | db_path: String, 40 | 41 | #[clap(value_parser)] 42 | image_dir: String, 43 | } 44 | 45 | pub async fn main(args: Args) -> anyhow::Result<()> { 46 | create_hash_prefixes(&args.image_dir).await?; 47 | 48 | println!("connecting database..."); 49 | let db = Database::open(&args.db_path).await?; 50 | println!("creating permutation..."); 51 | let perm = generate_permutation(args.min_id, args.max_id); 52 | println!("filtering permutation..."); 53 | let used_ids: HashSet<_> = db.get_attempt_ids(KBB_WEBSITE_NAME).await?; 54 | perm.filter(|x| !used_ids.contains(&format!("{}", x))).await; 55 | println!("scraping..."); 56 | 57 | let (tx, mut rx) = channel(args.concurrency); 58 | for _ in 0..args.concurrency { 59 | let local_db = db.clone(); 60 | let local_perm = perm.clone(); 61 | let local_args = args.clone(); 62 | let local_tx = tx.clone(); 63 | spawn(async move { 64 | local_tx 65 | .send(fetch_listings(local_db, local_perm, local_args).await) 66 | .await 67 | .unwrap(); 68 | }); 69 | } 70 | 71 | while let Some(exc) = rx.recv().await { 72 | exc?; 73 | } 74 | 75 | Ok(()) 76 | } 77 | 78 | async fn fetch_listings(db: Database, perm: TaskQueue, args: Args) -> anyhow::Result<()> { 79 | let mut client = Client::new(args.num_retries); 80 | while let Some((id, remaining)) = perm.pop().await { 81 | let id_str = format!("{}", id); 82 | if db.check_attempt(KBB_WEBSITE_NAME, &id_str).await?.is_some() { 83 | continue; 84 | } 85 | if let Some(listing) = client.run(ListingRequest(id_str.clone())).await? { 86 | download_listing_images(&mut client, &args.image_dir, args.resize_images, &listing) 87 | .await?; 88 | db.add_listing(listing).await?; 89 | } else { 90 | db.add_failed_attempt(KBB_WEBSITE_NAME, &id_str).await?; 91 | } 92 | 93 | let completed = perm.orig_len() - remaining; 94 | if completed % 100 == 0 { 95 | let start = Instant::now(); 96 | let (num_listings, total_attempts) = db.counts().await?; 97 | let counts_duration = start.elapsed(); 98 | eprintln!( 99 | "scraped={:.04}% hit_rate={:.02}% hit_total={} db_latency={:.05}", 100 | 100.0 * (completed as f64) / (perm.orig_len() as f64), 101 | 100.0 * (num_listings as f64) / (total_attempts as f64), 102 | num_listings, 103 | counts_duration.as_secs_f64(), 104 | ); 105 | } 106 | } 107 | Ok(()) 108 | } 109 | 110 | async fn download_listing_images( 111 | client: &mut Client, 112 | image_path: &str, 113 | resize_images: u32, 114 | listing: &Listing, 115 | ) -> anyhow::Result<()> { 116 | if let Some(urls) = &listing.image_urls { 117 | for url in urls { 118 | let image_hash = hash_image_url(&url); 119 | let out_path: PathBuf = [image_path, &image_hash[0..2], &image_hash] 120 | .iter() 121 | .collect(); 122 | if tokio::fs::metadata(&out_path).await.is_ok() { 123 | // Skip for already-downloaded image URL 124 | continue; 125 | } 126 | // Download+rename to atomically write the file. 127 | let tmp_out_path: PathBuf = [ 128 | image_path, 129 | &format!("{}.{}", image_hash, listing.website_id), 130 | ] 131 | .iter() 132 | .collect(); 133 | client 134 | .run(ImageDownloadRequest { 135 | url: url.clone(), 136 | out_path: tmp_out_path.clone(), 137 | }) 138 | .await?; 139 | if resize_images != 0 { 140 | spawn_blocking(move || resize_or_rename(resize_images, tmp_out_path, out_path)) 141 | .await??; 142 | } else { 143 | tokio::fs::rename(tmp_out_path, out_path).await?; 144 | } 145 | } 146 | } 147 | Ok(()) 148 | } 149 | 150 | fn resize_or_rename>(size: u32, src: T, dst: T) -> anyhow::Result<()> { 151 | if attempt_resize(size, &src, &dst).is_err() { 152 | std::fs::rename(src, dst)?; 153 | } 154 | Ok(()) 155 | } 156 | 157 | fn attempt_resize>(size: u32, src: T, dst: T) -> anyhow::Result<()> { 158 | let img = downsample_image( 159 | size, 160 | image::io::Reader::open(&src)? 161 | .with_guessed_format()? 162 | .decode()?, 163 | ); 164 | let tmp_tmp_path = format!("{}_writing", src.as_ref().to_string_lossy()); 165 | img.save_with_format(&tmp_tmp_path, ImageFormat::Jpeg)?; 166 | std::fs::rename(tmp_tmp_path, dst)?; 167 | Ok(()) 168 | } 169 | 170 | fn generate_permutation(min: i64, max: i64) -> TaskQueue { 171 | let mut v: Vec = (min..max).collect(); 172 | v.shuffle(&mut rand::thread_rng()); 173 | v.into() 174 | } 175 | -------------------------------------------------------------------------------- /src/task_queue.rs: -------------------------------------------------------------------------------- 1 | // Adapted from map-dump: 2 | // https://github.com/unixpickle/map-dump/blob/e5997309cd40a32c63d5fa461746d9dabc1dfea2/src/task_queue.rs 3 | 4 | use std::{mem::take, ops::DerefMut, sync::Arc}; 5 | 6 | use tokio::sync::Mutex; 7 | 8 | pub struct TaskQueue { 9 | queue: Arc>>, 10 | orig_len: usize, 11 | } 12 | 13 | impl Clone for TaskQueue { 14 | fn clone(&self) -> TaskQueue { 15 | TaskQueue { 16 | queue: self.queue.clone(), 17 | orig_len: self.orig_len, 18 | } 19 | } 20 | } 21 | 22 | impl> From for TaskQueue { 23 | fn from(x: I) -> TaskQueue { 24 | let v: Vec<_> = x.into_iter().collect(); 25 | let orig_len = v.len(); 26 | let queue = Arc::new(Mutex::new(v)); 27 | TaskQueue { 28 | queue: queue, 29 | orig_len: orig_len, 30 | } 31 | } 32 | } 33 | 34 | impl TaskQueue { 35 | pub async fn pop(&self) -> Option<(T, usize)> { 36 | let mut locked = self.queue.lock().await; 37 | locked.pop().map(|x| (x, locked.len())) 38 | } 39 | 40 | pub fn orig_len(&self) -> usize { 41 | self.orig_len 42 | } 43 | 44 | pub async fn filter bool>(&self, p: P) { 45 | let mut locked = self.queue.lock().await; 46 | *locked = take(locked.deref_mut()).into_iter().filter(p).collect(); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/types.rs: -------------------------------------------------------------------------------- 1 | use std::convert::Infallible; 2 | use std::fmt::Display; 3 | use std::str::FromStr; 4 | 5 | use rusqlite::types::{FromSql, FromSqlError, ToSqlOutput, ValueRef}; 6 | use rusqlite::ToSql; 7 | 8 | #[derive(Clone, Debug, Default, PartialEq, Eq)] 9 | pub enum PriceUnit { 10 | #[default] 11 | Cents, 12 | } 13 | 14 | #[derive(Clone, Debug, PartialEq, Eq)] 15 | pub struct Price { 16 | pub value: u64, 17 | pub unit: PriceUnit, 18 | } 19 | 20 | impl FromStr for Price { 21 | type Err = ::Err; 22 | 23 | fn from_str(s: &str) -> Result { 24 | let mut processed = s.trim().replace(",", ""); 25 | let mut unit = PriceUnit::default(); 26 | if processed.starts_with("$") { 27 | unit = PriceUnit::Cents; 28 | processed = processed.replace("$", ""); 29 | } 30 | Ok(Price { 31 | value: (f64::from_str(&processed)? * 100.0).round() as u64, 32 | unit: unit, 33 | }) 34 | } 35 | } 36 | 37 | impl Display for Price { 38 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 39 | match self.unit { 40 | PriceUnit::Cents => { 41 | write!(f, "${:.02}", ((self.value as f64) / 100.0))?; 42 | } 43 | } 44 | Ok(()) 45 | } 46 | } 47 | 48 | #[derive(Clone, Debug, Default, PartialEq, Eq)] 49 | pub enum DistanceUnit { 50 | #[default] 51 | Miles, 52 | } 53 | 54 | #[derive(Clone, Debug, PartialEq, Eq)] 55 | pub struct Distance { 56 | pub value: u64, 57 | pub unit: DistanceUnit, 58 | } 59 | 60 | impl FromStr for Distance { 61 | type Err = ::Err; 62 | 63 | fn from_str(s: &str) -> Result { 64 | let mut processed = s.trim().replace(",", ""); 65 | let mut unit = DistanceUnit::default(); 66 | if processed.ends_with(" mi") { 67 | unit = DistanceUnit::Miles; 68 | processed = processed.replace(" mi", ""); 69 | } 70 | Ok(Distance { 71 | value: (f64::from_str(&processed)?).round() as u64, 72 | unit: unit, 73 | }) 74 | } 75 | } 76 | 77 | impl Display for Distance { 78 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 79 | match self.unit { 80 | DistanceUnit::Miles => { 81 | write!(f, "{} mi", self.value)?; 82 | } 83 | } 84 | Ok(()) 85 | } 86 | } 87 | 88 | #[derive(Clone, Debug, PartialEq, Eq)] 89 | pub enum DriveType { 90 | TwoWheelFront, 91 | TwoWheelRear, 92 | FourWheel, 93 | Other(String), 94 | } 95 | 96 | impl FromStr for DriveType { 97 | type Err = Infallible; 98 | 99 | fn from_str(s: &str) -> Result { 100 | Ok(match s { 101 | "FWD" => DriveType::TwoWheelFront, 102 | "RWD" => DriveType::TwoWheelRear, 103 | "AWD4WD" => DriveType::FourWheel, 104 | x => DriveType::Other(x.to_owned()), 105 | }) 106 | } 107 | } 108 | 109 | impl Display for DriveType { 110 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 111 | match self { 112 | Self::TwoWheelFront => write!(f, "FWD")?, 113 | Self::TwoWheelRear => write!(f, "RWD")?, 114 | Self::FourWheel => write!(f, "AWD4WD")?, 115 | Self::Other(x) => write!(f, "{}", x)?, 116 | } 117 | Ok(()) 118 | } 119 | } 120 | 121 | #[derive(Clone, Debug, PartialEq, Eq)] 122 | pub enum FuelType { 123 | Gasoline, 124 | Hybrid, 125 | Diesel, 126 | Electric, 127 | Hydrogen, 128 | Alternative, 129 | } 130 | 131 | impl FromStr for FuelType { 132 | type Err = Infallible; 133 | 134 | fn from_str(s: &str) -> Result { 135 | Ok(match s { 136 | "Gasoline" => FuelType::Gasoline, 137 | "Hybrid" => FuelType::Hybrid, 138 | "Diesel" => FuelType::Diesel, 139 | "Electric" => FuelType::Electric, 140 | "Hydrogen" => FuelType::Hydrogen, 141 | _ => FuelType::Alternative, 142 | }) 143 | } 144 | } 145 | 146 | impl Display for FuelType { 147 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 148 | write!( 149 | f, 150 | "{}", 151 | match self { 152 | FuelType::Gasoline => "Gasoline", 153 | FuelType::Hybrid => "Hybrid", 154 | FuelType::Diesel => "Diesel", 155 | FuelType::Electric => "Electric", 156 | FuelType::Hydrogen => "Hydrogen", 157 | FuelType::Alternative => "Alternative", 158 | } 159 | ) 160 | } 161 | } 162 | 163 | #[derive(Clone, Debug, PartialEq, Eq)] 164 | pub struct OwnerInfo { 165 | pub id: String, 166 | pub name: Option, 167 | pub website: Option, 168 | } 169 | 170 | #[derive(Clone, Debug, PartialEq, Eq)] 171 | pub struct Listing { 172 | pub website: String, 173 | pub website_id: String, 174 | pub title: String, 175 | pub price: Option, 176 | pub make: Option, 177 | pub model: Option, 178 | pub year: Option, 179 | pub odometer: Option, 180 | pub engine_description: Option, 181 | pub exterior_color: Option, 182 | pub interior_color: Option, 183 | pub drive_type: Option, 184 | pub fuel_type: Option, 185 | pub fuel_economy: Option>, 186 | pub owners: Option>, 187 | pub vin: Option, 188 | pub stock_number: Option, 189 | pub comments: Option, 190 | pub image_urls: Option>, 191 | } 192 | 193 | macro_rules! sql_string_obj { 194 | ($data_type:ty) => { 195 | impl ToSql for $data_type { 196 | fn to_sql(&self) -> rusqlite::Result> { 197 | Ok(ToSqlOutput::Owned(rusqlite::types::Value::Text(format!( 198 | "{}", 199 | self 200 | )))) 201 | } 202 | } 203 | 204 | impl FromSql for $data_type { 205 | fn column_result( 206 | value: rusqlite::types::ValueRef<'_>, 207 | ) -> rusqlite::types::FromSqlResult { 208 | match value { 209 | ValueRef::Text(x) => String::from_utf8(Vec::from(x)) 210 | .map_err(|x| FromSqlError::Other(Box::new(x)))? 211 | .parse() 212 | .map_err(|x| FromSqlError::Other(Box::new(x))), 213 | _ => Err(FromSqlError::InvalidType), 214 | } 215 | } 216 | } 217 | }; 218 | } 219 | 220 | sql_string_obj!(Price); 221 | sql_string_obj!(Distance); 222 | sql_string_obj!(DriveType); 223 | sql_string_obj!(FuelType); 224 | --------------------------------------------------------------------------------