├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── car_data
    ├── __init__.py
    ├── constants.py
    ├── dataset.py
    ├── graphics.py
    ├── lin_features.py
    ├── losses.py
    ├── model.py
    ├── scripts
    │   ├── baseline.py
    │   ├── classify.py
    │   ├── classify_viz.py
    │   ├── classify_viz_animate.py
    │   ├── clip_features.py
    │   ├── data_bench.py
    │   ├── data_viz.py
    │   ├── filter_index.py
    │   ├── plot_runs.py
    │   ├── recalibrate.py
    │   ├── train.py
    │   ├── train_distill.py
    │   └── train_filter.py
    └── train_loop.py
├── setup.py
└── src
    ├── chan_util.rs
    ├── db.rs
    ├── dedup_images.rs
    ├── export_data.rs
    ├── image_util.rs
    ├── kbb.rs
    ├── main.rs
    ├── make_models.rs
    ├── parse_util.rs
    ├── scrape_kbb.rs
    ├── task_queue.rs
    └── types.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | data
3 | *.egg-info
4 | __pycache__
5 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
   1 | # This file is automatically @generated by Cargo.
   2 | # It is not intended for manual editing.
   3 | version = 3
   4 | 
   5 | [[package]]
   6 | name = "adler"
   7 | version = "1.0.2"
   8 | source = "registry+https://github.com/rust-lang/crates.io-index"
   9 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
  10 | 
  11 | [[package]]
  12 | name = "aes"
  13 | version = "0.7.5"
  14 | source = "registry+https://github.com/rust-lang/crates.io-index"
  15 | checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8"
  16 | dependencies = [
  17 |  "cfg-if",
  18 |  "cipher",
  19 |  "cpufeatures",
  20 |  "opaque-debug",
  21 | ]
  22 | 
  23 | [[package]]
  24 | name = "ahash"
  25 | version = "0.7.6"
  26 | source = "registry+https://github.com/rust-lang/crates.io-index"
  27 | checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
  28 | dependencies = [
  29 |  "getrandom 0.2.8",
  30 |  "once_cell",
  31 |  "version_check",
  32 | ]
  33 | 
  34 | [[package]]
  35 | name = "anyhow"
  36 | version = "1.0.66"
  37 | source = "registry+https://github.com/rust-lang/crates.io-index"
  38 | checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6"
  39 | 
  40 | [[package]]
  41 | name = "async-channel"
  42 | version = "1.7.1"
  43 | source = "registry+https://github.com/rust-lang/crates.io-index"
  44 | checksum = "e14485364214912d3b19cc3435dde4df66065127f05fa0d75c712f36f12c2f28"
  45 | dependencies = [
  46 |  "concurrent-queue",
  47 |  "event-listener",
  48 |  "futures-core",
  49 | ]
  50 | 
  51 | [[package]]
  52 | name = "atty"
  53 | version = "0.2.14"
  54 | source = "registry+https://github.com/rust-lang/crates.io-index"
  55 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
  56 | dependencies = [
  57 |  "hermit-abi",
  58 |  "libc",
  59 |  "winapi",
  60 | ]
  61 | 
  62 | [[package]]
  63 | name = "autocfg"
  64 | version = "1.1.0"
  65 | source = "registry+https://github.com/rust-lang/crates.io-index"
  66 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
  67 | 
  68 | [[package]]
  69 | name = "base64"
  70 | version = "0.13.1"
  71 | source = "registry+https://github.com/rust-lang/crates.io-index"
  72 | checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
  73 | 
  74 | [[package]]
  75 | name = "base64ct"
  76 | version = "1.5.3"
  77 | source = "registry+https://github.com/rust-lang/crates.io-index"
  78 | checksum = "b645a089122eccb6111b4f81cbc1a49f5900ac4666bb93ac027feaecf15607bf"
  79 | 
  80 | [[package]]
  81 | name = "bit_field"
  82 | version = "0.10.1"
  83 | source = "registry+https://github.com/rust-lang/crates.io-index"
  84 | checksum = "dcb6dd1c2376d2e096796e234a70e17e94cc2d5d54ff8ce42b28cef1d0d359a4"
  85 | 
  86 | [[package]]
  87 | name = "bitflags"
  88 | version = "1.3.2"
  89 | source = "registry+https://github.com/rust-lang/crates.io-index"
  90 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
  91 | 
  92 | [[package]]
  93 | name = "block-buffer"
  94 | version = "0.10.3"
  95 | source = "registry+https://github.com/rust-lang/crates.io-index"
  96 | checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e"
  97 | dependencies = [
  98 |  "generic-array",
  99 | ]
 100 | 
 101 | [[package]]
 102 | name = "bumpalo"
 103 | version = "3.11.1"
 104 | source = "registry+https://github.com/rust-lang/crates.io-index"
 105 | checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba"
 106 | 
 107 | [[package]]
 108 | name = "bytemuck"
 109 | version = "1.12.3"
 110 | source = "registry+https://github.com/rust-lang/crates.io-index"
 111 | checksum = "aaa3a8d9a1ca92e282c96a32d6511b695d7d994d1d102ba85d279f9b2756947f"
 112 | 
 113 | [[package]]
 114 | name = "byteorder"
 115 | version = "1.4.3"
 116 | source = "registry+https://github.com/rust-lang/crates.io-index"
 117 | checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
 118 | 
 119 | [[package]]
 120 | name = "bytes"
 121 | version = "1.3.0"
 122 | source = "registry+https://github.com/rust-lang/crates.io-index"
 123 | checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c"
 124 | 
 125 | [[package]]
 126 | name = "bzip2"
 127 | version = "0.4.3"
 128 | source = "registry+https://github.com/rust-lang/crates.io-index"
 129 | checksum = "6afcd980b5f3a45017c57e57a2fcccbb351cc43a356ce117ef760ef8052b89b0"
 130 | dependencies = [
 131 |  "bzip2-sys",
 132 |  "libc",
 133 | ]
 134 | 
 135 | [[package]]
 136 | name = "bzip2-sys"
 137 | version = "0.1.11+1.0.8"
 138 | source = "registry+https://github.com/rust-lang/crates.io-index"
 139 | checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
 140 | dependencies = [
 141 |  "cc",
 142 |  "libc",
 143 |  "pkg-config",
 144 | ]
 145 | 
 146 | [[package]]
 147 | name = "cache-padded"
 148 | version = "1.2.0"
 149 | source = "registry+https://github.com/rust-lang/crates.io-index"
 150 | checksum = "c1db59621ec70f09c5e9b597b220c7a2b43611f4710dc03ceb8748637775692c"
 151 | 
 152 | [[package]]
 153 | name = "car-data"
 154 | version = "0.1.0"
 155 | dependencies = [
 156 |  "anyhow",
 157 |  "async-channel",
 158 |  "clap",
 159 |  "image",
 160 |  "npy-writer",
 161 |  "rand 0.8.5",
 162 |  "reqwest",
 163 |  "rusqlite",
 164 |  "scraper",
 165 |  "serde_json",
 166 |  "sha2",
 167 |  "tokio",
 168 | ]
 169 | 
 170 | [[package]]
 171 | name = "cc"
 172 | version = "1.0.77"
 173 | source = "registry+https://github.com/rust-lang/crates.io-index"
 174 | checksum = "e9f73505338f7d905b19d18738976aae232eb46b8efc15554ffc56deb5d9ebe4"
 175 | dependencies = [
 176 |  "jobserver",
 177 | ]
 178 | 
 179 | [[package]]
 180 | name = "cfg-if"
 181 | version = "1.0.0"
 182 | source = "registry+https://github.com/rust-lang/crates.io-index"
 183 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 184 | 
 185 | [[package]]
 186 | name = "cipher"
 187 | version = "0.3.0"
 188 | source = "registry+https://github.com/rust-lang/crates.io-index"
 189 | checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7"
 190 | dependencies = [
 191 |  "generic-array",
 192 | ]
 193 | 
 194 | [[package]]
 195 | name = "clap"
 196 | version = "3.2.23"
 197 | source = "registry+https://github.com/rust-lang/crates.io-index"
 198 | checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5"
 199 | dependencies = [
 200 |  "atty",
 201 |  "bitflags",
 202 |  "clap_derive",
 203 |  "clap_lex",
 204 |  "indexmap",
 205 |  "once_cell",
 206 |  "strsim",
 207 |  "termcolor",
 208 |  "textwrap",
 209 | ]
 210 | 
 211 | [[package]]
 212 | name = "clap_derive"
 213 | version = "3.2.18"
 214 | source = "registry+https://github.com/rust-lang/crates.io-index"
 215 | checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65"
 216 | dependencies = [
 217 |  "heck",
 218 |  "proc-macro-error",
 219 |  "proc-macro2",
 220 |  "quote",
 221 |  "syn",
 222 | ]
 223 | 
 224 | [[package]]
 225 | name = "clap_lex"
 226 | version = "0.2.4"
 227 | source = "registry+https://github.com/rust-lang/crates.io-index"
 228 | checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
 229 | dependencies = [
 230 |  "os_str_bytes",
 231 | ]
 232 | 
 233 | [[package]]
 234 | name = "color_quant"
 235 | version = "1.1.0"
 236 | source = "registry+https://github.com/rust-lang/crates.io-index"
 237 | checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
 238 | 
 239 | [[package]]
 240 | name = "concurrent-queue"
 241 | version = "1.2.4"
 242 | source = "registry+https://github.com/rust-lang/crates.io-index"
 243 | checksum = "af4780a44ab5696ea9e28294517f1fffb421a83a25af521333c838635509db9c"
 244 | dependencies = [
 245 |  "cache-padded",
 246 | ]
 247 | 
 248 | [[package]]
 249 | name = "constant_time_eq"
 250 | version = "0.1.5"
 251 | source = "registry+https://github.com/rust-lang/crates.io-index"
 252 | checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
 253 | 
 254 | [[package]]
 255 | name = "convert_case"
 256 | version = "0.4.0"
 257 | source = "registry+https://github.com/rust-lang/crates.io-index"
 258 | checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
 259 | 
 260 | [[package]]
 261 | name = "core-foundation"
 262 | version = "0.9.3"
 263 | source = "registry+https://github.com/rust-lang/crates.io-index"
 264 | checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146"
 265 | dependencies = [
 266 |  "core-foundation-sys",
 267 |  "libc",
 268 | ]
 269 | 
 270 | [[package]]
 271 | name = "core-foundation-sys"
 272 | version = "0.8.3"
 273 | source = "registry+https://github.com/rust-lang/crates.io-index"
 274 | checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
 275 | 
 276 | [[package]]
 277 | name = "cpufeatures"
 278 | version = "0.2.5"
 279 | source = "registry+https://github.com/rust-lang/crates.io-index"
 280 | checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320"
 281 | dependencies = [
 282 |  "libc",
 283 | ]
 284 | 
 285 | [[package]]
 286 | name = "crc32fast"
 287 | version = "1.3.2"
 288 | source = "registry+https://github.com/rust-lang/crates.io-index"
 289 | checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
 290 | dependencies = [
 291 |  "cfg-if",
 292 | ]
 293 | 
 294 | [[package]]
 295 | name = "crossbeam-channel"
 296 | version = "0.5.6"
 297 | source = "registry+https://github.com/rust-lang/crates.io-index"
 298 | checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
 299 | dependencies = [
 300 |  "cfg-if",
 301 |  "crossbeam-utils",
 302 | ]
 303 | 
 304 | [[package]]
 305 | name = "crossbeam-deque"
 306 | version = "0.8.2"
 307 | source = "registry+https://github.com/rust-lang/crates.io-index"
 308 | checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
 309 | dependencies = [
 310 |  "cfg-if",
 311 |  "crossbeam-epoch",
 312 |  "crossbeam-utils",
 313 | ]
 314 | 
 315 | [[package]]
 316 | name = "crossbeam-epoch"
 317 | version = "0.9.13"
 318 | source = "registry+https://github.com/rust-lang/crates.io-index"
 319 | checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a"
 320 | dependencies = [
 321 |  "autocfg",
 322 |  "cfg-if",
 323 |  "crossbeam-utils",
 324 |  "memoffset",
 325 |  "scopeguard",
 326 | ]
 327 | 
 328 | [[package]]
 329 | name = "crossbeam-utils"
 330 | version = "0.8.14"
 331 | source = "registry+https://github.com/rust-lang/crates.io-index"
 332 | checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f"
 333 | dependencies = [
 334 |  "cfg-if",
 335 | ]
 336 | 
 337 | [[package]]
 338 | name = "crunchy"
 339 | version = "0.2.2"
 340 | source = "registry+https://github.com/rust-lang/crates.io-index"
 341 | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
 342 | 
 343 | [[package]]
 344 | name = "crypto-common"
 345 | version = "0.1.6"
 346 | source = "registry+https://github.com/rust-lang/crates.io-index"
 347 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
 348 | dependencies = [
 349 |  "generic-array",
 350 |  "typenum",
 351 | ]
 352 | 
 353 | [[package]]
 354 | name = "cssparser"
 355 | version = "0.27.2"
 356 | source = "registry+https://github.com/rust-lang/crates.io-index"
 357 | checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a"
 358 | dependencies = [
 359 |  "cssparser-macros",
 360 |  "dtoa-short",
 361 |  "itoa 0.4.8",
 362 |  "matches",
 363 |  "phf 0.8.0",
 364 |  "proc-macro2",
 365 |  "quote",
 366 |  "smallvec",
 367 |  "syn",
 368 | ]
 369 | 
 370 | [[package]]
 371 | name = "cssparser-macros"
 372 | version = "0.6.0"
 373 | source = "registry+https://github.com/rust-lang/crates.io-index"
 374 | checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e"
 375 | dependencies = [
 376 |  "quote",
 377 |  "syn",
 378 | ]
 379 | 
 380 | [[package]]
 381 | name = "derive_more"
 382 | version = "0.99.17"
 383 | source = "registry+https://github.com/rust-lang/crates.io-index"
 384 | checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
 385 | dependencies = [
 386 |  "convert_case",
 387 |  "proc-macro2",
 388 |  "quote",
 389 |  "rustc_version",
 390 |  "syn",
 391 | ]
 392 | 
 393 | [[package]]
 394 | name = "digest"
 395 | version = "0.10.6"
 396 | source = "registry+https://github.com/rust-lang/crates.io-index"
 397 | checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f"
 398 | dependencies = [
 399 |  "block-buffer",
 400 |  "crypto-common",
 401 |  "subtle",
 402 | ]
 403 | 
 404 | [[package]]
 405 | name = "dtoa"
 406 | version = "0.4.8"
 407 | source = "registry+https://github.com/rust-lang/crates.io-index"
 408 | checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0"
 409 | 
 410 | [[package]]
 411 | name = "dtoa-short"
 412 | version = "0.3.3"
 413 | source = "registry+https://github.com/rust-lang/crates.io-index"
 414 | checksum = "bde03329ae10e79ede66c9ce4dc930aa8599043b0743008548680f25b91502d6"
 415 | dependencies = [
 416 |  "dtoa",
 417 | ]
 418 | 
 419 | [[package]]
 420 | name = "ego-tree"
 421 | version = "0.6.2"
 422 | source = "registry+https://github.com/rust-lang/crates.io-index"
 423 | checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
 424 | 
 425 | [[package]]
 426 | name = "either"
 427 | version = "1.8.0"
 428 | source = "registry+https://github.com/rust-lang/crates.io-index"
 429 | checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797"
 430 | 
 431 | [[package]]
 432 | name = "encoding_rs"
 433 | version = "0.8.31"
 434 | source = "registry+https://github.com/rust-lang/crates.io-index"
 435 | checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b"
 436 | dependencies = [
 437 |  "cfg-if",
 438 | ]
 439 | 
 440 | [[package]]
 441 | name = "event-listener"
 442 | version = "2.5.3"
 443 | source = "registry+https://github.com/rust-lang/crates.io-index"
 444 | checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
 445 | 
 446 | [[package]]
 447 | name = "exr"
 448 | version = "1.5.2"
 449 | source = "registry+https://github.com/rust-lang/crates.io-index"
 450 | checksum = "8eb5f255b5980bb0c8cf676b675d1a99be40f316881444f44e0462eaf5df5ded"
 451 | dependencies = [
 452 |  "bit_field",
 453 |  "flume",
 454 |  "half",
 455 |  "lebe",
 456 |  "miniz_oxide",
 457 |  "smallvec",
 458 |  "threadpool",
 459 | ]
 460 | 
 461 | [[package]]
 462 | name = "fallible-iterator"
 463 | version = "0.2.0"
 464 | source = "registry+https://github.com/rust-lang/crates.io-index"
 465 | checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
 466 | 
 467 | [[package]]
 468 | name = "fallible-streaming-iterator"
 469 | version = "0.1.9"
 470 | source = "registry+https://github.com/rust-lang/crates.io-index"
 471 | checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
 472 | 
 473 | [[package]]
 474 | name = "flate2"
 475 | version = "1.0.25"
 476 | source = "registry+https://github.com/rust-lang/crates.io-index"
 477 | checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841"
 478 | dependencies = [
 479 |  "crc32fast",
 480 |  "miniz_oxide",
 481 | ]
 482 | 
 483 | [[package]]
 484 | name = "flume"
 485 | version = "0.10.14"
 486 | source = "registry+https://github.com/rust-lang/crates.io-index"
 487 | checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577"
 488 | dependencies = [
 489 |  "futures-core",
 490 |  "futures-sink",
 491 |  "nanorand",
 492 |  "pin-project",
 493 |  "spin 0.9.4",
 494 | ]
 495 | 
 496 | [[package]]
 497 | name = "fnv"
 498 | version = "1.0.7"
 499 | source = "registry+https://github.com/rust-lang/crates.io-index"
 500 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
 501 | 
 502 | [[package]]
 503 | name = "form_urlencoded"
 504 | version = "1.1.0"
 505 | source = "registry+https://github.com/rust-lang/crates.io-index"
 506 | checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8"
 507 | dependencies = [
 508 |  "percent-encoding",
 509 | ]
 510 | 
 511 | [[package]]
 512 | name = "futf"
 513 | version = "0.1.5"
 514 | source = "registry+https://github.com/rust-lang/crates.io-index"
 515 | checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
 516 | dependencies = [
 517 |  "mac",
 518 |  "new_debug_unreachable",
 519 | ]
 520 | 
 521 | [[package]]
 522 | name = "futures-channel"
 523 | version = "0.3.25"
 524 | source = "registry+https://github.com/rust-lang/crates.io-index"
 525 | checksum = "52ba265a92256105f45b719605a571ffe2d1f0fea3807304b522c1d778f79eed"
 526 | dependencies = [
 527 |  "futures-core",
 528 | ]
 529 | 
 530 | [[package]]
 531 | name = "futures-core"
 532 | version = "0.3.25"
 533 | source = "registry+https://github.com/rust-lang/crates.io-index"
 534 | checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac"
 535 | 
 536 | [[package]]
 537 | name = "futures-sink"
 538 | version = "0.3.25"
 539 | source = "registry+https://github.com/rust-lang/crates.io-index"
 540 | checksum = "39c15cf1a4aa79df40f1bb462fb39676d0ad9e366c2a33b590d7c66f4f81fcf9"
 541 | 
 542 | [[package]]
 543 | name = "futures-task"
 544 | version = "0.3.25"
 545 | source = "registry+https://github.com/rust-lang/crates.io-index"
 546 | checksum = "2ffb393ac5d9a6eaa9d3fdf37ae2776656b706e200c8e16b1bdb227f5198e6ea"
 547 | 
 548 | [[package]]
 549 | name = "futures-util"
 550 | version = "0.3.25"
 551 | source = "registry+https://github.com/rust-lang/crates.io-index"
 552 | checksum = "197676987abd2f9cadff84926f410af1c183608d36641465df73ae8211dc65d6"
 553 | dependencies = [
 554 |  "futures-core",
 555 |  "futures-task",
 556 |  "pin-project-lite",
 557 |  "pin-utils",
 558 | ]
 559 | 
 560 | [[package]]
 561 | name = "fxhash"
 562 | version = "0.2.1"
 563 | source = "registry+https://github.com/rust-lang/crates.io-index"
 564 | checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
 565 | dependencies = [
 566 |  "byteorder",
 567 | ]
 568 | 
 569 | [[package]]
 570 | name = "generic-array"
 571 | version = "0.14.6"
 572 | source = "registry+https://github.com/rust-lang/crates.io-index"
 573 | checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
 574 | dependencies = [
 575 |  "typenum",
 576 |  "version_check",
 577 | ]
 578 | 
 579 | [[package]]
 580 | name = "getopts"
 581 | version = "0.2.21"
 582 | source = "registry+https://github.com/rust-lang/crates.io-index"
 583 | checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
 584 | dependencies = [
 585 |  "unicode-width",
 586 | ]
 587 | 
 588 | [[package]]
 589 | name = "getrandom"
 590 | version = "0.1.16"
 591 | source = "registry+https://github.com/rust-lang/crates.io-index"
 592 | checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
 593 | dependencies = [
 594 |  "cfg-if",
 595 |  "libc",
 596 |  "wasi 0.9.0+wasi-snapshot-preview1",
 597 | ]
 598 | 
 599 | [[package]]
 600 | name = "getrandom"
 601 | version = "0.2.8"
 602 | source = "registry+https://github.com/rust-lang/crates.io-index"
 603 | checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31"
 604 | dependencies = [
 605 |  "cfg-if",
 606 |  "js-sys",
 607 |  "libc",
 608 |  "wasi 0.11.0+wasi-snapshot-preview1",
 609 |  "wasm-bindgen",
 610 | ]
 611 | 
 612 | [[package]]
 613 | name = "gif"
 614 | version = "0.11.4"
 615 | source = "registry+https://github.com/rust-lang/crates.io-index"
 616 | checksum = "3edd93c6756b4dfaf2709eafcc345ba2636565295c198a9cfbf75fa5e3e00b06"
 617 | dependencies = [
 618 |  "color_quant",
 619 |  "weezl",
 620 | ]
 621 | 
 622 | [[package]]
 623 | name = "h2"
 624 | version = "0.3.15"
 625 | source = "registry+https://github.com/rust-lang/crates.io-index"
 626 | checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4"
 627 | dependencies = [
 628 |  "bytes",
 629 |  "fnv",
 630 |  "futures-core",
 631 |  "futures-sink",
 632 |  "futures-util",
 633 |  "http",
 634 |  "indexmap",
 635 |  "slab",
 636 |  "tokio",
 637 |  "tokio-util",
 638 |  "tracing",
 639 | ]
 640 | 
 641 | [[package]]
 642 | name = "half"
 643 | version = "2.1.0"
 644 | source = "registry+https://github.com/rust-lang/crates.io-index"
 645 | checksum = "ad6a9459c9c30b177b925162351f97e7d967c7ea8bab3b8352805327daf45554"
 646 | dependencies = [
 647 |  "crunchy",
 648 | ]
 649 | 
 650 | [[package]]
 651 | name = "hashbrown"
 652 | version = "0.12.3"
 653 | source = "registry+https://github.com/rust-lang/crates.io-index"
 654 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
 655 | dependencies = [
 656 |  "ahash",
 657 | ]
 658 | 
 659 | [[package]]
 660 | name = "hashlink"
 661 | version = "0.8.1"
 662 | source = "registry+https://github.com/rust-lang/crates.io-index"
 663 | checksum = "69fe1fcf8b4278d860ad0548329f892a3631fb63f82574df68275f34cdbe0ffa"
 664 | dependencies = [
 665 |  "hashbrown",
 666 | ]
 667 | 
 668 | [[package]]
 669 | name = "heck"
 670 | version = "0.4.0"
 671 | source = "registry+https://github.com/rust-lang/crates.io-index"
 672 | checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
 673 | 
 674 | [[package]]
 675 | name = "hermit-abi"
 676 | version = "0.1.19"
 677 | source = "registry+https://github.com/rust-lang/crates.io-index"
 678 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
 679 | dependencies = [
 680 |  "libc",
 681 | ]
 682 | 
 683 | [[package]]
 684 | name = "hmac"
 685 | version = "0.12.1"
 686 | source = "registry+https://github.com/rust-lang/crates.io-index"
 687 | checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
 688 | dependencies = [
 689 |  "digest",
 690 | ]
 691 | 
 692 | [[package]]
 693 | name = "html5ever"
 694 | version = "0.26.0"
 695 | source = "registry+https://github.com/rust-lang/crates.io-index"
 696 | checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7"
 697 | dependencies = [
 698 |  "log",
 699 |  "mac",
 700 |  "markup5ever",
 701 |  "proc-macro2",
 702 |  "quote",
 703 |  "syn",
 704 | ]
 705 | 
 706 | [[package]]
 707 | name = "http"
 708 | version = "0.2.8"
 709 | source = "registry+https://github.com/rust-lang/crates.io-index"
 710 | checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399"
 711 | dependencies = [
 712 |  "bytes",
 713 |  "fnv",
 714 |  "itoa 1.0.4",
 715 | ]
 716 | 
 717 | [[package]]
 718 | name = "http-body"
 719 | version = "0.4.5"
 720 | source = "registry+https://github.com/rust-lang/crates.io-index"
 721 | checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1"
 722 | dependencies = [
 723 |  "bytes",
 724 |  "http",
 725 |  "pin-project-lite",
 726 | ]
 727 | 
 728 | [[package]]
 729 | name = "httparse"
 730 | version = "1.8.0"
 731 | source = "registry+https://github.com/rust-lang/crates.io-index"
 732 | checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904"
 733 | 
 734 | [[package]]
 735 | name = "httpdate"
 736 | version = "1.0.2"
 737 | source = "registry+https://github.com/rust-lang/crates.io-index"
 738 | checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
 739 | 
 740 | [[package]]
 741 | name = "hyper"
 742 | version = "0.14.23"
 743 | source = "registry+https://github.com/rust-lang/crates.io-index"
 744 | checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c"
 745 | dependencies = [
 746 |  "bytes",
 747 |  "futures-channel",
 748 |  "futures-core",
 749 |  "futures-util",
 750 |  "h2",
 751 |  "http",
 752 |  "http-body",
 753 |  "httparse",
 754 |  "httpdate",
 755 |  "itoa 1.0.4",
 756 |  "pin-project-lite",
 757 |  "socket2",
 758 |  "tokio",
 759 |  "tower-service",
 760 |  "tracing",
 761 |  "want",
 762 | ]
 763 | 
 764 | [[package]]
 765 | name = "hyper-rustls"
 766 | version = "0.23.1"
 767 | source = "registry+https://github.com/rust-lang/crates.io-index"
 768 | checksum = "59df7c4e19c950e6e0e868dcc0a300b09a9b88e9ec55bd879ca819087a77355d"
 769 | dependencies = [
 770 |  "http",
 771 |  "hyper",
 772 |  "rustls",
 773 |  "tokio",
 774 |  "tokio-rustls",
 775 | ]
 776 | 
 777 | [[package]]
 778 | name = "idna"
 779 | version = "0.3.0"
 780 | source = "registry+https://github.com/rust-lang/crates.io-index"
 781 | checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6"
 782 | dependencies = [
 783 |  "unicode-bidi",
 784 |  "unicode-normalization",
 785 | ]
 786 | 
 787 | [[package]]
 788 | name = "image"
 789 | version = "0.24.5"
 790 | source = "registry+https://github.com/rust-lang/crates.io-index"
 791 | checksum = "69b7ea949b537b0fd0af141fff8c77690f2ce96f4f41f042ccb6c69c6c965945"
 792 | dependencies = [
 793 |  "bytemuck",
 794 |  "byteorder",
 795 |  "color_quant",
 796 |  "exr",
 797 |  "gif",
 798 |  "jpeg-decoder",
 799 |  "num-rational",
 800 |  "num-traits",
 801 |  "png",
 802 |  "scoped_threadpool",
 803 |  "tiff",
 804 | ]
 805 | 
 806 | [[package]]
 807 | name = "indexmap"
 808 | version = "1.9.2"
 809 | source = "registry+https://github.com/rust-lang/crates.io-index"
 810 | checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399"
 811 | dependencies = [
 812 |  "autocfg",
 813 |  "hashbrown",
 814 | ]
 815 | 
 816 | [[package]]
 817 | name = "ipnet"
 818 | version = "2.5.1"
 819 | source = "registry+https://github.com/rust-lang/crates.io-index"
 820 | checksum = "f88c5561171189e69df9d98bcf18fd5f9558300f7ea7b801eb8a0fd748bd8745"
 821 | 
 822 | [[package]]
 823 | name = "itoa"
 824 | version = "0.4.8"
 825 | source = "registry+https://github.com/rust-lang/crates.io-index"
 826 | checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
 827 | 
 828 | [[package]]
 829 | name = "itoa"
 830 | version = "1.0.4"
 831 | source = "registry+https://github.com/rust-lang/crates.io-index"
 832 | checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
 833 | 
 834 | [[package]]
 835 | name = "jobserver"
 836 | version = "0.1.25"
 837 | source = "registry+https://github.com/rust-lang/crates.io-index"
 838 | checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b"
 839 | dependencies = [
 840 |  "libc",
 841 | ]
 842 | 
 843 | [[package]]
 844 | name = "jpeg-decoder"
 845 | version = "0.3.0"
 846 | source = "registry+https://github.com/rust-lang/crates.io-index"
 847 | checksum = "bc0000e42512c92e31c2252315bda326620a4e034105e900c98ec492fa077b3e"
 848 | dependencies = [
 849 |  "rayon",
 850 | ]
 851 | 
 852 | [[package]]
 853 | name = "js-sys"
 854 | version = "0.3.60"
 855 | source = "registry+https://github.com/rust-lang/crates.io-index"
 856 | checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47"
 857 | dependencies = [
 858 |  "wasm-bindgen",
 859 | ]
 860 | 
 861 | [[package]]
 862 | name = "lazy_static"
 863 | version = "1.4.0"
 864 | source = "registry+https://github.com/rust-lang/crates.io-index"
 865 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 866 | 
 867 | [[package]]
 868 | name = "lebe"
 869 | version = "0.5.2"
 870 | source = "registry+https://github.com/rust-lang/crates.io-index"
 871 | checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8"
 872 | 
 873 | [[package]]
 874 | name = "libc"
 875 | version = "0.2.137"
 876 | source = "registry+https://github.com/rust-lang/crates.io-index"
 877 | checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89"
 878 | 
 879 | [[package]]
 880 | name = "libsqlite3-sys"
 881 | version = "0.25.2"
 882 | source = "registry+https://github.com/rust-lang/crates.io-index"
 883 | checksum = "29f835d03d717946d28b1d1ed632eb6f0e24a299388ee623d0c23118d3e8a7fa"
 884 | dependencies = [
 885 |  "cc",
 886 |  "pkg-config",
 887 |  "vcpkg",
 888 | ]
 889 | 
 890 | [[package]]
 891 | name = "lock_api"
 892 | version = "0.4.9"
 893 | source = "registry+https://github.com/rust-lang/crates.io-index"
 894 | checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
 895 | dependencies = [
 896 |  "autocfg",
 897 |  "scopeguard",
 898 | ]
 899 | 
 900 | [[package]]
 901 | name = "log"
 902 | version = "0.4.17"
 903 | source = "registry+https://github.com/rust-lang/crates.io-index"
 904 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
 905 | dependencies = [
 906 |  "cfg-if",
 907 | ]
 908 | 
 909 | [[package]]
 910 | name = "mac"
 911 | version = "0.1.1"
 912 | source = "registry+https://github.com/rust-lang/crates.io-index"
 913 | checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
 914 | 
 915 | [[package]]
 916 | name = "markup5ever"
 917 | version = "0.11.0"
 918 | source = "registry+https://github.com/rust-lang/crates.io-index"
 919 | checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016"
 920 | dependencies = [
 921 |  "log",
 922 |  "phf 0.10.1",
 923 |  "phf_codegen 0.10.0",
 924 |  "string_cache",
 925 |  "string_cache_codegen",
 926 |  "tendril",
 927 | ]
 928 | 
 929 | [[package]]
 930 | name = "matches"
 931 | version = "0.1.9"
 932 | source = "registry+https://github.com/rust-lang/crates.io-index"
 933 | checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f"
 934 | 
 935 | [[package]]
 936 | name = "memchr"
 937 | version = "2.5.0"
 938 | source = "registry+https://github.com/rust-lang/crates.io-index"
 939 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
 940 | 
 941 | [[package]]
 942 | name = "memoffset"
 943 | version = "0.7.1"
 944 | source = "registry+https://github.com/rust-lang/crates.io-index"
 945 | checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
 946 | dependencies = [
 947 |  "autocfg",
 948 | ]
 949 | 
 950 | [[package]]
 951 | name = "mime"
 952 | version = "0.3.16"
 953 | source = "registry+https://github.com/rust-lang/crates.io-index"
 954 | checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
 955 | 
 956 | [[package]]
 957 | name = "miniz_oxide"
 958 | version = "0.6.2"
 959 | source = "registry+https://github.com/rust-lang/crates.io-index"
 960 | checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa"
 961 | dependencies = [
 962 |  "adler",
 963 | ]
 964 | 
 965 | [[package]]
 966 | name = "mio"
 967 | version = "0.8.5"
 968 | source = "registry+https://github.com/rust-lang/crates.io-index"
 969 | checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de"
 970 | dependencies = [
 971 |  "libc",
 972 |  "log",
 973 |  "wasi 0.11.0+wasi-snapshot-preview1",
 974 |  "windows-sys 0.42.0",
 975 | ]
 976 | 
 977 | [[package]]
 978 | name = "nanorand"
 979 | version = "0.7.0"
 980 | source = "registry+https://github.com/rust-lang/crates.io-index"
 981 | checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3"
 982 | dependencies = [
 983 |  "getrandom 0.2.8",
 984 | ]
 985 | 
 986 | [[package]]
 987 | name = "new_debug_unreachable"
 988 | version = "1.0.4"
 989 | source = "registry+https://github.com/rust-lang/crates.io-index"
 990 | checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
 991 | 
 992 | [[package]]
 993 | name = "nodrop"
 994 | version = "0.1.14"
 995 | source = "registry+https://github.com/rust-lang/crates.io-index"
 996 | checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
 997 | 
 998 | [[package]]
 999 | name = "npy-writer"
1000 | version = "0.1.0"
1001 | source = "registry+https://github.com/rust-lang/crates.io-index"
1002 | checksum = "a08beb23d1e6dfaf8c1e306d6eb24e4f5ad6c9507be26aecadf1e9305b883358"
1003 | dependencies = [
1004 |  "zip",
1005 | ]
1006 | 
1007 | [[package]]
1008 | name = "num-integer"
1009 | version = "0.1.45"
1010 | source = "registry+https://github.com/rust-lang/crates.io-index"
1011 | checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
1012 | dependencies = [
1013 |  "autocfg",
1014 |  "num-traits",
1015 | ]
1016 | 
1017 | [[package]]
1018 | name = "num-rational"
1019 | version = "0.4.1"
1020 | source = "registry+https://github.com/rust-lang/crates.io-index"
1021 | checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
1022 | dependencies = [
1023 |  "autocfg",
1024 |  "num-integer",
1025 |  "num-traits",
1026 | ]
1027 | 
1028 | [[package]]
1029 | name = "num-traits"
1030 | version = "0.2.15"
1031 | source = "registry+https://github.com/rust-lang/crates.io-index"
1032 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
1033 | dependencies = [
1034 |  "autocfg",
1035 | ]
1036 | 
1037 | [[package]]
1038 | name = "num_cpus"
1039 | version = "1.14.0"
1040 | source = "registry+https://github.com/rust-lang/crates.io-index"
1041 | checksum = "f6058e64324c71e02bc2b150e4f3bc8286db6c83092132ffa3f6b1eab0f9def5"
1042 | dependencies = [
1043 |  "hermit-abi",
1044 |  "libc",
1045 | ]
1046 | 
1047 | [[package]]
1048 | name = "once_cell"
1049 | version = "1.16.0"
1050 | source = "registry+https://github.com/rust-lang/crates.io-index"
1051 | checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860"
1052 | 
1053 | [[package]]
1054 | name = "opaque-debug"
1055 | version = "0.3.0"
1056 | source = "registry+https://github.com/rust-lang/crates.io-index"
1057 | checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
1058 | 
1059 | [[package]]
1060 | name = "openssl-probe"
1061 | version = "0.1.5"
1062 | source = "registry+https://github.com/rust-lang/crates.io-index"
1063 | checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
1064 | 
1065 | [[package]]
1066 | name = "os_str_bytes"
1067 | version = "6.4.1"
1068 | source = "registry+https://github.com/rust-lang/crates.io-index"
1069 | checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee"
1070 | 
1071 | [[package]]
1072 | name = "parking_lot"
1073 | version = "0.12.1"
1074 | source = "registry+https://github.com/rust-lang/crates.io-index"
1075 | checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
1076 | dependencies = [
1077 |  "lock_api",
1078 |  "parking_lot_core",
1079 | ]
1080 | 
1081 | [[package]]
1082 | name = "parking_lot_core"
1083 | version = "0.9.4"
1084 | source = "registry+https://github.com/rust-lang/crates.io-index"
1085 | checksum = "4dc9e0dc2adc1c69d09143aff38d3d30c5c3f0df0dad82e6d25547af174ebec0"
1086 | dependencies = [
1087 |  "cfg-if",
1088 |  "libc",
1089 |  "redox_syscall",
1090 |  "smallvec",
1091 |  "windows-sys 0.42.0",
1092 | ]
1093 | 
1094 | [[package]]
1095 | name = "password-hash"
1096 | version = "0.4.2"
1097 | source = "registry+https://github.com/rust-lang/crates.io-index"
1098 | checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700"
1099 | dependencies = [
1100 |  "base64ct",
1101 |  "rand_core 0.6.4",
1102 |  "subtle",
1103 | ]
1104 | 
1105 | [[package]]
1106 | name = "pbkdf2"
1107 | version = "0.11.0"
1108 | source = "registry+https://github.com/rust-lang/crates.io-index"
1109 | checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917"
1110 | dependencies = [
1111 |  "digest",
1112 |  "hmac",
1113 |  "password-hash",
1114 |  "sha2",
1115 | ]
1116 | 
1117 | [[package]]
1118 | name = "percent-encoding"
1119 | version = "2.2.0"
1120 | source = "registry+https://github.com/rust-lang/crates.io-index"
1121 | checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
1122 | 
1123 | [[package]]
1124 | name = "phf"
1125 | version = "0.8.0"
1126 | source = "registry+https://github.com/rust-lang/crates.io-index"
1127 | checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
1128 | dependencies = [
1129 |  "phf_macros",
1130 |  "phf_shared 0.8.0",
1131 |  "proc-macro-hack",
1132 | ]
1133 | 
1134 | [[package]]
1135 | name = "phf"
1136 | version = "0.10.1"
1137 | source = "registry+https://github.com/rust-lang/crates.io-index"
1138 | checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
1139 | dependencies = [
1140 |  "phf_shared 0.10.0",
1141 | ]
1142 | 
1143 | [[package]]
1144 | name = "phf_codegen"
1145 | version = "0.8.0"
1146 | source = "registry+https://github.com/rust-lang/crates.io-index"
1147 | checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815"
1148 | dependencies = [
1149 |  "phf_generator 0.8.0",
1150 |  "phf_shared 0.8.0",
1151 | ]
1152 | 
1153 | [[package]]
1154 | name = "phf_codegen"
1155 | version = "0.10.0"
1156 | source = "registry+https://github.com/rust-lang/crates.io-index"
1157 | checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
1158 | dependencies = [
1159 |  "phf_generator 0.10.0",
1160 |  "phf_shared 0.10.0",
1161 | ]
1162 | 
1163 | [[package]]
1164 | name = "phf_generator"
1165 | version = "0.8.0"
1166 | source = "registry+https://github.com/rust-lang/crates.io-index"
1167 | checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526"
1168 | dependencies = [
1169 |  "phf_shared 0.8.0",
1170 |  "rand 0.7.3",
1171 | ]
1172 | 
1173 | [[package]]
1174 | name = "phf_generator"
1175 | version = "0.10.0"
1176 | source = "registry+https://github.com/rust-lang/crates.io-index"
1177 | checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
1178 | dependencies = [
1179 |  "phf_shared 0.10.0",
1180 |  "rand 0.8.5",
1181 | ]
1182 | 
1183 | [[package]]
1184 | name = "phf_macros"
1185 | version = "0.8.0"
1186 | source = "registry+https://github.com/rust-lang/crates.io-index"
1187 | checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c"
1188 | dependencies = [
1189 |  "phf_generator 0.8.0",
1190 |  "phf_shared 0.8.0",
1191 |  "proc-macro-hack",
1192 |  "proc-macro2",
1193 |  "quote",
1194 |  "syn",
1195 | ]
1196 | 
1197 | [[package]]
1198 | name = "phf_shared"
1199 | version = "0.8.0"
1200 | source = "registry+https://github.com/rust-lang/crates.io-index"
1201 | checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7"
1202 | dependencies = [
1203 |  "siphasher",
1204 | ]
1205 | 
1206 | [[package]]
1207 | name = "phf_shared"
1208 | version = "0.10.0"
1209 | source = "registry+https://github.com/rust-lang/crates.io-index"
1210 | checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
1211 | dependencies = [
1212 |  "siphasher",
1213 | ]
1214 | 
1215 | [[package]]
1216 | name = "pin-project"
1217 | version = "1.0.12"
1218 | source = "registry+https://github.com/rust-lang/crates.io-index"
1219 | checksum = "ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc"
1220 | dependencies = [
1221 |  "pin-project-internal",
1222 | ]
1223 | 
1224 | [[package]]
1225 | name = "pin-project-internal"
1226 | version = "1.0.12"
1227 | source = "registry+https://github.com/rust-lang/crates.io-index"
1228 | checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55"
1229 | dependencies = [
1230 |  "proc-macro2",
1231 |  "quote",
1232 |  "syn",
1233 | ]
1234 | 
1235 | [[package]]
1236 | name = "pin-project-lite"
1237 | version = "0.2.9"
1238 | source = "registry+https://github.com/rust-lang/crates.io-index"
1239 | checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
1240 | 
1241 | [[package]]
1242 | name = "pin-utils"
1243 | version = "0.1.0"
1244 | source = "registry+https://github.com/rust-lang/crates.io-index"
1245 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
1246 | 
1247 | [[package]]
1248 | name = "pkg-config"
1249 | version = "0.3.26"
1250 | source = "registry+https://github.com/rust-lang/crates.io-index"
1251 | checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
1252 | 
1253 | [[package]]
1254 | name = "png"
1255 | version = "0.17.7"
1256 | source = "registry+https://github.com/rust-lang/crates.io-index"
1257 | checksum = "5d708eaf860a19b19ce538740d2b4bdeeb8337fa53f7738455e706623ad5c638"
1258 | dependencies = [
1259 |  "bitflags",
1260 |  "crc32fast",
1261 |  "flate2",
1262 |  "miniz_oxide",
1263 | ]
1264 | 
1265 | [[package]]
1266 | name = "ppv-lite86"
1267 | version = "0.2.17"
1268 | source = "registry+https://github.com/rust-lang/crates.io-index"
1269 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
1270 | 
1271 | [[package]]
1272 | name = "precomputed-hash"
1273 | version = "0.1.1"
1274 | source = "registry+https://github.com/rust-lang/crates.io-index"
1275 | checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
1276 | 
1277 | [[package]]
1278 | name = "proc-macro-error"
1279 | version = "1.0.4"
1280 | source = "registry+https://github.com/rust-lang/crates.io-index"
1281 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
1282 | dependencies = [
1283 |  "proc-macro-error-attr",
1284 |  "proc-macro2",
1285 |  "quote",
1286 |  "syn",
1287 |  "version_check",
1288 | ]
1289 | 
1290 | [[package]]
1291 | name = "proc-macro-error-attr"
1292 | version = "1.0.4"
1293 | source = "registry+https://github.com/rust-lang/crates.io-index"
1294 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
1295 | dependencies = [
1296 |  "proc-macro2",
1297 |  "quote",
1298 |  "version_check",
1299 | ]
1300 | 
1301 | [[package]]
1302 | name = "proc-macro-hack"
1303 | version = "0.5.19"
1304 | source = "registry+https://github.com/rust-lang/crates.io-index"
1305 | checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
1306 | 
1307 | [[package]]
1308 | name = "proc-macro2"
1309 | version = "1.0.47"
1310 | source = "registry+https://github.com/rust-lang/crates.io-index"
1311 | checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725"
1312 | dependencies = [
1313 |  "unicode-ident",
1314 | ]
1315 | 
1316 | [[package]]
1317 | name = "quote"
1318 | version = "1.0.21"
1319 | source = "registry+https://github.com/rust-lang/crates.io-index"
1320 | checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
1321 | dependencies = [
1322 |  "proc-macro2",
1323 | ]
1324 | 
1325 | [[package]]
1326 | name = "rand"
1327 | version = "0.7.3"
1328 | source = "registry+https://github.com/rust-lang/crates.io-index"
1329 | checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
1330 | dependencies = [
1331 |  "getrandom 0.1.16",
1332 |  "libc",
1333 |  "rand_chacha 0.2.2",
1334 |  "rand_core 0.5.1",
1335 |  "rand_hc",
1336 |  "rand_pcg",
1337 | ]
1338 | 
1339 | [[package]]
1340 | name = "rand"
1341 | version = "0.8.5"
1342 | source = "registry+https://github.com/rust-lang/crates.io-index"
1343 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
1344 | dependencies = [
1345 |  "libc",
1346 |  "rand_chacha 0.3.1",
1347 |  "rand_core 0.6.4",
1348 | ]
1349 | 
1350 | [[package]]
1351 | name = "rand_chacha"
1352 | version = "0.2.2"
1353 | source = "registry+https://github.com/rust-lang/crates.io-index"
1354 | checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
1355 | dependencies = [
1356 |  "ppv-lite86",
1357 |  "rand_core 0.5.1",
1358 | ]
1359 | 
1360 | [[package]]
1361 | name = "rand_chacha"
1362 | version = "0.3.1"
1363 | source = "registry+https://github.com/rust-lang/crates.io-index"
1364 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
1365 | dependencies = [
1366 |  "ppv-lite86",
1367 |  "rand_core 0.6.4",
1368 | ]
1369 | 
1370 | [[package]]
1371 | name = "rand_core"
1372 | version = "0.5.1"
1373 | source = "registry+https://github.com/rust-lang/crates.io-index"
1374 | checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
1375 | dependencies = [
1376 |  "getrandom 0.1.16",
1377 | ]
1378 | 
1379 | [[package]]
1380 | name = "rand_core"
1381 | version = "0.6.4"
1382 | source = "registry+https://github.com/rust-lang/crates.io-index"
1383 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
1384 | dependencies = [
1385 |  "getrandom 0.2.8",
1386 | ]
1387 | 
1388 | [[package]]
1389 | name = "rand_hc"
1390 | version = "0.2.0"
1391 | source = "registry+https://github.com/rust-lang/crates.io-index"
1392 | checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
1393 | dependencies = [
1394 |  "rand_core 0.5.1",
1395 | ]
1396 | 
1397 | [[package]]
1398 | name = "rand_pcg"
1399 | version = "0.2.1"
1400 | source = "registry+https://github.com/rust-lang/crates.io-index"
1401 | checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
1402 | dependencies = [
1403 |  "rand_core 0.5.1",
1404 | ]
1405 | 
1406 | [[package]]
1407 | name = "rayon"
1408 | version = "1.6.0"
1409 | source = "registry+https://github.com/rust-lang/crates.io-index"
1410 | checksum = "1e060280438193c554f654141c9ea9417886713b7acd75974c85b18a69a88e0b"
1411 | dependencies = [
1412 |  "crossbeam-deque",
1413 |  "either",
1414 |  "rayon-core",
1415 | ]
1416 | 
1417 | [[package]]
1418 | name = "rayon-core"
1419 | version = "1.10.1"
1420 | source = "registry+https://github.com/rust-lang/crates.io-index"
1421 | checksum = "cac410af5d00ab6884528b4ab69d1e8e146e8d471201800fa1b4524126de6ad3"
1422 | dependencies = [
1423 |  "crossbeam-channel",
1424 |  "crossbeam-deque",
1425 |  "crossbeam-utils",
1426 |  "num_cpus",
1427 | ]
1428 | 
1429 | [[package]]
1430 | name = "redox_syscall"
1431 | version = "0.2.16"
1432 | source = "registry+https://github.com/rust-lang/crates.io-index"
1433 | checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
1434 | dependencies = [
1435 |  "bitflags",
1436 | ]
1437 | 
1438 | [[package]]
1439 | name = "reqwest"
1440 | version = "0.11.13"
1441 | source = "registry+https://github.com/rust-lang/crates.io-index"
1442 | checksum = "68cc60575865c7831548863cc02356512e3f1dc2f3f82cb837d7fc4cc8f3c97c"
1443 | dependencies = [
1444 |  "base64",
1445 |  "bytes",
1446 |  "encoding_rs",
1447 |  "futures-core",
1448 |  "futures-util",
1449 |  "h2",
1450 |  "http",
1451 |  "http-body",
1452 |  "hyper",
1453 |  "hyper-rustls",
1454 |  "ipnet",
1455 |  "js-sys",
1456 |  "log",
1457 |  "mime",
1458 |  "once_cell",
1459 |  "percent-encoding",
1460 |  "pin-project-lite",
1461 |  "rustls",
1462 |  "rustls-native-certs",
1463 |  "rustls-pemfile",
1464 |  "serde",
1465 |  "serde_json",
1466 |  "serde_urlencoded",
1467 |  "tokio",
1468 |  "tokio-rustls",
1469 |  "tower-service",
1470 |  "url",
1471 |  "wasm-bindgen",
1472 |  "wasm-bindgen-futures",
1473 |  "web-sys",
1474 |  "winreg",
1475 | ]
1476 | 
1477 | [[package]]
1478 | name = "ring"
1479 | version = "0.16.20"
1480 | source = "registry+https://github.com/rust-lang/crates.io-index"
1481 | checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc"
1482 | dependencies = [
1483 |  "cc",
1484 |  "libc",
1485 |  "once_cell",
1486 |  "spin 0.5.2",
1487 |  "untrusted",
1488 |  "web-sys",
1489 |  "winapi",
1490 | ]
1491 | 
1492 | [[package]]
1493 | name = "rusqlite"
1494 | version = "0.28.0"
1495 | source = "registry+https://github.com/rust-lang/crates.io-index"
1496 | checksum = "01e213bc3ecb39ac32e81e51ebe31fd888a940515173e3a18a35f8c6e896422a"
1497 | dependencies = [
1498 |  "bitflags",
1499 |  "fallible-iterator",
1500 |  "fallible-streaming-iterator",
1501 |  "hashlink",
1502 |  "libsqlite3-sys",
1503 |  "smallvec",
1504 | ]
1505 | 
1506 | [[package]]
1507 | name = "rustc_version"
1508 | version = "0.4.0"
1509 | source = "registry+https://github.com/rust-lang/crates.io-index"
1510 | checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
1511 | dependencies = [
1512 |  "semver",
1513 | ]
1514 | 
1515 | [[package]]
1516 | name = "rustls"
1517 | version = "0.20.7"
1518 | source = "registry+https://github.com/rust-lang/crates.io-index"
1519 | checksum = "539a2bfe908f471bfa933876bd1eb6a19cf2176d375f82ef7f99530a40e48c2c"
1520 | dependencies = [
1521 |  "log",
1522 |  "ring",
1523 |  "sct",
1524 |  "webpki",
1525 | ]
1526 | 
1527 | [[package]]
1528 | name = "rustls-native-certs"
1529 | version = "0.6.2"
1530 | source = "registry+https://github.com/rust-lang/crates.io-index"
1531 | checksum = "0167bac7a9f490495f3c33013e7722b53cb087ecbe082fb0c6387c96f634ea50"
1532 | dependencies = [
1533 |  "openssl-probe",
1534 |  "rustls-pemfile",
1535 |  "schannel",
1536 |  "security-framework",
1537 | ]
1538 | 
1539 | [[package]]
1540 | name = "rustls-pemfile"
1541 | version = "1.0.1"
1542 | source = "registry+https://github.com/rust-lang/crates.io-index"
1543 | checksum = "0864aeff53f8c05aa08d86e5ef839d3dfcf07aeba2db32f12db0ef716e87bd55"
1544 | dependencies = [
1545 |  "base64",
1546 | ]
1547 | 
1548 | [[package]]
1549 | name = "ryu"
1550 | version = "1.0.11"
1551 | source = "registry+https://github.com/rust-lang/crates.io-index"
1552 | checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
1553 | 
1554 | [[package]]
1555 | name = "schannel"
1556 | version = "0.1.20"
1557 | source = "registry+https://github.com/rust-lang/crates.io-index"
1558 | checksum = "88d6731146462ea25d9244b2ed5fd1d716d25c52e4d54aa4fb0f3c4e9854dbe2"
1559 | dependencies = [
1560 |  "lazy_static",
1561 |  "windows-sys 0.36.1",
1562 | ]
1563 | 
1564 | [[package]]
1565 | name = "scoped_threadpool"
1566 | version = "0.1.9"
1567 | source = "registry+https://github.com/rust-lang/crates.io-index"
1568 | checksum = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8"
1569 | 
1570 | [[package]]
1571 | name = "scopeguard"
1572 | version = "1.1.0"
1573 | source = "registry+https://github.com/rust-lang/crates.io-index"
1574 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
1575 | 
1576 | [[package]]
1577 | name = "scraper"
1578 | version = "0.13.0"
1579 | source = "registry+https://github.com/rust-lang/crates.io-index"
1580 | checksum = "5684396b456f3eb69ceeb34d1b5cb1a2f6acf7ca4452131efa3ba0ee2c2d0a70"
1581 | dependencies = [
1582 |  "cssparser",
1583 |  "ego-tree",
1584 |  "getopts",
1585 |  "html5ever",
1586 |  "matches",
1587 |  "selectors",
1588 |  "smallvec",
1589 |  "tendril",
1590 | ]
1591 | 
1592 | [[package]]
1593 | name = "sct"
1594 | version = "0.7.0"
1595 | source = "registry+https://github.com/rust-lang/crates.io-index"
1596 | checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4"
1597 | dependencies = [
1598 |  "ring",
1599 |  "untrusted",
1600 | ]
1601 | 
1602 | [[package]]
1603 | name = "security-framework"
1604 | version = "2.7.0"
1605 | source = "registry+https://github.com/rust-lang/crates.io-index"
1606 | checksum = "2bc1bb97804af6631813c55739f771071e0f2ed33ee20b68c86ec505d906356c"
1607 | dependencies = [
1608 |  "bitflags",
1609 |  "core-foundation",
1610 |  "core-foundation-sys",
1611 |  "libc",
1612 |  "security-framework-sys",
1613 | ]
1614 | 
1615 | [[package]]
1616 | name = "security-framework-sys"
1617 | version = "2.6.1"
1618 | source = "registry+https://github.com/rust-lang/crates.io-index"
1619 | checksum = "0160a13a177a45bfb43ce71c01580998474f556ad854dcbca936dd2841a5c556"
1620 | dependencies = [
1621 |  "core-foundation-sys",
1622 |  "libc",
1623 | ]
1624 | 
1625 | [[package]]
1626 | name = "selectors"
1627 | version = "0.22.0"
1628 | source = "registry+https://github.com/rust-lang/crates.io-index"
1629 | checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe"
1630 | dependencies = [
1631 |  "bitflags",
1632 |  "cssparser",
1633 |  "derive_more",
1634 |  "fxhash",
1635 |  "log",
1636 |  "matches",
1637 |  "phf 0.8.0",
1638 |  "phf_codegen 0.8.0",
1639 |  "precomputed-hash",
1640 |  "servo_arc",
1641 |  "smallvec",
1642 |  "thin-slice",
1643 | ]
1644 | 
1645 | [[package]]
1646 | name = "semver"
1647 | version = "1.0.14"
1648 | source = "registry+https://github.com/rust-lang/crates.io-index"
1649 | checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4"
1650 | 
1651 | [[package]]
1652 | name = "serde"
1653 | version = "1.0.147"
1654 | source = "registry+https://github.com/rust-lang/crates.io-index"
1655 | checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965"
1656 | 
1657 | [[package]]
1658 | name = "serde_json"
1659 | version = "1.0.89"
1660 | source = "registry+https://github.com/rust-lang/crates.io-index"
1661 | checksum = "020ff22c755c2ed3f8cf162dbb41a7268d934702f3ed3631656ea597e08fc3db"
1662 | dependencies = [
1663 |  "itoa 1.0.4",
1664 |  "ryu",
1665 |  "serde",
1666 | ]
1667 | 
1668 | [[package]]
1669 | name = "serde_urlencoded"
1670 | version = "0.7.1"
1671 | source = "registry+https://github.com/rust-lang/crates.io-index"
1672 | checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
1673 | dependencies = [
1674 |  "form_urlencoded",
1675 |  "itoa 1.0.4",
1676 |  "ryu",
1677 |  "serde",
1678 | ]
1679 | 
1680 | [[package]]
1681 | name = "servo_arc"
1682 | version = "0.1.1"
1683 | source = "registry+https://github.com/rust-lang/crates.io-index"
1684 | checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432"
1685 | dependencies = [
1686 |  "nodrop",
1687 |  "stable_deref_trait",
1688 | ]
1689 | 
1690 | [[package]]
1691 | name = "sha1"
1692 | version = "0.10.5"
1693 | source = "registry+https://github.com/rust-lang/crates.io-index"
1694 | checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3"
1695 | dependencies = [
1696 |  "cfg-if",
1697 |  "cpufeatures",
1698 |  "digest",
1699 | ]
1700 | 
1701 | [[package]]
1702 | name = "sha2"
1703 | version = "0.10.6"
1704 | source = "registry+https://github.com/rust-lang/crates.io-index"
1705 | checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0"
1706 | dependencies = [
1707 |  "cfg-if",
1708 |  "cpufeatures",
1709 |  "digest",
1710 | ]
1711 | 
1712 | [[package]]
1713 | name = "signal-hook-registry"
1714 | version = "1.4.0"
1715 | source = "registry+https://github.com/rust-lang/crates.io-index"
1716 | checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
1717 | dependencies = [
1718 |  "libc",
1719 | ]
1720 | 
1721 | [[package]]
1722 | name = "siphasher"
1723 | version = "0.3.10"
1724 | source = "registry+https://github.com/rust-lang/crates.io-index"
1725 | checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
1726 | 
1727 | [[package]]
1728 | name = "slab"
1729 | version = "0.4.7"
1730 | source = "registry+https://github.com/rust-lang/crates.io-index"
1731 | checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef"
1732 | dependencies = [
1733 |  "autocfg",
1734 | ]
1735 | 
1736 | [[package]]
1737 | name = "smallvec"
1738 | version = "1.10.0"
1739 | source = "registry+https://github.com/rust-lang/crates.io-index"
1740 | checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
1741 | 
1742 | [[package]]
1743 | name = "socket2"
1744 | version = "0.4.7"
1745 | source = "registry+https://github.com/rust-lang/crates.io-index"
1746 | checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd"
1747 | dependencies = [
1748 |  "libc",
1749 |  "winapi",
1750 | ]
1751 | 
1752 | [[package]]
1753 | name = "spin"
1754 | version = "0.5.2"
1755 | source = "registry+https://github.com/rust-lang/crates.io-index"
1756 | checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
1757 | 
1758 | [[package]]
1759 | name = "spin"
1760 | version = "0.9.4"
1761 | source = "registry+https://github.com/rust-lang/crates.io-index"
1762 | checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09"
1763 | dependencies = [
1764 |  "lock_api",
1765 | ]
1766 | 
1767 | [[package]]
1768 | name = "stable_deref_trait"
1769 | version = "1.2.0"
1770 | source = "registry+https://github.com/rust-lang/crates.io-index"
1771 | checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
1772 | 
1773 | [[package]]
1774 | name = "string_cache"
1775 | version = "0.8.4"
1776 | source = "registry+https://github.com/rust-lang/crates.io-index"
1777 | checksum = "213494b7a2b503146286049378ce02b482200519accc31872ee8be91fa820a08"
1778 | dependencies = [
1779 |  "new_debug_unreachable",
1780 |  "once_cell",
1781 |  "parking_lot",
1782 |  "phf_shared 0.10.0",
1783 |  "precomputed-hash",
1784 |  "serde",
1785 | ]
1786 | 
1787 | [[package]]
1788 | name = "string_cache_codegen"
1789 | version = "0.5.2"
1790 | source = "registry+https://github.com/rust-lang/crates.io-index"
1791 | checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988"
1792 | dependencies = [
1793 |  "phf_generator 0.10.0",
1794 |  "phf_shared 0.10.0",
1795 |  "proc-macro2",
1796 |  "quote",
1797 | ]
1798 | 
1799 | [[package]]
1800 | name = "strsim"
1801 | version = "0.10.0"
1802 | source = "registry+https://github.com/rust-lang/crates.io-index"
1803 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
1804 | 
1805 | [[package]]
1806 | name = "subtle"
1807 | version = "2.4.1"
1808 | source = "registry+https://github.com/rust-lang/crates.io-index"
1809 | checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
1810 | 
1811 | [[package]]
1812 | name = "syn"
1813 | version = "1.0.103"
1814 | source = "registry+https://github.com/rust-lang/crates.io-index"
1815 | checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d"
1816 | dependencies = [
1817 |  "proc-macro2",
1818 |  "quote",
1819 |  "unicode-ident",
1820 | ]
1821 | 
1822 | [[package]]
1823 | name = "tendril"
1824 | version = "0.4.3"
1825 | source = "registry+https://github.com/rust-lang/crates.io-index"
1826 | checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
1827 | dependencies = [
1828 |  "futf",
1829 |  "mac",
1830 |  "utf-8",
1831 | ]
1832 | 
1833 | [[package]]
1834 | name = "termcolor"
1835 | version = "1.1.3"
1836 | source = "registry+https://github.com/rust-lang/crates.io-index"
1837 | checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
1838 | dependencies = [
1839 |  "winapi-util",
1840 | ]
1841 | 
1842 | [[package]]
1843 | name = "textwrap"
1844 | version = "0.16.0"
1845 | source = "registry+https://github.com/rust-lang/crates.io-index"
1846 | checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
1847 | 
1848 | [[package]]
1849 | name = "thin-slice"
1850 | version = "0.1.1"
1851 | source = "registry+https://github.com/rust-lang/crates.io-index"
1852 | checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
1853 | 
1854 | [[package]]
1855 | name = "threadpool"
1856 | version = "1.8.1"
1857 | source = "registry+https://github.com/rust-lang/crates.io-index"
1858 | checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa"
1859 | dependencies = [
1860 |  "num_cpus",
1861 | ]
1862 | 
1863 | [[package]]
1864 | name = "tiff"
1865 | version = "0.8.0"
1866 | source = "registry+https://github.com/rust-lang/crates.io-index"
1867 | checksum = "f17def29300a156c19ae30814710d9c63cd50288a49c6fd3a10ccfbe4cf886fd"
1868 | dependencies = [
1869 |  "flate2",
1870 |  "jpeg-decoder",
1871 |  "weezl",
1872 | ]
1873 | 
1874 | [[package]]
1875 | name = "time"
1876 | version = "0.3.17"
1877 | source = "registry+https://github.com/rust-lang/crates.io-index"
1878 | checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376"
1879 | dependencies = [
1880 |  "itoa 1.0.4",
1881 |  "serde",
1882 |  "time-core",
1883 |  "time-macros",
1884 | ]
1885 | 
1886 | [[package]]
1887 | name = "time-core"
1888 | version = "0.1.0"
1889 | source = "registry+https://github.com/rust-lang/crates.io-index"
1890 | checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd"
1891 | 
1892 | [[package]]
1893 | name = "time-macros"
1894 | version = "0.2.6"
1895 | source = "registry+https://github.com/rust-lang/crates.io-index"
1896 | checksum = "d967f99f534ca7e495c575c62638eebc2898a8c84c119b89e250477bc4ba16b2"
1897 | dependencies = [
1898 |  "time-core",
1899 | ]
1900 | 
1901 | [[package]]
1902 | name = "tinyvec"
1903 | version = "1.6.0"
1904 | source = "registry+https://github.com/rust-lang/crates.io-index"
1905 | checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
1906 | dependencies = [
1907 |  "tinyvec_macros",
1908 | ]
1909 | 
1910 | [[package]]
1911 | name = "tinyvec_macros"
1912 | version = "0.1.0"
1913 | source = "registry+https://github.com/rust-lang/crates.io-index"
1914 | checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
1915 | 
1916 | [[package]]
1917 | name = "tokio"
1918 | version = "1.22.0"
1919 | source = "registry+https://github.com/rust-lang/crates.io-index"
1920 | checksum = "d76ce4a75fb488c605c54bf610f221cea8b0dafb53333c1a67e8ee199dcd2ae3"
1921 | dependencies = [
1922 |  "autocfg",
1923 |  "bytes",
1924 |  "libc",
1925 |  "memchr",
1926 |  "mio",
1927 |  "num_cpus",
1928 |  "parking_lot",
1929 |  "pin-project-lite",
1930 |  "signal-hook-registry",
1931 |  "socket2",
1932 |  "tokio-macros",
1933 |  "winapi",
1934 | ]
1935 | 
1936 | [[package]]
1937 | name = "tokio-macros"
1938 | version = "1.8.0"
1939 | source = "registry+https://github.com/rust-lang/crates.io-index"
1940 | checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484"
1941 | dependencies = [
1942 |  "proc-macro2",
1943 |  "quote",
1944 |  "syn",
1945 | ]
1946 | 
1947 | [[package]]
1948 | name = "tokio-rustls"
1949 | version = "0.23.4"
1950 | source = "registry+https://github.com/rust-lang/crates.io-index"
1951 | checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59"
1952 | dependencies = [
1953 |  "rustls",
1954 |  "tokio",
1955 |  "webpki",
1956 | ]
1957 | 
1958 | [[package]]
1959 | name = "tokio-util"
1960 | version = "0.7.4"
1961 | source = "registry+https://github.com/rust-lang/crates.io-index"
1962 | checksum = "0bb2e075f03b3d66d8d8785356224ba688d2906a371015e225beeb65ca92c740"
1963 | dependencies = [
1964 |  "bytes",
1965 |  "futures-core",
1966 |  "futures-sink",
1967 |  "pin-project-lite",
1968 |  "tokio",
1969 |  "tracing",
1970 | ]
1971 | 
1972 | [[package]]
1973 | name = "tower-service"
1974 | version = "0.3.2"
1975 | source = "registry+https://github.com/rust-lang/crates.io-index"
1976 | checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
1977 | 
1978 | [[package]]
1979 | name = "tracing"
1980 | version = "0.1.37"
1981 | source = "registry+https://github.com/rust-lang/crates.io-index"
1982 | checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
1983 | dependencies = [
1984 |  "cfg-if",
1985 |  "pin-project-lite",
1986 |  "tracing-core",
1987 | ]
1988 | 
1989 | [[package]]
1990 | name = "tracing-core"
1991 | version = "0.1.30"
1992 | source = "registry+https://github.com/rust-lang/crates.io-index"
1993 | checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a"
1994 | dependencies = [
1995 |  "once_cell",
1996 | ]
1997 | 
1998 | [[package]]
1999 | name = "try-lock"
2000 | version = "0.2.3"
2001 | source = "registry+https://github.com/rust-lang/crates.io-index"
2002 | checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642"
2003 | 
2004 | [[package]]
2005 | name = "typenum"
2006 | version = "1.15.0"
2007 | source = "registry+https://github.com/rust-lang/crates.io-index"
2008 | checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
2009 | 
2010 | [[package]]
2011 | name = "unicode-bidi"
2012 | version = "0.3.8"
2013 | source = "registry+https://github.com/rust-lang/crates.io-index"
2014 | checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992"
2015 | 
2016 | [[package]]
2017 | name = "unicode-ident"
2018 | version = "1.0.5"
2019 | source = "registry+https://github.com/rust-lang/crates.io-index"
2020 | checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3"
2021 | 
2022 | [[package]]
2023 | name = "unicode-normalization"
2024 | version = "0.1.22"
2025 | source = "registry+https://github.com/rust-lang/crates.io-index"
2026 | checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921"
2027 | dependencies = [
2028 |  "tinyvec",
2029 | ]
2030 | 
2031 | [[package]]
2032 | name = "unicode-width"
2033 | version = "0.1.10"
2034 | source = "registry+https://github.com/rust-lang/crates.io-index"
2035 | checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
2036 | 
2037 | [[package]]
2038 | name = "untrusted"
2039 | version = "0.7.1"
2040 | source = "registry+https://github.com/rust-lang/crates.io-index"
2041 | checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
2042 | 
2043 | [[package]]
2044 | name = "url"
2045 | version = "2.3.1"
2046 | source = "registry+https://github.com/rust-lang/crates.io-index"
2047 | checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643"
2048 | dependencies = [
2049 |  "form_urlencoded",
2050 |  "idna",
2051 |  "percent-encoding",
2052 | ]
2053 | 
2054 | [[package]]
2055 | name = "utf-8"
2056 | version = "0.7.6"
2057 | source = "registry+https://github.com/rust-lang/crates.io-index"
2058 | checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
2059 | 
2060 | [[package]]
2061 | name = "vcpkg"
2062 | version = "0.2.15"
2063 | source = "registry+https://github.com/rust-lang/crates.io-index"
2064 | checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
2065 | 
2066 | [[package]]
2067 | name = "version_check"
2068 | version = "0.9.4"
2069 | source = "registry+https://github.com/rust-lang/crates.io-index"
2070 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
2071 | 
2072 | [[package]]
2073 | name = "want"
2074 | version = "0.3.0"
2075 | source = "registry+https://github.com/rust-lang/crates.io-index"
2076 | checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0"
2077 | dependencies = [
2078 |  "log",
2079 |  "try-lock",
2080 | ]
2081 | 
2082 | [[package]]
2083 | name = "wasi"
2084 | version = "0.9.0+wasi-snapshot-preview1"
2085 | source = "registry+https://github.com/rust-lang/crates.io-index"
2086 | checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
2087 | 
2088 | [[package]]
2089 | name = "wasi"
2090 | version = "0.11.0+wasi-snapshot-preview1"
2091 | source = "registry+https://github.com/rust-lang/crates.io-index"
2092 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
2093 | 
2094 | [[package]]
2095 | name = "wasm-bindgen"
2096 | version = "0.2.83"
2097 | source = "registry+https://github.com/rust-lang/crates.io-index"
2098 | checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268"
2099 | dependencies = [
2100 |  "cfg-if",
2101 |  "wasm-bindgen-macro",
2102 | ]
2103 | 
2104 | [[package]]
2105 | name = "wasm-bindgen-backend"
2106 | version = "0.2.83"
2107 | source = "registry+https://github.com/rust-lang/crates.io-index"
2108 | checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142"
2109 | dependencies = [
2110 |  "bumpalo",
2111 |  "log",
2112 |  "once_cell",
2113 |  "proc-macro2",
2114 |  "quote",
2115 |  "syn",
2116 |  "wasm-bindgen-shared",
2117 | ]
2118 | 
2119 | [[package]]
2120 | name = "wasm-bindgen-futures"
2121 | version = "0.4.33"
2122 | source = "registry+https://github.com/rust-lang/crates.io-index"
2123 | checksum = "23639446165ca5a5de86ae1d8896b737ae80319560fbaa4c2887b7da6e7ebd7d"
2124 | dependencies = [
2125 |  "cfg-if",
2126 |  "js-sys",
2127 |  "wasm-bindgen",
2128 |  "web-sys",
2129 | ]
2130 | 
2131 | [[package]]
2132 | name = "wasm-bindgen-macro"
2133 | version = "0.2.83"
2134 | source = "registry+https://github.com/rust-lang/crates.io-index"
2135 | checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810"
2136 | dependencies = [
2137 |  "quote",
2138 |  "wasm-bindgen-macro-support",
2139 | ]
2140 | 
2141 | [[package]]
2142 | name = "wasm-bindgen-macro-support"
2143 | version = "0.2.83"
2144 | source = "registry+https://github.com/rust-lang/crates.io-index"
2145 | checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c"
2146 | dependencies = [
2147 |  "proc-macro2",
2148 |  "quote",
2149 |  "syn",
2150 |  "wasm-bindgen-backend",
2151 |  "wasm-bindgen-shared",
2152 | ]
2153 | 
2154 | [[package]]
2155 | name = "wasm-bindgen-shared"
2156 | version = "0.2.83"
2157 | source = "registry+https://github.com/rust-lang/crates.io-index"
2158 | checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f"
2159 | 
2160 | [[package]]
2161 | name = "web-sys"
2162 | version = "0.3.60"
2163 | source = "registry+https://github.com/rust-lang/crates.io-index"
2164 | checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f"
2165 | dependencies = [
2166 |  "js-sys",
2167 |  "wasm-bindgen",
2168 | ]
2169 | 
2170 | [[package]]
2171 | name = "webpki"
2172 | version = "0.22.0"
2173 | source = "registry+https://github.com/rust-lang/crates.io-index"
2174 | checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd"
2175 | dependencies = [
2176 |  "ring",
2177 |  "untrusted",
2178 | ]
2179 | 
2180 | [[package]]
2181 | name = "weezl"
2182 | version = "0.1.7"
2183 | source = "registry+https://github.com/rust-lang/crates.io-index"
2184 | checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb"
2185 | 
2186 | [[package]]
2187 | name = "winapi"
2188 | version = "0.3.9"
2189 | source = "registry+https://github.com/rust-lang/crates.io-index"
2190 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
2191 | dependencies = [
2192 |  "winapi-i686-pc-windows-gnu",
2193 |  "winapi-x86_64-pc-windows-gnu",
2194 | ]
2195 | 
2196 | [[package]]
2197 | name = "winapi-i686-pc-windows-gnu"
2198 | version = "0.4.0"
2199 | source = "registry+https://github.com/rust-lang/crates.io-index"
2200 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
2201 | 
2202 | [[package]]
2203 | name = "winapi-util"
2204 | version = "0.1.5"
2205 | source = "registry+https://github.com/rust-lang/crates.io-index"
2206 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
2207 | dependencies = [
2208 |  "winapi",
2209 | ]
2210 | 
2211 | [[package]]
2212 | name = "winapi-x86_64-pc-windows-gnu"
2213 | version = "0.4.0"
2214 | source = "registry+https://github.com/rust-lang/crates.io-index"
2215 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
2216 | 
2217 | [[package]]
2218 | name = "windows-sys"
2219 | version = "0.36.1"
2220 | source = "registry+https://github.com/rust-lang/crates.io-index"
2221 | checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
2222 | dependencies = [
2223 |  "windows_aarch64_msvc 0.36.1",
2224 |  "windows_i686_gnu 0.36.1",
2225 |  "windows_i686_msvc 0.36.1",
2226 |  "windows_x86_64_gnu 0.36.1",
2227 |  "windows_x86_64_msvc 0.36.1",
2228 | ]
2229 | 
2230 | [[package]]
2231 | name = "windows-sys"
2232 | version = "0.42.0"
2233 | source = "registry+https://github.com/rust-lang/crates.io-index"
2234 | checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
2235 | dependencies = [
2236 |  "windows_aarch64_gnullvm",
2237 |  "windows_aarch64_msvc 0.42.0",
2238 |  "windows_i686_gnu 0.42.0",
2239 |  "windows_i686_msvc 0.42.0",
2240 |  "windows_x86_64_gnu 0.42.0",
2241 |  "windows_x86_64_gnullvm",
2242 |  "windows_x86_64_msvc 0.42.0",
2243 | ]
2244 | 
2245 | [[package]]
2246 | name = "windows_aarch64_gnullvm"
2247 | version = "0.42.0"
2248 | source = "registry+https://github.com/rust-lang/crates.io-index"
2249 | checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e"
2250 | 
2251 | [[package]]
2252 | name = "windows_aarch64_msvc"
2253 | version = "0.36.1"
2254 | source = "registry+https://github.com/rust-lang/crates.io-index"
2255 | checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
2256 | 
2257 | [[package]]
2258 | name = "windows_aarch64_msvc"
2259 | version = "0.42.0"
2260 | source = "registry+https://github.com/rust-lang/crates.io-index"
2261 | checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4"
2262 | 
2263 | [[package]]
2264 | name = "windows_i686_gnu"
2265 | version = "0.36.1"
2266 | source = "registry+https://github.com/rust-lang/crates.io-index"
2267 | checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
2268 | 
2269 | [[package]]
2270 | name = "windows_i686_gnu"
2271 | version = "0.42.0"
2272 | source = "registry+https://github.com/rust-lang/crates.io-index"
2273 | checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7"
2274 | 
2275 | [[package]]
2276 | name = "windows_i686_msvc"
2277 | version = "0.36.1"
2278 | source = "registry+https://github.com/rust-lang/crates.io-index"
2279 | checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
2280 | 
2281 | [[package]]
2282 | name = "windows_i686_msvc"
2283 | version = "0.42.0"
2284 | source = "registry+https://github.com/rust-lang/crates.io-index"
2285 | checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246"
2286 | 
2287 | [[package]]
2288 | name = "windows_x86_64_gnu"
2289 | version = "0.36.1"
2290 | source = "registry+https://github.com/rust-lang/crates.io-index"
2291 | checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
2292 | 
2293 | [[package]]
2294 | name = "windows_x86_64_gnu"
2295 | version = "0.42.0"
2296 | source = "registry+https://github.com/rust-lang/crates.io-index"
2297 | checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed"
2298 | 
2299 | [[package]]
2300 | name = "windows_x86_64_gnullvm"
2301 | version = "0.42.0"
2302 | source = "registry+https://github.com/rust-lang/crates.io-index"
2303 | checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028"
2304 | 
2305 | [[package]]
2306 | name = "windows_x86_64_msvc"
2307 | version = "0.36.1"
2308 | source = "registry+https://github.com/rust-lang/crates.io-index"
2309 | checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
2310 | 
2311 | [[package]]
2312 | name = "windows_x86_64_msvc"
2313 | version = "0.42.0"
2314 | source = "registry+https://github.com/rust-lang/crates.io-index"
2315 | checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5"
2316 | 
2317 | [[package]]
2318 | name = "winreg"
2319 | version = "0.10.1"
2320 | source = "registry+https://github.com/rust-lang/crates.io-index"
2321 | checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
2322 | dependencies = [
2323 |  "winapi",
2324 | ]
2325 | 
2326 | [[package]]
2327 | name = "zip"
2328 | version = "0.6.3"
2329 | source = "registry+https://github.com/rust-lang/crates.io-index"
2330 | checksum = "537ce7411d25e54e8ae21a7ce0b15840e7bfcff15b51d697ec3266cc76bdf080"
2331 | dependencies = [
2332 |  "aes",
2333 |  "byteorder",
2334 |  "bzip2",
2335 |  "constant_time_eq",
2336 |  "crc32fast",
2337 |  "crossbeam-utils",
2338 |  "flate2",
2339 |  "hmac",
2340 |  "pbkdf2",
2341 |  "sha1",
2342 |  "time",
2343 |  "zstd",
2344 | ]
2345 | 
2346 | [[package]]
2347 | name = "zstd"
2348 | version = "0.11.2+zstd.1.5.2"
2349 | source = "registry+https://github.com/rust-lang/crates.io-index"
2350 | checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
2351 | dependencies = [
2352 |  "zstd-safe",
2353 | ]
2354 | 
2355 | [[package]]
2356 | name = "zstd-safe"
2357 | version = "5.0.2+zstd.1.5.2"
2358 | source = "registry+https://github.com/rust-lang/crates.io-index"
2359 | checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db"
2360 | dependencies = [
2361 |  "libc",
2362 |  "zstd-sys",
2363 | ]
2364 | 
2365 | [[package]]
2366 | name = "zstd-sys"
2367 | version = "2.0.4+zstd.1.5.2"
2368 | source = "registry+https://github.com/rust-lang/crates.io-index"
2369 | checksum = "4fa202f2ef00074143e219d15b62ffc317d17cc33909feac471c044087cad7b0"
2370 | dependencies = [
2371 |  "cc",
2372 |  "libc",
2373 | ]
2374 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "car-data"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | anyhow = { version="1.0" }
10 | async-channel = { version="1.7.1" }
11 | clap = { version="3.2.20", features=["derive"] }
12 | image = { version="0.24.5" }
13 | npy-writer = { version="0.1.0", features=["zip"] }
14 | rand = { version="0.8.5", features=["std_rng"] }
15 | reqwest = { version="0.11.11", default-features = false, features = ["rustls-tls-native-roots"] }
16 | rusqlite = { version="0.28.0", features = ["bundled"] }
17 | scraper = { version="0.13.0" }
18 | serde_json = { version="1.0" }
19 | sha2 = { version="0.10.6" }
20 | tokio = { version="1.20.1", features=["full"] }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2022 Alexander Quinn Nichol
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # car-data
  2 | 
  3 | This is a hobby project to predict various attributes of cars from photos. 
  4 | 
  5 | Demo and blog post:
  6 | 
  7 |  * [Blog post](https://blog.aqnichol.com/2022/12/31/large-scale-vehicle-classification/).
  8 |  * [Gradio demo](https://huggingface.co/spaces/unixpickle/car-data)
  9 | 
 10 | # Usage
 11 | 
 12 | First, you should:
 13 | 
 14 |  * Compile the scraper with `cargo build --release`.
 15 |  * Install the Python package with `pip install -e .`.
 16 | 
 17 | ## Scraping data
 18 | 
 19 | To run the scraper, run:
 20 | 
 21 | ```
 22 | ./target/release/car-data scrape-kbb /path/to/db.db /path/to/images
 23 | ```
 24 | 
 25 | In the above command, `/path/to/db.db` is the path where the metadata will be saved. It is stored as a sqlite3 database. The `/path/to/images` directory will be used to dump raw images.
 26 | 
 27 | To deduplicate and downsample the downloaded images, run:
 28 | 
 29 | ```
 30 | ./target/release/car-data dedup-images \
 31 |     /path/to/db.db \
 32 |     /path/to/images \
 33 |     /path/to/dedup
 34 | ```
 35 | 
 36 | From here on out, we will use the `/path/to/dedup` directory instead of `/path/to/images`, since the former directory contains all of the images we will actually use for training.
 37 | 
 38 | To export the resulting dataset as a `.npz` file to load in Python, run:
 39 | 
 40 | ```
 41 | ./target/release/car-data export-data \
 42 |     /path/to/db.db \
 43 |     /path/to/index.npz
 44 | ```
 45 | 
 46 | ## Filtering the dataset
 47 | 
 48 | To filter the dataset, you will first want to compute feature vectors for the entire dataset. These will be exported as a directory full of npz files with shards of features. You can do this with the following command:
 49 | 
 50 | ```
 51 | python3 -m car_data.scripts.clip_features \
 52 |     /path/to/dedup \
 53 |     /path/to/features
 54 | ```
 55 | 
 56 | Once you have labeled some images for the filter, you can train it quickly like so:
 57 | 
 58 | ```
 59 | python3 -m car_data.scripts.train_filter \
 60 |     --positive_dirs /path/to/positive_dir \
 61 |     --negative_dirs /path/to/negative_dir \
 62 |     --model_out /path/to/filter.pt
 63 | ```
 64 | 
 65 | To filter the dataset `.npz` file using the filter, you can use this command:
 66 | 
 67 | ```
 68 | python3 -m car_data.scripts.filter_index \
 69 |     --index /path/to/index.npz \
 70 |     --feature_dir /path/to/features \
 71 |     --classifier_path /path/to/filter.pt \
 72 |     --output_path /path/to/index_filtered.npz
 73 | ```
 74 | 
 75 | ## Training a model
 76 | 
 77 | To train a MobileNetV2 with auxiliary losses:
 78 | 
 79 | ```
 80 | python3 -m car_data.scripts.train \
 81 |     --index_path /path/to/index_filtered.npz \
 82 |     --image_dir /path/to/dedup \
 83 |     --save_dir /path/to/mobilenetv2_save_dir \
 84 |     --lr 1e-4 \
 85 |     --batch_size 64 \
 86 |     --eval_interval 1 \
 87 |     --use_data_aug \
 88 |     --model mobilenetv2
 89 | ```
 90 | 
 91 | To finetune CLIP with auxiliary losses:
 92 | 
 93 | ```
 94 | python3 -m car_data.scripts.train \
 95 |     --index_path /path/to/index_filtered.npz \
 96 |     --image_dir /path/to/dedup \
 97 |     --save_dir /path/to/clip_save_dir \
 98 |     --lr 1e-5 \
 99 |     --batch_size 64 \
100 |     --microbatch 16 \
101 |     --eval_interval 1 \
102 |     --use_data_aug \
103 |     --model clip
104 | ```
105 | 


--------------------------------------------------------------------------------
/car_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unixpickle/car-data/5e496b4190767fa24a88d135613eb620d7929499/car_data/__init__.py


--------------------------------------------------------------------------------
/car_data/constants.py:
--------------------------------------------------------------------------------
  1 | PRICE_CUTOFFS = [
  2 |     10_000.0,
  3 |     15_000.0,
  4 |     20_000.0,
  5 |     25_000.0,
  6 |     30_000.0,
  7 |     35_000.0,
  8 |     40_000.0,
  9 |     50_000.0,
 10 |     60_000.0,
 11 | ]
 12 | 
 13 | NUM_PRICE_BINS = len(PRICE_CUTOFFS) + 1
 14 | 
 15 | PRICE_BIN_LABELS = [
 16 |     "$0-$10,000",
 17 |     "$10,000-$15,000",
 18 |     "$15,000-$20,000",
 19 |     "$20,000-$25,000",
 20 |     "$25,000-$30,000",
 21 |     "$30,000-$35,000",
 22 |     "$35,000-$40,000",
 23 |     "$40,000-$50,000",
 24 |     "$50,000-$60,000",
 25 |     "$60,000+",
 26 | ]
 27 | 
 28 | MEDIAN_PRICE_SCALE = 30000.0
 29 | 
 30 | YEARS = list(range(1983, 2024))
 31 | NUM_YEARS = len(YEARS) + 1
 32 | 
 33 | MAKES_MODELS = (
 34 |     ("Ford", "F150"),
 35 |     ("Chevrolet", "Silverado 1500"),
 36 |     ("RAM", "1500"),
 37 |     ("Jeep", "Wrangler"),
 38 |     ("Ford", "Explorer"),
 39 |     ("Nissan", "Rogue"),
 40 |     ("Jeep", "Grand Cherokee"),
 41 |     ("Chevrolet", "Equinox"),
 42 |     ("GMC", "Sierra 1500"),
 43 |     ("Ford", "Escape"),
 44 |     ("Honda", "Accord"),
 45 |     ("Toyota", "Camry"),
 46 |     ("Toyota", "RAV4"),
 47 |     ("Honda", "Civic"),
 48 |     ("Honda", "CR-V"),
 49 |     ("MAZDA", "CX-5"),
 50 |     ("Toyota", "Tacoma"),
 51 |     ("Ford", "F250"),
 52 |     ("Toyota", "Corolla"),
 53 |     ("Toyota", "Highlander"),
 54 |     ("Jeep", "Cherokee"),
 55 |     ("Nissan", "Altima"),
 56 |     ("Subaru", "Outback"),
 57 |     ("RAM", "2500"),
 58 |     ("Honda", "Pilot"),
 59 |     ("Chevrolet", "Malibu"),
 60 |     ("Hyundai", "Tucson"),
 61 |     ("Ford", "Mustang"),
 62 |     ("Chevrolet", "Traverse"),
 63 |     ("Hyundai", "Santa Fe"),
 64 |     ("Hyundai", "Elantra"),
 65 |     ("Jeep", "Compass"),
 66 |     ("Chevrolet", "Silverado 2500"),
 67 |     ("Ford", "Edge"),
 68 |     ("Nissan", "Frontier"),
 69 |     ("Chevrolet", "Tahoe"),
 70 |     ("GMC", "Terrain"),
 71 |     ("Toyota", "Tundra"),
 72 |     ("GMC", "Acadia"),
 73 |     ("Volkswagen", "Tiguan"),
 74 |     ("Hyundai", "Sonata"),
 75 |     ("Subaru", "Forester"),
 76 |     ("Jeep", "Gladiator"),
 77 |     ("Chevrolet", "Colorado"),
 78 |     ("Nissan", "Pathfinder"),
 79 |     ("Toyota", "4Runner"),
 80 |     ("Ford", "Fusion"),
 81 |     ("Nissan", "Sentra"),
 82 |     ("Kia", "Sorento"),
 83 |     ("GMC", "Sierra 2500"),
 84 |     ("Ford", "F350"),
 85 |     ("Subaru", "Crosstrek"),
 86 |     ("Kia", "Sportage"),
 87 |     ("Honda", "HR-V"),
 88 |     ("Kia", "Forte"),
 89 |     ("Honda", "Odyssey"),
 90 |     ("Ford", "Bronco Sport"),
 91 |     ("Dodge", "Challenger"),
 92 |     ("Dodge", "Charger"),
 93 |     ("Buick", "Enclave"),
 94 |     ("Chevrolet", "Blazer"),
 95 |     ("Acura", "MDX"),
 96 |     ("Audi", "Q5"),
 97 |     ("Volkswagen", "Atlas"),
 98 |     ("Buick", "Envision"),
 99 |     ("Kia", "Soul"),
100 |     ("Chrysler", "Pacifica"),
101 |     ("Hyundai", "Kona"),
102 |     ("Chevrolet", "Camaro"),
103 |     ("Jeep", "Grand Cherokee L"),
104 |     ("MAZDA", "CX-9"),
105 |     ("Dodge", "Durango"),
106 |     ("Nissan", "Murano"),
107 |     ("Chevrolet", "Trax"),
108 |     ("GMC", "Yukon"),
109 |     ("Volkswagen", "Jetta"),
110 |     ("BMW", "X5"),
111 |     ("Chevrolet", "Suburban"),
112 |     ("Ford", "Expedition"),
113 |     ("Nissan", "Rogue Sport"),
114 |     ("RAM", "3500"),
115 |     ("Ford", "Bronco"),
116 |     ("Honda", "Ridgeline"),
117 |     ("Chevrolet", "Corvette"),
118 |     ("Cadillac", "XT5"),
119 |     ("Toyota", "Sienna"),
120 |     ("Mitsubishi", "Outlander"),
121 |     ("Kia", "Telluride"),
122 |     ("Buick", "Encore"),
123 |     ("Mercedes-Benz", "C 300"),
124 |     ("BMW", "X3"),
125 |     ("Subaru", "Ascent"),
126 |     ("Honda", "Passport"),
127 |     ("MAZDA", "MAZDA3"),
128 |     ("Buick", "Encore GX"),
129 |     ("Volvo", "XC90"),
130 |     ("Mercedes-Benz", "GLC 300"),
131 |     ("Ford", "Ranger"),
132 |     ("Jeep", "Renegade"),
133 |     ("Lexus", "RX 350"),
134 |     ("Volvo", "XC60"),
135 |     ("Kia", "Optima"),
136 |     ("Chevrolet", "Silverado 3500"),
137 |     ("Dodge", "Grand Caravan"),
138 |     ("INFINITI", "QX60"),
139 |     ("Nissan", "Titan"),
140 |     ("Subaru", "WRX"),
141 |     ("GMC", "Canyon"),
142 |     ("Tesla", "Model 3"),
143 |     ("Chevrolet", "Cruze"),
144 |     ("Lexus", "ES 350"),
145 |     ("Nissan", "Armada"),
146 |     ("GMC", "Yukon XL"),
147 |     ("GMC", "Sierra 3500"),
148 |     ("Hyundai", "Palisade"),
149 |     ("Ford", "Focus"),
150 |     ("Kia", "Niro"),
151 |     ("Toyota", "Prius"),
152 |     ("INFINITI", "QX80"),
153 |     ("Porsche", "Macan"),
154 |     ("Chevrolet", "TrailBlazer"),
155 |     ("Cadillac", "XT4"),
156 |     ("MAZDA", "CX-50"),
157 |     ("Lincoln", "Corsair"),
158 |     ("Audi", "Q7"),
159 |     ("Ford", "Expedition Max"),
160 |     ("Cadillac", "Escalade"),
161 |     ("MINI", "Cooper"),
162 |     ("Acura", "RDX"),
163 |     ("Subaru", "Impreza"),
164 |     ("Audi", "A4"),
165 |     ("Nissan", "Kicks"),
166 |     ("Nissan", "Maxima"),
167 |     ("Porsche", "Cayenne"),
168 |     ("Dodge", "Journey"),
169 |     ("Porsche", "911"),
170 |     ("RAM", "ProMaster"),
171 |     ("Mercedes-Benz", "GLE 350"),
172 |     ("Ford", "EcoSport"),
173 |     ("Volkswagen", "Taos"),
174 |     ("MAZDA", "CX-30"),
175 |     ("Lincoln", "Nautilus"),
176 |     ("Land Rover", "Range Rover"),
177 |     ("Mitsubishi", "Outlander Sport"),
178 |     ("Lexus", "GX 460"),
179 |     ("Volkswagen", "Passat"),
180 |     ("Land Rover", "Range Rover Sport"),
181 |     ("Nissan", "Versa"),
182 |     ("Volvo", "XC40"),
183 |     ("Mercedes-Benz", "E 350"),
184 |     ("Chrysler", "300"),
185 |     ("Chevrolet", "Impala"),
186 |     ("Subaru", "Legacy"),
187 |     ("Acura", "TLX"),
188 |     ("Mercedes-Benz", "Sprinter"),
189 |     ("Cadillac", "CT5"),
190 |     ("Mercedes-Benz", "GLA 250"),
191 |     ("Hyundai", "Santa Cruz"),
192 |     ("Tesla", "Model S"),
193 |     ("Mercedes-Benz", "GLB 250"),
194 |     ("INFINITI", "Q50"),
195 |     ("Kia", "K5"),
196 |     ("Cadillac", "XT6"),
197 |     ("Audi", "Q3"),
198 |     ("INFINITI", "QX50"),
199 |     ("Ford", "Transit 250"),
200 |     ("Ford", "Mustang Mach-E"),
201 |     ("Kia", "Seltos"),
202 |     ("MAZDA", "MX-5 Miata"),
203 |     ("Audi", "A5"),
204 |     ("Lincoln", "Aviator"),
205 |     ("BMW", "X1"),
206 |     ("Kia", "Rio"),
207 |     ("Chevrolet", "Express 2500"),
208 |     ("Ford", "Transit 350"),
209 |     ("Toyota", "Venza"),
210 |     ("Mercedes-Benz", "S 500"),
211 |     ("Cadillac", "Escalade ESV"),
212 |     ("Jeep", "Wagoneer"),
213 |     ("Chevrolet", "Bolt"),
214 |     ("MINI", "Cooper Countryman"),
215 |     ("Toyota", "Sequoia"),
216 |     ("Mercedes-Benz", "CLA 250"),
217 |     ("BMW", "X7"),
218 |     ("Cadillac", "CTS"),
219 |     ("Hyundai", "Venue"),
220 |     ("Volkswagen", "ID.4"),
221 |     ("Toyota", "Avalon"),
222 |     ("Jeep", "Patriot"),
223 |     ("Tesla", "Model Y"),
224 |     ("Nissan", "Leaf"),
225 |     ("Audi", "A3"),
226 |     ("Acura", "Integra"),
227 |     ("Ford", "Transit Connect"),
228 |     ("Lexus", "NX 300"),
229 |     ("Audi", "A6"),
230 |     ("Mercedes-Benz", "EQS 450+"),
231 |     ("Chevrolet", "Spark"),
232 |     ("Jaguar", "F-PACE"),
233 |     ("Mercedes-Benz", "S 580"),
234 |     ("Chevrolet", "Sonic"),
235 |     ("Lincoln", "Navigator"),
236 |     ("Toyota", "C-HR"),
237 |     ("Ford", "Fiesta"),
238 |     ("RAM", "ProMaster City"),
239 |     ("Volvo", "S60"),
240 |     ("BMW", "330i xDrive"),
241 |     ("Ford", "Flex"),
242 |     ("MAZDA", "MAZDA6"),
243 |     ("Toyota", "Corolla Cross"),
244 |     ("Lincoln", "MKZ"),
245 |     ("Chevrolet", "Express 3500"),
246 |     ("Hyundai", "Accent"),
247 |     ("Land Rover", "Discovery Sport"),
248 |     ("Tesla", "Model X"),
249 |     ("Honda", "Fit"),
250 |     ("Alfa Romeo", "Stelvio"),
251 |     ("Chrysler", "200"),
252 |     ("Volkswagen", "Beetle"),
253 |     ("Cadillac", "CT4"),
254 |     ("Ford", "Maverick"),
255 |     ("Volkswagen", "GTI"),
256 |     ("Lincoln", "MKC"),
257 |     ("Porsche", "Panamera"),
258 |     ("Ford", "F450"),
259 |     ("Lexus", "NX 350"),
260 |     ("Chrysler", "Town & Country"),
261 |     ("Kia", "Stinger"),
262 |     ("Land Rover", "Range Rover Velar"),
263 |     ("Audi", "S5"),
264 |     ("BMW", "330i"),
265 |     ("Volkswagen", "Golf"),
266 |     ("Mercedes-Benz", "GLS 450"),
267 |     ("Lexus", "IS 350"),
268 |     ("Land Rover", "Range Rover Evoque"),
269 |     ("Toyota", "Prius Prime"),
270 |     ("Acura", "ILX"),
271 |     ("Genesis", "G70"),
272 |     ("Ford", "Taurus"),
273 |     ("Hyundai", "Veloster"),
274 |     ("Lexus", "IS 300"),
275 |     ("Land Rover", "Defender"),
276 |     ("Genesis", "GV80"),
277 |     ("Alfa Romeo", "Giulia"),
278 |     ("BMW", "X6"),
279 |     ("Hyundai", "Ioniq 5"),
280 |     ("Audi", "SQ5"),
281 |     ("BMW", "328i"),
282 |     ("BMW", "i3"),
283 |     ("Cadillac", "ATS"),
284 |     ("Mercedes-Benz", "S 550"),
285 |     ("Lincoln", "Navigator L"),
286 |     ("Mercedes-Benz", "E 450"),
287 |     ("Buick", "LaCrosse"),
288 |     ("Ford", "E-350 and Econoline 350"),
289 |     ("BMW", "M3"),
290 |     ("Mercedes-Benz", "GLE 53 AMG"),
291 |     ("Lexus", "IS 250"),
292 |     ("Mercedes-Benz", "E 300"),
293 |     ("Cadillac", "SRX"),
294 |     ("GMC", "Savana 2500"),
295 |     ("INFINITI", "QX55"),
296 |     ("Mitsubishi", "Eclipse Cross"),
297 |     ("Audi", "Q8"),
298 |     ("INFINITI", "Q60"),
299 |     ("Kia", "Sedona"),
300 |     ("Lincoln", "MKX"),
301 |     ("Audi", "e-tron"),
302 |     ("Chevrolet", "Volt"),
303 |     ("BMW", "X4"),
304 |     ("Chevrolet", "Bolt EUV"),
305 |     ("Volvo", "C40"),
306 |     ("Maserati", "Ghibli"),
307 |     ("Lexus", "ES 300h"),
308 |     ("Jaguar", "F-TYPE"),
309 |     ("Cadillac", "XTS"),
310 |     ("Genesis", "GV70"),
311 |     ("BMW", "430i xDrive"),
312 |     ("BMW", "430i"),
313 |     ("BMW", "Z4"),
314 |     ("BMW", "M4"),
315 |     ("Land Rover", "Discovery"),
316 |     ("Lexus", "GS 350"),
317 |     ("Mercedes-Benz", "A 220"),
318 |     ("Dodge", "Ram 1500 Truck"),
319 |     ("Ford", "F550"),
320 |     ("Hyundai", "Ioniq"),
321 |     ("Mercedes-Benz", "ML 350"),
322 |     ("Genesis", "G80"),
323 |     ("MINI", "Cooper Clubman"),
324 |     ("Maserati", "Levante"),
325 |     ("Mercedes-Benz", "AMG GT"),
326 |     ("BMW", "530i xDrive"),
327 |     ("Lincoln", "Continental"),
328 |     ("Chrysler", "Voyager"),
329 |     ("Lexus", "LS 460"),
330 |     ("MAZDA", "MX-5 Miata RF"),
331 |     ("FIAT", "500"),
332 |     ("Cadillac", "CT6"),
333 |     ("MAZDA", "CX-3"),
334 |     ("BMW", "M5"),
335 |     ("BMW", "328i xDrive"),
336 |     ("Hyundai", "Genesis"),
337 |     ("Kia", "EV6"),
338 |     ("INFINITI", "G37"),
339 |     ("Audi", "A8"),
340 |     ("Audi", "S4"),
341 |     ("BMW", "X2"),
342 |     ("BMW", "530i"),
343 |     ("Lexus", "UX 250h"),
344 |     ("Lexus", "RX 350L"),
345 |     ("Mercedes-Benz", "G 63 AMG"),
346 |     ("Nissan", "Juke"),
347 |     ("Volkswagen", "Arteon"),
348 |     ("Honda", "Insight"),
349 |     ("Lexus", "RC 350"),
350 |     ("RAM", "5500"),
351 |     ("Audi", "A7"),
352 |     ("Lexus", "NX 200t"),
353 |     ("Nissan", "370Z"),
354 |     ("Porsche", "Boxster"),
355 |     ("BMW", "540i"),
356 |     ("Buick", "Regal"),
357 |     ("Dodge", "Dart"),
358 |     ("BMW", "540i xDrive"),
359 |     ("Mercedes-Benz", "GLE 450"),
360 |     ("Ford", "Expedition EL"),
361 |     ("Jeep", "Grand Wagoneer"),
362 |     ("Bentley", "Continental"),
363 |     ("Dodge", "Ram 2500 Truck"),
364 |     ("Jeep", "Liberty"),
365 |     ("Kia", "Carnival"),
366 |     ("Mitsubishi", "Mirage G4"),
367 |     ("Mercedes-Benz", "GL 450"),
368 |     ("Mitsubishi", "Mirage"),
369 |     ("Lexus", "RX 450h"),
370 |     ("Porsche", "Taycan"),
371 |     ("Acura", "TL"),
372 |     ("Lexus", "CT 200h"),
373 |     ("Nissan", "NV"),
374 |     ("BMW", "440i xDrive"),
375 |     ("Mercedes-Benz", "C 43 AMG"),
376 |     ("Mercedes-Benz", "EQS 580"),
377 |     ("Toyota", "Supra"),
378 |     ("Mercedes-Benz", "GLK 350"),
379 |     ("Lexus", "LS 500"),
380 |     ("Toyota", "Prius C"),
381 |     ("Toyota", "Yaris"),
382 |     ("Jaguar", "XF"),
383 |     ("Nissan", "Versa Note"),
384 |     ("BMW", "335i"),
385 |     ("Nissan", "Xterra"),
386 |     ("Lexus", "NX 250"),
387 |     ("Toyota", "FJ Cruiser"),
388 |     ("Audi", "RS 5"),
389 |     ("Volvo", "V60"),
390 |     ("Audi", "S3"),
391 |     ("BMW", "740i"),
392 |     ("BMW", "128i"),
393 |     ("Buick", "Verano"),
394 |     ("Subaru", "BRZ"),
395 |     ("Audi", "Q4 e-tron"),
396 |     ("Chevrolet", "Avalanche"),
397 |     ("Mercedes-Benz", "SL 550"),
398 |     ("Ford", "C-MAX"),
399 |     ("Toyota", "GR86"),
400 |     ("BMW", "750i xDrive"),
401 |     ("Ford", "Transit 150"),
402 |     ("Mercedes-Benz", "Metris"),
403 |     ("Mercedes-Benz", "S 560"),
404 |     ("Nissan", "NV200"),
405 |     ("Volkswagen", "Golf R"),
406 |     ("Mercedes-Benz", "SL 63 AMG"),
407 |     ("BMW", "M850i xDrive"),
408 |     ("Lexus", "LX 570"),
409 |     ("Mercedes-Benz", "G 550"),
410 |     ("Ford", "E-450 and Econoline 450"),
411 |     ("Ford", "E-Transit"),
412 |     ("Mercedes-Benz", "C 250"),
413 |     ("Mercedes-Benz", "CLS 450"),
414 |     ("Mercedes-Benz", "S 63 AMG"),
415 |     ("BMW", "530e"),
416 |     ("BMW", "428i"),
417 |     ("Mercedes-Benz", "GLC 43 AMG"),
418 |     ("Volvo", "S90"),
419 |     ("Dodge", "Avenger"),
420 |     ("Lexus", "NX 300h"),
421 |     ("Mercedes-Benz", "GLE 43 AMG"),
422 |     ("Mercedes-Benz", "E 400"),
423 |     ("Toyota", "Prius V"),
424 |     ("BMW", "X5 M"),
425 |     ("GMC", "Savana 3500"),
426 |     ("Scion", "tC"),
427 |     ("Volkswagen", "CC"),
428 |     ("Acura", "TSX"),
429 |     ("BMW", "228i xDrive"),
430 |     ("BMW", "535i xDrive"),
431 |     ("Porsche", "Cayman"),
432 |     ("Subaru", "Impreza WRX"),
433 |     ("BMW", "535i"),
434 |     ("BMW", "M8"),
435 |     ("Bentley", "Bentayga"),
436 |     ("Maserati", "Quattroporte"),
437 |     ("BMW", "M550i xDrive"),
438 |     ("Jaguar", "XE"),
439 |     ("Hyundai", "Kona N"),
440 |     ("Porsche", "718 Cayman"),
441 |     ("BMW", "M2"),
442 |     ("Mercedes-Benz", "C 63 AMG"),
443 |     ("BMW", "M340i"),
444 |     ("Hyundai", "Elantra N"),
445 |     ("BMW", "528i"),
446 |     ("Ford", "E-250 and Econoline 250"),
447 |     ("BMW", "i4"),
448 |     ("FIAT", "500X"),
449 |     ("BMW", "iX"),
450 |     ("Audi", "TT"),
451 |     ("Lexus", "IS 200t"),
452 |     ("Maserati", "GranTurismo"),
453 |     ("Dodge", "Ram 3500 Truck"),
454 |     ("BMW", "650i"),
455 |     ("Lexus", "UX 200"),
456 |     ("Dodge", "Dakota"),
457 |     ("INFINITI", "QX30"),
458 |     ("Mercedes-Benz", "GLE 63 AMG"),
459 |     ("Volkswagen", "Touareg"),
460 |     ("Volkswagen", "e-Golf"),
461 |     ("Lamborghini", "Huracan"),
462 |     ("Lexus", "LC 500"),
463 |     ("Land Rover", "LR4"),
464 |     ("Lexus", "NX 350h"),
465 |     ("BMW", "428i xDrive"),
466 |     ("Jaguar", "XJ"),
467 |     ("Lexus", "RC 300"),
468 |     ("Toyota", "Mirai"),
469 |     ("BMW", "330e"),
470 |     ("Genesis", "G90"),
471 |     ("Jaguar", "E-PACE"),
472 |     ("Lamborghini", "Urus"),
473 |     ("BMW", "M340i xDrive"),
474 |     ("Audi", "RS 7"),
475 |     ("Lexus", "ES 250"),
476 |     ("Mercedes-Benz", "SL 55 AMG"),
477 |     ("BMW", "320i"),
478 |     ("Toyota", "Land Cruiser"),
479 |     ("Ford", "Thunderbird"),
480 |     ("Honda", "Element"),
481 |     ("Scion", "xB"),
482 |     ("BMW", "530e xDrive"),
483 |     ("Porsche", "718 Boxster"),
484 |     ("Buick", "Lucerne"),
485 |     ("Mercedes-Benz", "E 53 AMG"),
486 |     ("Mitsubishi", "Lancer"),
487 |     ("Polestar", "Polestar 2"),
488 |     ("RAM", "4500"),
489 |     ("Scion", "FR-S"),
490 |     ("Mercedes-Benz", "E 550"),
491 |     ("Nissan", "GT-R"),
492 |     ("BMW", "X6 M"),
493 |     ("INFINITI", "Q70"),
494 |     ("Audi", "R8"),
495 |     ("Honda", "Clarity"),
496 |     ("Mercedes-Benz", "E 63 AMG"),
497 |     ("BMW", "320i xDrive"),
498 |     ("Ford", "E-150 and Econoline 150"),
499 |     ("Lexus", "GX 470"),
500 |     ("Lincoln", "MKS"),
501 |     ("BMW", "135i"),
502 |     ("Mercedes-Benz", "GL 550"),
503 |     ("Toyota", "86"),
504 |     ("smart", "fortwo"),
505 |     ("Chevrolet", "Express 1500"),
506 |     ("BMW", "528i xDrive"),
507 |     ("BMW", "M440i"),
508 |     ("BMW", "230i"),
509 |     ("INFINITI", "G35"),
510 |     ("Mercedes-Benz", "S 450"),
511 |     ("Mercedes-Benz", "SL 500"),
512 |     ("BMW", "435i xDrive"),
513 |     ("FIAT", "124 Spider"),
514 |     ("Mercedes-Benz", "CLS 550"),
515 |     ("Mercedes-Benz", "EQE 350+"),
516 |     ("Mercury", "Grand Marquis"),
517 |     ("Volkswagen", "Eos"),
518 |     ("Chrysler", "PT Cruiser"),
519 |     ("Lexus", "SC 430"),
520 |     ("Lincoln", "Town Car"),
521 |     ("Nissan", "Quest"),
522 |     ("Audi", "S8"),
523 |     ("BMW", "435i"),
524 |     ("HUMMER", "H2"),
525 |     ("Kia", "Cadenza"),
526 |     ("BMW", "228i"),
527 |     ("Chrysler", "Sebring"),
528 |     ("Volvo", "XC70"),
529 |     ("BMW", "335i xDrive"),
530 |     ("Chevrolet", "Captiva Sport"),
531 |     ("Ferrari", "California"),
532 |     ("Ford", "Excursion"),
533 |     ("BMW", "440i"),
534 |     ("Chevrolet", "HHR"),
535 |     ("INFINITI", "QX56"),
536 |     ("INFINITI", "QX70"),
537 |     ("MAZDA", "MAZDA5"),
538 |     ("Pontiac", "G6"),
539 |     ("Chevrolet", "Cobalt"),
540 |     ("Rivian", "R1T"),
541 |     ("Audi", "S6"),
542 |     ("BMW", "750i"),
543 |     ("BMW", "M240i xDrive"),
544 |     ("BMW", "i8"),
545 | )
546 | 
547 | MAKE_MODEL_TO_INDEX = {x: i for i, x in enumerate(MAKES_MODELS)}
548 | 
549 | NUM_MAKE_MODELS = len(MAKE_MODEL_TO_INDEX) + 1
550 | 


--------------------------------------------------------------------------------
/car_data/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import traceback
  3 | from dataclasses import dataclass
  4 | from typing import Iterator, List, Optional
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | import torchvision.transforms as transforms
  9 | from PIL import Image
 10 | from torch.utils.data import DataLoader, Dataset, Sampler
 11 | 
 12 | 
 13 | def looping_loader(
 14 |     index_path: str,
 15 |     image_dir: str,
 16 |     batch_size: int,
 17 |     train: bool = True,
 18 |     use_data_aug: bool = False,
 19 |     last_seen_phash: Optional[str] = None,
 20 | ) -> Iterator[List["CarImage"]]:
 21 |     dataset = CarImageDataset(
 22 |         index_path, image_dir, train=train, use_data_aug=use_data_aug
 23 |     )
 24 |     sampler = CarImageDatasetSampler(dataset, last_seen_phash=last_seen_phash)
 25 |     loader = DataLoader(
 26 |         dataset,
 27 |         batch_size=batch_size,
 28 |         sampler=sampler,
 29 |         num_workers=4,
 30 |         collate_fn=lambda x: x,
 31 |     )
 32 |     while True:
 33 |         yield from loader
 34 | 
 35 | 
 36 | def image_transform(use_data_aug: bool) -> transforms.Compose:
 37 |     if use_data_aug:
 38 |         image_ops = [
 39 |             transforms.RandomResizedCrop(224, scale=(0.8, 1.0), ratio=(1.0, 1.0)),
 40 |             transforms.RandomHorizontalFlip(),
 41 |             transforms.ColorJitter(0.4, 0.4, 0.4),
 42 |         ]
 43 |     else:
 44 |         image_ops = [
 45 |             transforms.Resize(224),
 46 |             transforms.CenterCrop(224),
 47 |         ]
 48 |     return transforms.Compose(
 49 |         [
 50 |             *image_ops,
 51 |             transforms.ToTensor(),
 52 |             transforms.Normalize(
 53 |                 (0.48145466, 0.4578275, 0.40821073),
 54 |                 (0.26862954, 0.26130258, 0.27577711),
 55 |             ),
 56 |         ]
 57 |     )
 58 | 
 59 | 
 60 | @dataclass
 61 | class CarImage:
 62 |     image: torch.Tensor
 63 |     phash: str
 64 |     price: float
 65 |     make: Optional[str]
 66 |     model: Optional[str]
 67 |     year: Optional[int]
 68 | 
 69 | 
 70 | class CarImageDataset(Dataset):
 71 |     def __init__(
 72 |         self,
 73 |         index_path: str,
 74 |         image_dir: str,
 75 |         train: bool = True,
 76 |         use_data_aug: bool = False,
 77 |     ):
 78 |         super().__init__()
 79 |         self.index_path = index_path
 80 |         self.image_dir = image_dir
 81 |         with open(index_path, "rb") as f:
 82 |             obj = np.load(f)
 83 |             phashes = obj["phashes"]
 84 |             ordering = np.argsort(phashes)  # sorting hashes => random order
 85 | 
 86 |             test_count = len(ordering) // 10
 87 |             if train:
 88 |                 ordering = ordering[test_count:]
 89 |             else:
 90 |                 ordering = ordering[:test_count]
 91 | 
 92 |             self.phashes = phashes[ordering]
 93 |             self.prices = obj["prices"][ordering]
 94 |             self.makes = obj["makes"][ordering].tolist()
 95 |             self.models = obj["models"][ordering].tolist()
 96 |             self.years = obj["years"][ordering]
 97 |         self.transform = image_transform(use_data_aug)
 98 | 
 99 |     def __len__(self) -> int:
100 |         return len(self.phashes)
101 | 
102 |     def __getitem__(self, idx: int) -> CarImage:
103 |         phash = self.phashes[idx]
104 |         img_path = os.path.join(self.image_dir, phash[:2], phash)
105 |         try:
106 |             img = Image.open(img_path).convert("RGB")
107 |         except:
108 |             # Don't kill the job due to a single missing or corrupted image.
109 |             print(f"error loading: {img_path}")
110 |             traceback.print_exc()
111 |             img = Image.new("RGB", (256, 256))
112 |         return CarImage(
113 |             image=self.transform(img),
114 |             phash=self.phashes[idx].tolist(),
115 |             price=self.prices[idx],
116 |             make=self.makes[idx] or None,
117 |             model=self.models[idx] or None,
118 |             year=self.years[idx] or None,
119 |         )
120 | 
121 | 
122 | class CarImageDatasetSampler(Sampler):
123 |     def __init__(
124 |         self, data_source: CarImageDataset, last_seen_phash: Optional[str] = None
125 |     ):
126 |         self.data_source = data_source
127 |         self._start_idx = 0
128 |         if last_seen_phash is not None:
129 |             self._start_idx = np.searchsorted(data_source.phashes, last_seen_phash)
130 | 
131 |     def __len__(self) -> int:
132 |         return len(self.data_source)
133 | 
134 |     def __iter__(self) -> Iterator[int]:
135 |         size = len(self.data_source)
136 |         for i in range(size):
137 |             yield (i + self._start_idx) % size
138 | 


--------------------------------------------------------------------------------
/car_data/graphics.py:
--------------------------------------------------------------------------------
  1 | """
  2 | APIs for drawing predictions with Cairo.
  3 | """
  4 | 
  5 | import io
  6 | import math
  7 | import os
  8 | from abc import ABC, abstractmethod
  9 | from contextlib import contextmanager
 10 | from typing import Dict, Iterator, List, Tuple
 11 | 
 12 | import cairo
 13 | import numpy as np
 14 | import torch
 15 | import torch.nn.functional as F
 16 | from PIL import Image
 17 | 
 18 | from car_data.constants import MAKES_MODELS, PRICE_BIN_LABELS, YEARS
 19 | 
 20 | PANEL_WIDTH = 550
 21 | IMAGE_SIZE = 224
 22 | 
 23 | 
 24 | @contextmanager
 25 | def open_context(path: str, width: int, height: int) -> Iterator[cairo.Context]:
 26 |     _, ext = os.path.splitext(path)
 27 |     if ext.lower() == ".svg":
 28 |         with cairo.SVGSurface(path, width, height) as surface:
 29 |             ctx = cairo.Context(surface)
 30 |             yield ctx
 31 |     else:
 32 |         with cairo.ImageSurface(
 33 |             cairo.Format.RGB24, math.ceil(width), math.ceil(height)
 34 |         ) as surface:
 35 |             ctx = cairo.Context(surface)
 36 |             yield ctx
 37 |             surface.write_to_png(path)
 38 | 
 39 | 
 40 | def prediction_element_size() -> Tuple[int, int]:
 41 |     with cairo.SVGSurface(io.BytesIO(), PANEL_WIDTH, 10000) as surface:
 42 |         ctx = cairo.Context(surface)
 43 |         element = prediction_element(
 44 |             ctx=ctx,
 45 |             idx=0,
 46 |             img=Image.fromarray(np.zeros((IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.uint8)),
 47 |             outputs=dict(
 48 |                 price_median=torch.tensor(0.0),
 49 |                 price_bin=torch.zeros(1, len(PRICE_BIN_LABELS)),
 50 |                 make_model=torch.zeros(1, len(MAKES_MODELS) + 1),
 51 |                 year=torch.zeros(1, len(YEARS) + 1),
 52 |             ),
 53 |         )
 54 |     return math.ceil(element.width), math.ceil(element.height)
 55 | 
 56 | 
 57 | def prediction_element(
 58 |     ctx: cairo.Context, idx: int, img: Image.Image, outputs: Dict[str, torch.Tensor]
 59 | ) -> "Element":
 60 |     content = VStack(
 61 |         Empty(width=0.0, height=16.0),
 62 |         pad_to_width(ImageElement(crop_image(img)), PANEL_WIDTH),
 63 |         Padded(Separator(PANEL_WIDTH - 40.0), horiz=20, vert=16),
 64 |         pad_to_width(
 65 |             HStack(
 66 |                 Text(ctx, "Price prediction:", font_size=30.0),
 67 |                 Empty(width=10, height=1),
 68 |                 Text(
 69 |                     ctx,
 70 |                     f"${int(round(outputs['price_median'].item()))}",
 71 |                     font_size=30.0,
 72 |                     bold=True,
 73 |                 ),
 74 |             ),
 75 |             PANEL_WIDTH,
 76 |         ),
 77 |         Padded(Separator(PANEL_WIDTH - 40.0), horiz=20, vert=16),
 78 |         Empty(width=PANEL_WIDTH, height=16),
 79 |         HStack(
 80 |             TopN(
 81 |                 ctx,
 82 |                 PANEL_WIDTH / 2,
 83 |                 "Price",
 84 |                 PRICE_BIN_LABELS,
 85 |                 F.softmax(outputs["price_bin"], dim=-1)[0].tolist(),
 86 |                 4,
 87 |             ),
 88 |             TopN(
 89 |                 ctx,
 90 |                 PANEL_WIDTH / 2,
 91 |                 "Year",
 92 |                 [str(year) for year in YEARS] + ["Unknown"],
 93 |                 F.softmax(outputs["year"], dim=-1)[0].tolist(),
 94 |                 4,
 95 |             ),
 96 |         ),
 97 |         Empty(width=PANEL_WIDTH, height=16),
 98 |         pad_to_width(
 99 |             TopN(
100 |                 ctx,
101 |                 PANEL_WIDTH * 0.8,
102 |                 "Make/Model",
103 |                 [f"{make} {model}" for make, model in MAKES_MODELS] + ["Unknown"],
104 |                 F.softmax(outputs["make_model"], dim=-1)[0].tolist(),
105 |                 5,
106 |             ),
107 |             PANEL_WIDTH,
108 |         ),
109 |         Empty(width=PANEL_WIDTH, height=16),
110 |     )
111 |     return Overlay(Background(idx, PANEL_WIDTH, content.height), content)
112 | 
113 | 
114 | def crop_image(img: Image.Image) -> Image.Image:
115 |     width, height = img.size
116 |     size = min(width, height)
117 |     left = (width - size) // 2
118 |     top = (height - size) // 2
119 |     img = img.crop((left, top, left + size, top + size))
120 |     return img.resize((IMAGE_SIZE, IMAGE_SIZE))
121 | 
122 | 
123 | class Element(ABC):
124 |     def __init__(self, width: float, height: float):
125 |         self.width = width
126 |         self.height = height
127 | 
128 |     @abstractmethod
129 |     def draw_at(self, ctx: cairo.Context, x: float, y: float):
130 |         """Draw the UI element at the coordinates."""
131 |         _, _ = x, y
132 | 
133 | 
134 | class Combination(Element):
135 |     def __init__(self, *children: Element, horiz: bool = False, vert: bool = False):
136 |         super().__init__(
137 |             width=(sum if horiz else max)(x.width for x in children),
138 |             height=(sum if vert else max)(x.height for x in children),
139 |         )
140 |         self.horiz = horiz
141 |         self.vert = vert
142 |         self.children = children
143 | 
144 |     def draw_at(self, ctx: cairo.Context, x: float, y: float):
145 |         for child in self.children:
146 |             child.draw_at(ctx, x, y)
147 |             if self.horiz:
148 |                 x += child.width
149 |             if self.vert:
150 |                 y += child.height
151 | 
152 | 
153 | class VStack(Combination):
154 |     def __init__(self, *children: Element):
155 |         super().__init__(
156 |             *children,
157 |             vert=True,
158 |         )
159 | 
160 | 
161 | class HStack(Combination):
162 |     def __init__(self, *children: Element):
163 |         super().__init__(
164 |             *children,
165 |             horiz=True,
166 |         )
167 | 
168 | 
169 | class Overlay(Combination):
170 |     def __init__(self, *children: Element):
171 |         super().__init__(
172 |             *children,
173 |         )
174 | 
175 | 
176 | class Padded(Element):
177 |     def __init__(self, contained: Element, horiz: float = 0.0, vert: float = 0.0):
178 |         super().__init__(
179 |             width=contained.width + horiz * 2,
180 |             height=contained.height + vert * 2,
181 |         )
182 |         self.contained = contained
183 |         self.horiz = horiz
184 |         self.vert = vert
185 | 
186 |     def draw_at(self, ctx: cairo.Context, x: float, y: float):
187 |         self.contained.draw_at(ctx, x + self.horiz, y + self.vert)
188 | 
189 | 
190 | class Empty(Element):
191 |     def __init__(self, width: float, height: float):
192 |         super().__init__(width, height)
193 | 
194 |     def draw_at(self, ctx: cairo.Context, x: float, y: float):
195 |         _, _, _ = ctx, x, y
196 | 
197 | 
198 | class Separator(Element):
199 |     def __init__(self, width: float):
200 |         super().__init__(width, 1)
201 | 
202 |     def draw_at(self, ctx: cairo.Context, x: float, y: float):
203 |         ctx.set_source_rgb(0.8, 0.8, 0.8)
204 |         ctx.set_line_width(1.0)
205 |         ctx.move_to(x, y + 0.5)
206 |         ctx.line_to(x + self.width, y + 0.5)
207 |         ctx.stroke()
208 | 
209 | 
210 | class Background(Element):
211 |     def __init__(self, idx: int, width: float, height: float):
212 |         super().__init__(width, height)
213 |         self.idx = idx
214 | 
215 |     def draw_at(self, ctx: cairo.Context, x: float, y: float):
216 |         if self.idx % 2:
217 |             brightness = 0.97
218 |             ctx.set_source_rgb(brightness, brightness, brightness)
219 |             ctx.rectangle(x, y, self.width, self.height)
220 |             ctx.fill()
221 |         else:
222 |             ctx.set_source_rgb(1, 1, 1)
223 |             ctx.rectangle(x, y, self.width, self.height)
224 |             ctx.fill()
225 | 
226 | 
227 | class ImageElement(Element):
228 |     def __init__(self, img: Image.Image):
229 |         width, height = img.size
230 |         super().__init__(width, height)
231 | 
232 |         data = io.BytesIO()
233 |         img.save(data, format="PNG")
234 |         data.seek(0)
235 |         self.source = cairo.ImageSurface.create_from_png(data)
236 | 
237 |     def draw_at(self, ctx: cairo.Context, x: float, y: float):
238 |         ctx.set_source_surface(
239 |             self.source,
240 |             x,
241 |             y,
242 |         )
243 |         ctx.paint()
244 | 
245 | 
246 | class Text(Element):
247 |     def __init__(
248 |         self, ctx: cairo.Context, text: str, font_size: float, bold: bool = False
249 |     ):
250 |         ctx.set_font_size(font_size)
251 |         ctx.select_font_face(
252 |             "Arial",
253 |             cairo.FONT_SLANT_NORMAL,
254 |             cairo.FONT_WEIGHT_NORMAL if not bold else cairo.FONT_WEIGHT_BOLD,
255 |         )
256 |         extents = ctx.text_extents(text)
257 |         # Height should not depend on text to make consecutive labels
258 |         # line up perfectly.
259 |         height = ctx.text_extents(
260 |             "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
261 |         ).height
262 |         self.text = text
263 |         self.font_size = font_size
264 |         self.bold = bold
265 |         super().__init__(extents.width, height)
266 | 
267 |     def draw_at(self, ctx: cairo.Context, x: float, y: float):
268 |         ctx.set_font_size(self.font_size)
269 |         ctx.select_font_face(
270 |             "Arial",
271 |             cairo.FONT_SLANT_NORMAL,
272 |             cairo.FONT_WEIGHT_NORMAL if not self.bold else cairo.FONT_WEIGHT_BOLD,
273 |         )
274 |         ctx.set_source_rgb(0, 0, 0)
275 |         hacky_height_centerer = 0.9  # found empirically to center vertically
276 |         ctx.move_to(x, y + self.height * hacky_height_centerer)
277 |         ctx.show_text(self.text)
278 |         ctx.stroke()
279 | 
280 | 
281 | class ProbabilityBar(Element):
282 |     def __init__(
283 |         self, ctx: cairo.Context, width: float, title: str, probability: float
284 |     ):
285 |         title_element = Text(ctx, title, font_size=22.0)
286 |         prob_text = Text(ctx, f"{(probability*100):2.1f}%", font_size=18.0)
287 |         super().__init__(width, title_element.height + 30)
288 |         self.probability = probability
289 |         self.title_element = title_element
290 |         self.prob_text = prob_text
291 | 
292 |     def draw_at(self, ctx: cairo.Context, x: float, y: float):
293 |         self.title_element.draw_at(ctx, x, y)
294 | 
295 |         bar_y = y + self.title_element.height + 5
296 |         bar_width = self.width - self.prob_text.width - 8
297 | 
298 |         self.prob_text.draw_at(ctx, x + self.width - self.prob_text.width, bar_y)
299 | 
300 |         ctx.set_source_rgb(0.9, 0.9, 0.9)
301 |         ctx.rectangle(x, bar_y, bar_width, 20)
302 |         ctx.fill()
303 | 
304 |         ctx.set_source_rgb(0x65 / 0xFF, 0xBC / 0xFF, 0xD4 / 0xFF)
305 |         ctx.rectangle(x, bar_y, bar_width * self.probability, 20)
306 |         ctx.fill()
307 | 
308 | 
309 | class TopN(VStack):
310 |     def __init__(
311 |         self,
312 |         ctx: cairo.Context,
313 |         width: float,
314 |         title: str,
315 |         labels: List[str],
316 |         probs: List[float],
317 |         n: int,
318 |     ):
319 |         elements = []
320 |         elements.append(pad_to_width(Text(ctx, title, font_size=24, bold=True), width))
321 |         elements.append(Padded(Separator(width - 32), horiz=16, vert=8))
322 |         bars = []
323 |         for i in np.argsort(probs)[::-1][:n]:
324 |             bars.append(ProbabilityBar(ctx, width - 40, labels[i], probs[i]))
325 |         max_prob_text_width = max(bar.prob_text.width for bar in bars)
326 |         for bar in bars:
327 |             bar.prob_text.width = max_prob_text_width
328 |             elements.append(Padded(bar, horiz=20, vert=4))
329 |         super().__init__(*elements)
330 | 
331 | 
332 | def pad_to_width(e: Element, width: float) -> Padded:
333 |     return Padded(e, horiz=max(0, (width - e.width) / 2))
334 | 


--------------------------------------------------------------------------------
/car_data/lin_features.py:
--------------------------------------------------------------------------------
 1 | from multiprocessing.pool import ThreadPool
 2 | from typing import Callable, Iterator, List
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | import torch.nn as nn
 7 | from PIL import Image
 8 | 
 9 | 
10 | def compute_pooled_features(
11 |     device: torch.device,
12 |     model: nn.Module,
13 |     preprocess: Callable[[Image.Image], torch.Tensor],
14 |     paths: List[str],
15 |     batch_size: int = 64,
16 | ) -> np.ndarray:
17 |     with ThreadPool(8) as p:
18 |         all_outs = []
19 |         for chunk in chunk_filenames(paths, batch_size):
20 |             crops = [x for y in p.map(image_crops, chunk) for x in y]
21 |             tensors = torch.stack(p.map(preprocess, crops), dim=0).to(device)
22 |             with torch.no_grad():
23 |                 features_out = (
24 |                     model.encode_image(tensors).reshape([len(chunk), 3, -1]).mean(1)
25 |                 )
26 |                 features_out /= torch.linalg.norm(features_out, dim=-1, keepdim=True)
27 |             all_outs.append(features_out.cpu().numpy())
28 |         return np.concatenate(all_outs, axis=0)
29 | 
30 | 
31 | def chunk_filenames(paths: List[str], batch_size: int) -> Iterator[List[str]]:
32 |     for i in range(0, len(paths), batch_size):
33 |         yield paths[i : i + batch_size]
34 | 
35 | 
36 | def image_crops(path: str):
37 |     img = Image.open(path)
38 |     width, height = img.size
39 |     min_dim = min(width, height)
40 |     cx = width // 2 - min_dim
41 |     cy = height // 2 - min_dim
42 |     if width > height:
43 |         crops = [
44 |             (0, 0, height, height),
45 |             (cx, 0, cx + height, height),
46 |             (width - height, 0, width, height),
47 |         ]
48 |     else:
49 |         crops = [
50 |             (0, 0, width, width),
51 |             (0, cy, width, cy + width),
52 |             (0, height - width, width, height),
53 |         ]
54 |     outs = []
55 |     for box in crops:
56 |         outs.append(img.crop(box=box))
57 |     return outs
58 | 


--------------------------------------------------------------------------------
/car_data/losses.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from typing import Dict, List, Sequence
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn.functional as F
  7 | 
  8 | from .constants import MAKE_MODEL_TO_INDEX, MEDIAN_PRICE_SCALE, PRICE_CUTOFFS, YEARS
  9 | from .dataset import CarImage
 10 | 
 11 | 
 12 | @dataclass
 13 | class LossWeights:
 14 |     price_ce: float = 1.0
 15 |     price_mae: float = 1.0
 16 |     year_ce: float = 1.0
 17 |     make_model_ce: float = 1.0
 18 | 
 19 |     @classmethod
 20 |     def parse(cls, x: str) -> "LossWeights":
 21 |         presets = {
 22 |             "default": LossWeights(),
 23 |             "price_ce_only": LossWeights(
 24 |                 price_ce=1.0, price_mae=0.0, year_ce=0.0, make_model_ce=0.0
 25 |             ),
 26 |         }
 27 |         if x in presets:
 28 |             return presets[x]
 29 | 
 30 |         res = {}
 31 |         for part in x.split(","):
 32 |             pair = part.split("=")
 33 |             if len(pair) != 2:
 34 |                 raise ValueError(f"expected pairs of k=v, but got token `{pair}`")
 35 |             res[pair[0]] = float(pair[1])
 36 |         return cls(**res)
 37 | 
 38 | 
 39 | @dataclass
 40 | class LossTargets:
 41 |     prices: torch.Tensor
 42 |     price_bins: torch.Tensor
 43 |     years: torch.Tensor
 44 |     make_models: torch.Tensor
 45 | 
 46 |     @classmethod
 47 |     def cat(cls, items: Sequence["LossTargets"]) -> "LossTargets":
 48 |         return LossTargets(
 49 |             prices=torch.cat([x.prices for x in items]),
 50 |             price_bins=torch.cat([x.price_bins for x in items]),
 51 |             years=torch.cat([x.years for x in items]),
 52 |             make_models=torch.cat([x.make_models for x in items]),
 53 |         )
 54 | 
 55 |     @classmethod
 56 |     def from_batch(cls, batch: List[CarImage], device: torch.device) -> "LossTargets":
 57 |         return cls(
 58 |             prices=torch.tensor(
 59 |                 [x.price for x in batch], dtype=torch.float32, device=device
 60 |             ),
 61 |             price_bins=torch.tensor([bin_price(x.price) for x in batch], device=device),
 62 |             years=torch.tensor([bin_year(x.year) for x in batch], device=device),
 63 |             make_models=torch.tensor(
 64 |                 [bin_make_model(x.make, x.model) for x in batch], device=device
 65 |             ),
 66 |         )
 67 | 
 68 |     @classmethod
 69 |     def from_model_out(cls, outputs: Dict[str, torch.Tensor]) -> "LossTargets":
 70 |         return cls(
 71 |             prices=outputs["price_median"],
 72 |             price_bins=F.softmax(outputs["price_bin"], dim=-1),
 73 |             years=F.softmax(outputs["year"], dim=-1),
 74 |             make_models=F.softmax(outputs["make_model"], dim=-1),
 75 |         )
 76 | 
 77 |     def metrics(
 78 |         self, weights: LossWeights, outputs: Dict[str, torch.Tensor]
 79 |     ) -> torch.Tensor:
 80 |         metrics = dict(
 81 |             price_ce=F.cross_entropy(outputs["price_bin"], self.price_bins),
 82 |             price_acc=(
 83 |                 (outputs["price_bin"].argmax(-1) == self.price_bins).float().mean()
 84 |             ),
 85 |             price_mae=(outputs["price_median"] - self.prices).abs().float().mean(),
 86 |             year_ce=F.cross_entropy(outputs["year"], self.years),
 87 |             year_acc=((outputs["year"].argmax(-1) == self.years).float().mean()),
 88 |             make_model_ce=F.cross_entropy(outputs["make_model"], self.make_models),
 89 |             make_model_acc=(
 90 |                 (outputs["make_model"].argmax(-1) == self.make_models).float().mean()
 91 |             ),
 92 |         )
 93 |         metrics["loss"] = (
 94 |             (weights.price_ce * metrics["price_ce"])
 95 |             + (weights.price_mae * metrics["price_mae"] / MEDIAN_PRICE_SCALE)
 96 |             + (weights.year_ce * metrics["year_ce"])
 97 |             + (weights.make_model_ce * metrics["make_model_ce"])
 98 |         )
 99 |         return metrics
100 | 
101 | 
102 | def bin_price(price: float) -> int:
103 |     for i, cutoff in enumerate(PRICE_CUTOFFS):
104 |         if price <= cutoff:
105 |             return i
106 |     return len(PRICE_CUTOFFS)
107 | 
108 | 
109 | def bin_prices(prices: np.ndarray) -> np.ndarray:
110 |     return np.searchsorted(PRICE_CUTOFFS, prices)
111 | 
112 | 
113 | def bin_make_model(make: str, model: str) -> int:
114 |     return MAKE_MODEL_TO_INDEX.get((make, model), len(MAKE_MODEL_TO_INDEX))
115 | 
116 | 
117 | def bin_year(year: int) -> int:
118 |     if year not in YEARS:
119 |         return len(YEARS)
120 |     return YEARS.index(year)
121 | 


--------------------------------------------------------------------------------
/car_data/model.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Optional
 2 | 
 3 | import clip
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from .constants import MEDIAN_PRICE_SCALE, NUM_MAKE_MODELS, NUM_PRICE_BINS, NUM_YEARS
 8 | 
 9 | 
10 | def create_model(name: str, device: torch.device, download_root: Optional[str] = None):
11 |     if name == "clip":
12 |         return CLIPModel(device, download_root=download_root)
13 |     elif name == "mobilenetv2":
14 |         return MobileNetV2Model(device, download_root=download_root)
15 |     else:
16 |         raise ValueError(f"unknown model name: {name}")
17 | 
18 | 
19 | class CLIPModel(nn.Module):
20 |     def __init__(self, device: torch.device, download_root: Optional[str] = None):
21 |         super().__init__()
22 |         self.device = device
23 |         self.clip, _ = clip.load("ViT-B/16", device=device, download_root=download_root)
24 |         self.clip.float()
25 |         self.output = OutputLayer(512, device=device)
26 | 
27 |     def output_layer(self) -> "OutputLayer":
28 |         return self.output
29 | 
30 |     def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
31 |         h = self.clip.encode_image(x)
32 |         return self.output(h)
33 | 
34 | 
35 | class MobileNetV2Model(nn.Module):
36 |     def __init__(self, device: torch.device, download_root: Optional[str] = None):
37 |         super().__init__()
38 |         if download_root is not None:
39 |             backup_dir = torch.hub.get_dir()
40 |             torch.hub.set_dir(download_root)
41 |         self.model = torch.hub.load(
42 |             "pytorch/vision:v0.10.0", "mobilenet_v2", pretrained=True
43 |         ).to(device)
44 |         if download_root is not None:
45 |             torch.hub.set_dir(backup_dir)
46 |         self.model.classifier[1] = OutputLayer(1280, device=device)
47 | 
48 |     def output_layer(self) -> "OutputLayer":
49 |         return self.model.classifier[1]
50 | 
51 |     def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
52 |         return self.model(x)
53 | 
54 | 
55 | class OutputLayer(nn.Module):
56 |     def __init__(self, n_features: int, device: torch.device):
57 |         super().__init__()
58 |         self.price_bin = nn.Linear(n_features, NUM_PRICE_BINS, device=device)
59 |         self.price_median = nn.Linear(n_features, 1, device=device)
60 |         self.make_model = nn.Linear(n_features, NUM_MAKE_MODELS, device=device)
61 |         self.year = nn.Linear(n_features, NUM_YEARS, device=device)
62 | 
63 |     def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
64 |         return dict(
65 |             price_bin=self.price_bin(x),
66 |             price_median=self.price_median(x)[..., 0] * MEDIAN_PRICE_SCALE,
67 |             make_model=self.make_model(x),
68 |             year=self.year(x),
69 |         )
70 | 
71 |     def scale_outputs(self, scales: Dict[str, float]):
72 |         with torch.no_grad():
73 |             for key, scale in scales.items():
74 |                 layer = getattr(self, key)
75 |                 layer.weight.mul_(scale)
76 |                 layer.bias.mul_(scale)
77 | 


--------------------------------------------------------------------------------
/car_data/scripts/baseline.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Compute the baseline accuracy of a dummy classifier.
 3 | """
 4 | 
 5 | import argparse
 6 | from collections import Counter
 7 | 
 8 | import numpy as np
 9 | from car_data.losses import bin_prices
10 | 
11 | 
12 | def main():
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument("index_path", type=str)
15 |     args = parser.parse_args()
16 | 
17 |     prices = np.load(args.index_path)["prices"]
18 |     max_count = max(Counter(bin_prices(prices)).values())
19 |     print(max_count / len(prices))
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     main()
24 | 


--------------------------------------------------------------------------------
/car_data/scripts/classify.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the classifier on an image.
 3 | """
 4 | 
 5 | import argparse
 6 | from typing import Iterator, List
 7 | 
 8 | import numpy as np
 9 | import torch
10 | import torch.nn.functional as F
11 | import torchvision.transforms as transforms
12 | from car_data.constants import MAKES_MODELS, PRICE_BIN_LABELS, YEARS
13 | from car_data.model import create_model
14 | from PIL import Image
15 | 
16 | 
17 | def main():
18 |     parser = argparse.ArgumentParser()
19 |     parser.add_argument("--model_name", type=str, default="clip")
20 |     parser.add_argument("checkpoint", type=str)
21 |     parser.add_argument("image", type=str)
22 |     args = parser.parse_args()
23 | 
24 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25 |     model = create_model(args.model_name, device)
26 |     model.load_state_dict(torch.load(args.checkpoint, map_location=device))
27 |     model.eval()
28 | 
29 |     transform = transforms.Compose(
30 |         [
31 |             transforms.CenterCrop(224),
32 |             transforms.ToTensor(),
33 |             transforms.Normalize(
34 |                 (0.48145466, 0.4578275, 0.40821073),
35 |                 (0.26862954, 0.26130258, 0.27577711),
36 |             ),
37 |         ]
38 |     )
39 |     image_tensor = transform(Image.open(args.image)).to(device)
40 | 
41 |     outputs = model(image_tensor[None])
42 | 
43 |     print("---- Price ----")
44 |     print(f"median: {outputs['price_median'].item():.02}")
45 |     price_probs = F.softmax(outputs["price_bin"], dim=-1)[0].tolist()
46 |     for label, prob in zip(pad_labels(PRICE_BIN_LABELS), price_probs):
47 |         print(f"{label}: {(prob*100):.04}%")
48 | 
49 |     print("---- Make/model ----")
50 |     make_model_probs = F.softmax(outputs["make_model"], dim=-1)[0].tolist()
51 |     print_top_n(
52 |         [f"{make} {model}" for make, model in MAKES_MODELS] + ["Unknown"],
53 |         make_model_probs,
54 |     )
55 | 
56 |     print("---- year ----")
57 |     year_probs = F.softmax(outputs["year"], dim=-1)[0].tolist()
58 |     print_top_n([str(year) for year in YEARS] + ["Unknown"], year_probs)
59 | 
60 | 
61 | def print_top_n(labels: List[str], probs: List[float], n: int = 5):
62 |     indices = np.argsort(-np.array(probs))
63 |     labels = [labels[i] for i in indices[:n]]
64 |     probs = [probs[i] for i in indices[:n]]
65 |     for label, prob in zip(pad_labels(labels), probs):
66 |         print(f"{label}: {(prob*100):.04}%")
67 | 
68 | 
69 | def pad_labels(labels: List[str]) -> Iterator[str]:
70 |     max_len = max(len(x) for x in labels)
71 |     for label in labels:
72 |         while len(label) < max_len:
73 |             label = " " + label
74 |         yield label
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     main()
79 | 


--------------------------------------------------------------------------------
/car_data/scripts/classify_viz.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Create an SVG with a visual depiction of a model's predictions for a batch of
 3 | input images.
 4 | """
 5 | 
 6 | import argparse
 7 | 
 8 | import torch
 9 | from PIL import Image
10 | 
11 | from car_data.dataset import image_transform
12 | from car_data.graphics import (
13 |     HStack,
14 |     open_context,
15 |     prediction_element,
16 |     prediction_element_size,
17 | )
18 | from car_data.model import create_model
19 | 
20 | 
21 | def main():
22 |     parser = argparse.ArgumentParser()
23 |     parser.add_argument("--output", type=str, default="viz.svg")
24 |     parser.add_argument("--model_name", type=str, default="clip")
25 |     parser.add_argument("--checkpoint", type=str, required=True)
26 |     parser.add_argument("images", type=str, nargs="+")
27 |     args = parser.parse_args()
28 | 
29 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30 |     model = create_model(args.model_name, device)
31 |     model.load_state_dict(torch.load(args.checkpoint, map_location=device))
32 |     model.eval()
33 |     transform = image_transform(False)
34 | 
35 |     width, height = prediction_element_size()
36 |     with open_context(args.output, width * len(args.images), height) as ctx:
37 |         panels = []
38 |         for i, img_path in enumerate(args.images):
39 |             img = Image.open(img_path).convert("RGB")
40 |             outputs = model(transform(img)[None].to(device))
41 |             panels.append(prediction_element(ctx, i, img, outputs))
42 |         HStack(*panels).draw_at(ctx, 0, 0)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     main()
47 | 


--------------------------------------------------------------------------------
/car_data/scripts/classify_viz_animate.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Create an animation of classification results as a crop is moved from one side
 3 | of a rectangular image to the other.
 4 | """
 5 | 
 6 | import argparse
 7 | import io
 8 | from typing import Iterator
 9 | 
10 | import cairo
11 | import torch
12 | from PIL import Image
13 | 
14 | from car_data.dataset import image_transform
15 | from car_data.graphics import HStack, prediction_element, prediction_element_size
16 | from car_data.model import create_model
17 | 
18 | 
19 | def main():
20 |     parser = argparse.ArgumentParser()
21 |     parser.add_argument("--output", type=str, default="viz.gif")
22 |     parser.add_argument("--num_frames", type=int, default=10)
23 |     parser.add_argument("--frame_rate", type=float, default=10.0)
24 |     parser.add_argument("--model_name", type=str, default="clip")
25 |     parser.add_argument("--checkpoint", type=str, required=True)
26 |     parser.add_argument("images", type=str, nargs="+")
27 |     args = parser.parse_args()
28 | 
29 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30 |     model = create_model(args.model_name, device)
31 |     model.load_state_dict(torch.load(args.checkpoint, map_location=device))
32 |     model.eval()
33 |     transform = image_transform(False)
34 | 
35 |     imgs = [Image.open(path).convert("RGB") for path in args.images]
36 |     crop_iter = zip(*(crops_of_image(img, args.num_frames) for img in imgs))
37 | 
38 |     images = []
39 |     width, height = prediction_element_size()
40 |     for crops in crop_iter:
41 |         with cairo.ImageSurface(
42 |             cairo.Format.RGB24, width * len(imgs), height
43 |         ) as surface:
44 |             ctx = cairo.Context(surface)
45 |             panels = []
46 |             for i, crop in enumerate(crops):
47 |                 outputs = model(transform(crop)[None].to(device))
48 |                 panels.append(prediction_element(ctx, i, crop, outputs))
49 |             HStack(*panels).draw_at(ctx, 0, 0)
50 | 
51 |             # Convert canvas to Pillow in the hackiest possible way.
52 |             f = io.BytesIO()
53 |             surface.write_to_png(f)
54 |             f.seek(0)
55 |             images.append(Image.open(f))
56 | 
57 |     images[0].save(
58 |         args.output,
59 |         save_all=True,
60 |         append_images=images[1:],
61 |         duration=round(1000 / args.frame_rate),
62 |         loop=0,
63 |     )
64 | 
65 | 
66 | def crops_of_image(img: Image.Image, n: int) -> Iterator[Image.Image]:
67 |     width, height = img.size
68 |     min_size = min(width, height)
69 | 
70 |     dx = (width - min_size) / (n - 1)
71 |     dy = (height - min_size) / (n - 1)
72 | 
73 |     for i in range(n):
74 |         x = round(i * dx)
75 |         y = round(i * dy)
76 |         yield img.crop((x, y, x + min_size, y + min_size))
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     main()
81 | 


--------------------------------------------------------------------------------
/car_data/scripts/clip_features.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Compute three-crop CLIP features for all the images in a dataset to apply to
  3 | filtering.
  4 | """
  5 | 
  6 | import argparse
  7 | import itertools
  8 | import os
  9 | from collections import defaultdict
 10 | from typing import Dict, Iterator, List, Optional, Tuple
 11 | 
 12 | import clip
 13 | import numpy as np
 14 | import torch
 15 | from car_data.lin_features import compute_pooled_features
 16 | 
 17 | 
 18 | def main():
 19 |     parser = argparse.ArgumentParser()
 20 |     parser.add_argument("--shard_digits", type=int, default=4)
 21 |     parser.add_argument("--download_root", type=str, default=None)
 22 |     parser.add_argument("--batch_size", type=int, default=64)
 23 |     parser.add_argument("--old_feature_dir", type=str, default=None)
 24 |     parser.add_argument("image_dir", type=str)
 25 |     parser.add_argument("output_dir", type=str)
 26 |     args = parser.parse_args()
 27 | 
 28 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 29 |     model, preprocess = clip.load(
 30 |         "ViT-B/16", device=device, download_root=args.download_root
 31 |     )
 32 | 
 33 |     print("reading paths...")
 34 |     prefixes = ["".join(x) for x in itertools.product(*(["0123456789abcdef"] * 2))]
 35 |     listing = sorted(
 36 |         x
 37 |         for prefix in prefixes
 38 |         for x in os.listdir(os.path.join(args.image_dir, prefix))
 39 |     )
 40 |     print("iterating...")
 41 |     for shard_id, filenames in group_by_prefix(listing, args.shard_digits):
 42 |         out_path = os.path.join(args.output_dir, f"{shard_id}.npz")
 43 |         if os.path.exists(out_path):
 44 |             continue
 45 | 
 46 |         old_features = None
 47 |         if args.old_feature_dir:
 48 |             old_path = os.path.join(args.old_feature_dir, f"{shard_id}.npz")
 49 |             if os.path.exists(old_path):
 50 |                 old_features = dict(np.load(old_path))
 51 | 
 52 |         filenames = filter_out_existing_filenames(old_features, filenames)
 53 |         if not len(filenames):
 54 |             assert old_features is not None
 55 |             out_dict = old_features
 56 |         else:
 57 |             print(f"working on {out_path}...")
 58 |             features = compute_pooled_features(
 59 |                 device,
 60 |                 model,
 61 |                 preprocess,
 62 |                 [os.path.join(args.image_dir, x[:2], x) for x in filenames],
 63 |                 batch_size=args.batch_size,
 64 |             )
 65 |             out_dict = combine_existing_features(old_features, filenames, features)
 66 |         np.savez(out_path + ".tmp.npz", **out_dict)
 67 |         os.rename(out_path + ".tmp.npz", out_path)
 68 | 
 69 | 
 70 | def group_by_prefix(
 71 |     listing: List[str], prefix_len: int
 72 | ) -> Iterator[Tuple[str, List[str]]]:
 73 |     groups = defaultdict(list)
 74 |     for item in listing:
 75 |         if len(item) < prefix_len or item.startswith("."):
 76 |             continue
 77 |         groups[item[:prefix_len]].append(item)
 78 |     for k in sorted(groups.keys()):
 79 |         yield k, groups[k]
 80 | 
 81 | 
 82 | def filter_out_existing_filenames(
 83 |     old_features: Optional[Dict[str, np.ndarray]], filenames: List[str]
 84 | ) -> List[str]:
 85 |     if old_features is None:
 86 |         return filenames
 87 |     old_set = set(old_features["filenames"].tolist())
 88 |     return [x for x in filenames if x not in old_set]
 89 | 
 90 | 
 91 | def combine_existing_features(
 92 |     old_features: Optional[Dict[str, np.ndarray]],
 93 |     new_filenames: List[str],
 94 |     new_features: np.ndarray,
 95 | ) -> Dict[str, np.ndarray]:
 96 |     if old_features is None:
 97 |         return dict(features=new_features, filenames=new_filenames)
 98 |     all_filenames = np.array(old_features["filenames"].tolist() + new_filenames)
 99 |     all_features = np.concatenate([old_features["features"], new_features])
100 |     sorted_indices = np.argsort(all_filenames)
101 |     return dict(
102 |         features=all_features[sorted_indices], filenames=all_filenames[sorted_indices]
103 |     )
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     main()
108 | 


--------------------------------------------------------------------------------
/car_data/scripts/data_bench.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Benchmark the data loader.
 3 | """
 4 | 
 5 | import argparse
 6 | 
 7 | from car_data.dataset import looping_loader
 8 | from tqdm.auto import tqdm
 9 | 
10 | 
11 | def main():
12 |     parser = argparse.ArgumentParser()
13 |     parser.add_argument("--index_path", type=str, required=True)
14 |     parser.add_argument("--image_dir", type=str, required=True)
15 |     args = parser.parse_args()
16 | 
17 |     dataset = looping_loader(
18 |         index_path=args.index_path, image_dir=args.image_dir, batch_size=64
19 |     )
20 |     next(dataset)
21 |     for _ in tqdm(dataset):
22 |         pass
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/car_data/scripts/data_viz.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Dump some images from the data loader to make sure it is working.
 3 | """
 4 | 
 5 | import argparse
 6 | import os
 7 | 
 8 | import torch
 9 | from car_data.dataset import looping_loader
10 | from PIL import Image
11 | 
12 | 
13 | def main():
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument("--test", action="store_true", default=False)
16 |     parser.add_argument("--use_data_aug", action="store_true", default=False)
17 |     parser.add_argument("--count", type=int, default=10)
18 |     parser.add_argument("--index_path", type=str, required=True)
19 |     parser.add_argument("--image_dir", type=str, required=True)
20 |     parser.add_argument("--output_dir", type=str, required=True)
21 |     args = parser.parse_args()
22 | 
23 |     os.makedirs(args.output_dir, exist_ok=True)
24 | 
25 |     loader = looping_loader(
26 |         args.index_path,
27 |         args.image_dir,
28 |         1,
29 |         train=not args.test,
30 |         use_data_aug=args.use_data_aug,
31 |     )
32 |     for i in range(args.count):
33 |         obj = next(loader)[0]
34 |         mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).view(-1, 1, 1)
35 |         std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).view(-1, 1, 1)
36 |         img = (
37 |             (((obj.image * std) + mean) * 255.99)
38 |             .permute(1, 2, 0)
39 |             .clamp(0, 255)
40 |             .to(torch.uint8)
41 |             .cpu()
42 |             .numpy()
43 |         )
44 |         Image.fromarray(img).save(os.path.join(args.output_dir, f"{i}_{obj.price}.png"))
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     main()
49 | 


--------------------------------------------------------------------------------
/car_data/scripts/filter_index.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Filter a dataset index using a filter and pre-computed feature vectors.
 3 | """
 4 | 
 5 | import argparse
 6 | import os
 7 | 
 8 | import numpy as np
 9 | import torch
10 | from tqdm.auto import tqdm
11 | 
12 | 
13 | def main():
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument("--index_path", type=str, required=True)
16 |     parser.add_argument("--feature_dir", type=str, required=True)
17 |     parser.add_argument("--classifier_path", type=str, required=True)
18 |     parser.add_argument("--threshold", type=float, default=-0.5)
19 |     parser.add_argument("--output_path", type=str, required=True)
20 |     args = parser.parse_args()
21 | 
22 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23 |     model = torch.jit.load(args.classifier_path).to(device)
24 | 
25 |     print("listing feature filenames...")
26 |     feature_filenames = [
27 |         x
28 |         for x in os.listdir(args.feature_dir)
29 |         if not x.startswith(".") and x.endswith(".npz")
30 |     ]
31 | 
32 |     print("computing kept IDs...")
33 |     positive_ids = set()
34 |     total_ids = 0
35 |     for feature_filename in tqdm(feature_filenames):
36 |         obj = np.load(os.path.join(args.feature_dir, feature_filename))
37 |         features = torch.from_numpy(obj["features"]).float().to(device)
38 |         ids = obj["filenames"].tolist()
39 |         with torch.no_grad():
40 |             preds = (
41 |                 (model.decision_function(features) > args.threshold)
42 |                 .cpu()
43 |                 .numpy()
44 |                 .tolist()
45 |             )
46 |         total_ids += len(ids)
47 |         for pred, id in zip(preds, ids):
48 |             if pred:
49 |                 positive_ids.add(id)
50 | 
51 |     print(f"filtering index; kept {len(positive_ids)}/{total_ids}...")
52 | 
53 |     obj = np.load(args.index_path)
54 |     use_indices = np.array([x.tolist() in positive_ids for x in obj["phashes"]])
55 |     np.savez(args.output_path, **{k: obj[k][use_indices] for k in obj.keys()})
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     main()
60 | 


--------------------------------------------------------------------------------
/car_data/scripts/plot_runs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Plot runs via their log files.
 3 | """
 4 | 
 5 | import argparse
 6 | from typing import Dict, List, Optional
 7 | 
 8 | import matplotlib.pyplot as plt
 9 | import numpy as np
10 | 
11 | 
12 | def main():
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument("--ema_rate", type=float, default=0.99)
15 |     parser.add_argument("--max_step", type=int, default=None)
16 |     parser.add_argument("--key", type=str, default="loss")
17 |     parser.add_argument("--output_path", type=str, default="plot.png")
18 |     parser.add_argument("names_and_paths", nargs="+", type=str)
19 |     args = parser.parse_args()
20 | 
21 |     plt.figure()
22 |     for name, path in zip(args.names_and_paths[::2], args.names_and_paths[1::2]):
23 |         lines = read_log_lines(path, args.max_step)
24 |         steps = np.array([x["step"] for x in lines])
25 |         losses = np.array([x[args.key] for x in lines])
26 |         plt.plot(
27 |             steps,
28 |             smooth(losses, args.ema_rate),
29 |             label=name,
30 |         )
31 |     plt.legend()
32 |     plt.xlabel("step")
33 |     plt.ylabel(args.key)
34 |     plt.savefig(args.output_path)
35 | 
36 | 
37 | def read_log_lines(path: str, max_step: Optional[int]) -> List[Dict[str, float]]:
38 |     # map step to log dict, to allow restarts to overwrite old steps
39 |     lines = {}
40 | 
41 |     with open(path, "r") as f:
42 |         for line in f:
43 |             if "step=" not in line:
44 |                 continue
45 |             parts = line.split()
46 |             obj = {}
47 |             for item in parts:
48 |                 if "=" not in item:
49 |                     continue
50 |                 k, v = item.split("=")
51 |                 obj[k] = float(v)
52 |             if "step" in obj and (max_step is None or obj["step"] < max_step):
53 |                 lines[obj["step"]] = obj
54 | 
55 |     return sorted(lines.values(), key=lambda x: x["step"])
56 | 
57 | 
58 | def smooth(data: np.ndarray, ema_rate: float) -> np.ndarray:
59 |     num = 0.0
60 |     denom = 0.0
61 |     results = []
62 |     for x in data:
63 |         num = ema_rate * num + (1 - ema_rate) * x
64 |         denom = ema_rate * denom + (1 - ema_rate)
65 |         results.append(num / denom)
66 |     return np.array(results)
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     main()
71 | 


--------------------------------------------------------------------------------
/car_data/scripts/recalibrate.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Re-scale output heads of the model to minimize validation loss.
 3 | This can help recalibrate overconfident model predictions after a model has
 4 | overfit to the training data.
 5 | """
 6 | 
 7 | import argparse
 8 | from typing import Iterator, List
 9 | 
10 | import numpy as np
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | import torchvision.transforms as transforms
15 | from PIL import Image
16 | from tqdm.auto import tqdm
17 | 
18 | from car_data.dataset import looping_loader
19 | from car_data.losses import LossTargets, LossWeights
20 | from car_data.model import create_model
21 | 
22 | 
23 | def main():
24 |     parser = argparse.ArgumentParser()
25 |     parser.add_argument("--index_path", type=str, required=True)
26 |     parser.add_argument("--image_dir", type=str, required=True)
27 |     parser.add_argument("--num_images", type=int, default=1024)
28 |     parser.add_argument("--batch_size", type=int, default=8)
29 |     parser.add_argument("--iterations", type=int, default=5000)
30 |     parser.add_argument("--model_name", type=str, default="clip")
31 |     parser.add_argument("checkpoint", type=str)
32 |     parser.add_argument("checkpoint_out", type=str)
33 |     args = parser.parse_args()
34 | 
35 |     print("creating model...")
36 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37 |     model = create_model(args.model_name, device)
38 |     model.load_state_dict(torch.load(args.checkpoint, map_location=device))
39 |     model.eval()
40 | 
41 |     print("loading data...")
42 |     dataset = looping_loader(
43 |         index_path=args.index_path,
44 |         image_dir=args.image_dir,
45 |         batch_size=args.batch_size,
46 |         train=False,
47 |     )
48 |     outputs = []
49 |     targets = []
50 |     for _ in tqdm(range(args.num_images // args.batch_size)):
51 |         batch = next(dataset)
52 |         with torch.no_grad():
53 |             outputs.append(
54 |                 model(torch.stack([x.image for x in batch], dim=0).to(device))
55 |             )
56 |             targets.append(LossTargets.from_batch(batch, device))
57 | 
58 |     outputs = {k: torch.cat([x[k] for x in outputs]) for k in outputs[0].keys()}
59 |     targets = LossTargets.cat(targets)
60 | 
61 |     print("recalibrating...")
62 | 
63 |     def loss_fn(scales: torch.Tensor) -> torch.Tensor:
64 |         scaled_outputs = {k: v * scales[i] for i, (k, v) in enumerate(outputs.items())}
65 |         return targets.metrics(LossWeights(), scaled_outputs)["loss"]
66 | 
67 |     scales = nn.Parameter(torch.ones(len(outputs.keys()), device=device))
68 |     loss_fn = torch.jit.trace(loss_fn, scales)
69 | 
70 |     init_loss = loss_fn(scales).item()
71 | 
72 |     opt = torch.optim.Adam([scales], lr=1e-2)
73 |     for i in tqdm(range(args.iterations)):
74 |         loss = loss_fn(scales)
75 |         opt.zero_grad()
76 |         loss.backward()
77 |         opt.step()
78 | 
79 |     scale_dict = dict(zip(outputs.keys(), scales.detach().cpu().tolist()))
80 | 
81 |     print("scales:")
82 |     print()
83 |     for name, scale in scale_dict.items():
84 |         print(f"  {name}: {scale}")
85 |     print()
86 |     print(f"loss went from {init_loss} => {loss.item()}")
87 | 
88 |     model.output_layer().scale_outputs(scale_dict)
89 |     torch.save(model.state_dict(), args.checkpoint_out)
90 | 
91 | 
92 | if __name__ == "__main__":
93 |     main()
94 | 


--------------------------------------------------------------------------------
/car_data/scripts/train.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Entrypoint for training. Pass hyperparameters and dataset as flags.
 3 | """
 4 | 
 5 | import argparse
 6 | import shlex
 7 | import sys
 8 | 
 9 | import torch
10 | from car_data.model import create_model
11 | from car_data.train_loop import TrainLoop, add_training_args, training_args_dict
12 | 
13 | 
14 | def main():
15 |     parser = argparse.ArgumentParser()
16 |     add_training_args(parser)
17 |     parser.add_argument("--model_name", type=str, default="clip")
18 |     args = parser.parse_args()
19 |     train_args = training_args_dict(args)
20 | 
21 |     print(f"COMMAND: {shlex.join(sys.argv)}")
22 | 
23 |     print("creating model...")
24 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25 |     model = create_model(args.model_name, device)
26 | 
27 |     print("creating trainer...")
28 |     trainer = TrainLoop(
29 |         **train_args,
30 |         model=model,
31 |         device=device,
32 |     )
33 |     while True:
34 |         trainer.run_step()
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     main()
39 | 


--------------------------------------------------------------------------------
/car_data/scripts/train_distill.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Entrypoint for distillation of one model into another.
 3 | Similar to train.py, but pass --teacher_model_name and --teacher_model_path.
 4 | """
 5 | 
 6 | import argparse
 7 | import shlex
 8 | import sys
 9 | 
10 | import torch
11 | from car_data.model import create_model
12 | from car_data.train_loop import (
13 |     DistillationTrainLoop,
14 |     add_training_args,
15 |     training_args_dict,
16 | )
17 | 
18 | 
19 | def main():
20 |     parser = argparse.ArgumentParser()
21 |     add_training_args(parser)
22 |     parser.add_argument("--model_name", type=str, default="clip")
23 |     parser.add_argument("--teacher_model_name", type=str, required=True)
24 |     parser.add_argument("--teacher_model_path", type=str, required=True)
25 |     args = parser.parse_args()
26 |     train_args = training_args_dict(args)
27 | 
28 |     print(f"COMMAND: {shlex.join(sys.argv)}")
29 | 
30 |     print("creating model...")
31 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32 |     model = create_model(args.model_name, device)
33 | 
34 |     print("creating teacher model...")
35 |     teacher = create_model(args.teacher_model_name, device)
36 |     teacher.load_state_dict(torch.load(args.teacher_model_path, map_location=device))
37 | 
38 |     print("creating trainer...")
39 |     trainer = DistillationTrainLoop(
40 |         **train_args,
41 |         teacher=teacher,
42 |         model=model,
43 |         device=device,
44 |     )
45 |     while True:
46 |         trainer.run_step()
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     main()
51 | 


--------------------------------------------------------------------------------
/car_data/scripts/train_filter.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Train a simple classifier on pooled features.
  3 | """
  4 | 
  5 | import argparse
  6 | import os
  7 | from collections import defaultdict
  8 | from typing import Any, Iterator, List, Tuple
  9 | 
 10 | import clip
 11 | import numpy as np
 12 | import sk2torch
 13 | import torch
 14 | from car_data.lin_features import compute_pooled_features
 15 | from sklearn.model_selection import cross_val_score
 16 | from sklearn.svm import SVC
 17 | 
 18 | 
 19 | def main():
 20 |     parser = argparse.ArgumentParser()
 21 |     parser.add_argument("--positive_dirs", type=str, nargs="+", required=True)
 22 |     parser.add_argument("--negative_dirs", type=str, nargs="+", required=True)
 23 |     parser.add_argument("--download_root", type=str, default=None)
 24 |     parser.add_argument("--model_out", type=str, required=True)
 25 |     args = parser.parse_args()
 26 | 
 27 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 28 |     model, preprocess = clip.load(
 29 |         "ViT-B/16", device=device, download_root=args.download_root
 30 |     )
 31 | 
 32 |     print("computing positive features...")
 33 |     positive_features = compute_pooled_features(
 34 |         device=device,
 35 |         model=model,
 36 |         preprocess=preprocess,
 37 |         paths=list(list_dirs(args.positive_dirs)),
 38 |     )
 39 | 
 40 |     print("computing negative features...")
 41 |     negative_features = compute_pooled_features(
 42 |         device=device,
 43 |         model=model,
 44 |         preprocess=preprocess,
 45 |         paths=list(list_dirs(args.negative_dirs)),
 46 |     )
 47 |     inputs = np.concatenate([positive_features, negative_features], axis=0)
 48 |     labels = np.array(
 49 |         [True] * len(positive_features) + [False] * len(negative_features)
 50 |     )
 51 | 
 52 |     clf = SVC(random_state=0)
 53 | 
 54 |     print("validating...")
 55 |     preds = cross_val_preds(clf, inputs, labels, folds=10)
 56 |     for threshold in np.linspace(-2.0, 2.0, num=31):
 57 |         acc = np.mean((preds > threshold) == labels)
 58 |         false_neg = np.mean(((preds > threshold) != labels)[labels])
 59 |         filter_frac = np.mean(((preds > threshold) == labels)[~labels])
 60 |         print(
 61 |             f"threshold {threshold:.02}: acc={acc:.03f} false_neg={false_neg:.03f} filter_frac={filter_frac:.03f}"
 62 |         )
 63 | 
 64 |     print("training...")
 65 |     clf.fit(inputs, labels)
 66 | 
 67 |     print("saving...")
 68 |     save_model = torch.jit.script(sk2torch.wrap(clf).float())
 69 |     torch.jit.save(save_model, args.model_out)
 70 | 
 71 | 
 72 | def cross_val_preds(
 73 |     model: Any, xs: np.ndarray, ys: np.ndarray, folds: int
 74 | ) -> np.ndarray:
 75 |     """
 76 |     Compute out-of-fold decision function outputs for all of the samples.
 77 |     """
 78 |     perm = np.random.permutation(len(xs))
 79 |     chunk_size = len(xs) // folds
 80 |     chunk_sizes = [chunk_size + int(i < len(xs) % folds) for i in range(folds)]
 81 |     index_chunks = np.split(perm, np.cumsum(chunk_sizes)[:-1])
 82 | 
 83 |     all_outs = np.zeros(len(xs), dtype=np.float32)
 84 |     for val_indices in index_chunks:
 85 |         mask = np.ones(len(xs), dtype=bool)
 86 |         mask[val_indices] = False
 87 |         model.fit(xs[mask], ys[mask])
 88 |         all_outs[~mask] = model.decision_function(xs[~mask])
 89 |     return all_outs
 90 | 
 91 | 
 92 | def list_dirs(dirs: List[str]) -> Iterator[str]:
 93 |     for sub_dir in dirs:
 94 |         for x in os.listdir(sub_dir):
 95 |             if not x.startswith("."):
 96 |                 yield os.path.join(sub_dir, x)
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     main()
101 | 


--------------------------------------------------------------------------------
/car_data/train_loop.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | from abc import ABC, abstractmethod
  5 | from collections import defaultdict
  6 | from typing import Any, Dict, Iterator, List
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | import torch.optim as optim
 12 | 
 13 | from .dataset import CarImage, looping_loader
 14 | from .losses import LossTargets, LossWeights
 15 | 
 16 | 
 17 | class TrainLoopBase(ABC):
 18 |     def __init__(
 19 |         self,
 20 |         *,
 21 |         index_path: str,
 22 |         image_dir: str,
 23 |         use_data_aug: bool,
 24 |         save_dir: str,
 25 |         batch_size: int,
 26 |         microbatch: int,
 27 |         eval_interval: int,
 28 |         save_interval: int,
 29 |         lr: float,
 30 |         weight_decay: float,
 31 |         model: nn.Module,
 32 |         device: torch.device,
 33 |         loss_weights: LossWeights,
 34 |     ):
 35 |         self.index_path = index_path
 36 |         self.image_dir = image_dir
 37 |         self.use_data_aug = use_data_aug
 38 |         self.save_dir = save_dir
 39 |         self.batch_size = batch_size
 40 |         self.microbatch = microbatch
 41 |         self.eval_interval = eval_interval
 42 |         self.save_interval = save_interval
 43 |         self.model = model
 44 |         self.device = device
 45 |         self.loss_weights = loss_weights
 46 | 
 47 |         os.makedirs(save_dir, exist_ok=True)
 48 | 
 49 |         self.dataset_state_path = os.path.join(save_dir, "dataset_state.json")
 50 |         if os.path.exists(self.dataset_state_path):
 51 |             print("loading dataset state:", self.dataset_state_path)
 52 |             with open(self.dataset_state_path, "rb") as f:
 53 |                 self.dataset_state = json.load(f)
 54 |         else:
 55 |             self.dataset_state = dict(test=None, train=None)
 56 | 
 57 |         self.train_dataset = looping_loader(
 58 |             index_path,
 59 |             image_dir,
 60 |             batch_size=self.batch_size,
 61 |             train=True,
 62 |             use_data_aug=use_data_aug,
 63 |             last_seen_phash=self.dataset_state["train"],
 64 |         )
 65 |         self.test_dataset = looping_loader(
 66 |             index_path,
 67 |             image_dir,
 68 |             batch_size=self.batch_size,
 69 |             train=False,
 70 |             last_seen_phash=self.dataset_state["test"],
 71 |         )
 72 | 
 73 |         self.opt = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
 74 |         self.opt_state_path = os.path.join(save_dir, "opt.pt")
 75 |         if os.path.exists(self.opt_state_path):
 76 |             print("loading optimizer:", self.opt_state_path)
 77 |             self.opt.load_state_dict(
 78 |                 torch.load(self.opt_state_path, map_location=device)
 79 |             )
 80 | 
 81 |         self.model_state_path = os.path.join(save_dir, "model.pt")
 82 |         if os.path.exists(self.model_state_path):
 83 |             print("loading model:", self.model_state_path)
 84 |             model.load_state_dict(
 85 |                 torch.load(self.model_state_path, map_location=device)
 86 |             )
 87 | 
 88 |         self.step = 0
 89 |         self.step_state_path = os.path.join(save_dir, "step.json")
 90 |         if os.path.exists(self.step_state_path):
 91 |             print("loading step:", self.step_state_path)
 92 |             with open(self.step_state_path, "rb") as f:
 93 |                 self.step = json.load(f)
 94 | 
 95 |     def run_step(self):
 96 |         results = LossAverage()
 97 | 
 98 |         if self.step % self.eval_interval == 0:
 99 |             with torch.no_grad():
100 |                 batch = next(self.test_dataset)
101 |                 for microbatch in self._microbatches(batch):
102 |                     results.add(
103 |                         {
104 |                             f"eval_{k}": v
105 |                             for k, v in self.compute_losses(microbatch).items()
106 |                         },
107 |                         len(microbatch),
108 |                     )
109 |                 self.dataset_state["test"] = batch[-1].phash
110 | 
111 |         batch = next(self.train_dataset)
112 |         self.opt.zero_grad()
113 |         for microbatch in self._microbatches(batch):
114 |             losses = self.compute_losses(microbatch)
115 |             batch_frac = len(microbatch) / len(batch)
116 |             (batch_frac * losses["loss"]).backward()
117 |             results.add(losses, len(microbatch))
118 |         self.opt.step()
119 |         self.dataset_state["train"] = batch[-1].phash
120 | 
121 |         print(results.format(self.step))
122 | 
123 |         self.step += 1
124 | 
125 |         if not self.step % self.save_interval:
126 |             print(f"saving at step {self.step}...")
127 |             self.save()
128 | 
129 |     def _microbatches(self, batch: List[CarImage]) -> Iterator[List[CarImage]]:
130 |         if not self.microbatch:
131 |             yield batch
132 |         else:
133 |             for i in range(0, len(batch), self.microbatch):
134 |                 yield batch[i : i + self.microbatch]
135 | 
136 |     @abstractmethod
137 |     def compute_losses(self, batch: List[CarImage]) -> Dict[str, torch.Tensor]:
138 |         """
139 |         Compute a dict of loss scalars for the batch of images.
140 |         """
141 | 
142 |     def save(self):
143 |         torch.save(self.model.state_dict(), _tmp_path(self.model_state_path))
144 |         torch.save(self.opt.state_dict(), _tmp_path(self.opt_state_path))
145 |         with open(_tmp_path(self.step_state_path), "w") as f:
146 |             json.dump(self.step, f)
147 |         with open(_tmp_path(self.dataset_state_path), "w") as f:
148 |             json.dump(self.dataset_state, f)
149 |         _rename_from_tmp(self.model_state_path)
150 |         _rename_from_tmp(self.opt_state_path)
151 |         _rename_from_tmp(self.step_state_path)
152 |         _rename_from_tmp(self.dataset_state_path)
153 | 
154 | 
155 | class TrainLoop(TrainLoopBase):
156 |     def compute_losses(self, batch: List[CarImage]) -> Dict[str, torch.Tensor]:
157 |         images = torch.stack([x.image for x in batch], dim=0).to(self.device)
158 |         targets = LossTargets.from_batch(batch, self.device)
159 |         outputs = self.model(images)
160 |         return targets.metrics(self.loss_weights, outputs)
161 | 
162 | 
163 | class DistillationTrainLoop(TrainLoopBase):
164 |     def __init__(self, *, teacher: nn.Module, **kwargs):
165 |         super().__init__(**kwargs)
166 |         self.teacher = teacher
167 | 
168 |     def compute_losses(self, batch: List[CarImage]) -> Dict[str, torch.Tensor]:
169 |         images = torch.stack([x.image for x in batch], dim=0).to(self.device)
170 |         targets = LossTargets.from_batch(batch, self.device)
171 |         with torch.no_grad():
172 |             teacher_out = self.teacher(images)
173 |             teacher_targets = LossTargets.from_model_out(teacher_out)
174 |         outputs = self.model(images)
175 |         with torch.no_grad():
176 |             metrics = targets.metrics(self.loss_weights, outputs)
177 |         metrics.update(
178 |             {
179 |                 f"teacher_{k}": v
180 |                 for k, v in teacher_targets.metrics(self.loss_weights, outputs).item()
181 |             }
182 |         )
183 |         metrics["loss"] = metrics.pop("teacher_loss")
184 |         return metrics
185 | 
186 | 
187 | class LossAverage:
188 |     def __init__(self):
189 |         self.results = defaultdict(lambda: 0.0)
190 |         self.counts = defaultdict(lambda: 0)
191 | 
192 |     def add(self, losses: Dict[str, torch.Tensor], count: int):
193 |         for k, v in losses.items():
194 |             self.results[k] += v.item() * count
195 |             self.counts[k] += count
196 | 
197 |     def average(self) -> Dict[str, float]:
198 |         return {k: v / self.counts[k] for k, v in self.results.items()}
199 | 
200 |     def format(self, step: int) -> str:
201 |         key_strs = [f"step={step}"]
202 |         avg = self.average()
203 |         for k in sorted(avg.keys()):
204 |             key_strs.append(f"{k}={avg[k]:.04f}")
205 |         return " ".join(key_strs)
206 | 
207 | 
208 | def _tmp_path(orig_path: str) -> str:
209 |     return orig_path + ".tmp"
210 | 
211 | 
212 | def _rename_from_tmp(path: str) -> str:
213 |     os.rename(_tmp_path(path), path)
214 | 
215 | 
216 | def add_training_args(parser: argparse.ArgumentParser):
217 |     parser.add_argument("--loss_weights", type=str, default="default")
218 |     parser.add_argument("--lr", type=float, default=1e-4)
219 |     parser.add_argument("--weight_decay", type=float, default=1e-3)
220 |     parser.add_argument("--batch_size", type=int, default=4)
221 |     parser.add_argument("--microbatch", type=int, default=0)
222 |     parser.add_argument("--eval_interval", type=int, default=5)
223 |     parser.add_argument("--save_interval", type=int, default=1000)
224 |     parser.add_argument("--use_data_aug", action="store_true")
225 |     parser.add_argument("--index_path", type=str, required=True)
226 |     parser.add_argument("--image_dir", type=str, required=True)
227 |     parser.add_argument("--save_dir", type=str, required=True)
228 | 
229 | 
230 | def training_args_dict(args: argparse.Namespace) -> Dict[str, Any]:
231 |     res = {}
232 |     for k in [
233 |         "lr",
234 |         "weight_decay",
235 |         "batch_size",
236 |         "microbatch",
237 |         "eval_interval",
238 |         "save_interval",
239 |         "use_data_aug",
240 |         "index_path",
241 |         "image_dir",
242 |         "save_dir",
243 |     ]:
244 |         res[k] = getattr(args, k)
245 |     res["loss_weights"] = LossWeights.parse(args.loss_weights)
246 |     return res
247 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name="car-data",
 5 |     version="0.0.1",
 6 |     description="Train a car price estimator.",
 7 |     packages=["car_data"],
 8 |     install_requires=[
 9 |         "torch",
10 |         "torchvision",
11 |         "sk2torch",
12 |         "clip @ git+https://github.com/openai/CLIP.git",
13 |         "pycairo",
14 |     ],
15 |     author="Alex Nichol",
16 |     author_email="unixpickle@gmail.com",
17 |     url="https://github.com/unixpickle/car-data",
18 |     license="MIT",
19 |     classifiers=[
20 |         "License :: OSI Approved :: MIT License",
21 |         "Programming Language :: Python :: 3",
22 |         "Programming Language :: Python :: 3.7",
23 |     ],
24 | )
25 | 


--------------------------------------------------------------------------------
/src/chan_util.rs:
--------------------------------------------------------------------------------
 1 | use async_channel::Receiver;
 2 | 
 3 | pub async fn recv_at_least_one<T>(rx: &Receiver<T>) -> Option<Vec<T>> {
 4 |     if let Ok(obj) = rx.recv().await {
 5 |         let mut buffer = vec![obj];
 6 |         loop {
 7 |             match rx.try_recv() {
 8 |                 Ok(obj) => buffer.push(obj),
 9 |                 _ => return Some(buffer),
10 |             }
11 |         }
12 |     } else {
13 |         None
14 |     }
15 | }
16 | 
17 | pub fn recv_at_least_one_blocking<T>(rx: &Receiver<T>) -> Option<Vec<T>> {
18 |     if let Ok(obj) = rx.recv_blocking() {
19 |         let mut buffer = vec![obj];
20 |         loop {
21 |             match rx.try_recv() {
22 |                 Ok(obj) => buffer.push(obj),
23 |                 _ => return Some(buffer),
24 |             }
25 |         }
26 |     } else {
27 |         None
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/db.rs:
--------------------------------------------------------------------------------
  1 | use crate::chan_util::recv_at_least_one_blocking;
  2 | use std::fmt::Write;
  3 | use std::path::Path;
  4 | 
  5 | use async_channel::{bounded, Receiver, Sender};
  6 | use rusqlite::{Connection, Transaction};
  7 | use sha2::Digest;
  8 | use tokio::{spawn, task::spawn_blocking};
  9 | 
 10 | use crate::types::{Listing, OwnerInfo};
 11 | 
 12 | #[derive(Clone)]
 13 | pub struct Database {
 14 |     req_chan: Sender<
 15 |         Box<
 16 |             dyn Send
 17 |                 + FnOnce(
 18 |                     anyhow::Result<&mut Transaction>,
 19 |                 ) -> Box<dyn Send + FnOnce(anyhow::Result<()>)>,
 20 |         >,
 21 |     >,
 22 | }
 23 | 
 24 | impl Database {
 25 |     pub async fn open<P: AsRef<Path>>(path: P) -> anyhow::Result<Database> {
 26 |         let path = path.as_ref().to_owned();
 27 |         spawn_blocking(move || -> anyhow::Result<Database> {
 28 |             let conn = Connection::open(path)?;
 29 |             Database::new_with_conn(conn)
 30 |         })
 31 |         .await?
 32 |     }
 33 | 
 34 |     #[allow(dead_code)]
 35 |     pub async fn open_in_memory() -> anyhow::Result<Database> {
 36 |         spawn_blocking(move || -> anyhow::Result<Database> {
 37 |             let conn = Connection::open_in_memory()?;
 38 |             Database::new_with_conn(conn)
 39 |         })
 40 |         .await?
 41 |     }
 42 | 
 43 |     fn new_with_conn(conn: Connection) -> anyhow::Result<Database> {
 44 |         create_tables(&conn)?;
 45 |         let (tx, rx) = bounded(100);
 46 |         spawn_blocking(move || Database::transaction_worker(conn, rx));
 47 |         Ok(Database { req_chan: tx })
 48 |     }
 49 | 
 50 |     pub async fn check_attempt(
 51 |         &self,
 52 |         website: &str,
 53 |         website_id: &str,
 54 |     ) -> anyhow::Result<Option<bool>> {
 55 |         let website = website.to_owned();
 56 |         let website_id = website_id.to_owned();
 57 |         self.with_conn(move |conn| {
 58 |             let mut stmt =
 59 |                 conn.prepare("SELECT success FROM attempt_ids WHERE website=?1 AND website_id=?2")?;
 60 |             let mut result_it = stmt.query_map::<bool, _, _>((&website, &website_id), |x| {
 61 |                 Ok(x.get::<_, i8>(0)? == 1)
 62 |             })?;
 63 |             Ok(match result_it.next() {
 64 |                 None => None,
 65 |                 Some(x) => Some(x?),
 66 |             })
 67 |         })
 68 |         .await
 69 |     }
 70 | 
 71 |     pub async fn add_failed_attempt(&self, website: &str, website_id: &str) -> anyhow::Result<()> {
 72 |         let website = website.to_owned();
 73 |         let website_id = website_id.to_owned();
 74 |         self.with_conn(move |conn| {
 75 |             conn.execute(
 76 |                 "INSERT OR IGNORE INTO attempt_ids (website, website_id, success) VALUES (?1, ?2, ?3)",
 77 |                 (&website, &website_id, 0),
 78 |             )?;
 79 |             Ok(())
 80 |         })
 81 |         .await
 82 |     }
 83 | 
 84 |     pub async fn add_listing(&self, listing: Listing) -> anyhow::Result<Option<i64>> {
 85 |         self.with_conn(move |conn| {
 86 |             let tx = conn.savepoint()?;
 87 |             if tx.execute("INSERT OR IGNORE INTO attempt_ids (website, website_id, success) VALUES (?1, ?2, 1)", (&listing.website, &listing.website_id))? != 1 {
 88 |                 return Ok(None);
 89 |             }
 90 |             tx.execute(
 91 |                 "INSERT INTO listings (website, website_id, title, price, make, model, year, odometer, engine, exterior_color, interior_color, drive_type, fuel_type, fuel_economy_0, fuel_economy_1, vin, stock_number, comments) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18)",
 92 |                 rusqlite::params![
 93 |                     &listing.website,
 94 |                     &listing.website_id,
 95 |                     &listing.title,
 96 |                     &listing.price,
 97 |                     &listing.make,
 98 |                     &listing.model,
 99 |                     &listing.year,
100 |                     &listing.odometer,
101 |                     &listing.engine_description,
102 |                     &listing.exterior_color,
103 |                     &listing.interior_color,
104 |                     &listing.drive_type,
105 |                     &listing.fuel_type,
106 |                     &maybe_list_entry(&listing.fuel_economy, 0),
107 |                     &maybe_list_entry(&listing.fuel_economy, 1),
108 |                     &listing.vin,
109 |                     &listing.stock_number,
110 |                     &listing.comments
111 |                 ],
112 |             )?;
113 |             let last_id = tx.last_insert_rowid();
114 |             if let Some(image_urls) = &listing.image_urls {
115 |                 for (i, image_url) in image_urls.iter().enumerate() {
116 |                     tx.execute(
117 |                         "INSERT INTO images (listing_id, image_index, url, hash) VALUES (?1, ?2, ?3, ?4)",
118 |                         rusqlite::params![&last_id, &i, &image_url, &hash_image_url(&image_url)],
119 |                     )?;
120 |                 }
121 |             }
122 |             if let Some(owners) = &listing.owners {
123 |                 for (i, owner) in owners.iter().enumerate() {
124 |                     tx.execute(
125 |                         "INSERT INTO owners (listing_id, owner_index, website_id, name, website) VALUES (?1, ?2, ?3, ?4, ?5)",
126 |                         rusqlite::params![&last_id, &i, &owner.id, &owner.name, &owner.website],
127 |                     )?;
128 |                 }
129 |             }
130 |             tx.commit()?;
131 |             Ok(Some(last_id))
132 |         }).await
133 |     }
134 | 
135 |     #[allow(dead_code)]
136 |     pub async fn listing_for_id(&self, id: i64) -> anyhow::Result<Option<Listing>> {
137 |         self.with_conn(move |tx| Ok(retrieve_listing(tx, id)?))
138 |             .await
139 |     }
140 | 
141 |     pub async fn insert_phashes(
142 |         &self,
143 |         hash_and_phash: Vec<(String, String)>,
144 |     ) -> anyhow::Result<()> {
145 |         self.with_conn(move |conn| {
146 |             let tx = conn.savepoint()?;
147 |             for (image_hash, phash) in hash_and_phash {
148 |                 tx.execute(
149 |                     "INSERT OR IGNORE INTO phashes (hash, phash, hash_count) VALUES
150 |                      (?1, ?2, (SELECT COUNT(*) from images WHERE hash=?1))",
151 |                     (&image_hash, &phash),
152 |                 )?;
153 |             }
154 |             tx.commit()?;
155 |             Ok(())
156 |         })
157 |         .await
158 |     }
159 | 
160 |     pub async fn counts(&self) -> anyhow::Result<(i64, i64)> {
161 |         self.with_conn(move |tx| {
162 |             let listing_count: i64 =
163 |                 tx.query_row("SELECT COUNT(*) FROM listings", (), |row| row.get(0))?;
164 |             let attempt_count: i64 =
165 |                 tx.query_row("SELECT COUNT(*) FROM attempt_ids", (), |row| row.get(0))?;
166 |             Ok((listing_count, attempt_count))
167 |         })
168 |         .await
169 |     }
170 | 
171 |     pub async fn make_model_counts(&self) -> anyhow::Result<Vec<(String, String, i64)>> {
172 |         self.with_conn(move |tx| {
173 |             let mut stmt = tx.prepare(
174 |                 "SELECT
175 |                     make,
176 |                     model,
177 |                     COUNT(*)
178 |                 FROM listings
179 |                 GROUP BY make, model
180 |                 ORDER BY -COUNT(*)",
181 |             )?;
182 |             let results = stmt.query_map((), |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))?;
183 |             Ok(results.into_iter().collect::<rusqlite::Result<_>>()?)
184 |         })
185 |         .await
186 |     }
187 | 
188 |     pub async fn completed_dedups<C: 'static + Send + FromIterator<String>>(
189 |         &self,
190 |     ) -> anyhow::Result<C> {
191 |         self.with_conn(move |tx| {
192 |             let mut stmt = tx.prepare("SELECT hash FROM phashes")?;
193 |             let results = stmt.query_map((), |row| Ok(row.get(0)?))?;
194 |             Ok(results.into_iter().collect::<rusqlite::Result<C>>()?)
195 |         })
196 |         .await
197 |     }
198 | 
199 |     pub async fn get_attempt_ids<C: 'static + Send + FromIterator<String>>(
200 |         &self,
201 |         website: &str,
202 |     ) -> anyhow::Result<C> {
203 |         let website = website.to_owned();
204 |         self.with_conn(move |tx| {
205 |             let mut stmt = tx.prepare("SELECT website_id FROM attempt_ids WHERE website = ?1")?;
206 |             let results = stmt.query_map((website,), |row| Ok(row.get(0)?))?;
207 |             Ok(results.into_iter().collect::<rusqlite::Result<C>>()?)
208 |         })
209 |         .await
210 |     }
211 | 
212 |     pub fn unique_phashes(&self) -> Receiver<anyhow::Result<(String, Listing)>> {
213 |         let (tx, rx) = bounded(100);
214 |         let db_clone = self.clone();
215 |         spawn(async move {
216 |             let tx_clone = tx.clone();
217 |             let res = db_clone
218 |                 .with_conn(move |conn| {
219 |                     let mut stmt = conn.prepare(
220 |                         "SELECT
221 |                             phashes.phash,
222 |                             images.listing_id
223 |                         FROM phashes
224 |                         LEFT JOIN images ON images.hash = phashes.hash
225 |                         RIGHT JOIN listings ON listings.id == images.listing_id
226 |                         WHERE (SELECT SUM(hash_count) FROM phashes phashes2 WHERE phashes2.phash = phashes.phash) == 1",
227 |                     )?;
228 |                     let results = stmt.query_map((), |row| {
229 |                         Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
230 |                     })?;
231 |                     for row in results {
232 |                         let (phash, listing_id) = row?;
233 |                         if let Some(listing) = retrieve_listing(conn, listing_id)? {
234 |                             if tx.send_blocking(Ok((phash, listing))).is_err() {
235 |                                 return Ok(());
236 |                             }
237 |                         } else {
238 |                             return Err(anyhow::Error::msg(format!(
239 |                                 "no listing found for ID {}",
240 |                                 listing_id
241 |                             )));
242 |                         }
243 |                     }
244 |                     Ok(())
245 |                 })
246 |                 .await;
247 |             if let Err(e) = res {
248 |                 tx_clone.send(Err(e)).await.ok();
249 |             }
250 |         });
251 |         rx
252 |     }
253 | 
254 |     async fn with_conn<
255 |         T: 'static + Send,
256 |         F: 'static + Send + FnOnce(&mut Transaction) -> anyhow::Result<T>,
257 |     >(
258 |         &self,
259 |         f: F,
260 |     ) -> anyhow::Result<T> {
261 |         let (res_tx, res_rx) = bounded(1);
262 |         let res = self
263 |             .req_chan
264 |             .send(Box::new(move |maybe_tx| match maybe_tx {
265 |                 Ok(tx) => {
266 |                     let res = f(tx);
267 |                     Box::new(move |commit_res| {
268 |                         if res.is_ok() && !commit_res.is_ok() {
269 |                             res_tx.send_blocking(Err(commit_res.unwrap_err())).ok();
270 |                         } else {
271 |                             res_tx.send_blocking(res).ok();
272 |                         }
273 |                     })
274 |                 }
275 |                 Err(e) => Box::new(move |_| {
276 |                     res_tx.send_blocking(Err(e)).ok();
277 |                 }),
278 |             }))
279 |             .await;
280 |         if res.is_err() {
281 |             // The true error contains the argument we tried to send,
282 |             // which we cannot wrap in anyhow for some reason.
283 |             Err(anyhow::Error::msg("connection worker has died"))
284 |         } else {
285 |             res_rx.recv().await?
286 |         }
287 |     }
288 | 
289 |     fn transaction_worker(
290 |         mut conn: Connection,
291 |         rx: Receiver<
292 |             Box<
293 |                 dyn Send
294 |                     + FnOnce(
295 |                         anyhow::Result<&mut Transaction>,
296 |                     ) -> Box<dyn Send + FnOnce(anyhow::Result<()>)>,
297 |             >,
298 |         >,
299 |     ) {
300 |         while let Some(reqs) = recv_at_least_one_blocking(&rx) {
301 |             match conn.transaction() {
302 |                 Ok(mut tx) => {
303 |                     let mut done_fns = Vec::new();
304 |                     for req in reqs {
305 |                         done_fns.push(req(Ok(&mut tx)));
306 |                     }
307 |                     if let Err(e) = tx.commit() {
308 |                         let msg = format!("{}", e);
309 |                         for done_fn in done_fns {
310 |                             done_fn(Err(anyhow::Error::msg(msg.clone())));
311 |                         }
312 |                     } else {
313 |                         for done_fn in done_fns {
314 |                             done_fn(Ok(()));
315 |                         }
316 |                     }
317 |                 }
318 |                 Err(e) => {
319 |                     let msg = format!("{}", e);
320 |                     for req in reqs {
321 |                         req(Err(anyhow::Error::msg(msg.clone())))(Ok(()))
322 |                     }
323 |                 }
324 |             }
325 |         }
326 |     }
327 | }
328 | 
329 | pub fn hash_image_url(url: &str) -> String {
330 |     let mut hasher = sha2::Sha256::new();
331 |     hasher.update(url);
332 |     let mut res = String::with_capacity(64);
333 |     for ch in hasher.finalize() {
334 |         write!(&mut res, "{:02x}", ch).unwrap();
335 |     }
336 |     res
337 | }
338 | 
339 | fn create_tables(conn: &Connection) -> anyhow::Result<()> {
340 |     conn.execute(
341 |         "CREATE TABLE if not exists attempt_ids (
342 |             website     CHAR(16),
343 |             website_id  CHAR(64),
344 |             success     INT,
345 |             PRIMARY KEY (website, website_id)
346 |         )",
347 |         (),
348 |     )?;
349 |     conn.execute(
350 |         "CREATE TABLE if not exists listings (
351 |             id             INTEGER PRIMARY KEY,
352 |             website        TEXT not null,
353 |             website_id     TEXT not null,
354 |             title          TEXT,
355 |             price          TEXT,
356 |             make           TEXT,
357 |             model          TEXT,
358 |             year           INT,
359 |             odometer       TEXT,
360 |             engine         TEXT,
361 |             exterior_color TEXT,
362 |             interior_color TEXT,
363 |             drive_type     TEXT,
364 |             fuel_type      TEXT,
365 |             fuel_economy_0 TEXT,
366 |             fuel_economy_1 TEXT,
367 |             vin            TEXT,
368 |             stock_number   TEXT,
369 |             comments       TEXT
370 |         )",
371 |         (),
372 |     )?;
373 |     conn.execute(
374 |         "CREATE TABLE if not exists owners (
375 |             id             INTEGER PRIMARY KEY,
376 |             listing_id     INT not null,
377 |             owner_index    INT not null,
378 |             website_id     TEXT,
379 |             name           TEXT,
380 |             website        TEXT
381 |         )",
382 |         (),
383 |     )?;
384 |     conn.execute(
385 |         "CREATE TABLE if not exists images (
386 |             id             INTEGER PRIMARY KEY,
387 |             listing_id     INT not null,
388 |             image_index    INT not null,
389 |             url            TEXT not null,
390 |             hash           CHAR(64) not null
391 |         )",
392 |         (),
393 |     )?;
394 |     conn.execute(
395 |         "CREATE TABLE if not exists phashes (
396 |             id             INTEGER PRIMARY KEY,
397 |             hash           CHAR(64) not null,
398 |             hash_count     INT not null,
399 |             phash          CHAR(64) not null,
400 |             UNIQUE (hash)
401 |         )",
402 |         (),
403 |     )?;
404 |     conn.execute(
405 |         "CREATE INDEX if not exists phashindex ON phashes(phash)",
406 |         (),
407 |     )?;
408 |     conn.execute(
409 |         "CREATE INDEX if not exists phasheshashindex ON phashes(hash)",
410 |         (),
411 |     )?;
412 |     conn.execute(
413 |         "CREATE INDEX if not exists imageshashindex ON images(hash)",
414 |         (),
415 |     )?;
416 |     conn.execute(
417 |         "CREATE INDEX if not exists owners_listingid ON owners(listing_id)",
418 |         (),
419 |     )?;
420 |     conn.execute(
421 |         "CREATE INDEX if not exists images_listingid ON images(listing_id)",
422 |         (),
423 |     )?;
424 |     Ok(())
425 | }
426 | 
427 | fn maybe_list_entry<T>(x: &Option<Vec<T>>, i: usize) -> Option<&T> {
428 |     x.as_ref().and_then(|v| v.get(i))
429 | }
430 | 
431 | fn maybe_build_list(x: Option<String>, y: Option<String>) -> Option<Vec<String>> {
432 |     if let Some(x) = x {
433 |         let mut res = vec![x];
434 |         if let Some(y) = y {
435 |             res.push(y);
436 |         }
437 |         Some(res)
438 |     } else {
439 |         None
440 |     }
441 | }
442 | 
443 | fn retrieve_listing(tx: &Transaction, id: i64) -> rusqlite::Result<Option<Listing>> {
444 |     let row = tx.query_row_and_then(
445 |         "SELECT website, website_id, title, price, make, model, year, odometer, engine, exterior_color, interior_color, drive_type, fuel_type, fuel_economy_0, fuel_economy_1, vin, stock_number, comments FROM listings WHERE id=?1",
446 |         (id,),
447 |         |row| -> rusqlite::Result<Listing> {
448 |             Ok(Listing{
449 |                 website: row.get(0)?,
450 |                 website_id: row.get(1)?,
451 |                 title: row.get(2)?,
452 |                 price: row.get(3)?,
453 |                 make: row.get(4)?,
454 |                 model: row.get(5)?,
455 |                 year: row.get(6)?,
456 |                 odometer: row.get(7)?,
457 |                 engine_description: row.get(8)?,
458 |                 exterior_color: row.get(9)?,
459 |                 interior_color: row.get(10)?,
460 |                 drive_type: row.get(11)?,
461 |                 fuel_type: row.get(12)?,
462 |                 fuel_economy: maybe_build_list(row.get(13)?, row.get(14)?),
463 |                 owners: None,
464 |                 vin: row.get(15)?,
465 |                 stock_number: row.get(16)?,
466 |                 comments: row.get(17)?,
467 |                 image_urls: None,
468 |             })
469 |         },
470 |     );
471 |     match row {
472 |         Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
473 |         Err(e) => Err(e),
474 |         Ok(mut x) => {
475 |             let mut images = Vec::new();
476 |             for row in tx
477 |                 .prepare("SELECT url FROM images WHERE listing_id=?1 ORDER BY image_index")?
478 |                 .query_map((&id,), |x| Ok(x.get::<_, String>(0)?))?
479 |             {
480 |                 images.push(row?);
481 |             }
482 |             if images.len() > 0 {
483 |                 x.image_urls = Some(images);
484 |             }
485 |             let mut owners = Vec::new();
486 |             for row in tx
487 |                 .prepare("SELECT website_id, name, website FROM owners WHERE listing_id=?1 ORDER BY owner_index")?
488 |                 .query_map((&id,), |x| Ok(OwnerInfo{id: x.get(0)?, name: x.get(1)?, website: x.get(2)?}))?
489 |             {
490 |                 owners.push(row?);
491 |             }
492 |             if owners.len() > 0 {
493 |                 x.owners = Some(owners);
494 |             }
495 |             Ok(Some(x))
496 |         }
497 |     }
498 | }
499 | 
500 | #[cfg(test)]
501 | mod tests {
502 |     use crate::types::{Listing, OwnerInfo};
503 | 
504 |     use super::Database;
505 | 
506 |     #[test]
507 |     fn attempt_ids() {
508 |         tokio::runtime::Runtime::new().unwrap().block_on(async {
509 |             let db = Database::open_in_memory().await.unwrap();
510 |             assert_eq!(db.check_attempt("kbb", "123").await.unwrap(), None);
511 |             assert_eq!(db.check_attempt("kbb", "321").await.unwrap(), None);
512 |             db.add_failed_attempt("kbb", "123").await.unwrap();
513 |             assert_eq!(db.check_attempt("kbb", "123").await.unwrap(), Some(false));
514 |             assert_eq!(db.check_attempt("kbb", "321").await.unwrap(), None);
515 |             assert_eq!(db.check_attempt("kbb_v2", "123").await.unwrap(), None);
516 |             db.add_failed_attempt("kbb_v2", "321").await.unwrap();
517 |             assert_eq!(db.check_attempt("kbb", "123").await.unwrap(), Some(false));
518 |             assert_eq!(db.check_attempt("kbb", "321").await.unwrap(), None);
519 |             assert_eq!(
520 |                 db.check_attempt("kbb_v2", "321").await.unwrap(),
521 |                 Some(false)
522 |             );
523 |         });
524 |     }
525 | 
526 |     #[test]
527 |     fn add_listing() {
528 |         tokio::runtime::Runtime::new().unwrap().block_on(async {
529 |             let listing = Listing {
530 |                 website: "kbb".to_owned(),
531 |                 website_id: "321".to_owned(),
532 |                 title: "Car Listing".to_owned(),
533 |                 price: Some("$12.98".parse().unwrap()),
534 |                 make: Some("Nissan".to_owned()),
535 |                 model: Some("Altima".to_owned()),
536 |                 year: Some(2019),
537 |                 odometer: Some("52 mi".parse().unwrap()),
538 |                 engine_description: Some("fast boi".to_owned()),
539 |                 exterior_color: Some("Red".to_owned()),
540 |                 interior_color: None,
541 |                 drive_type: Some("RWD".parse().unwrap()),
542 |                 fuel_type: Some("Gasoline".parse().unwrap()),
543 |                 fuel_economy: Some(vec!["hello".to_owned(), "world".to_owned()]),
544 |                 owners: Some(vec![OwnerInfo {
545 |                     id: "1".to_owned(),
546 |                     name: Some("Annabelle".to_owned()),
547 |                     website: Some("corgi.com/foo".to_owned()),
548 |                 }]),
549 |                 vin: Some("123123123".to_owned()),
550 |                 stock_number: Some("123".to_owned()),
551 |                 comments: Some("this car is awesome".to_owned()),
552 |                 image_urls: Some(vec!["hello.com".to_owned(), "baz.com".to_owned()]),
553 |             };
554 | 
555 |             let db = Database::open_in_memory().await.unwrap();
556 |             assert_eq!(db.check_attempt("kbb", "123").await.unwrap(), None);
557 |             assert_eq!(db.check_attempt("kbb", "321").await.unwrap(), None);
558 |             db.add_failed_attempt("kbb", "123").await.unwrap();
559 |             let listing_id = db.add_listing(listing.clone()).await.unwrap().unwrap();
560 |             assert_eq!(db.check_attempt("kbb", "123").await.unwrap(), Some(false));
561 |             assert_eq!(db.check_attempt("kbb", "321").await.unwrap(), Some(true));
562 |             assert_eq!(db.listing_for_id(listing_id + 1).await.unwrap(), None);
563 |             assert_eq!(db.listing_for_id(listing_id).await.unwrap(), Some(listing));
564 |         });
565 |     }
566 | }
567 | 


--------------------------------------------------------------------------------
/src/dedup_images.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     collections::HashSet,
  3 |     fs::rename,
  4 |     path::{Path, PathBuf},
  5 | };
  6 | 
  7 | use crate::{
  8 |     chan_util::recv_at_least_one,
  9 |     db::Database,
 10 |     image_util::{downsample_image, hash_image},
 11 | };
 12 | use clap::Parser;
 13 | use image::ImageFormat;
 14 | use tokio::{
 15 |     fs::{create_dir_all, read_dir},
 16 |     spawn,
 17 |     task::spawn_blocking,
 18 | };
 19 | 
 20 | #[derive(Clone, Parser)]
 21 | pub struct Args {
 22 |     #[clap(long, value_parser, default_value_t = 16)]
 23 |     hash_resolution: u32,
 24 | 
 25 |     #[clap(long, value_parser, default_value_t = 256)]
 26 |     out_resolution: u32,
 27 | 
 28 |     #[clap(long, value_parser, default_value_t = 4)]
 29 |     concurrency: usize,
 30 | 
 31 |     #[clap(value_parser)]
 32 |     db_path: String,
 33 | 
 34 |     #[clap(value_parser)]
 35 |     image_dir: String,
 36 | 
 37 |     #[clap(value_parser)]
 38 |     output_dir: String,
 39 | }
 40 | 
 41 | pub async fn main(args: Args) -> anyhow::Result<()> {
 42 |     create_hash_prefixes(&args.output_dir).await?;
 43 | 
 44 |     let (path_tx, path_rx) = async_channel::bounded(args.concurrency);
 45 |     let image_dir = args.image_dir.clone();
 46 |     spawn(async move {
 47 |         for prefix in hash_prefixes() {
 48 |             let sub_dir: PathBuf = [&image_dir, &prefix].iter().collect();
 49 |             let mut reader = read_dir(sub_dir).await.unwrap();
 50 |             while let Some(path_info) = reader.next_entry().await.unwrap() {
 51 |                 path_tx.send(path_info.path()).await.unwrap();
 52 |             }
 53 |         }
 54 |     });
 55 | 
 56 |     let db = Database::open(&args.db_path).await?;
 57 |     let completed: HashSet<String> = db.completed_dedups().await?;
 58 | 
 59 |     let (hash_tx, hash_rx) = async_channel::bounded(100);
 60 |     for _ in 0..args.concurrency {
 61 |         let path_rx = path_rx.clone();
 62 |         let hash_tx = hash_tx.clone();
 63 |         let completed = completed.clone();
 64 |         let args = args.clone();
 65 |         spawn_blocking(move || {
 66 |             while let Ok(path) = path_rx.recv_blocking() {
 67 |                 if completed.contains(&path_basename(&path)) {
 68 |                     continue;
 69 |                 }
 70 |                 match hash_and_downsample(&args, &path) {
 71 |                     Ok(hash) => hash_tx.send_blocking((path, hash)).unwrap(),
 72 |                     Err(e) => eprintln!("error from {:?}: {}", path, e),
 73 |                 }
 74 |             }
 75 |         });
 76 |     }
 77 |     drop(hash_tx);
 78 | 
 79 |     let mut num_inserted: u64 = 0;
 80 |     while let Some(objs) = recv_at_least_one(&hash_rx).await {
 81 |         let batch_size = objs.len();
 82 |         db.insert_phashes(
 83 |             objs.into_iter()
 84 |                 .map(|(path, phash)| (path_basename(&path), phash))
 85 |                 .collect(),
 86 |         )
 87 |         .await?;
 88 |         for _ in 0..batch_size {
 89 |             num_inserted += 1;
 90 |             if num_inserted % 100 == 0 {
 91 |                 println!("inserted {} hashes", num_inserted);
 92 |             }
 93 |         }
 94 |     }
 95 |     Ok(())
 96 | }
 97 | 
 98 | pub fn hash_prefixes() -> Vec<String> {
 99 |     let chars = "0123456789abcdef";
100 |     chars
101 |         .chars()
102 |         .flat_map(|x| chars.chars().map(move |y| format!("{}{}", x, y)))
103 |         .collect()
104 | }
105 | 
106 | pub async fn create_hash_prefixes<P: AsRef<Path>>(p: P) -> anyhow::Result<()> {
107 |     for prefix in hash_prefixes() {
108 |         let full_path: PathBuf = [p.as_ref(), &Path::new(&prefix)].iter().collect();
109 |         create_dir_all(full_path).await?;
110 |     }
111 |     Ok(())
112 | }
113 | 
114 | fn hash_and_downsample(args: &Args, path: &Path) -> anyhow::Result<String> {
115 |     let mut img = image::io::Reader::open(path)?
116 |         .with_guessed_format()?
117 |         .decode()?;
118 |     let hash = hash_image(args.hash_resolution, &img);
119 |     img = downsample_image(args.out_resolution, img);
120 | 
121 |     let out_path: PathBuf = [&args.output_dir, &hash[0..2], &hash].iter().collect();
122 |     let tmp_out_path: PathBuf = [
123 |         &args.output_dir,
124 |         &hash[0..2],
125 |         &format!("tmp_{}", path_basename(path)),
126 |     ]
127 |     .iter()
128 |     .collect();
129 |     img.save_with_format(&tmp_out_path, ImageFormat::Jpeg)?;
130 |     rename(tmp_out_path, out_path)?;
131 |     Ok(hash)
132 | }
133 | 
134 | fn path_basename(p: &Path) -> String {
135 |     p.file_name().unwrap().to_string_lossy().into_owned()
136 | }
137 | 


--------------------------------------------------------------------------------
/src/export_data.rs:
--------------------------------------------------------------------------------
 1 | use clap::Parser;
 2 | use tokio::task::spawn_blocking;
 3 | 
 4 | use npy_writer::NpzWriter;
 5 | 
 6 | use crate::{
 7 |     db::Database,
 8 |     types::{Price, PriceUnit},
 9 | };
10 | 
11 | #[derive(Clone, Parser)]
12 | pub struct Args {
13 |     #[clap(long, value_parser, default_value_t = 1)]
14 |     min_images: usize,
15 | 
16 |     #[clap(value_parser)]
17 |     db_path: String,
18 | 
19 |     #[clap(value_parser)]
20 |     output_path: String,
21 | }
22 | 
23 | pub async fn main(args: Args) -> anyhow::Result<()> {
24 |     let db = Database::open(&args.db_path).await?;
25 |     let results = db.unique_phashes();
26 | 
27 |     let mut phashes = Vec::new();
28 |     let mut prices = Vec::new();
29 |     let mut makes = Vec::new();
30 |     let mut models = Vec::new();
31 |     let mut years = Vec::new();
32 | 
33 |     let mut seen = 0usize;
34 |     let mut used = 0usize;
35 | 
36 |     while let Ok(item) = results.recv().await {
37 |         let (phash, listing) = item?;
38 |         seen += 1;
39 |         if listing.image_urls.map(|x| x.len()).unwrap_or_default() >= args.min_images {
40 |             if let Some(dollars) = get_dollar_amount(listing.price) {
41 |                 used += 1;
42 |                 phashes.push(phash);
43 |                 prices.push(dollars);
44 |                 makes.push(listing.make.unwrap_or_default());
45 |                 models.push(listing.model.unwrap_or_default());
46 |                 years.push(listing.year.unwrap_or_default());
47 |             }
48 |         }
49 |         if seen % 1000 == 0 {
50 |             print_stats(seen, used);
51 |         }
52 |     }
53 |     print_stats(seen, used);
54 |     spawn_blocking(|| -> anyhow::Result<()> {
55 |         let mut writer = NpzWriter::new(args.output_path)?;
56 |         writer.write("phashes", phashes)?;
57 |         writer.write("prices", prices)?;
58 |         writer.write("makes", makes)?;
59 |         writer.write("models", models)?;
60 |         writer.write("years", years)?;
61 |         writer.close()?;
62 |         Ok(())
63 |     })
64 |     .await??;
65 |     Ok(())
66 | }
67 | 
68 | fn print_stats(seen: usize, used: usize) {
69 |     println!(
70 |         "total={} used={} (frac={:.02}%)",
71 |         seen,
72 |         used,
73 |         100.0 * (used as f64) / (seen as f64),
74 |     );
75 | }
76 | 
77 | fn get_dollar_amount(price: Option<Price>) -> Option<f64> {
78 |     if let Some(p) = price {
79 |         if p.unit == PriceUnit::Cents && p.value > 0 {
80 |             return Some((p.value as f64) / 100.0);
81 |         }
82 |     }
83 |     None
84 | }
85 | 


--------------------------------------------------------------------------------
/src/image_util.rs:
--------------------------------------------------------------------------------
 1 | use image::{imageops::FilterType, DynamicImage, EncodableLayout};
 2 | use sha2::Digest;
 3 | use std::fmt::Write;
 4 | 
 5 | pub fn downsample_image(out_resolution: u32, img: DynamicImage) -> DynamicImage {
 6 |     let in_width = img.width();
 7 |     let in_height = img.height();
 8 |     let scale = (out_resolution as f64) / (in_width.min(in_height) as f64);
 9 |     if scale < 1.0 {
10 |         img.resize(
11 |             ((in_width as f64) * scale) as u32,
12 |             ((in_height as f64) * scale) as u32,
13 |             FilterType::Lanczos3,
14 |         )
15 |     } else {
16 |         img
17 |     }
18 | }
19 | 
20 | pub fn hash_image(resolution: u32, img: &DynamicImage) -> String {
21 |     let orig_width = img.width();
22 |     let orig_height = img.height();
23 |     let small_img = img
24 |         .resize_exact(resolution, resolution, FilterType::Lanczos3)
25 |         .into_rgb8();
26 | 
27 |     let mut bytes = Vec::new();
28 |     for px in small_img.as_bytes() {
29 |         // Quantize each color to allow some wiggle room.
30 |         bytes.push(px >> 4);
31 |     }
32 | 
33 |     // Bin the aspect ratio to make sure we don't match very
34 |     // differently sized images.
35 |     let log_aspect_ratio = ((((orig_width as f64) / (orig_height as f64)).log2())
36 |         .clamp(-4.0, 4.0)
37 |         .round()
38 |         + 4.0) as u8;
39 |     bytes.push(log_aspect_ratio);
40 | 
41 |     let mut hasher = sha2::Sha256::new();
42 |     hasher.update(&bytes);
43 |     let mut res = String::with_capacity(64);
44 |     for ch in hasher.finalize() {
45 |         write!(&mut res, "{:02x}", ch).unwrap();
46 |     }
47 |     res
48 | }
49 | 


--------------------------------------------------------------------------------
/src/kbb.rs:
--------------------------------------------------------------------------------
  1 | use crate::types::{Listing, OwnerInfo, Price, PriceUnit};
  2 | use std::{collections::HashMap, future::Future, path::PathBuf, pin::Pin, time::Duration};
  3 | 
  4 | use crate::parse_util::{inner_text, FromJSON};
  5 | use reqwest::{RequestBuilder, Response};
  6 | use scraper::{Html, Selector};
  7 | use serde_json::Value;
  8 | use tokio::{fs::File, io::AsyncWriteExt, time::sleep};
  9 | 
 10 | pub struct Client {
 11 |     client: reqwest::Client,
 12 |     num_retries: i32,
 13 | }
 14 | 
 15 | impl Client {
 16 |     pub fn new(num_retries: i32) -> Client {
 17 |         Client {
 18 |             client: reqwest::Client::new(),
 19 |             num_retries: num_retries,
 20 |         }
 21 |     }
 22 | 
 23 |     pub async fn run<R: Request>(&mut self, req: R) -> anyhow::Result<R::Output> {
 24 |         let mut last_err: anyhow::Error = anyhow::Error::msg("UNREACHABLE");
 25 |         for i in 0..self.num_retries {
 26 |             let builder = req
 27 |                 .build_request(&self)
 28 |                 .timeout(Duration::from_secs(30))
 29 |                 .header("host", "www.kbb.com")
 30 |                 .header("user-agent", format!("curl/1"));
 31 |             let result = builder.send().await;
 32 |             match result {
 33 |                 Err(e) => {
 34 |                     last_err = e.into();
 35 |                     self.client = reqwest::Client::new();
 36 |                     if i + 1 < self.num_retries {
 37 |                         sleep(Duration::from_secs(10)).await;
 38 |                     }
 39 |                 }
 40 |                 Ok(resp) => {
 41 |                     let output = req.handle_response(resp).await;
 42 |                     match output {
 43 |                         Err(e) => {
 44 |                             last_err = e.into();
 45 |                         }
 46 |                         Ok(x) => {
 47 |                             return Ok(x);
 48 |                         }
 49 |                     }
 50 |                 }
 51 |             };
 52 |         }
 53 |         Err(last_err)
 54 |     }
 55 | }
 56 | 
 57 | pub trait Request {
 58 |     type Output;
 59 |     type Err: Into<anyhow::Error>;
 60 | 
 61 |     fn build_request(&self, client: &Client) -> RequestBuilder;
 62 | 
 63 |     fn handle_response(
 64 |         &self,
 65 |         resp: Response,
 66 |     ) -> Pin<Box<dyn Send + Future<Output = Result<Self::Output, Self::Err>>>>;
 67 | }
 68 | 
 69 | // A request for fetching information about an individual car listing.
 70 | pub struct ListingRequest(pub String);
 71 | 
 72 | impl Request for ListingRequest {
 73 |     type Output = Option<Listing>;
 74 |     type Err = anyhow::Error;
 75 | 
 76 |     fn build_request(&self, client: &Client) -> RequestBuilder {
 77 |         client.client.get(format!(
 78 |             "https://www.kbb.com/cars-for-sale/vehicledetails.xhtml?listingId={}",
 79 |             self.0
 80 |         ))
 81 |     }
 82 | 
 83 |     fn handle_response(
 84 |         &self,
 85 |         resp: Response,
 86 |     ) -> Pin<Box<dyn Send + Future<Output = anyhow::Result<Self::Output>>>> {
 87 |         let id = self.0.clone();
 88 |         Box::pin(async {
 89 |             let text = resp.text().await?;
 90 |             let doc = Html::parse_fragment(&text);
 91 |             let titles: Vec<_> = doc.select(&Selector::parse("h1").unwrap()).collect();
 92 |             if titles.len() == 0 {
 93 |                 // The "car no longer available" page.
 94 |                 return Ok(None);
 95 |             } else if titles.len() != 1 {
 96 |                 return Err(anyhow::Error::msg("no title heading found on listing page"));
 97 |             }
 98 | 
 99 |             let doc_info = extract_doc_json(&doc)?;
100 |             let inventory_item =
101 |                 <HashMap<String, Value>>::extract_from_json(&doc_info, "initialState.inventory")
102 |                     .ok()
103 |                     .and_then(|x| x.into_values().next());
104 | 
105 |             Ok(Some(Listing {
106 |                 website: "kbb.com".to_owned(),
107 |                 website_id: id,
108 |                 title: inner_text(&titles[0]),
109 |                 price: {
110 |                     f64::extract_from_json(
111 |                         &doc_info,
112 |                         "initialState.birf.pageData.page.vehicle.price",
113 |                     )
114 |                     .ok()
115 |                     .map(|x| Price {
116 |                         value: 100 * (x as u64),
117 |                         unit: PriceUnit::Cents,
118 |                     })
119 |                 },
120 |                 make: {
121 |                     <Vec<String>>::extract_from_json(
122 |                         &doc_info,
123 |                         "initialState.birf.pageData.page.vehicle.makeName",
124 |                     )
125 |                     .ok()
126 |                     .and_then(vec_into_first)
127 |                 },
128 |                 model: {
129 |                     <Vec<String>>::extract_from_json(
130 |                         &doc_info,
131 |                         "initialState.birf.pageData.page.vehicle.modelName",
132 |                     )
133 |                     .ok()
134 |                     .and_then(vec_into_first)
135 |                 },
136 |                 year: {
137 |                     u64::extract_from_json(
138 |                         &doc_info,
139 |                         "initialState.birf.pageData.page.vehicle.car_year",
140 |                     )
141 |                     .ok()
142 |                 },
143 |                 odometer: {
144 |                     String::extract_from_json(
145 |                         &doc_info,
146 |                         "initialState.birf.pageData.page.vehicle.odometer",
147 |                     )
148 |                     .ok()
149 |                     .and_then(|x| x.parse().ok())
150 |                 },
151 |                 engine_description: inventory_item
152 |                     .as_ref()
153 |                     .and_then(|x| String::extract_from_json(&x, "engine").ok()),
154 |                 exterior_color: {
155 |                     inventory_item
156 |                         .as_ref()
157 |                         .and_then(|x| String::extract_from_json(&x, "exteriorColorSimple").ok())
158 |                         .or_else(|| {
159 |                             <Vec<String>>::extract_from_json(
160 |                                 &doc_info,
161 |                                 "initialState.birf.pageData.page.vehicle.color",
162 |                             )
163 |                             .ok()
164 |                             .and_then(vec_into_first)
165 |                         })
166 |                 },
167 |                 interior_color: {
168 |                     inventory_item
169 |                         .as_ref()
170 |                         .and_then(|x| String::extract_from_json(&x, "interiorColorSimple").ok())
171 |                 },
172 |                 drive_type: {
173 |                     inventory_item
174 |                         .as_ref()
175 |                         .and_then(|x| String::extract_from_json(&x, "driveGroup").ok())
176 |                         .and_then(|x| x.parse().ok())
177 |                 },
178 |                 fuel_type: {
179 |                     inventory_item
180 |                         .as_ref()
181 |                         .and_then(|x| String::extract_from_json(&x, "fuelType").ok())
182 |                         .and_then(|x| x.parse().ok())
183 |                 },
184 |                 fuel_economy: {
185 |                     <Vec<String>>::extract_from_json(
186 |                         &doc_info,
187 |                         "initialState.birf.pageData.page.vehicle.fuelEconomy",
188 |                     )
189 |                     .ok()
190 |                 },
191 |                 owners: {
192 |                     <HashMap<String, Value>>::extract_from_json(&doc_info, "initialState.owners")
193 |                         .ok()
194 |                         .map(|x| {
195 |                             let mut result = Vec::new();
196 |                             for (owner_id, owner_info) in x.into_iter() {
197 |                                 let name = String::extract_from_json(&owner_info, "name").ok();
198 |                                 let website =
199 |                                     String::extract_from_json(&owner_info, "website.href").ok();
200 |                                 result.push(OwnerInfo {
201 |                                     id: owner_id,
202 |                                     name,
203 |                                     website,
204 |                                 });
205 |                             }
206 |                             result
207 |                         })
208 |                 },
209 |                 vin: String::extract_from_json(
210 |                     &doc_info,
211 |                     "initialState.birf.pageData.page.vehicle.vin",
212 |                 )
213 |                 .ok(),
214 |                 stock_number: String::extract_from_json(
215 |                     &doc_info,
216 |                     "initialState.birf.pageData.page.vehicle.stockNumber",
217 |                 )
218 |                 .ok(),
219 |                 comments: {
220 |                     inventory_item
221 |                         .as_ref()
222 |                         .and_then(|x| {
223 |                             String::extract_from_json(&x, "additionalInfo.vehicleDescription").ok()
224 |                         })
225 |                         .map(|x| x.replace("<br>", "\n"))
226 |                         .map(|x| inner_text(&Html::parse_fragment(&x).root_element()))
227 |                 },
228 |                 image_urls: {
229 |                     inventory_item
230 |                         .as_ref()
231 |                         .and_then(|x| extract_image_urls(x).ok())
232 |                 },
233 |             }))
234 |         })
235 |     }
236 | }
237 | 
238 | fn extract_doc_json(body: &Html) -> anyhow::Result<serde_json::Value> {
239 |     let preamble = "window.__BONNET_DATA__=";
240 |     for x in body.select(&Selector::parse("script").unwrap()) {
241 |         let contents = inner_text(&x);
242 |         if !contents.starts_with(preamble) {
243 |             continue;
244 |         }
245 |         return Ok(serde_json::from_str(&contents[preamble.len()..])?);
246 |     }
247 |     Err(anyhow::Error::msg("could not find JSON data in document"))
248 | }
249 | 
250 | fn extract_image_urls(inventory_item: &Value) -> anyhow::Result<Vec<String>> {
251 |     let mut raw_result = <Vec<Value>>::extract_from_json(inventory_item, "images.sources")?;
252 | 
253 |     // Re-order so that the primary image URL is first.
254 |     if let Ok(primary) = u64::extract_from_json(inventory_item, "images.primary") {
255 |         let primary = primary as usize;
256 |         if primary < raw_result.len() {
257 |             let x = raw_result.remove(primary);
258 |             raw_result.insert(0, x);
259 |         }
260 |     }
261 | 
262 |     Ok(raw_result
263 |         .into_iter()
264 |         .filter_map(|x| String::extract_from_json(&x, "src").ok())
265 |         .map(|x| {
266 |             if x.starts_with("//") {
267 |                 format!("https://{}", x)
268 |             } else {
269 |                 x
270 |             }
271 |         })
272 |         .collect())
273 | }
274 | 
275 | fn vec_into_first<T>(list: Vec<T>) -> Option<T> {
276 |     for x in list {
277 |         return Some(x);
278 |     }
279 |     None
280 | }
281 | 
282 | pub struct ImageDownloadRequest {
283 |     pub url: String,
284 |     pub out_path: PathBuf,
285 | }
286 | 
287 | impl Request for ImageDownloadRequest {
288 |     type Output = ();
289 |     type Err = anyhow::Error;
290 | 
291 |     fn build_request(&self, client: &Client) -> RequestBuilder {
292 |         client.client.get(&self.url)
293 |     }
294 | 
295 |     fn handle_response(
296 |         &self,
297 |         mut resp: Response,
298 |     ) -> Pin<Box<dyn Send + Future<Output = anyhow::Result<Self::Output>>>> {
299 |         let out_path = self.out_path.clone();
300 |         Box::pin(async move {
301 |             let mut out_file = File::create(&out_path).await?;
302 |             while let Some(chunk) = resp.chunk().await? {
303 |                 out_file.write_all(&chunk).await?;
304 |             }
305 |             out_file.flush().await?;
306 |             Ok(())
307 |         })
308 |     }
309 | }
310 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::process::ExitCode;
 2 | 
 3 | use clap::Parser;
 4 | 
 5 | mod chan_util;
 6 | mod db;
 7 | mod dedup_images;
 8 | mod export_data;
 9 | mod image_util;
10 | mod kbb;
11 | mod make_models;
12 | mod parse_util;
13 | mod scrape_kbb;
14 | mod task_queue;
15 | mod types;
16 | 
17 | #[derive(Parser, Clone)]
18 | #[clap(author, version, about, long_about = None)]
19 | enum Args {
20 |     ScrapeKbb {
21 |         #[clap(flatten)]
22 |         args: scrape_kbb::Args,
23 |     },
24 |     DedupImages {
25 |         #[clap(flatten)]
26 |         args: dedup_images::Args,
27 |     },
28 |     ExportData {
29 |         #[clap(flatten)]
30 |         args: export_data::Args,
31 |     },
32 |     MakeModels {
33 |         #[clap(flatten)]
34 |         args: make_models::Args,
35 |     },
36 | }
37 | 
38 | #[tokio::main]
39 | async fn main() -> ExitCode {
40 |     let args = Args::parse();
41 |     if let Err(e) = match args {
42 |         Args::ScrapeKbb { args } => scrape_kbb::main(args).await,
43 |         Args::DedupImages { args } => dedup_images::main(args).await,
44 |         Args::ExportData { args } => export_data::main(args).await,
45 |         Args::MakeModels { args } => make_models::main(args).await,
46 |     } {
47 |         eprintln!("{}", e);
48 |         ExitCode::FAILURE
49 |     } else {
50 |         ExitCode::SUCCESS
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/make_models.rs:
--------------------------------------------------------------------------------
 1 | use crate::db::Database;
 2 | use clap::Parser;
 3 | 
 4 | #[derive(Clone, Parser)]
 5 | pub struct Args {
 6 |     #[clap(value_parser)]
 7 |     db_path: String,
 8 | }
 9 | 
10 | pub async fn main(args: Args) -> anyhow::Result<()> {
11 |     let db = Database::open(args.db_path).await?;
12 |     let counts = db.make_model_counts().await?;
13 | 
14 |     let total: i64 = counts.iter().map(|(_, _, count)| count).sum();
15 | 
16 |     for i in 1..10 {
17 |         let sub_idx = (((counts.len() * i) as f64) / 10.0).round() as usize;
18 |         let sub_total: i64 = counts[0..sub_idx].iter().map(|(_, _, count)| count).sum();
19 |         let (_, _, sub_count) = &counts[sub_idx];
20 |         println!(
21 |             "{}-percentile: {:.02}% (total items: {}) (per-entry {})",
22 |             i * 10,
23 |             (sub_total as f64) / (total as f64) * 100.0,
24 |             sub_idx,
25 |             sub_count
26 |         );
27 |     }
28 | 
29 |     Ok(())
30 | }
31 | 


--------------------------------------------------------------------------------
/src/parse_util.rs:
--------------------------------------------------------------------------------
  1 | use scraper::ElementRef;
  2 | use serde_json::Value;
  3 | use std::{collections::HashMap, fmt::Write};
  4 | 
  5 | pub fn inner_text(obj: &ElementRef) -> String {
  6 |     let mut result = String::new();
  7 |     for x in obj.text() {
  8 |         write!(&mut result, "{} ", x).unwrap();
  9 |     }
 10 |     result.trim().to_owned()
 11 | }
 12 | 
 13 | pub trait FromJSON
 14 | where
 15 |     Self: Sized,
 16 | {
 17 |     fn from_json(value: &Value) -> anyhow::Result<Self>;
 18 | 
 19 |     fn extract_from_json(root: &Value, path: &str) -> anyhow::Result<Self> {
 20 |         let mut cur_obj = root;
 21 |         for part in path.split(".") {
 22 |             if let Value::Object(obj) = cur_obj {
 23 |                 if let Some(x) = obj.get(part) {
 24 |                     cur_obj = x;
 25 |                 } else {
 26 |                     return Err(anyhow::Error::msg(format!(
 27 |                         "object path not found: {}",
 28 |                         path
 29 |                     )));
 30 |                 }
 31 |             } else {
 32 |                 return Err(anyhow::Error::msg(format!(
 33 |                     "incorrect type in object path: {}",
 34 |                     path
 35 |                 )));
 36 |             }
 37 |         }
 38 |         match Self::from_json(cur_obj) {
 39 |             Ok(x) => Ok(x),
 40 |             Err(e) => Err(anyhow::Error::msg(format!(
 41 |                 "error for object path {}: {}",
 42 |                 path, e
 43 |             ))),
 44 |         }
 45 |     }
 46 | }
 47 | 
 48 | impl FromJSON for Value {
 49 |     fn from_json(value: &Value) -> anyhow::Result<Self> {
 50 |         Ok(value.clone())
 51 |     }
 52 | }
 53 | 
 54 | impl FromJSON for f64 {
 55 |     fn from_json(value: &Value) -> anyhow::Result<Self> {
 56 |         match value {
 57 |             Value::Number(x) => {
 58 |                 if let Some(f) = x.as_f64() {
 59 |                     Ok(f)
 60 |                 } else {
 61 |                     Err(anyhow::Error::msg(format!("{} is not an f64", x)))
 62 |                 }
 63 |             }
 64 |             _ => Err(anyhow::Error::msg(format!("{} is not a number", value))),
 65 |         }
 66 |     }
 67 | }
 68 | 
 69 | impl FromJSON for u64 {
 70 |     fn from_json(value: &Value) -> anyhow::Result<Self> {
 71 |         match value {
 72 |             Value::Number(x) => {
 73 |                 if let Some(f) = x.as_u64() {
 74 |                     Ok(f)
 75 |                 } else {
 76 |                     Err(anyhow::Error::msg(format!("{} is not a u64", x)))
 77 |                 }
 78 |             }
 79 |             _ => Err(anyhow::Error::msg(format!("{} is not a number", value))),
 80 |         }
 81 |     }
 82 | }
 83 | 
 84 | impl FromJSON for String {
 85 |     fn from_json(value: &Value) -> anyhow::Result<Self> {
 86 |         match value {
 87 |             Value::String(x) => Ok(x.clone()),
 88 |             _ => Err(anyhow::Error::msg(format!("{} is not a string", value))),
 89 |         }
 90 |     }
 91 | }
 92 | 
 93 | impl<T: FromJSON> FromJSON for Vec<T> {
 94 |     fn from_json(value: &Value) -> anyhow::Result<Self> {
 95 |         match value {
 96 |             Value::Array(x) => x
 97 |                 .iter()
 98 |                 .map(|x| T::from_json(x))
 99 |                 .collect::<anyhow::Result<Vec<T>>>(),
100 |             _ => Err(anyhow::Error::msg(format!("{} is not an array", value))),
101 |         }
102 |     }
103 | }
104 | 
105 | impl<T: FromJSON> FromJSON for HashMap<String, T> {
106 |     fn from_json(value: &Value) -> anyhow::Result<Self> {
107 |         match value {
108 |             Value::Object(x) => x
109 |                 .iter()
110 |                 .map(|(k, v)| T::from_json(v).map(|x| (k.clone(), x)))
111 |                 .collect::<anyhow::Result<HashMap<String, T>>>(),
112 |             _ => Err(anyhow::Error::msg(format!("{} is not an object", value))),
113 |         }
114 |     }
115 | }
116 | 


--------------------------------------------------------------------------------
/src/scrape_kbb.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     collections::HashSet,
  3 |     path::{Path, PathBuf},
  4 | };
  5 | 
  6 | use crate::{
  7 |     db::{hash_image_url, Database},
  8 |     dedup_images::create_hash_prefixes,
  9 |     image_util::downsample_image,
 10 |     kbb::{Client, ImageDownloadRequest, ListingRequest},
 11 |     task_queue::TaskQueue,
 12 |     types::Listing,
 13 | };
 14 | use clap::Parser;
 15 | use image::ImageFormat;
 16 | use rand::seq::SliceRandom;
 17 | use tokio::{spawn, sync::mpsc::channel, task::spawn_blocking, time::Instant};
 18 | 
 19 | const KBB_WEBSITE_NAME: &str = "kbb.com";
 20 | 
 21 | #[derive(Clone, Parser)]
 22 | pub struct Args {
 23 |     #[clap(long, value_parser, default_value_t = 660000000)]
 24 |     min_id: i64,
 25 | 
 26 |     #[clap(long, value_parser, default_value_t = 668000000)]
 27 |     max_id: i64,
 28 | 
 29 |     #[clap(short, long, value_parser, default_value_t = 15)]
 30 |     num_retries: i32,
 31 | 
 32 |     #[clap(short, long, value_parser, default_value_t = 8)]
 33 |     concurrency: usize,
 34 | 
 35 |     #[clap(short, long, value_parser, default_value_t = 256)]
 36 |     resize_images: u32,
 37 | 
 38 |     #[clap(value_parser)]
 39 |     db_path: String,
 40 | 
 41 |     #[clap(value_parser)]
 42 |     image_dir: String,
 43 | }
 44 | 
 45 | pub async fn main(args: Args) -> anyhow::Result<()> {
 46 |     create_hash_prefixes(&args.image_dir).await?;
 47 | 
 48 |     println!("connecting database...");
 49 |     let db = Database::open(&args.db_path).await?;
 50 |     println!("creating permutation...");
 51 |     let perm = generate_permutation(args.min_id, args.max_id);
 52 |     println!("filtering permutation...");
 53 |     let used_ids: HashSet<_> = db.get_attempt_ids(KBB_WEBSITE_NAME).await?;
 54 |     perm.filter(|x| !used_ids.contains(&format!("{}", x))).await;
 55 |     println!("scraping...");
 56 | 
 57 |     let (tx, mut rx) = channel(args.concurrency);
 58 |     for _ in 0..args.concurrency {
 59 |         let local_db = db.clone();
 60 |         let local_perm = perm.clone();
 61 |         let local_args = args.clone();
 62 |         let local_tx = tx.clone();
 63 |         spawn(async move {
 64 |             local_tx
 65 |                 .send(fetch_listings(local_db, local_perm, local_args).await)
 66 |                 .await
 67 |                 .unwrap();
 68 |         });
 69 |     }
 70 | 
 71 |     while let Some(exc) = rx.recv().await {
 72 |         exc?;
 73 |     }
 74 | 
 75 |     Ok(())
 76 | }
 77 | 
 78 | async fn fetch_listings(db: Database, perm: TaskQueue<i64>, args: Args) -> anyhow::Result<()> {
 79 |     let mut client = Client::new(args.num_retries);
 80 |     while let Some((id, remaining)) = perm.pop().await {
 81 |         let id_str = format!("{}", id);
 82 |         if db.check_attempt(KBB_WEBSITE_NAME, &id_str).await?.is_some() {
 83 |             continue;
 84 |         }
 85 |         if let Some(listing) = client.run(ListingRequest(id_str.clone())).await? {
 86 |             download_listing_images(&mut client, &args.image_dir, args.resize_images, &listing)
 87 |                 .await?;
 88 |             db.add_listing(listing).await?;
 89 |         } else {
 90 |             db.add_failed_attempt(KBB_WEBSITE_NAME, &id_str).await?;
 91 |         }
 92 | 
 93 |         let completed = perm.orig_len() - remaining;
 94 |         if completed % 100 == 0 {
 95 |             let start = Instant::now();
 96 |             let (num_listings, total_attempts) = db.counts().await?;
 97 |             let counts_duration = start.elapsed();
 98 |             eprintln!(
 99 |                 "scraped={:.04}% hit_rate={:.02}% hit_total={} db_latency={:.05}",
100 |                 100.0 * (completed as f64) / (perm.orig_len() as f64),
101 |                 100.0 * (num_listings as f64) / (total_attempts as f64),
102 |                 num_listings,
103 |                 counts_duration.as_secs_f64(),
104 |             );
105 |         }
106 |     }
107 |     Ok(())
108 | }
109 | 
110 | async fn download_listing_images(
111 |     client: &mut Client,
112 |     image_path: &str,
113 |     resize_images: u32,
114 |     listing: &Listing,
115 | ) -> anyhow::Result<()> {
116 |     if let Some(urls) = &listing.image_urls {
117 |         for url in urls {
118 |             let image_hash = hash_image_url(&url);
119 |             let out_path: PathBuf = [image_path, &image_hash[0..2], &image_hash]
120 |                 .iter()
121 |                 .collect();
122 |             if tokio::fs::metadata(&out_path).await.is_ok() {
123 |                 // Skip for already-downloaded image URL
124 |                 continue;
125 |             }
126 |             // Download+rename to atomically write the file.
127 |             let tmp_out_path: PathBuf = [
128 |                 image_path,
129 |                 &format!("{}.{}", image_hash, listing.website_id),
130 |             ]
131 |             .iter()
132 |             .collect();
133 |             client
134 |                 .run(ImageDownloadRequest {
135 |                     url: url.clone(),
136 |                     out_path: tmp_out_path.clone(),
137 |                 })
138 |                 .await?;
139 |             if resize_images != 0 {
140 |                 spawn_blocking(move || resize_or_rename(resize_images, tmp_out_path, out_path))
141 |                     .await??;
142 |             } else {
143 |                 tokio::fs::rename(tmp_out_path, out_path).await?;
144 |             }
145 |         }
146 |     }
147 |     Ok(())
148 | }
149 | 
150 | fn resize_or_rename<T: AsRef<Path>>(size: u32, src: T, dst: T) -> anyhow::Result<()> {
151 |     if attempt_resize(size, &src, &dst).is_err() {
152 |         std::fs::rename(src, dst)?;
153 |     }
154 |     Ok(())
155 | }
156 | 
157 | fn attempt_resize<T: AsRef<Path>>(size: u32, src: T, dst: T) -> anyhow::Result<()> {
158 |     let img = downsample_image(
159 |         size,
160 |         image::io::Reader::open(&src)?
161 |             .with_guessed_format()?
162 |             .decode()?,
163 |     );
164 |     let tmp_tmp_path = format!("{}_writing", src.as_ref().to_string_lossy());
165 |     img.save_with_format(&tmp_tmp_path, ImageFormat::Jpeg)?;
166 |     std::fs::rename(tmp_tmp_path, dst)?;
167 |     Ok(())
168 | }
169 | 
170 | fn generate_permutation(min: i64, max: i64) -> TaskQueue<i64> {
171 |     let mut v: Vec<i64> = (min..max).collect();
172 |     v.shuffle(&mut rand::thread_rng());
173 |     v.into()
174 | }
175 | 


--------------------------------------------------------------------------------
/src/task_queue.rs:
--------------------------------------------------------------------------------
 1 | // Adapted from map-dump:
 2 | // https://github.com/unixpickle/map-dump/blob/e5997309cd40a32c63d5fa461746d9dabc1dfea2/src/task_queue.rs
 3 | 
 4 | use std::{mem::take, ops::DerefMut, sync::Arc};
 5 | 
 6 | use tokio::sync::Mutex;
 7 | 
 8 | pub struct TaskQueue<T: Send> {
 9 |     queue: Arc<Mutex<Vec<T>>>,
10 |     orig_len: usize,
11 | }
12 | 
13 | impl<T: Send> Clone for TaskQueue<T> {
14 |     fn clone(&self) -> TaskQueue<T> {
15 |         TaskQueue {
16 |             queue: self.queue.clone(),
17 |             orig_len: self.orig_len,
18 |         }
19 |     }
20 | }
21 | 
22 | impl<T: Send, I: IntoIterator<Item = T>> From<I> for TaskQueue<T> {
23 |     fn from(x: I) -> TaskQueue<T> {
24 |         let v: Vec<_> = x.into_iter().collect();
25 |         let orig_len = v.len();
26 |         let queue = Arc::new(Mutex::new(v));
27 |         TaskQueue {
28 |             queue: queue,
29 |             orig_len: orig_len,
30 |         }
31 |     }
32 | }
33 | 
34 | impl<T: Send> TaskQueue<T> {
35 |     pub async fn pop(&self) -> Option<(T, usize)> {
36 |         let mut locked = self.queue.lock().await;
37 |         locked.pop().map(|x| (x, locked.len()))
38 |     }
39 | 
40 |     pub fn orig_len(&self) -> usize {
41 |         self.orig_len
42 |     }
43 | 
44 |     pub async fn filter<P: FnMut(&T) -> bool>(&self, p: P) {
45 |         let mut locked = self.queue.lock().await;
46 |         *locked = take(locked.deref_mut()).into_iter().filter(p).collect();
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/types.rs:
--------------------------------------------------------------------------------
  1 | use std::convert::Infallible;
  2 | use std::fmt::Display;
  3 | use std::str::FromStr;
  4 | 
  5 | use rusqlite::types::{FromSql, FromSqlError, ToSqlOutput, ValueRef};
  6 | use rusqlite::ToSql;
  7 | 
  8 | #[derive(Clone, Debug, Default, PartialEq, Eq)]
  9 | pub enum PriceUnit {
 10 |     #[default]
 11 |     Cents,
 12 | }
 13 | 
 14 | #[derive(Clone, Debug, PartialEq, Eq)]
 15 | pub struct Price {
 16 |     pub value: u64,
 17 |     pub unit: PriceUnit,
 18 | }
 19 | 
 20 | impl FromStr for Price {
 21 |     type Err = <f64 as FromStr>::Err;
 22 | 
 23 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
 24 |         let mut processed = s.trim().replace(",", "");
 25 |         let mut unit = PriceUnit::default();
 26 |         if processed.starts_with("$") {
 27 |             unit = PriceUnit::Cents;
 28 |             processed = processed.replace("$", "");
 29 |         }
 30 |         Ok(Price {
 31 |             value: (f64::from_str(&processed)? * 100.0).round() as u64,
 32 |             unit: unit,
 33 |         })
 34 |     }
 35 | }
 36 | 
 37 | impl Display for Price {
 38 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 39 |         match self.unit {
 40 |             PriceUnit::Cents => {
 41 |                 write!(f, "${:.02}", ((self.value as f64) / 100.0))?;
 42 |             }
 43 |         }
 44 |         Ok(())
 45 |     }
 46 | }
 47 | 
 48 | #[derive(Clone, Debug, Default, PartialEq, Eq)]
 49 | pub enum DistanceUnit {
 50 |     #[default]
 51 |     Miles,
 52 | }
 53 | 
 54 | #[derive(Clone, Debug, PartialEq, Eq)]
 55 | pub struct Distance {
 56 |     pub value: u64,
 57 |     pub unit: DistanceUnit,
 58 | }
 59 | 
 60 | impl FromStr for Distance {
 61 |     type Err = <f64 as FromStr>::Err;
 62 | 
 63 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
 64 |         let mut processed = s.trim().replace(",", "");
 65 |         let mut unit = DistanceUnit::default();
 66 |         if processed.ends_with(" mi") {
 67 |             unit = DistanceUnit::Miles;
 68 |             processed = processed.replace(" mi", "");
 69 |         }
 70 |         Ok(Distance {
 71 |             value: (f64::from_str(&processed)?).round() as u64,
 72 |             unit: unit,
 73 |         })
 74 |     }
 75 | }
 76 | 
 77 | impl Display for Distance {
 78 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 79 |         match self.unit {
 80 |             DistanceUnit::Miles => {
 81 |                 write!(f, "{} mi", self.value)?;
 82 |             }
 83 |         }
 84 |         Ok(())
 85 |     }
 86 | }
 87 | 
 88 | #[derive(Clone, Debug, PartialEq, Eq)]
 89 | pub enum DriveType {
 90 |     TwoWheelFront,
 91 |     TwoWheelRear,
 92 |     FourWheel,
 93 |     Other(String),
 94 | }
 95 | 
 96 | impl FromStr for DriveType {
 97 |     type Err = Infallible;
 98 | 
 99 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
100 |         Ok(match s {
101 |             "FWD" => DriveType::TwoWheelFront,
102 |             "RWD" => DriveType::TwoWheelRear,
103 |             "AWD4WD" => DriveType::FourWheel,
104 |             x => DriveType::Other(x.to_owned()),
105 |         })
106 |     }
107 | }
108 | 
109 | impl Display for DriveType {
110 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
111 |         match self {
112 |             Self::TwoWheelFront => write!(f, "FWD")?,
113 |             Self::TwoWheelRear => write!(f, "RWD")?,
114 |             Self::FourWheel => write!(f, "AWD4WD")?,
115 |             Self::Other(x) => write!(f, "{}", x)?,
116 |         }
117 |         Ok(())
118 |     }
119 | }
120 | 
121 | #[derive(Clone, Debug, PartialEq, Eq)]
122 | pub enum FuelType {
123 |     Gasoline,
124 |     Hybrid,
125 |     Diesel,
126 |     Electric,
127 |     Hydrogen,
128 |     Alternative,
129 | }
130 | 
131 | impl FromStr for FuelType {
132 |     type Err = Infallible;
133 | 
134 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
135 |         Ok(match s {
136 |             "Gasoline" => FuelType::Gasoline,
137 |             "Hybrid" => FuelType::Hybrid,
138 |             "Diesel" => FuelType::Diesel,
139 |             "Electric" => FuelType::Electric,
140 |             "Hydrogen" => FuelType::Hydrogen,
141 |             _ => FuelType::Alternative,
142 |         })
143 |     }
144 | }
145 | 
146 | impl Display for FuelType {
147 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148 |         write!(
149 |             f,
150 |             "{}",
151 |             match self {
152 |                 FuelType::Gasoline => "Gasoline",
153 |                 FuelType::Hybrid => "Hybrid",
154 |                 FuelType::Diesel => "Diesel",
155 |                 FuelType::Electric => "Electric",
156 |                 FuelType::Hydrogen => "Hydrogen",
157 |                 FuelType::Alternative => "Alternative",
158 |             }
159 |         )
160 |     }
161 | }
162 | 
163 | #[derive(Clone, Debug, PartialEq, Eq)]
164 | pub struct OwnerInfo {
165 |     pub id: String,
166 |     pub name: Option<String>,
167 |     pub website: Option<String>,
168 | }
169 | 
170 | #[derive(Clone, Debug, PartialEq, Eq)]
171 | pub struct Listing {
172 |     pub website: String,
173 |     pub website_id: String,
174 |     pub title: String,
175 |     pub price: Option<Price>,
176 |     pub make: Option<String>,
177 |     pub model: Option<String>,
178 |     pub year: Option<u64>,
179 |     pub odometer: Option<Distance>,
180 |     pub engine_description: Option<String>,
181 |     pub exterior_color: Option<String>,
182 |     pub interior_color: Option<String>,
183 |     pub drive_type: Option<DriveType>,
184 |     pub fuel_type: Option<FuelType>,
185 |     pub fuel_economy: Option<Vec<String>>,
186 |     pub owners: Option<Vec<OwnerInfo>>,
187 |     pub vin: Option<String>,
188 |     pub stock_number: Option<String>,
189 |     pub comments: Option<String>,
190 |     pub image_urls: Option<Vec<String>>,
191 | }
192 | 
193 | macro_rules! sql_string_obj {
194 |     ($data_type:ty) => {
195 |         impl ToSql for $data_type {
196 |             fn to_sql(&self) -> rusqlite::Result<rusqlite::types::ToSqlOutput<'_>> {
197 |                 Ok(ToSqlOutput::Owned(rusqlite::types::Value::Text(format!(
198 |                     "{}",
199 |                     self
200 |                 ))))
201 |             }
202 |         }
203 | 
204 |         impl FromSql for $data_type {
205 |             fn column_result(
206 |                 value: rusqlite::types::ValueRef<'_>,
207 |             ) -> rusqlite::types::FromSqlResult<Self> {
208 |                 match value {
209 |                     ValueRef::Text(x) => String::from_utf8(Vec::from(x))
210 |                         .map_err(|x| FromSqlError::Other(Box::new(x)))?
211 |                         .parse()
212 |                         .map_err(|x| FromSqlError::Other(Box::new(x))),
213 |                     _ => Err(FromSqlError::InvalidType),
214 |                 }
215 |             }
216 |         }
217 |     };
218 | }
219 | 
220 | sql_string_obj!(Price);
221 | sql_string_obj!(Distance);
222 | sql_string_obj!(DriveType);
223 | sql_string_obj!(FuelType);
224 | 


--------------------------------------------------------------------------------