├── .gitignore ├── Makefile ├── README.md ├── amazon_review_pipeline ├── .env ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── Makefile ├── README.md ├── datalayers │ ├── .gitkeep │ ├── analytics │ │ └── .gitkeep │ ├── downloader.sh │ ├── insights │ │ └── .gitkeep │ └── landing │ │ └── .gitkeep ├── experiment │ └── data_fusion.rs ├── poetry.lock ├── pyproject.toml ├── pysrc │ └── main.py └── src │ └── main.rs ├── amazon_review_pipeline_polars ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── Makefile ├── datalayers │ ├── analytics │ │ └── .gitkeep │ ├── downloader.sh │ ├── insights │ │ └── .gitkeep │ └── landing │ │ └── .gitkeep ├── pysrc │ └── main.py └── src │ └── main.rs ├── diabetes_ml_pipeline ├── .dvc │ ├── .gitignore │ └── config ├── .dvcignore ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── README.md ├── data │ ├── interim │ │ └── .gitignore │ ├── processed │ │ └── .gitignore │ └── raw │ │ ├── .gitignore │ │ └── diabetes.csv.dvc ├── dvc.lock ├── dvc.yaml ├── model │ └── model.cbor ├── params.yaml └── src │ └── bin │ └── stages │ ├── preprocess.rs │ ├── serve.rs │ ├── test.rs │ └── train.rs ├── poetry.lock ├── pyproject.toml └── wine_pipeline ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── Makefile ├── README.md ├── config.yaml ├── datastore ├── wine.data ├── wine.names └── wine_download.sh ├── poetry.lock ├── pyproject.toml ├── pysrc └── main.py └── src └── main.rs /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .vscode 3 | temp 4 | *.code-workspace -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | poetry_env: 2 | poetry show -v 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data Pipeline in Rust 2 | 3 | The repository contains, data pipeline code written in Rust (Polars/DataFusion) and Python (Pandas/Modin/Dask/PySpark). The main intend was to write data pipelin in Rust instead of python. But to compare the cpu time peformance gain the same pipeline is also writen using Python frameworks. 4 | 5 | Each directory in the project is a separate data pipeline. The smaller datasets are included along with the pipeline and only if it has free to use license. If the data set is big there will be a bash script added into the `datastore` directory of the particular project to download the data. 6 | 7 | ## Project Structure: 8 | Each directory in the root is a separate data pipeline. Inside of the directory where will be: 9 | - A Rust Project setup (cargo.toml) which you can use to build the project 10 | - A Poetry Python Project Setup (pyproject.toml) to create your virtual environment 11 | - pysrc folder contains the data pipeline written in Python 12 | - src folder contains the data pipeline written in Rust 13 | - Makefile is used to wrap the frequently used bash command but not necessary to use 14 | 15 | ## Contribution 16 | If you want to add more data pipeline or some complex pipeline helpful for others feel free to fork and send a pr request. **But only condition are you pipeline must be written in Polars or other Rust dataframe framework. Do not use python binding for Polars, it is about democratizing Rust over Python** 17 | 18 | ## Pipelines: 19 | - `wine_pipeline` is a small data pipeline written for the famous wine data which as three different class of wine and other measurements as feature variable. The data set is quite popular in ML community. The data pipeline is written on Polars and Pandas 20 | - `Amazon Review Pipeline` is a data pipeline based on Amazon Games and Toys product review data. There is more than 800+ MB of data. Each record in the json data is a product review. The pipeline is written on DataFusion and Pyspark. -------------------------------------------------------------------------------- /amazon_review_pipeline/.env: -------------------------------------------------------------------------------- 1 | export PYSPARK_PYTHON="/home/datapsycho/.cache/pypoetry/virtualenvs/amazon-review-pipeline-VPgRp_l9-py3.9/bin/python" -------------------------------------------------------------------------------- /amazon_review_pipeline/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .vscode 3 | /datalayers/**/*.json 4 | /datalayers/**/*.csv 5 | /datalayers/**/.part* 6 | datalayers/**/part* 7 | datalayers/**/_SUCCESS 8 | datalayers/**/._SUCCESS* -------------------------------------------------------------------------------- /amazon_review_pipeline/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "adler" 7 | version = "1.0.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 10 | 11 | [[package]] 12 | name = "ahash" 13 | version = "0.7.6" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" 16 | dependencies = [ 17 | "getrandom", 18 | "once_cell", 19 | "version_check", 20 | ] 21 | 22 | [[package]] 23 | name = "aho-corasick" 24 | version = "0.7.18" 25 | source = "registry+https://github.com/rust-lang/crates.io-index" 26 | checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" 27 | dependencies = [ 28 | "memchr", 29 | ] 30 | 31 | [[package]] 32 | name = "alloc-no-stdlib" 33 | version = "2.0.3" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "35ef4730490ad1c4eae5c4325b2a95f521d023e5c885853ff7aca0a6a1631db3" 36 | 37 | [[package]] 38 | name = "alloc-stdlib" 39 | version = "0.2.1" 40 | source = "registry+https://github.com/rust-lang/crates.io-index" 41 | checksum = "697ed7edc0f1711de49ce108c541623a0af97c6c60b2f6e2b65229847ac843c2" 42 | dependencies = [ 43 | "alloc-no-stdlib", 44 | ] 45 | 46 | [[package]] 47 | name = "amazon_review_pipeline" 48 | version = "0.1.0" 49 | dependencies = [ 50 | "arrow 17.0.0", 51 | "datafusion", 52 | "env_logger", 53 | "log", 54 | "tokio", 55 | ] 56 | 57 | [[package]] 58 | name = "arrayref" 59 | version = "0.3.6" 60 | source = "registry+https://github.com/rust-lang/crates.io-index" 61 | checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" 62 | 63 | [[package]] 64 | name = "arrayvec" 65 | version = "0.7.2" 66 | source = "registry+https://github.com/rust-lang/crates.io-index" 67 | checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" 68 | 69 | [[package]] 70 | name = "arrow" 71 | version = "15.0.0" 72 | source = "registry+https://github.com/rust-lang/crates.io-index" 73 | checksum = "6510d919fa4c27880f54430510d09327d7c86699c3692664bc0bb7c314f71385" 74 | dependencies = [ 75 | "bitflags", 76 | "chrono", 77 | "comfy-table 5.0.1", 78 | "csv", 79 | "flatbuffers", 80 | "half 1.8.2", 81 | "hex", 82 | "indexmap", 83 | "lazy_static", 84 | "lexical-core", 85 | "multiversion", 86 | "num", 87 | "rand", 88 | "regex", 89 | "serde", 90 | "serde_derive", 91 | "serde_json", 92 | ] 93 | 94 | [[package]] 95 | name = "arrow" 96 | version = "17.0.0" 97 | source = "registry+https://github.com/rust-lang/crates.io-index" 98 | checksum = "20f88d9bd22ae87d3d9faa19316b51ebb692a3b402f8a5e02916ad33c2435d2b" 99 | dependencies = [ 100 | "bitflags", 101 | "chrono", 102 | "comfy-table 6.0.0", 103 | "csv", 104 | "flatbuffers", 105 | "half 2.0.0", 106 | "hex", 107 | "indexmap", 108 | "lazy_static", 109 | "lexical-core", 110 | "multiversion", 111 | "num", 112 | "rand", 113 | "regex", 114 | "serde", 115 | "serde_derive", 116 | "serde_json", 117 | ] 118 | 119 | [[package]] 120 | name = "async-trait" 121 | version = "0.1.56" 122 | source = "registry+https://github.com/rust-lang/crates.io-index" 123 | checksum = "96cf8829f67d2eab0b2dfa42c5d0ef737e0724e4a82b01b3e292456202b19716" 124 | dependencies = [ 125 | "proc-macro2", 126 | "quote", 127 | "syn", 128 | ] 129 | 130 | [[package]] 131 | name = "atty" 132 | version = "0.2.14" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 135 | dependencies = [ 136 | "hermit-abi", 137 | "libc", 138 | "winapi", 139 | ] 140 | 141 | [[package]] 142 | name = "autocfg" 143 | version = "1.1.0" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 146 | 147 | [[package]] 148 | name = "base64" 149 | version = "0.13.0" 150 | source = "registry+https://github.com/rust-lang/crates.io-index" 151 | checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" 152 | 153 | [[package]] 154 | name = "bitflags" 155 | version = "1.3.2" 156 | source = "registry+https://github.com/rust-lang/crates.io-index" 157 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 158 | 159 | [[package]] 160 | name = "blake2" 161 | version = "0.10.4" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "b9cf849ee05b2ee5fba5e36f97ff8ec2533916700fc0758d40d92136a42f3388" 164 | dependencies = [ 165 | "digest", 166 | ] 167 | 168 | [[package]] 169 | name = "blake3" 170 | version = "1.3.1" 171 | source = "registry+https://github.com/rust-lang/crates.io-index" 172 | checksum = "a08e53fc5a564bb15bfe6fae56bd71522205f1f91893f9c0116edad6496c183f" 173 | dependencies = [ 174 | "arrayref", 175 | "arrayvec", 176 | "cc", 177 | "cfg-if", 178 | "constant_time_eq", 179 | "digest", 180 | ] 181 | 182 | [[package]] 183 | name = "block-buffer" 184 | version = "0.10.2" 185 | source = "registry+https://github.com/rust-lang/crates.io-index" 186 | checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" 187 | dependencies = [ 188 | "generic-array", 189 | ] 190 | 191 | [[package]] 192 | name = "brotli" 193 | version = "3.3.4" 194 | source = "registry+https://github.com/rust-lang/crates.io-index" 195 | checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" 196 | dependencies = [ 197 | "alloc-no-stdlib", 198 | "alloc-stdlib", 199 | "brotli-decompressor", 200 | ] 201 | 202 | [[package]] 203 | name = "brotli-decompressor" 204 | version = "2.3.2" 205 | source = "registry+https://github.com/rust-lang/crates.io-index" 206 | checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" 207 | dependencies = [ 208 | "alloc-no-stdlib", 209 | "alloc-stdlib", 210 | ] 211 | 212 | [[package]] 213 | name = "bstr" 214 | version = "0.2.17" 215 | source = "registry+https://github.com/rust-lang/crates.io-index" 216 | checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" 217 | dependencies = [ 218 | "lazy_static", 219 | "memchr", 220 | "regex-automata", 221 | "serde", 222 | ] 223 | 224 | [[package]] 225 | name = "byteorder" 226 | version = "1.4.3" 227 | source = "registry+https://github.com/rust-lang/crates.io-index" 228 | checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" 229 | 230 | [[package]] 231 | name = "bytes" 232 | version = "1.1.0" 233 | source = "registry+https://github.com/rust-lang/crates.io-index" 234 | checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" 235 | 236 | [[package]] 237 | name = "cc" 238 | version = "1.0.73" 239 | source = "registry+https://github.com/rust-lang/crates.io-index" 240 | checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" 241 | dependencies = [ 242 | "jobserver", 243 | ] 244 | 245 | [[package]] 246 | name = "cfg-if" 247 | version = "1.0.0" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 250 | 251 | [[package]] 252 | name = "chrono" 253 | version = "0.4.19" 254 | source = "registry+https://github.com/rust-lang/crates.io-index" 255 | checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" 256 | dependencies = [ 257 | "libc", 258 | "num-integer", 259 | "num-traits", 260 | "winapi", 261 | ] 262 | 263 | [[package]] 264 | name = "comfy-table" 265 | version = "5.0.1" 266 | source = "registry+https://github.com/rust-lang/crates.io-index" 267 | checksum = "b103d85ca6e209388771bfb7aa6b68a7aeec4afbf6f0a0264bfbf50360e5212e" 268 | dependencies = [ 269 | "strum 0.23.0", 270 | "strum_macros 0.23.1", 271 | "unicode-width", 272 | ] 273 | 274 | [[package]] 275 | name = "comfy-table" 276 | version = "6.0.0" 277 | source = "registry+https://github.com/rust-lang/crates.io-index" 278 | checksum = "121d8a5b0346092c18a4b2fd6f620d7a06f0eb7ac0a45860939a0884bc579c56" 279 | dependencies = [ 280 | "strum 0.24.1", 281 | "strum_macros 0.24.2", 282 | "unicode-width", 283 | ] 284 | 285 | [[package]] 286 | name = "constant_time_eq" 287 | version = "0.1.5" 288 | source = "registry+https://github.com/rust-lang/crates.io-index" 289 | checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" 290 | 291 | [[package]] 292 | name = "cpufeatures" 293 | version = "0.2.2" 294 | source = "registry+https://github.com/rust-lang/crates.io-index" 295 | checksum = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b" 296 | dependencies = [ 297 | "libc", 298 | ] 299 | 300 | [[package]] 301 | name = "crc32fast" 302 | version = "1.3.2" 303 | source = "registry+https://github.com/rust-lang/crates.io-index" 304 | checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" 305 | dependencies = [ 306 | "cfg-if", 307 | ] 308 | 309 | [[package]] 310 | name = "crypto-common" 311 | version = "0.1.4" 312 | source = "registry+https://github.com/rust-lang/crates.io-index" 313 | checksum = "5999502d32b9c48d492abe66392408144895020ec4709e549e840799f3bb74c0" 314 | dependencies = [ 315 | "generic-array", 316 | "typenum", 317 | ] 318 | 319 | [[package]] 320 | name = "csv" 321 | version = "1.1.6" 322 | source = "registry+https://github.com/rust-lang/crates.io-index" 323 | checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" 324 | dependencies = [ 325 | "bstr", 326 | "csv-core", 327 | "itoa 0.4.8", 328 | "ryu", 329 | "serde", 330 | ] 331 | 332 | [[package]] 333 | name = "csv-core" 334 | version = "0.1.10" 335 | source = "registry+https://github.com/rust-lang/crates.io-index" 336 | checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" 337 | dependencies = [ 338 | "memchr", 339 | ] 340 | 341 | [[package]] 342 | name = "datafusion" 343 | version = "9.0.0" 344 | source = "registry+https://github.com/rust-lang/crates.io-index" 345 | checksum = "9f912a89e5ad2f716fcbbad090b1b1bc4b57c07604de1da60925a6652a4b8219" 346 | dependencies = [ 347 | "ahash", 348 | "arrow 15.0.0", 349 | "async-trait", 350 | "chrono", 351 | "datafusion-common", 352 | "datafusion-data-access", 353 | "datafusion-expr", 354 | "datafusion-optimizer", 355 | "datafusion-physical-expr", 356 | "datafusion-row", 357 | "datafusion-sql", 358 | "futures", 359 | "glob", 360 | "hashbrown", 361 | "itertools", 362 | "lazy_static", 363 | "log", 364 | "num_cpus", 365 | "ordered-float 3.0.0", 366 | "parking_lot", 367 | "parquet", 368 | "paste", 369 | "pin-project-lite", 370 | "rand", 371 | "smallvec", 372 | "sqlparser", 373 | "tempfile", 374 | "tokio", 375 | "tokio-stream", 376 | "url", 377 | "uuid", 378 | ] 379 | 380 | [[package]] 381 | name = "datafusion-common" 382 | version = "9.0.0" 383 | source = "registry+https://github.com/rust-lang/crates.io-index" 384 | checksum = "ec26c175360423abaa97cf45f41c367d07d40f5b631f7772aba4948e1af19e5a" 385 | dependencies = [ 386 | "arrow 15.0.0", 387 | "ordered-float 3.0.0", 388 | "parquet", 389 | "sqlparser", 390 | ] 391 | 392 | [[package]] 393 | name = "datafusion-data-access" 394 | version = "9.0.0" 395 | source = "registry+https://github.com/rust-lang/crates.io-index" 396 | checksum = "35b28da52ca4e7eb160df15d6119780a7637f3added9e15844c884143d1c8bca" 397 | dependencies = [ 398 | "async-trait", 399 | "chrono", 400 | "futures", 401 | "parking_lot", 402 | "tempfile", 403 | "tokio", 404 | ] 405 | 406 | [[package]] 407 | name = "datafusion-expr" 408 | version = "9.0.0" 409 | source = "registry+https://github.com/rust-lang/crates.io-index" 410 | checksum = "1c4967ba29f27354745154be8d5a03c5236333666b45f3c09e91283021dbb3cf" 411 | dependencies = [ 412 | "ahash", 413 | "arrow 15.0.0", 414 | "datafusion-common", 415 | "sqlparser", 416 | ] 417 | 418 | [[package]] 419 | name = "datafusion-optimizer" 420 | version = "9.0.0" 421 | source = "registry+https://github.com/rust-lang/crates.io-index" 422 | checksum = "f5630b25a6473a58fb096fbbc0b1bf6d28b0b256e5c3d9142a07de270bd3e27b" 423 | dependencies = [ 424 | "arrow 15.0.0", 425 | "async-trait", 426 | "chrono", 427 | "datafusion-common", 428 | "datafusion-expr", 429 | "datafusion-physical-expr", 430 | "hashbrown", 431 | "log", 432 | ] 433 | 434 | [[package]] 435 | name = "datafusion-physical-expr" 436 | version = "9.0.0" 437 | source = "registry+https://github.com/rust-lang/crates.io-index" 438 | checksum = "ca0ed9091539791f406b3928e7802fe65163e4e78dd15d08ad7d67f19c6c6c7d" 439 | dependencies = [ 440 | "ahash", 441 | "arrow 15.0.0", 442 | "blake2", 443 | "blake3", 444 | "chrono", 445 | "datafusion-common", 446 | "datafusion-expr", 447 | "datafusion-row", 448 | "hashbrown", 449 | "lazy_static", 450 | "md-5", 451 | "ordered-float 3.0.0", 452 | "paste", 453 | "rand", 454 | "regex", 455 | "sha2", 456 | "unicode-segmentation", 457 | ] 458 | 459 | [[package]] 460 | name = "datafusion-row" 461 | version = "9.0.0" 462 | source = "registry+https://github.com/rust-lang/crates.io-index" 463 | checksum = "ad857586d0ffd7fbb12b7c9031dcf8801fdbe450b42bf049ef29bb7474c0d4ae" 464 | dependencies = [ 465 | "arrow 15.0.0", 466 | "datafusion-common", 467 | "paste", 468 | "rand", 469 | ] 470 | 471 | [[package]] 472 | name = "datafusion-sql" 473 | version = "9.0.0" 474 | source = "registry+https://github.com/rust-lang/crates.io-index" 475 | checksum = "f7817f26fbfb3db3310905a83643a99b7518e7f672d1801247d653349268db7b" 476 | dependencies = [ 477 | "ahash", 478 | "arrow 15.0.0", 479 | "datafusion-common", 480 | "datafusion-expr", 481 | "hashbrown", 482 | "sqlparser", 483 | "tokio", 484 | ] 485 | 486 | [[package]] 487 | name = "digest" 488 | version = "0.10.3" 489 | source = "registry+https://github.com/rust-lang/crates.io-index" 490 | checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" 491 | dependencies = [ 492 | "block-buffer", 493 | "crypto-common", 494 | "subtle", 495 | ] 496 | 497 | [[package]] 498 | name = "either" 499 | version = "1.7.0" 500 | source = "registry+https://github.com/rust-lang/crates.io-index" 501 | checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be" 502 | 503 | [[package]] 504 | name = "env_logger" 505 | version = "0.9.0" 506 | source = "registry+https://github.com/rust-lang/crates.io-index" 507 | checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" 508 | dependencies = [ 509 | "atty", 510 | "humantime", 511 | "log", 512 | "regex", 513 | "termcolor", 514 | ] 515 | 516 | [[package]] 517 | name = "fastrand" 518 | version = "1.7.0" 519 | source = "registry+https://github.com/rust-lang/crates.io-index" 520 | checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" 521 | dependencies = [ 522 | "instant", 523 | ] 524 | 525 | [[package]] 526 | name = "flatbuffers" 527 | version = "2.1.2" 528 | source = "registry+https://github.com/rust-lang/crates.io-index" 529 | checksum = "86b428b715fdbdd1c364b84573b5fdc0f84f8e423661b9f398735278bc7f2b6a" 530 | dependencies = [ 531 | "bitflags", 532 | "smallvec", 533 | "thiserror", 534 | ] 535 | 536 | [[package]] 537 | name = "flate2" 538 | version = "1.0.24" 539 | source = "registry+https://github.com/rust-lang/crates.io-index" 540 | checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" 541 | dependencies = [ 542 | "crc32fast", 543 | "miniz_oxide", 544 | ] 545 | 546 | [[package]] 547 | name = "form_urlencoded" 548 | version = "1.0.1" 549 | source = "registry+https://github.com/rust-lang/crates.io-index" 550 | checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" 551 | dependencies = [ 552 | "matches", 553 | "percent-encoding", 554 | ] 555 | 556 | [[package]] 557 | name = "futures" 558 | version = "0.3.21" 559 | source = "registry+https://github.com/rust-lang/crates.io-index" 560 | checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" 561 | dependencies = [ 562 | "futures-channel", 563 | "futures-core", 564 | "futures-executor", 565 | "futures-io", 566 | "futures-sink", 567 | "futures-task", 568 | "futures-util", 569 | ] 570 | 571 | [[package]] 572 | name = "futures-channel" 573 | version = "0.3.21" 574 | source = "registry+https://github.com/rust-lang/crates.io-index" 575 | checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" 576 | dependencies = [ 577 | "futures-core", 578 | "futures-sink", 579 | ] 580 | 581 | [[package]] 582 | name = "futures-core" 583 | version = "0.3.21" 584 | source = "registry+https://github.com/rust-lang/crates.io-index" 585 | checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" 586 | 587 | [[package]] 588 | name = "futures-executor" 589 | version = "0.3.21" 590 | source = "registry+https://github.com/rust-lang/crates.io-index" 591 | checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" 592 | dependencies = [ 593 | "futures-core", 594 | "futures-task", 595 | "futures-util", 596 | ] 597 | 598 | [[package]] 599 | name = "futures-io" 600 | version = "0.3.21" 601 | source = "registry+https://github.com/rust-lang/crates.io-index" 602 | checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" 603 | 604 | [[package]] 605 | name = "futures-macro" 606 | version = "0.3.21" 607 | source = "registry+https://github.com/rust-lang/crates.io-index" 608 | checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" 609 | dependencies = [ 610 | "proc-macro2", 611 | "quote", 612 | "syn", 613 | ] 614 | 615 | [[package]] 616 | name = "futures-sink" 617 | version = "0.3.21" 618 | source = "registry+https://github.com/rust-lang/crates.io-index" 619 | checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" 620 | 621 | [[package]] 622 | name = "futures-task" 623 | version = "0.3.21" 624 | source = "registry+https://github.com/rust-lang/crates.io-index" 625 | checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" 626 | 627 | [[package]] 628 | name = "futures-util" 629 | version = "0.3.21" 630 | source = "registry+https://github.com/rust-lang/crates.io-index" 631 | checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" 632 | dependencies = [ 633 | "futures-channel", 634 | "futures-core", 635 | "futures-io", 636 | "futures-macro", 637 | "futures-sink", 638 | "futures-task", 639 | "memchr", 640 | "pin-project-lite", 641 | "pin-utils", 642 | "slab", 643 | ] 644 | 645 | [[package]] 646 | name = "generic-array" 647 | version = "0.14.5" 648 | source = "registry+https://github.com/rust-lang/crates.io-index" 649 | checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" 650 | dependencies = [ 651 | "typenum", 652 | "version_check", 653 | ] 654 | 655 | [[package]] 656 | name = "getrandom" 657 | version = "0.2.7" 658 | source = "registry+https://github.com/rust-lang/crates.io-index" 659 | checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" 660 | dependencies = [ 661 | "cfg-if", 662 | "libc", 663 | "wasi", 664 | ] 665 | 666 | [[package]] 667 | name = "glob" 668 | version = "0.3.0" 669 | source = "registry+https://github.com/rust-lang/crates.io-index" 670 | checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" 671 | 672 | [[package]] 673 | name = "half" 674 | version = "1.8.2" 675 | source = "registry+https://github.com/rust-lang/crates.io-index" 676 | checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" 677 | 678 | [[package]] 679 | name = "half" 680 | version = "2.0.0" 681 | source = "registry+https://github.com/rust-lang/crates.io-index" 682 | checksum = "c207b0ee023c7fce79daf01828163aaf53a1ddd0be8b1ef9541da7d41f6fa63a" 683 | 684 | [[package]] 685 | name = "hashbrown" 686 | version = "0.12.1" 687 | source = "registry+https://github.com/rust-lang/crates.io-index" 688 | checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" 689 | dependencies = [ 690 | "ahash", 691 | ] 692 | 693 | [[package]] 694 | name = "heck" 695 | version = "0.3.3" 696 | source = "registry+https://github.com/rust-lang/crates.io-index" 697 | checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" 698 | dependencies = [ 699 | "unicode-segmentation", 700 | ] 701 | 702 | [[package]] 703 | name = "heck" 704 | version = "0.4.0" 705 | source = "registry+https://github.com/rust-lang/crates.io-index" 706 | checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" 707 | 708 | [[package]] 709 | name = "hermit-abi" 710 | version = "0.1.19" 711 | source = "registry+https://github.com/rust-lang/crates.io-index" 712 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 713 | dependencies = [ 714 | "libc", 715 | ] 716 | 717 | [[package]] 718 | name = "hex" 719 | version = "0.4.3" 720 | source = "registry+https://github.com/rust-lang/crates.io-index" 721 | checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" 722 | 723 | [[package]] 724 | name = "humantime" 725 | version = "2.1.0" 726 | source = "registry+https://github.com/rust-lang/crates.io-index" 727 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 728 | 729 | [[package]] 730 | name = "idna" 731 | version = "0.2.3" 732 | source = "registry+https://github.com/rust-lang/crates.io-index" 733 | checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" 734 | dependencies = [ 735 | "matches", 736 | "unicode-bidi", 737 | "unicode-normalization", 738 | ] 739 | 740 | [[package]] 741 | name = "indexmap" 742 | version = "1.9.1" 743 | source = "registry+https://github.com/rust-lang/crates.io-index" 744 | checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" 745 | dependencies = [ 746 | "autocfg", 747 | "hashbrown", 748 | ] 749 | 750 | [[package]] 751 | name = "instant" 752 | version = "0.1.12" 753 | source = "registry+https://github.com/rust-lang/crates.io-index" 754 | checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" 755 | dependencies = [ 756 | "cfg-if", 757 | ] 758 | 759 | [[package]] 760 | name = "integer-encoding" 761 | version = "1.1.7" 762 | source = "registry+https://github.com/rust-lang/crates.io-index" 763 | checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f" 764 | 765 | [[package]] 766 | name = "itertools" 767 | version = "0.10.3" 768 | source = "registry+https://github.com/rust-lang/crates.io-index" 769 | checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" 770 | dependencies = [ 771 | "either", 772 | ] 773 | 774 | [[package]] 775 | name = "itoa" 776 | version = "0.4.8" 777 | source = "registry+https://github.com/rust-lang/crates.io-index" 778 | checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" 779 | 780 | [[package]] 781 | name = "itoa" 782 | version = "1.0.2" 783 | source = "registry+https://github.com/rust-lang/crates.io-index" 784 | checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" 785 | 786 | [[package]] 787 | name = "jobserver" 788 | version = "0.1.24" 789 | source = "registry+https://github.com/rust-lang/crates.io-index" 790 | checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" 791 | dependencies = [ 792 | "libc", 793 | ] 794 | 795 | [[package]] 796 | name = "lazy_static" 797 | version = "1.4.0" 798 | source = "registry+https://github.com/rust-lang/crates.io-index" 799 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 800 | 801 | [[package]] 802 | name = "lexical-core" 803 | version = "0.8.5" 804 | source = "registry+https://github.com/rust-lang/crates.io-index" 805 | checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" 806 | dependencies = [ 807 | "lexical-parse-float", 808 | "lexical-parse-integer", 809 | "lexical-util", 810 | "lexical-write-float", 811 | "lexical-write-integer", 812 | ] 813 | 814 | [[package]] 815 | name = "lexical-parse-float" 816 | version = "0.8.5" 817 | source = "registry+https://github.com/rust-lang/crates.io-index" 818 | checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" 819 | dependencies = [ 820 | "lexical-parse-integer", 821 | "lexical-util", 822 | "static_assertions", 823 | ] 824 | 825 | [[package]] 826 | name = "lexical-parse-integer" 827 | version = "0.8.6" 828 | source = "registry+https://github.com/rust-lang/crates.io-index" 829 | checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" 830 | dependencies = [ 831 | "lexical-util", 832 | "static_assertions", 833 | ] 834 | 835 | [[package]] 836 | name = "lexical-util" 837 | version = "0.8.5" 838 | source = "registry+https://github.com/rust-lang/crates.io-index" 839 | checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" 840 | dependencies = [ 841 | "static_assertions", 842 | ] 843 | 844 | [[package]] 845 | name = "lexical-write-float" 846 | version = "0.8.5" 847 | source = "registry+https://github.com/rust-lang/crates.io-index" 848 | checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" 849 | dependencies = [ 850 | "lexical-util", 851 | "lexical-write-integer", 852 | "static_assertions", 853 | ] 854 | 855 | [[package]] 856 | name = "lexical-write-integer" 857 | version = "0.8.5" 858 | source = "registry+https://github.com/rust-lang/crates.io-index" 859 | checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" 860 | dependencies = [ 861 | "lexical-util", 862 | "static_assertions", 863 | ] 864 | 865 | [[package]] 866 | name = "libc" 867 | version = "0.2.126" 868 | source = "registry+https://github.com/rust-lang/crates.io-index" 869 | checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" 870 | 871 | [[package]] 872 | name = "lock_api" 873 | version = "0.4.7" 874 | source = "registry+https://github.com/rust-lang/crates.io-index" 875 | checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" 876 | dependencies = [ 877 | "autocfg", 878 | "scopeguard", 879 | ] 880 | 881 | [[package]] 882 | name = "log" 883 | version = "0.4.17" 884 | source = "registry+https://github.com/rust-lang/crates.io-index" 885 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" 886 | dependencies = [ 887 | "cfg-if", 888 | ] 889 | 890 | [[package]] 891 | name = "lz4" 892 | version = "1.23.3" 893 | source = "registry+https://github.com/rust-lang/crates.io-index" 894 | checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885" 895 | dependencies = [ 896 | "libc", 897 | "lz4-sys", 898 | ] 899 | 900 | [[package]] 901 | name = "lz4-sys" 902 | version = "1.9.3" 903 | source = "registry+https://github.com/rust-lang/crates.io-index" 904 | checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17" 905 | dependencies = [ 906 | "cc", 907 | "libc", 908 | ] 909 | 910 | [[package]] 911 | name = "matches" 912 | version = "0.1.9" 913 | source = "registry+https://github.com/rust-lang/crates.io-index" 914 | checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" 915 | 916 | [[package]] 917 | name = "md-5" 918 | version = "0.10.1" 919 | source = "registry+https://github.com/rust-lang/crates.io-index" 920 | checksum = "658646b21e0b72f7866c7038ab086d3d5e1cd6271f060fd37defb241949d0582" 921 | dependencies = [ 922 | "digest", 923 | ] 924 | 925 | [[package]] 926 | name = "memchr" 927 | version = "2.5.0" 928 | source = "registry+https://github.com/rust-lang/crates.io-index" 929 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 930 | 931 | [[package]] 932 | name = "miniz_oxide" 933 | version = "0.5.3" 934 | source = "registry+https://github.com/rust-lang/crates.io-index" 935 | checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc" 936 | dependencies = [ 937 | "adler", 938 | ] 939 | 940 | [[package]] 941 | name = "multiversion" 942 | version = "0.6.1" 943 | source = "registry+https://github.com/rust-lang/crates.io-index" 944 | checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373" 945 | dependencies = [ 946 | "multiversion-macros", 947 | ] 948 | 949 | [[package]] 950 | name = "multiversion-macros" 951 | version = "0.6.1" 952 | source = "registry+https://github.com/rust-lang/crates.io-index" 953 | checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af" 954 | dependencies = [ 955 | "proc-macro2", 956 | "quote", 957 | "syn", 958 | ] 959 | 960 | [[package]] 961 | name = "num" 962 | version = "0.4.0" 963 | source = "registry+https://github.com/rust-lang/crates.io-index" 964 | checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" 965 | dependencies = [ 966 | "num-bigint", 967 | "num-complex", 968 | "num-integer", 969 | "num-iter", 970 | "num-rational", 971 | "num-traits", 972 | ] 973 | 974 | [[package]] 975 | name = "num-bigint" 976 | version = "0.4.3" 977 | source = "registry+https://github.com/rust-lang/crates.io-index" 978 | checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" 979 | dependencies = [ 980 | "autocfg", 981 | "num-integer", 982 | "num-traits", 983 | ] 984 | 985 | [[package]] 986 | name = "num-complex" 987 | version = "0.4.2" 988 | source = "registry+https://github.com/rust-lang/crates.io-index" 989 | checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19" 990 | dependencies = [ 991 | "num-traits", 992 | ] 993 | 994 | [[package]] 995 | name = "num-integer" 996 | version = "0.1.45" 997 | source = "registry+https://github.com/rust-lang/crates.io-index" 998 | checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" 999 | dependencies = [ 1000 | "autocfg", 1001 | "num-traits", 1002 | ] 1003 | 1004 | [[package]] 1005 | name = "num-iter" 1006 | version = "0.1.43" 1007 | source = "registry+https://github.com/rust-lang/crates.io-index" 1008 | checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" 1009 | dependencies = [ 1010 | "autocfg", 1011 | "num-integer", 1012 | "num-traits", 1013 | ] 1014 | 1015 | [[package]] 1016 | name = "num-rational" 1017 | version = "0.4.1" 1018 | source = "registry+https://github.com/rust-lang/crates.io-index" 1019 | checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" 1020 | dependencies = [ 1021 | "autocfg", 1022 | "num-bigint", 1023 | "num-integer", 1024 | "num-traits", 1025 | ] 1026 | 1027 | [[package]] 1028 | name = "num-traits" 1029 | version = "0.2.15" 1030 | source = "registry+https://github.com/rust-lang/crates.io-index" 1031 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" 1032 | dependencies = [ 1033 | "autocfg", 1034 | ] 1035 | 1036 | [[package]] 1037 | name = "num_cpus" 1038 | version = "1.13.1" 1039 | source = "registry+https://github.com/rust-lang/crates.io-index" 1040 | checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" 1041 | dependencies = [ 1042 | "hermit-abi", 1043 | "libc", 1044 | ] 1045 | 1046 | [[package]] 1047 | name = "once_cell" 1048 | version = "1.12.0" 1049 | source = "registry+https://github.com/rust-lang/crates.io-index" 1050 | checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" 1051 | 1052 | [[package]] 1053 | name = "ordered-float" 1054 | version = "1.1.1" 1055 | source = "registry+https://github.com/rust-lang/crates.io-index" 1056 | checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" 1057 | dependencies = [ 1058 | "num-traits", 1059 | ] 1060 | 1061 | [[package]] 1062 | name = "ordered-float" 1063 | version = "3.0.0" 1064 | source = "registry+https://github.com/rust-lang/crates.io-index" 1065 | checksum = "96bcbab4bfea7a59c2c0fe47211a1ac4e3e96bea6eb446d704f310bc5c732ae2" 1066 | dependencies = [ 1067 | "num-traits", 1068 | ] 1069 | 1070 | [[package]] 1071 | name = "parking_lot" 1072 | version = "0.12.1" 1073 | source = "registry+https://github.com/rust-lang/crates.io-index" 1074 | checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" 1075 | dependencies = [ 1076 | "lock_api", 1077 | "parking_lot_core", 1078 | ] 1079 | 1080 | [[package]] 1081 | name = "parking_lot_core" 1082 | version = "0.9.3" 1083 | source = "registry+https://github.com/rust-lang/crates.io-index" 1084 | checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" 1085 | dependencies = [ 1086 | "cfg-if", 1087 | "libc", 1088 | "redox_syscall", 1089 | "smallvec", 1090 | "windows-sys", 1091 | ] 1092 | 1093 | [[package]] 1094 | name = "parquet" 1095 | version = "15.0.0" 1096 | source = "registry+https://github.com/rust-lang/crates.io-index" 1097 | checksum = "94d31dde60b151ef88ec2c847e3a8f66d42d7dbdaeefd05d13d79db676b0b56f" 1098 | dependencies = [ 1099 | "arrow 15.0.0", 1100 | "base64", 1101 | "brotli", 1102 | "byteorder", 1103 | "bytes", 1104 | "chrono", 1105 | "flate2", 1106 | "lz4", 1107 | "num", 1108 | "num-bigint", 1109 | "parquet-format", 1110 | "rand", 1111 | "snap", 1112 | "thrift", 1113 | "zstd", 1114 | ] 1115 | 1116 | [[package]] 1117 | name = "parquet-format" 1118 | version = "4.0.0" 1119 | source = "registry+https://github.com/rust-lang/crates.io-index" 1120 | checksum = "1f0c06cdcd5460967c485f9c40a821746f5955ad81990533c7fae95dbd9bc0b5" 1121 | dependencies = [ 1122 | "thrift", 1123 | ] 1124 | 1125 | [[package]] 1126 | name = "paste" 1127 | version = "1.0.7" 1128 | source = "registry+https://github.com/rust-lang/crates.io-index" 1129 | checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" 1130 | 1131 | [[package]] 1132 | name = "percent-encoding" 1133 | version = "2.1.0" 1134 | source = "registry+https://github.com/rust-lang/crates.io-index" 1135 | checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" 1136 | 1137 | [[package]] 1138 | name = "pin-project-lite" 1139 | version = "0.2.9" 1140 | source = "registry+https://github.com/rust-lang/crates.io-index" 1141 | checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" 1142 | 1143 | [[package]] 1144 | name = "pin-utils" 1145 | version = "0.1.0" 1146 | source = "registry+https://github.com/rust-lang/crates.io-index" 1147 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 1148 | 1149 | [[package]] 1150 | name = "ppv-lite86" 1151 | version = "0.2.16" 1152 | source = "registry+https://github.com/rust-lang/crates.io-index" 1153 | checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" 1154 | 1155 | [[package]] 1156 | name = "proc-macro2" 1157 | version = "1.0.40" 1158 | source = "registry+https://github.com/rust-lang/crates.io-index" 1159 | checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" 1160 | dependencies = [ 1161 | "unicode-ident", 1162 | ] 1163 | 1164 | [[package]] 1165 | name = "quote" 1166 | version = "1.0.20" 1167 | source = "registry+https://github.com/rust-lang/crates.io-index" 1168 | checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" 1169 | dependencies = [ 1170 | "proc-macro2", 1171 | ] 1172 | 1173 | [[package]] 1174 | name = "rand" 1175 | version = "0.8.5" 1176 | source = "registry+https://github.com/rust-lang/crates.io-index" 1177 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 1178 | dependencies = [ 1179 | "libc", 1180 | "rand_chacha", 1181 | "rand_core", 1182 | ] 1183 | 1184 | [[package]] 1185 | name = "rand_chacha" 1186 | version = "0.3.1" 1187 | source = "registry+https://github.com/rust-lang/crates.io-index" 1188 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 1189 | dependencies = [ 1190 | "ppv-lite86", 1191 | "rand_core", 1192 | ] 1193 | 1194 | [[package]] 1195 | name = "rand_core" 1196 | version = "0.6.3" 1197 | source = "registry+https://github.com/rust-lang/crates.io-index" 1198 | checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" 1199 | dependencies = [ 1200 | "getrandom", 1201 | ] 1202 | 1203 | [[package]] 1204 | name = "redox_syscall" 1205 | version = "0.2.13" 1206 | source = "registry+https://github.com/rust-lang/crates.io-index" 1207 | checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" 1208 | dependencies = [ 1209 | "bitflags", 1210 | ] 1211 | 1212 | [[package]] 1213 | name = "regex" 1214 | version = "1.5.6" 1215 | source = "registry+https://github.com/rust-lang/crates.io-index" 1216 | checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" 1217 | dependencies = [ 1218 | "aho-corasick", 1219 | "memchr", 1220 | "regex-syntax", 1221 | ] 1222 | 1223 | [[package]] 1224 | name = "regex-automata" 1225 | version = "0.1.10" 1226 | source = "registry+https://github.com/rust-lang/crates.io-index" 1227 | checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" 1228 | 1229 | [[package]] 1230 | name = "regex-syntax" 1231 | version = "0.6.26" 1232 | source = "registry+https://github.com/rust-lang/crates.io-index" 1233 | checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" 1234 | 1235 | [[package]] 1236 | name = "remove_dir_all" 1237 | version = "0.5.3" 1238 | source = "registry+https://github.com/rust-lang/crates.io-index" 1239 | checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" 1240 | dependencies = [ 1241 | "winapi", 1242 | ] 1243 | 1244 | [[package]] 1245 | name = "rustversion" 1246 | version = "1.0.7" 1247 | source = "registry+https://github.com/rust-lang/crates.io-index" 1248 | checksum = "a0a5f7c728f5d284929a1cccb5bc19884422bfe6ef4d6c409da2c41838983fcf" 1249 | 1250 | [[package]] 1251 | name = "ryu" 1252 | version = "1.0.10" 1253 | source = "registry+https://github.com/rust-lang/crates.io-index" 1254 | checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" 1255 | 1256 | [[package]] 1257 | name = "scopeguard" 1258 | version = "1.1.0" 1259 | source = "registry+https://github.com/rust-lang/crates.io-index" 1260 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 1261 | 1262 | [[package]] 1263 | name = "serde" 1264 | version = "1.0.138" 1265 | source = "registry+https://github.com/rust-lang/crates.io-index" 1266 | checksum = "1578c6245786b9d168c5447eeacfb96856573ca56c9d68fdcf394be134882a47" 1267 | 1268 | [[package]] 1269 | name = "serde_derive" 1270 | version = "1.0.138" 1271 | source = "registry+https://github.com/rust-lang/crates.io-index" 1272 | checksum = "023e9b1467aef8a10fb88f25611870ada9800ef7e22afce356bb0d2387b6f27c" 1273 | dependencies = [ 1274 | "proc-macro2", 1275 | "quote", 1276 | "syn", 1277 | ] 1278 | 1279 | [[package]] 1280 | name = "serde_json" 1281 | version = "1.0.82" 1282 | source = "registry+https://github.com/rust-lang/crates.io-index" 1283 | checksum = "82c2c1fdcd807d1098552c5b9a36e425e42e9fbd7c6a37a8425f390f781f7fa7" 1284 | dependencies = [ 1285 | "indexmap", 1286 | "itoa 1.0.2", 1287 | "ryu", 1288 | "serde", 1289 | ] 1290 | 1291 | [[package]] 1292 | name = "sha2" 1293 | version = "0.10.2" 1294 | source = "registry+https://github.com/rust-lang/crates.io-index" 1295 | checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676" 1296 | dependencies = [ 1297 | "cfg-if", 1298 | "cpufeatures", 1299 | "digest", 1300 | ] 1301 | 1302 | [[package]] 1303 | name = "slab" 1304 | version = "0.4.6" 1305 | source = "registry+https://github.com/rust-lang/crates.io-index" 1306 | checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" 1307 | 1308 | [[package]] 1309 | name = "smallvec" 1310 | version = "1.8.1" 1311 | source = "registry+https://github.com/rust-lang/crates.io-index" 1312 | checksum = "cc88c725d61fc6c3132893370cac4a0200e3fedf5da8331c570664b1987f5ca2" 1313 | 1314 | [[package]] 1315 | name = "snap" 1316 | version = "1.0.5" 1317 | source = "registry+https://github.com/rust-lang/crates.io-index" 1318 | checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" 1319 | 1320 | [[package]] 1321 | name = "sqlparser" 1322 | version = "0.18.0" 1323 | source = "registry+https://github.com/rust-lang/crates.io-index" 1324 | checksum = "f531637a13132fa3d38c54d4cd8f115905e5dc3e72f6e77bd6160481f482e25d" 1325 | dependencies = [ 1326 | "log", 1327 | ] 1328 | 1329 | [[package]] 1330 | name = "static_assertions" 1331 | version = "1.1.0" 1332 | source = "registry+https://github.com/rust-lang/crates.io-index" 1333 | checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" 1334 | 1335 | [[package]] 1336 | name = "strum" 1337 | version = "0.23.0" 1338 | source = "registry+https://github.com/rust-lang/crates.io-index" 1339 | checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" 1340 | 1341 | [[package]] 1342 | name = "strum" 1343 | version = "0.24.1" 1344 | source = "registry+https://github.com/rust-lang/crates.io-index" 1345 | checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" 1346 | 1347 | [[package]] 1348 | name = "strum_macros" 1349 | version = "0.23.1" 1350 | source = "registry+https://github.com/rust-lang/crates.io-index" 1351 | checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" 1352 | dependencies = [ 1353 | "heck 0.3.3", 1354 | "proc-macro2", 1355 | "quote", 1356 | "rustversion", 1357 | "syn", 1358 | ] 1359 | 1360 | [[package]] 1361 | name = "strum_macros" 1362 | version = "0.24.2" 1363 | source = "registry+https://github.com/rust-lang/crates.io-index" 1364 | checksum = "4faebde00e8ff94316c01800f9054fd2ba77d30d9e922541913051d1d978918b" 1365 | dependencies = [ 1366 | "heck 0.4.0", 1367 | "proc-macro2", 1368 | "quote", 1369 | "rustversion", 1370 | "syn", 1371 | ] 1372 | 1373 | [[package]] 1374 | name = "subtle" 1375 | version = "2.4.1" 1376 | source = "registry+https://github.com/rust-lang/crates.io-index" 1377 | checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" 1378 | 1379 | [[package]] 1380 | name = "syn" 1381 | version = "1.0.98" 1382 | source = "registry+https://github.com/rust-lang/crates.io-index" 1383 | checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" 1384 | dependencies = [ 1385 | "proc-macro2", 1386 | "quote", 1387 | "unicode-ident", 1388 | ] 1389 | 1390 | [[package]] 1391 | name = "tempfile" 1392 | version = "3.3.0" 1393 | source = "registry+https://github.com/rust-lang/crates.io-index" 1394 | checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" 1395 | dependencies = [ 1396 | "cfg-if", 1397 | "fastrand", 1398 | "libc", 1399 | "redox_syscall", 1400 | "remove_dir_all", 1401 | "winapi", 1402 | ] 1403 | 1404 | [[package]] 1405 | name = "termcolor" 1406 | version = "1.1.3" 1407 | source = "registry+https://github.com/rust-lang/crates.io-index" 1408 | checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" 1409 | dependencies = [ 1410 | "winapi-util", 1411 | ] 1412 | 1413 | [[package]] 1414 | name = "thiserror" 1415 | version = "1.0.31" 1416 | source = "registry+https://github.com/rust-lang/crates.io-index" 1417 | checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" 1418 | dependencies = [ 1419 | "thiserror-impl", 1420 | ] 1421 | 1422 | [[package]] 1423 | name = "thiserror-impl" 1424 | version = "1.0.31" 1425 | source = "registry+https://github.com/rust-lang/crates.io-index" 1426 | checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" 1427 | dependencies = [ 1428 | "proc-macro2", 1429 | "quote", 1430 | "syn", 1431 | ] 1432 | 1433 | [[package]] 1434 | name = "threadpool" 1435 | version = "1.8.1" 1436 | source = "registry+https://github.com/rust-lang/crates.io-index" 1437 | checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" 1438 | dependencies = [ 1439 | "num_cpus", 1440 | ] 1441 | 1442 | [[package]] 1443 | name = "thrift" 1444 | version = "0.13.0" 1445 | source = "registry+https://github.com/rust-lang/crates.io-index" 1446 | checksum = "0c6d965454947cc7266d22716ebfd07b18d84ebaf35eec558586bbb2a8cb6b5b" 1447 | dependencies = [ 1448 | "byteorder", 1449 | "integer-encoding", 1450 | "log", 1451 | "ordered-float 1.1.1", 1452 | "threadpool", 1453 | ] 1454 | 1455 | [[package]] 1456 | name = "tinyvec" 1457 | version = "1.6.0" 1458 | source = "registry+https://github.com/rust-lang/crates.io-index" 1459 | checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" 1460 | dependencies = [ 1461 | "tinyvec_macros", 1462 | ] 1463 | 1464 | [[package]] 1465 | name = "tinyvec_macros" 1466 | version = "0.1.0" 1467 | source = "registry+https://github.com/rust-lang/crates.io-index" 1468 | checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" 1469 | 1470 | [[package]] 1471 | name = "tokio" 1472 | version = "1.19.2" 1473 | source = "registry+https://github.com/rust-lang/crates.io-index" 1474 | checksum = "c51a52ed6686dd62c320f9b89299e9dfb46f730c7a48e635c19f21d116cb1439" 1475 | dependencies = [ 1476 | "num_cpus", 1477 | "once_cell", 1478 | "parking_lot", 1479 | "pin-project-lite", 1480 | "tokio-macros", 1481 | ] 1482 | 1483 | [[package]] 1484 | name = "tokio-macros" 1485 | version = "1.8.0" 1486 | source = "registry+https://github.com/rust-lang/crates.io-index" 1487 | checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484" 1488 | dependencies = [ 1489 | "proc-macro2", 1490 | "quote", 1491 | "syn", 1492 | ] 1493 | 1494 | [[package]] 1495 | name = "tokio-stream" 1496 | version = "0.1.9" 1497 | source = "registry+https://github.com/rust-lang/crates.io-index" 1498 | checksum = "df54d54117d6fdc4e4fea40fe1e4e566b3505700e148a6827e59b34b0d2600d9" 1499 | dependencies = [ 1500 | "futures-core", 1501 | "pin-project-lite", 1502 | "tokio", 1503 | ] 1504 | 1505 | [[package]] 1506 | name = "typenum" 1507 | version = "1.15.0" 1508 | source = "registry+https://github.com/rust-lang/crates.io-index" 1509 | checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" 1510 | 1511 | [[package]] 1512 | name = "unicode-bidi" 1513 | version = "0.3.8" 1514 | source = "registry+https://github.com/rust-lang/crates.io-index" 1515 | checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" 1516 | 1517 | [[package]] 1518 | name = "unicode-ident" 1519 | version = "1.0.1" 1520 | source = "registry+https://github.com/rust-lang/crates.io-index" 1521 | checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" 1522 | 1523 | [[package]] 1524 | name = "unicode-normalization" 1525 | version = "0.1.21" 1526 | source = "registry+https://github.com/rust-lang/crates.io-index" 1527 | checksum = "854cbdc4f7bc6ae19c820d44abdc3277ac3e1b2b93db20a636825d9322fb60e6" 1528 | dependencies = [ 1529 | "tinyvec", 1530 | ] 1531 | 1532 | [[package]] 1533 | name = "unicode-segmentation" 1534 | version = "1.9.0" 1535 | source = "registry+https://github.com/rust-lang/crates.io-index" 1536 | checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" 1537 | 1538 | [[package]] 1539 | name = "unicode-width" 1540 | version = "0.1.9" 1541 | source = "registry+https://github.com/rust-lang/crates.io-index" 1542 | checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" 1543 | 1544 | [[package]] 1545 | name = "url" 1546 | version = "2.2.2" 1547 | source = "registry+https://github.com/rust-lang/crates.io-index" 1548 | checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" 1549 | dependencies = [ 1550 | "form_urlencoded", 1551 | "idna", 1552 | "matches", 1553 | "percent-encoding", 1554 | ] 1555 | 1556 | [[package]] 1557 | name = "uuid" 1558 | version = "1.1.2" 1559 | source = "registry+https://github.com/rust-lang/crates.io-index" 1560 | checksum = "dd6469f4314d5f1ffec476e05f17cc9a78bc7a27a6a857842170bdf8d6f98d2f" 1561 | dependencies = [ 1562 | "getrandom", 1563 | ] 1564 | 1565 | [[package]] 1566 | name = "version_check" 1567 | version = "0.9.4" 1568 | source = "registry+https://github.com/rust-lang/crates.io-index" 1569 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 1570 | 1571 | [[package]] 1572 | name = "wasi" 1573 | version = "0.11.0+wasi-snapshot-preview1" 1574 | source = "registry+https://github.com/rust-lang/crates.io-index" 1575 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 1576 | 1577 | [[package]] 1578 | name = "winapi" 1579 | version = "0.3.9" 1580 | source = "registry+https://github.com/rust-lang/crates.io-index" 1581 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 1582 | dependencies = [ 1583 | "winapi-i686-pc-windows-gnu", 1584 | "winapi-x86_64-pc-windows-gnu", 1585 | ] 1586 | 1587 | [[package]] 1588 | name = "winapi-i686-pc-windows-gnu" 1589 | version = "0.4.0" 1590 | source = "registry+https://github.com/rust-lang/crates.io-index" 1591 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 1592 | 1593 | [[package]] 1594 | name = "winapi-util" 1595 | version = "0.1.5" 1596 | source = "registry+https://github.com/rust-lang/crates.io-index" 1597 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 1598 | dependencies = [ 1599 | "winapi", 1600 | ] 1601 | 1602 | [[package]] 1603 | name = "winapi-x86_64-pc-windows-gnu" 1604 | version = "0.4.0" 1605 | source = "registry+https://github.com/rust-lang/crates.io-index" 1606 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 1607 | 1608 | [[package]] 1609 | name = "windows-sys" 1610 | version = "0.36.1" 1611 | source = "registry+https://github.com/rust-lang/crates.io-index" 1612 | checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" 1613 | dependencies = [ 1614 | "windows_aarch64_msvc", 1615 | "windows_i686_gnu", 1616 | "windows_i686_msvc", 1617 | "windows_x86_64_gnu", 1618 | "windows_x86_64_msvc", 1619 | ] 1620 | 1621 | [[package]] 1622 | name = "windows_aarch64_msvc" 1623 | version = "0.36.1" 1624 | source = "registry+https://github.com/rust-lang/crates.io-index" 1625 | checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" 1626 | 1627 | [[package]] 1628 | name = "windows_i686_gnu" 1629 | version = "0.36.1" 1630 | source = "registry+https://github.com/rust-lang/crates.io-index" 1631 | checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" 1632 | 1633 | [[package]] 1634 | name = "windows_i686_msvc" 1635 | version = "0.36.1" 1636 | source = "registry+https://github.com/rust-lang/crates.io-index" 1637 | checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" 1638 | 1639 | [[package]] 1640 | name = "windows_x86_64_gnu" 1641 | version = "0.36.1" 1642 | source = "registry+https://github.com/rust-lang/crates.io-index" 1643 | checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" 1644 | 1645 | [[package]] 1646 | name = "windows_x86_64_msvc" 1647 | version = "0.36.1" 1648 | source = "registry+https://github.com/rust-lang/crates.io-index" 1649 | checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" 1650 | 1651 | [[package]] 1652 | name = "zstd" 1653 | version = "0.11.2+zstd.1.5.2" 1654 | source = "registry+https://github.com/rust-lang/crates.io-index" 1655 | checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" 1656 | dependencies = [ 1657 | "zstd-safe", 1658 | ] 1659 | 1660 | [[package]] 1661 | name = "zstd-safe" 1662 | version = "5.0.2+zstd.1.5.2" 1663 | source = "registry+https://github.com/rust-lang/crates.io-index" 1664 | checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" 1665 | dependencies = [ 1666 | "libc", 1667 | "zstd-sys", 1668 | ] 1669 | 1670 | [[package]] 1671 | name = "zstd-sys" 1672 | version = "2.0.1+zstd.1.5.2" 1673 | source = "registry+https://github.com/rust-lang/crates.io-index" 1674 | checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" 1675 | dependencies = [ 1676 | "cc", 1677 | "libc", 1678 | ] 1679 | -------------------------------------------------------------------------------- /amazon_review_pipeline/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "amazon_review_pipeline" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | tokio = "1.19.2" 10 | arrow = {version = "17.0.0", features = ["prettyprint"]} 11 | datafusion = "9.0.0" 12 | log = "0.4.17" 13 | env_logger = "0.9.0" 14 | -------------------------------------------------------------------------------- /amazon_review_pipeline/Makefile: -------------------------------------------------------------------------------- 1 | run_rust_dev: 2 | RUST_LOG=info cargo run 3 | run_rust_release: 4 | RUST_LOG=info ./target/release/amazon_review_pipeline 5 | clean_dirs: 6 | rm -rf datalayers/insights/toys_n_game/ && rm -rf datalayers/analytics/toys_n_game/ 7 | run_pyspark_release: 8 | poetry run python pysrc/main.py 9 | -------------------------------------------------------------------------------- /amazon_review_pipeline/README.md: -------------------------------------------------------------------------------- 1 | # Amazon Review Pipeline (DataFusion) 2 | The data is based on Amazon Product review on games and toys with 800+ Megabyte of data. Each record in the data is a product review. 3 | 4 | The following transformation is demonestrated in the data pipeline: 5 | - Reading Json Data with DataFusion 6 | - Select sebsection of the data 7 | - Add transformed timestamp, categorical, numerical columns 8 | - Remove null values with a fixed value 9 | - SQL like case when statement to create categorical column 10 | - Filter and Sort the Data 11 | - Repartition the Data 12 | - Save the data with partition 13 | 14 | Framework use: 15 | - Rust: DataFusion 16 | - Python: PySpark 17 | 18 | ## Performance Logs: 19 | 20 | Logs for Pyspark: 21 | ``` 22 | datapsycho@dataops:~/.../amazon_review_pipeline$ make run_pyspark_release 23 | poetry run python pysrc/main.py 24 | [2022-07-14 01:20:47,833 INFO amazon_review_pipeline] Data loading plan created successfully! 25 | [2022-07-14 01:20:47,920 INFO amazon_review_pipeline] Plan for processed layer created successfully! 26 | [2022-07-14 01:20:48,096 INFO amazon_review_pipeline] Plan for Aggregate Layer created successfully! 27 | [2022-07-14 01:21:00,630 INFO amazon_review_pipeline] Data Written successfully in analytics layer! 28 | [2022-07-14 01:21:11,641 INFO amazon_review_pipeline] Data Written successfully in insights layer! 29 | [2022-07-14 01:21:11,642 INFO amazon_review_pipeline] Pipeline executed successfully! 30 | [2022-07-14 01:21:11,642 INFO amazon_review_pipeline] Pipeline Execution time: 35.40674662590027s. 31 | ``` 32 | 33 | Logs for Rust DataFusion: 34 | ``` 35 | datapsycho@dataops:~/.../amazon_review_pipeline$ make run_rust_release 36 | RUST_LOG=info ./target/release/amazon_review_pipeline 37 | [2022-07-13T23:21:52Z INFO amazon_review_pipeline] Data loading plan created successfully! 38 | [2022-07-13T23:21:52Z INFO amazon_review_pipeline] Year month lenght column added plan created successfully! 39 | [2022-07-13T23:21:52Z INFO amazon_review_pipeline] Plan for processed layer created successfully! 40 | [2022-07-13T23:21:52Z INFO amazon_review_pipeline] Plan for Aggregate Layer created successfully! 41 | [2022-07-13T23:22:01Z INFO amazon_review_pipeline] Data Written successfully in analytics layer! 42 | [2022-07-13T23:22:12Z INFO amazon_review_pipeline] Data Written successfully in analytics layer! 43 | [2022-07-13T23:22:12Z INFO amazon_review_pipeline] Pipeline executed successfully! 44 | [2022-07-13T23:22:12Z INFO amazon_review_pipeline] Pipeline Execution time: 20.104681726s 45 | ``` 46 | 47 | ## Setup Configuration: 48 | The cpu is used to run the pipelines is intel core i7, 8th Gen with 16 GB RAM on Linux Mint OS. -------------------------------------------------------------------------------- /amazon_review_pipeline/datalayers/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDataPsycho/data-pipelines-in-rust/06319272f26745c5de61bab73ff4d5d133771568/amazon_review_pipeline/datalayers/.gitkeep -------------------------------------------------------------------------------- /amazon_review_pipeline/datalayers/analytics/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDataPsycho/data-pipelines-in-rust/06319272f26745c5de61bab73ff4d5d133771568/amazon_review_pipeline/datalayers/analytics/.gitkeep -------------------------------------------------------------------------------- /amazon_review_pipeline/datalayers/downloader.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | wget -P datalayers/landing http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/Toys_and_Games_5.json.gz 4 | gzip -dk datalayers/landing/Toys_and_Games_5.json.gz 5 | rm -rf datalayers/landing/Toys_and_Games_5.json.gz -------------------------------------------------------------------------------- /amazon_review_pipeline/datalayers/insights/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDataPsycho/data-pipelines-in-rust/06319272f26745c5de61bab73ff4d5d133771568/amazon_review_pipeline/datalayers/insights/.gitkeep -------------------------------------------------------------------------------- /amazon_review_pipeline/datalayers/landing/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDataPsycho/data-pipelines-in-rust/06319272f26745c5de61bab73ff4d5d133771568/amazon_review_pipeline/datalayers/landing/.gitkeep -------------------------------------------------------------------------------- /amazon_review_pipeline/experiment/data_fusion.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Add; 2 | // use chrono::prelude::*; 3 | use datafusion::prelude::*; 4 | 5 | #[tokio::main] 6 | async fn main() -> datafusion::error::Result<()> { 7 | // let file_path = "datalayers/landing/Toys_and_Games_5.json"; 8 | let file_path = "datalayers/landing/test_file.json"; 9 | // let selected_columns = vec!["asin", "vote", "verified", "unixReviewTime", "reviewText"]; 10 | 11 | let mut ctx = SessionContext::new(); 12 | let df = ctx.read_json(file_path, NdJsonReadOptions::default()).await?; 13 | // let df = df.select_columns(&selected_columns)?.limit(None, Some(10))?; 14 | let df = df.select(vec![col("a"), col("c"), col("a").add(col("c")).alias("d")])?; 15 | // let result = df.collect().await?; 16 | // let pretty_results = datafusion::arrow::util::pretty::pretty_format_batches(&result)?; 17 | // println!("{:?}", pretty_results.to_string()); 18 | df.show().await?; 19 | Ok(()) 20 | } 21 | 22 | // use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; 23 | // let schema = Arc::new(Schema::new(vec![ 24 | // Field::new("asin", DataType::Utf8, false), 25 | // Field::new("vote", DataType::Int32, true), 26 | // Field::new("verified", DataType::Boolean, false), 27 | // Field::new("unixReviewTime", DataType::Timestamp(TimeUnit::Millisecond, None), false), 28 | // Field::new("reviewText", DataType::UInt8, true), 29 | // ])); 30 | 31 | 32 | // fn get_date_time(){ 33 | // let timestamp:i64 = 1381017600; 34 | // let naive = NaiveDateTime::from_timestamp(timestamp, 0); 35 | // info!("Data read successfully!"); 36 | // info!("Year: {}", naive.year()); 37 | // info!("Month: {}", naive.month()); 38 | // info!("Day: {}", naive.day()); 39 | // // println!("{}", naive); 40 | // } -------------------------------------------------------------------------------- /amazon_review_pipeline/poetry.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "py4j" 3 | version = "0.10.9.5" 4 | description = "Enables Python programs to dynamically access arbitrary Java objects" 5 | category = "main" 6 | optional = false 7 | python-versions = "*" 8 | 9 | [[package]] 10 | name = "pyspark" 11 | version = "3.3.0" 12 | description = "Apache Spark Python API" 13 | category = "main" 14 | optional = false 15 | python-versions = ">=3.7" 16 | 17 | [package.dependencies] 18 | py4j = "0.10.9.5" 19 | 20 | [package.extras] 21 | ml = ["numpy (>=1.15)"] 22 | mllib = ["numpy (>=1.15)"] 23 | pandas_on_spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] 24 | sql = ["pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] 25 | 26 | [[package]] 27 | name = "python-dotenv" 28 | version = "0.20.0" 29 | description = "Read key-value pairs from a .env file and set them as environment variables" 30 | category = "main" 31 | optional = false 32 | python-versions = ">=3.5" 33 | 34 | [package.extras] 35 | cli = ["click (>=5.0)"] 36 | 37 | [metadata] 38 | lock-version = "1.1" 39 | python-versions = "^3.8" 40 | content-hash = "5826a617369fe2ae5b274fc3a0a22974421593d74199e233b27e4b124416c0c7" 41 | 42 | [metadata.files] 43 | py4j = [ 44 | {file = "py4j-0.10.9.5-py2.py3-none-any.whl", hash = "sha256:52d171a6a2b031d8a5d1de6efe451cf4f5baff1a2819aabc3741c8406539ba04"}, 45 | {file = "py4j-0.10.9.5.tar.gz", hash = "sha256:276a4a3c5a2154df1860ef3303a927460e02e97b047dc0a47c1c3fb8cce34db6"}, 46 | ] 47 | pyspark = [ 48 | {file = "pyspark-3.3.0.tar.gz", hash = "sha256:7ebe8e9505647b4d124d5a82fca60dfd3891021cf8ad6c5ec88777eeece92cf7"}, 49 | ] 50 | python-dotenv = [] 51 | -------------------------------------------------------------------------------- /amazon_review_pipeline/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "amazon_review_pipeline" 3 | version = "0.1.0" 4 | description = "amazon review data pipeline with pyspark" 5 | authors = ["DataPsycho "] 6 | license = "MIT" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.8" 10 | pyspark = "^3.3.0" 11 | python-dotenv = "^0.20.0" 12 | 13 | [tool.poetry.dev-dependencies] 14 | 15 | [build-system] 16 | requires = ["poetry-core>=1.0.0"] 17 | build-backend = "poetry.core.masonry.api" 18 | -------------------------------------------------------------------------------- /amazon_review_pipeline/pysrc/main.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | import logging 4 | import time 5 | from pyspark.sql import SparkSession, DataFrame 6 | import pyspark.sql.functions as fn 7 | from pyspark.sql.functions import col 8 | from dotenv import load_dotenv 9 | 10 | logging.basicConfig(format='[%(asctime)s %(levelname)s amazon_review_pipeline] %(message)s', level=logging.INFO) 11 | 12 | def create_spark_session(): 13 | """Create a Spark Session""" 14 | _ = load_dotenv() 15 | return ( 16 | SparkSession 17 | .builder 18 | .appName("MovieReview") 19 | .getOrCreate() 20 | ) 21 | 22 | 23 | def read_data(path: str) -> DataFrame: 24 | spark = create_spark_session() 25 | spark.conf.set("spark.sql.caseSensitive", "true") 26 | column_list = [ 27 | "asin", 28 | "vote", 29 | "verified", 30 | "unixReviewTime", 31 | "reviewTime", 32 | "reviewText", 33 | ] 34 | df = spark.read.json(path) 35 | df = df.select(*column_list) 36 | 37 | df = df.select( 38 | col("asin"), 39 | fn.coalesce(col("vote"), fn.lit("0")).alias("vote"), 40 | fn.to_timestamp(col("unixReviewTime")).alias("reviewed_at"), 41 | fn.coalesce(col("reviewText"), fn.lit("")).alias("review_text"), 42 | ) 43 | df = df.withColumn("review_text_len", fn.length(col("review_text"))) 44 | df = df.withColumn("reviewed_year", fn.year(col("reviewed_at"))) 45 | df = df.withColumn("reviewed_month", fn.month(col("reviewed_at"))) 46 | logging.info("Data loading plan created successfully!") 47 | return df 48 | 49 | 50 | def add_processed_columns(df: DataFrame) -> DataFrame: 51 | df = df.withColumn( 52 | "review_text_ctg", 53 | fn.when(col("review_text_len") >= 200, "long") 54 | .when((col("review_text_len") > 10) & (col("review_text_len") < 200), "medium") 55 | .when((col("review_text_len") > 1) & (col("review_text_len") <= 10), "short") 56 | .otherwise("invalid") 57 | ) 58 | logging.info("Plan for processed layer created successfully!"); 59 | return df.repartition(20) 60 | 61 | def prepare_aggregated_insights(df: DataFrame) -> DataFrame: 62 | df = ( 63 | df.filter(col("review_text_len") > 0) 64 | .groupBy(col("asin"), col("reviewed_year"), col("reviewed_month")) 65 | .agg( 66 | fn.count(col("asin")).alias("total_review") 67 | ) 68 | .sort(col("reviewed_year"), col("reviewed_month")) 69 | ) 70 | logging.info("Plan for Aggregate Layer created successfully!"); 71 | return df.repartition(12, col("reviewed_year")) 72 | 73 | 74 | def main(): 75 | DATA_PATH = 'datalayers/landing/Toys_and_Games_5.json' 76 | df = read_data(DATA_PATH) 77 | processed_df = add_processed_columns(df) 78 | insights_df = prepare_aggregated_insights(processed_df) 79 | processed_df.write.mode("overwrite").csv("datalayers/analytics/toys_n_game") 80 | logging.info("Data Written successfully in analytics layer!") 81 | insights_df.write.mode("overwrite").csv("datalayers/insights/toys_n_game") 82 | logging.info("Data Written successfully in insights layer!") 83 | # insights_df.show(3) 84 | 85 | if __name__ == "__main__": 86 | st = time.time() 87 | main() 88 | et = time.time() 89 | res = et - st 90 | logging.info("Pipeline executed successfully!") 91 | logging.info(f'Pipeline Execution time: {res}s.') 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /amazon_review_pipeline/src/main.rs: -------------------------------------------------------------------------------- 1 | use datafusion::logical_plan::{to_timestamp_seconds, when}; 2 | use datafusion::prelude::date_part; 3 | use datafusion::prelude::*; 4 | use env_logger; 5 | use log::info; 6 | use std::sync::Arc; 7 | use std::time::Instant; 8 | 9 | #[tokio::main] 10 | async fn main() -> datafusion::error::Result<()> { 11 | env_logger::init(); 12 | let start = Instant::now(); 13 | let file_path = "datalayers/landing/Toys_and_Games_5.json"; 14 | // let file_path = "datalayers/landing/test_file.json"; 15 | let df = read_data(file_path.to_string()).await?; 16 | let df_added_ymrl = add_year_month_review_len(&df).await?; 17 | let processed_df = add_processed_columns(&df_added_ymrl).await?; 18 | let insights_df = prepare_aggregated_insights(&df_added_ymrl).await?; 19 | processed_df 20 | .write_csv("datalayers/analytics/toys_n_game") 21 | .await?; 22 | info!("Data Written successfully in analytics layer!"); 23 | insights_df 24 | .write_csv("datalayers/insights/toys_n_game") 25 | .await?; 26 | info!("Data Written successfully in analytics layer!"); 27 | // processed_df.limit(None, Some(4))?.show().await?; 28 | // insights_df.limit(None, Some(4))?.show().await?; 29 | let duration = start.elapsed(); 30 | info!{"Pipeline executed successfully!"} 31 | info!("Pipeline Execution time: {:?}", duration); 32 | Ok(()) 33 | } 34 | 35 | async fn read_data(path: String) -> datafusion::error::Result> { 36 | let mut ctx = SessionContext::new(); 37 | let selected_columns = vec![ 38 | "asin", 39 | "vote", 40 | "verified", 41 | "unixReviewTime", 42 | "reviewTime", 43 | "reviewText", 44 | ]; 45 | let df_ = ctx.read_json(path, NdJsonReadOptions::default()).await?; 46 | let df_ = df_.select_columns(&selected_columns)?; 47 | info!("Data loading plan created successfully!"); 48 | Ok(df_) 49 | } 50 | 51 | async fn add_year_month_review_len(df: &Arc) -> datafusion::error::Result> { 52 | let processed_columns = vec![ 53 | col("asin"), 54 | coalesce(vec![col("vote"), lit("0")]).alias("vote"), 55 | col("verified"), 56 | to_timestamp_seconds(col("unixReviewTime")).alias("reviewed_at"), 57 | coalesce(vec![col("reviewText"), lit("")]).alias("review_text"), 58 | length(coalesce(vec![col("reviewText"), lit("")])).alias("review_text_len"), 59 | date_part( 60 | lit("year"), 61 | to_timestamp_seconds(col("unixReviewTime")) 62 | ).alias("reviewed_year"), 63 | date_part( 64 | lit("month"), 65 | to_timestamp_seconds(col("unixReviewTime")) 66 | ).alias("reviewed_month"), 67 | ]; 68 | let df_ = df.select(processed_columns); 69 | info!("Year month lenght column added plan created successfully!"); 70 | df_ 71 | } 72 | 73 | async fn add_processed_columns(df: &Arc) -> datafusion::error::Result> { 74 | let text_len_category = when(col("review_text_len").gt_eq(lit(200)), lit("long")) 75 | .when( 76 | col("review_text_len") 77 | .gt(lit(10)) 78 | .and(col("review_text_len").lt(lit(200))), 79 | lit("medium"), 80 | ) 81 | .when( 82 | col("review_text_len") 83 | .gt(lit(1)) 84 | .and(col("review_text_len").lt_eq(lit(10))), 85 | lit("short"), 86 | ) 87 | .otherwise(lit("invalid"))?; 88 | 89 | // let is_voted = when(col("vote").eq(lit(0)), lit("no")).otherwise(lit("yes"))?; 90 | 91 | let selected_col = vec![ 92 | col("asin"), 93 | // col("vote"), 94 | // is_voted.alias("is_voted"), 95 | col("verified"), 96 | col("reviewed_at"), 97 | col("review_text"), 98 | col("review_text_len"), 99 | text_len_category.alias("review_text_ctg"), 100 | col("reviewed_year"), 101 | col("reviewed_month"), 102 | ]; 103 | let _df = df.select(selected_col)?; 104 | info!("Plan for processed layer created successfully!"); 105 | _df.repartition(Partitioning::RoundRobinBatch(20)) 106 | } 107 | 108 | async fn prepare_aggregated_insights( 109 | df: &Arc, 110 | ) -> datafusion::error::Result> { 111 | let _df = df 112 | .filter(col("review_text_len").gt(lit(0)))? 113 | .aggregate( 114 | vec![col("asin"), col("reviewed_year"), col("reviewed_month")], 115 | vec![ 116 | count(col("asin")).alias("total_review"), 117 | // sum(col("vote")).alias("total_vote"), 118 | ], 119 | )? 120 | .sort(vec![ 121 | col("reviewed_year").sort(true, false), 122 | col("reviewed_month").sort(true, false), 123 | ])?; 124 | info!("Plan for Aggregate Layer created successfully!"); 125 | _df.repartition(Partitioning::Hash(vec![col("reviewed_year")], 12)) 126 | } 127 | -------------------------------------------------------------------------------- /amazon_review_pipeline_polars/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .vscode 3 | /datalayers/**/*.json 4 | /datalayers/**/*.csv 5 | /datalayers/**/.part* 6 | datalayers/**/part* 7 | datalayers/**/_SUCCESS 8 | datalayers/**/._SUCCESS* 9 | datalayers/**/*.gz -------------------------------------------------------------------------------- /amazon_review_pipeline_polars/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "ahash" 7 | version = "0.7.6" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" 10 | dependencies = [ 11 | "getrandom", 12 | "once_cell", 13 | "version_check", 14 | ] 15 | 16 | [[package]] 17 | name = "ahash" 18 | version = "0.8.3" 19 | source = "registry+https://github.com/rust-lang/crates.io-index" 20 | checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" 21 | dependencies = [ 22 | "cfg-if", 23 | "once_cell", 24 | "version_check", 25 | ] 26 | 27 | [[package]] 28 | name = "aho-corasick" 29 | version = "0.7.20" 30 | source = "registry+https://github.com/rust-lang/crates.io-index" 31 | checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" 32 | dependencies = [ 33 | "memchr", 34 | ] 35 | 36 | [[package]] 37 | name = "amazon_review_pipeline_polars" 38 | version = "0.1.0" 39 | dependencies = [ 40 | "env_logger", 41 | "log", 42 | "polars", 43 | "tokio", 44 | ] 45 | 46 | [[package]] 47 | name = "android_system_properties" 48 | version = "0.1.5" 49 | source = "registry+https://github.com/rust-lang/crates.io-index" 50 | checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" 51 | dependencies = [ 52 | "libc", 53 | ] 54 | 55 | [[package]] 56 | name = "anyhow" 57 | version = "1.0.69" 58 | source = "registry+https://github.com/rust-lang/crates.io-index" 59 | checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" 60 | 61 | [[package]] 62 | name = "arrow2" 63 | version = "0.14.2" 64 | source = "registry+https://github.com/rust-lang/crates.io-index" 65 | checksum = "ee6f62e41078c967a4c063fcbdfd3801a2a9632276402c045311c4d73d0845f3" 66 | dependencies = [ 67 | "ahash 0.7.6", 68 | "bytemuck", 69 | "chrono", 70 | "dyn-clone", 71 | "either", 72 | "ethnum", 73 | "fallible-streaming-iterator", 74 | "foreign_vec", 75 | "hash_hasher", 76 | "indexmap", 77 | "json-deserializer", 78 | "lexical-core", 79 | "multiversion", 80 | "num-traits", 81 | "simdutf8", 82 | "streaming-iterator", 83 | "strength_reduce", 84 | ] 85 | 86 | [[package]] 87 | name = "atty" 88 | version = "0.2.14" 89 | source = "registry+https://github.com/rust-lang/crates.io-index" 90 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 91 | dependencies = [ 92 | "hermit-abi 0.1.19", 93 | "libc", 94 | "winapi", 95 | ] 96 | 97 | [[package]] 98 | name = "autocfg" 99 | version = "1.1.0" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 102 | 103 | [[package]] 104 | name = "bitflags" 105 | version = "1.3.2" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 108 | 109 | [[package]] 110 | name = "bumpalo" 111 | version = "3.12.0" 112 | source = "registry+https://github.com/rust-lang/crates.io-index" 113 | checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" 114 | 115 | [[package]] 116 | name = "bytemuck" 117 | version = "1.13.1" 118 | source = "registry+https://github.com/rust-lang/crates.io-index" 119 | checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" 120 | dependencies = [ 121 | "bytemuck_derive", 122 | ] 123 | 124 | [[package]] 125 | name = "bytemuck_derive" 126 | version = "1.4.0" 127 | source = "registry+https://github.com/rust-lang/crates.io-index" 128 | checksum = "1aca418a974d83d40a0c1f0c5cba6ff4bc28d8df099109ca459a2118d40b6322" 129 | dependencies = [ 130 | "proc-macro2", 131 | "quote", 132 | "syn", 133 | ] 134 | 135 | [[package]] 136 | name = "byteorder" 137 | version = "1.4.3" 138 | source = "registry+https://github.com/rust-lang/crates.io-index" 139 | checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" 140 | 141 | [[package]] 142 | name = "bytes" 143 | version = "1.4.0" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" 146 | 147 | [[package]] 148 | name = "cc" 149 | version = "1.0.79" 150 | source = "registry+https://github.com/rust-lang/crates.io-index" 151 | checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" 152 | 153 | [[package]] 154 | name = "cfg-if" 155 | version = "1.0.0" 156 | source = "registry+https://github.com/rust-lang/crates.io-index" 157 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 158 | 159 | [[package]] 160 | name = "chrono" 161 | version = "0.4.23" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" 164 | dependencies = [ 165 | "iana-time-zone", 166 | "js-sys", 167 | "num-integer", 168 | "num-traits", 169 | "time", 170 | "wasm-bindgen", 171 | "winapi", 172 | ] 173 | 174 | [[package]] 175 | name = "codespan-reporting" 176 | version = "0.11.1" 177 | source = "registry+https://github.com/rust-lang/crates.io-index" 178 | checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" 179 | dependencies = [ 180 | "termcolor", 181 | "unicode-width", 182 | ] 183 | 184 | [[package]] 185 | name = "comfy-table" 186 | version = "5.0.1" 187 | source = "registry+https://github.com/rust-lang/crates.io-index" 188 | checksum = "b103d85ca6e209388771bfb7aa6b68a7aeec4afbf6f0a0264bfbf50360e5212e" 189 | dependencies = [ 190 | "crossterm", 191 | "strum", 192 | "strum_macros", 193 | "unicode-width", 194 | ] 195 | 196 | [[package]] 197 | name = "core-foundation-sys" 198 | version = "0.8.3" 199 | source = "registry+https://github.com/rust-lang/crates.io-index" 200 | checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" 201 | 202 | [[package]] 203 | name = "crossbeam-channel" 204 | version = "0.5.7" 205 | source = "registry+https://github.com/rust-lang/crates.io-index" 206 | checksum = "cf2b3e8478797446514c91ef04bafcb59faba183e621ad488df88983cc14128c" 207 | dependencies = [ 208 | "cfg-if", 209 | "crossbeam-utils", 210 | ] 211 | 212 | [[package]] 213 | name = "crossbeam-deque" 214 | version = "0.8.3" 215 | source = "registry+https://github.com/rust-lang/crates.io-index" 216 | checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" 217 | dependencies = [ 218 | "cfg-if", 219 | "crossbeam-epoch", 220 | "crossbeam-utils", 221 | ] 222 | 223 | [[package]] 224 | name = "crossbeam-epoch" 225 | version = "0.9.14" 226 | source = "registry+https://github.com/rust-lang/crates.io-index" 227 | checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" 228 | dependencies = [ 229 | "autocfg", 230 | "cfg-if", 231 | "crossbeam-utils", 232 | "memoffset", 233 | "scopeguard", 234 | ] 235 | 236 | [[package]] 237 | name = "crossbeam-utils" 238 | version = "0.8.15" 239 | source = "registry+https://github.com/rust-lang/crates.io-index" 240 | checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" 241 | dependencies = [ 242 | "cfg-if", 243 | ] 244 | 245 | [[package]] 246 | name = "crossterm" 247 | version = "0.23.2" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | checksum = "a2102ea4f781910f8a5b98dd061f4c2023f479ce7bb1236330099ceb5a93cf17" 250 | dependencies = [ 251 | "bitflags", 252 | "crossterm_winapi", 253 | "libc", 254 | "mio", 255 | "parking_lot", 256 | "signal-hook", 257 | "signal-hook-mio", 258 | "winapi", 259 | ] 260 | 261 | [[package]] 262 | name = "crossterm_winapi" 263 | version = "0.9.0" 264 | source = "registry+https://github.com/rust-lang/crates.io-index" 265 | checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c" 266 | dependencies = [ 267 | "winapi", 268 | ] 269 | 270 | [[package]] 271 | name = "csv-core" 272 | version = "0.1.10" 273 | source = "registry+https://github.com/rust-lang/crates.io-index" 274 | checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" 275 | dependencies = [ 276 | "memchr", 277 | ] 278 | 279 | [[package]] 280 | name = "cxx" 281 | version = "1.0.91" 282 | source = "registry+https://github.com/rust-lang/crates.io-index" 283 | checksum = "86d3488e7665a7a483b57e25bdd90d0aeb2bc7608c8d0346acf2ad3f1caf1d62" 284 | dependencies = [ 285 | "cc", 286 | "cxxbridge-flags", 287 | "cxxbridge-macro", 288 | "link-cplusplus", 289 | ] 290 | 291 | [[package]] 292 | name = "cxx-build" 293 | version = "1.0.91" 294 | source = "registry+https://github.com/rust-lang/crates.io-index" 295 | checksum = "48fcaf066a053a41a81dfb14d57d99738b767febb8b735c3016e469fac5da690" 296 | dependencies = [ 297 | "cc", 298 | "codespan-reporting", 299 | "once_cell", 300 | "proc-macro2", 301 | "quote", 302 | "scratch", 303 | "syn", 304 | ] 305 | 306 | [[package]] 307 | name = "cxxbridge-flags" 308 | version = "1.0.91" 309 | source = "registry+https://github.com/rust-lang/crates.io-index" 310 | checksum = "a2ef98b8b717a829ca5603af80e1f9e2e48013ab227b68ef37872ef84ee479bf" 311 | 312 | [[package]] 313 | name = "cxxbridge-macro" 314 | version = "1.0.91" 315 | source = "registry+https://github.com/rust-lang/crates.io-index" 316 | checksum = "086c685979a698443656e5cf7856c95c642295a38599f12fb1ff76fb28d19892" 317 | dependencies = [ 318 | "proc-macro2", 319 | "quote", 320 | "syn", 321 | ] 322 | 323 | [[package]] 324 | name = "dirs" 325 | version = "4.0.0" 326 | source = "registry+https://github.com/rust-lang/crates.io-index" 327 | checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" 328 | dependencies = [ 329 | "dirs-sys", 330 | ] 331 | 332 | [[package]] 333 | name = "dirs-sys" 334 | version = "0.3.7" 335 | source = "registry+https://github.com/rust-lang/crates.io-index" 336 | checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" 337 | dependencies = [ 338 | "libc", 339 | "redox_users", 340 | "winapi", 341 | ] 342 | 343 | [[package]] 344 | name = "dyn-clone" 345 | version = "1.0.11" 346 | source = "registry+https://github.com/rust-lang/crates.io-index" 347 | checksum = "68b0cf012f1230e43cd00ebb729c6bb58707ecfa8ad08b52ef3a4ccd2697fc30" 348 | 349 | [[package]] 350 | name = "either" 351 | version = "1.8.1" 352 | source = "registry+https://github.com/rust-lang/crates.io-index" 353 | checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" 354 | 355 | [[package]] 356 | name = "env_logger" 357 | version = "0.9.3" 358 | source = "registry+https://github.com/rust-lang/crates.io-index" 359 | checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" 360 | dependencies = [ 361 | "atty", 362 | "humantime", 363 | "log", 364 | "regex", 365 | "termcolor", 366 | ] 367 | 368 | [[package]] 369 | name = "ethnum" 370 | version = "1.3.2" 371 | source = "registry+https://github.com/rust-lang/crates.io-index" 372 | checksum = "0198b9d0078e0f30dedc7acbb21c974e838fc8fae3ee170128658a98cb2c1c04" 373 | 374 | [[package]] 375 | name = "fallible-streaming-iterator" 376 | version = "0.1.9" 377 | source = "registry+https://github.com/rust-lang/crates.io-index" 378 | checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" 379 | 380 | [[package]] 381 | name = "float-cmp" 382 | version = "0.9.0" 383 | source = "registry+https://github.com/rust-lang/crates.io-index" 384 | checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" 385 | dependencies = [ 386 | "num-traits", 387 | ] 388 | 389 | [[package]] 390 | name = "foreign_vec" 391 | version = "0.1.0" 392 | source = "registry+https://github.com/rust-lang/crates.io-index" 393 | checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" 394 | 395 | [[package]] 396 | name = "fxhash" 397 | version = "0.2.1" 398 | source = "registry+https://github.com/rust-lang/crates.io-index" 399 | checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" 400 | dependencies = [ 401 | "byteorder", 402 | ] 403 | 404 | [[package]] 405 | name = "getrandom" 406 | version = "0.2.8" 407 | source = "registry+https://github.com/rust-lang/crates.io-index" 408 | checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" 409 | dependencies = [ 410 | "cfg-if", 411 | "libc", 412 | "wasi 0.11.0+wasi-snapshot-preview1", 413 | ] 414 | 415 | [[package]] 416 | name = "glob" 417 | version = "0.3.1" 418 | source = "registry+https://github.com/rust-lang/crates.io-index" 419 | checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" 420 | 421 | [[package]] 422 | name = "halfbrown" 423 | version = "0.1.18" 424 | source = "registry+https://github.com/rust-lang/crates.io-index" 425 | checksum = "9e2a3c70a9c00cc1ee87b54e89f9505f73bb17d63f1b25c9a462ba8ef885444f" 426 | dependencies = [ 427 | "fxhash", 428 | "hashbrown 0.13.2", 429 | "serde", 430 | ] 431 | 432 | [[package]] 433 | name = "hash_hasher" 434 | version = "2.0.3" 435 | source = "registry+https://github.com/rust-lang/crates.io-index" 436 | checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" 437 | 438 | [[package]] 439 | name = "hashbrown" 440 | version = "0.12.3" 441 | source = "registry+https://github.com/rust-lang/crates.io-index" 442 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 443 | dependencies = [ 444 | "ahash 0.7.6", 445 | "rayon", 446 | ] 447 | 448 | [[package]] 449 | name = "hashbrown" 450 | version = "0.13.2" 451 | source = "registry+https://github.com/rust-lang/crates.io-index" 452 | checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" 453 | dependencies = [ 454 | "ahash 0.8.3", 455 | ] 456 | 457 | [[package]] 458 | name = "heck" 459 | version = "0.3.3" 460 | source = "registry+https://github.com/rust-lang/crates.io-index" 461 | checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" 462 | dependencies = [ 463 | "unicode-segmentation", 464 | ] 465 | 466 | [[package]] 467 | name = "hermit-abi" 468 | version = "0.1.19" 469 | source = "registry+https://github.com/rust-lang/crates.io-index" 470 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 471 | dependencies = [ 472 | "libc", 473 | ] 474 | 475 | [[package]] 476 | name = "hermit-abi" 477 | version = "0.2.6" 478 | source = "registry+https://github.com/rust-lang/crates.io-index" 479 | checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" 480 | dependencies = [ 481 | "libc", 482 | ] 483 | 484 | [[package]] 485 | name = "humantime" 486 | version = "2.1.0" 487 | source = "registry+https://github.com/rust-lang/crates.io-index" 488 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 489 | 490 | [[package]] 491 | name = "iana-time-zone" 492 | version = "0.1.53" 493 | source = "registry+https://github.com/rust-lang/crates.io-index" 494 | checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" 495 | dependencies = [ 496 | "android_system_properties", 497 | "core-foundation-sys", 498 | "iana-time-zone-haiku", 499 | "js-sys", 500 | "wasm-bindgen", 501 | "winapi", 502 | ] 503 | 504 | [[package]] 505 | name = "iana-time-zone-haiku" 506 | version = "0.1.1" 507 | source = "registry+https://github.com/rust-lang/crates.io-index" 508 | checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" 509 | dependencies = [ 510 | "cxx", 511 | "cxx-build", 512 | ] 513 | 514 | [[package]] 515 | name = "indexmap" 516 | version = "1.9.2" 517 | source = "registry+https://github.com/rust-lang/crates.io-index" 518 | checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" 519 | dependencies = [ 520 | "autocfg", 521 | "hashbrown 0.12.3", 522 | ] 523 | 524 | [[package]] 525 | name = "itoa" 526 | version = "1.0.6" 527 | source = "registry+https://github.com/rust-lang/crates.io-index" 528 | checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" 529 | 530 | [[package]] 531 | name = "js-sys" 532 | version = "0.3.61" 533 | source = "registry+https://github.com/rust-lang/crates.io-index" 534 | checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" 535 | dependencies = [ 536 | "wasm-bindgen", 537 | ] 538 | 539 | [[package]] 540 | name = "json-deserializer" 541 | version = "0.4.4" 542 | source = "registry+https://github.com/rust-lang/crates.io-index" 543 | checksum = "5f63b421e16eb4100beb677af56f0b4f3a4f08bab74ef2af079ce5bb92c2683f" 544 | dependencies = [ 545 | "indexmap", 546 | ] 547 | 548 | [[package]] 549 | name = "lexical" 550 | version = "6.1.1" 551 | source = "registry+https://github.com/rust-lang/crates.io-index" 552 | checksum = "c7aefb36fd43fef7003334742cbf77b243fcd36418a1d1bdd480d613a67968f6" 553 | dependencies = [ 554 | "lexical-core", 555 | ] 556 | 557 | [[package]] 558 | name = "lexical-core" 559 | version = "0.8.5" 560 | source = "registry+https://github.com/rust-lang/crates.io-index" 561 | checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" 562 | dependencies = [ 563 | "lexical-parse-float", 564 | "lexical-parse-integer", 565 | "lexical-util", 566 | "lexical-write-float", 567 | "lexical-write-integer", 568 | ] 569 | 570 | [[package]] 571 | name = "lexical-parse-float" 572 | version = "0.8.5" 573 | source = "registry+https://github.com/rust-lang/crates.io-index" 574 | checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" 575 | dependencies = [ 576 | "lexical-parse-integer", 577 | "lexical-util", 578 | "static_assertions", 579 | ] 580 | 581 | [[package]] 582 | name = "lexical-parse-integer" 583 | version = "0.8.6" 584 | source = "registry+https://github.com/rust-lang/crates.io-index" 585 | checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" 586 | dependencies = [ 587 | "lexical-util", 588 | "static_assertions", 589 | ] 590 | 591 | [[package]] 592 | name = "lexical-util" 593 | version = "0.8.5" 594 | source = "registry+https://github.com/rust-lang/crates.io-index" 595 | checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" 596 | dependencies = [ 597 | "static_assertions", 598 | ] 599 | 600 | [[package]] 601 | name = "lexical-write-float" 602 | version = "0.8.5" 603 | source = "registry+https://github.com/rust-lang/crates.io-index" 604 | checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" 605 | dependencies = [ 606 | "lexical-util", 607 | "lexical-write-integer", 608 | "static_assertions", 609 | ] 610 | 611 | [[package]] 612 | name = "lexical-write-integer" 613 | version = "0.8.5" 614 | source = "registry+https://github.com/rust-lang/crates.io-index" 615 | checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" 616 | dependencies = [ 617 | "lexical-util", 618 | "static_assertions", 619 | ] 620 | 621 | [[package]] 622 | name = "libc" 623 | version = "0.2.139" 624 | source = "registry+https://github.com/rust-lang/crates.io-index" 625 | checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" 626 | 627 | [[package]] 628 | name = "libm" 629 | version = "0.2.6" 630 | source = "registry+https://github.com/rust-lang/crates.io-index" 631 | checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" 632 | 633 | [[package]] 634 | name = "link-cplusplus" 635 | version = "1.0.8" 636 | source = "registry+https://github.com/rust-lang/crates.io-index" 637 | checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" 638 | dependencies = [ 639 | "cc", 640 | ] 641 | 642 | [[package]] 643 | name = "lock_api" 644 | version = "0.4.9" 645 | source = "registry+https://github.com/rust-lang/crates.io-index" 646 | checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" 647 | dependencies = [ 648 | "autocfg", 649 | "scopeguard", 650 | ] 651 | 652 | [[package]] 653 | name = "log" 654 | version = "0.4.17" 655 | source = "registry+https://github.com/rust-lang/crates.io-index" 656 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" 657 | dependencies = [ 658 | "cfg-if", 659 | ] 660 | 661 | [[package]] 662 | name = "memchr" 663 | version = "2.5.0" 664 | source = "registry+https://github.com/rust-lang/crates.io-index" 665 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 666 | 667 | [[package]] 668 | name = "memmap2" 669 | version = "0.5.10" 670 | source = "registry+https://github.com/rust-lang/crates.io-index" 671 | checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" 672 | dependencies = [ 673 | "libc", 674 | ] 675 | 676 | [[package]] 677 | name = "memoffset" 678 | version = "0.8.0" 679 | source = "registry+https://github.com/rust-lang/crates.io-index" 680 | checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" 681 | dependencies = [ 682 | "autocfg", 683 | ] 684 | 685 | [[package]] 686 | name = "mio" 687 | version = "0.8.6" 688 | source = "registry+https://github.com/rust-lang/crates.io-index" 689 | checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" 690 | dependencies = [ 691 | "libc", 692 | "log", 693 | "wasi 0.11.0+wasi-snapshot-preview1", 694 | "windows-sys", 695 | ] 696 | 697 | [[package]] 698 | name = "multiversion" 699 | version = "0.6.1" 700 | source = "registry+https://github.com/rust-lang/crates.io-index" 701 | checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373" 702 | dependencies = [ 703 | "multiversion-macros", 704 | ] 705 | 706 | [[package]] 707 | name = "multiversion-macros" 708 | version = "0.6.1" 709 | source = "registry+https://github.com/rust-lang/crates.io-index" 710 | checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af" 711 | dependencies = [ 712 | "proc-macro2", 713 | "quote", 714 | "syn", 715 | ] 716 | 717 | [[package]] 718 | name = "num" 719 | version = "0.4.0" 720 | source = "registry+https://github.com/rust-lang/crates.io-index" 721 | checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" 722 | dependencies = [ 723 | "num-bigint", 724 | "num-complex", 725 | "num-integer", 726 | "num-iter", 727 | "num-rational", 728 | "num-traits", 729 | ] 730 | 731 | [[package]] 732 | name = "num-bigint" 733 | version = "0.4.3" 734 | source = "registry+https://github.com/rust-lang/crates.io-index" 735 | checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" 736 | dependencies = [ 737 | "autocfg", 738 | "num-integer", 739 | "num-traits", 740 | ] 741 | 742 | [[package]] 743 | name = "num-complex" 744 | version = "0.4.3" 745 | source = "registry+https://github.com/rust-lang/crates.io-index" 746 | checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" 747 | dependencies = [ 748 | "num-traits", 749 | ] 750 | 751 | [[package]] 752 | name = "num-integer" 753 | version = "0.1.45" 754 | source = "registry+https://github.com/rust-lang/crates.io-index" 755 | checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" 756 | dependencies = [ 757 | "autocfg", 758 | "num-traits", 759 | ] 760 | 761 | [[package]] 762 | name = "num-iter" 763 | version = "0.1.43" 764 | source = "registry+https://github.com/rust-lang/crates.io-index" 765 | checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" 766 | dependencies = [ 767 | "autocfg", 768 | "num-integer", 769 | "num-traits", 770 | ] 771 | 772 | [[package]] 773 | name = "num-rational" 774 | version = "0.4.1" 775 | source = "registry+https://github.com/rust-lang/crates.io-index" 776 | checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" 777 | dependencies = [ 778 | "autocfg", 779 | "num-bigint", 780 | "num-integer", 781 | "num-traits", 782 | ] 783 | 784 | [[package]] 785 | name = "num-traits" 786 | version = "0.2.15" 787 | source = "registry+https://github.com/rust-lang/crates.io-index" 788 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" 789 | dependencies = [ 790 | "autocfg", 791 | "libm", 792 | ] 793 | 794 | [[package]] 795 | name = "num_cpus" 796 | version = "1.15.0" 797 | source = "registry+https://github.com/rust-lang/crates.io-index" 798 | checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" 799 | dependencies = [ 800 | "hermit-abi 0.2.6", 801 | "libc", 802 | ] 803 | 804 | [[package]] 805 | name = "once_cell" 806 | version = "1.17.1" 807 | source = "registry+https://github.com/rust-lang/crates.io-index" 808 | checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" 809 | 810 | [[package]] 811 | name = "parking_lot" 812 | version = "0.12.1" 813 | source = "registry+https://github.com/rust-lang/crates.io-index" 814 | checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" 815 | dependencies = [ 816 | "lock_api", 817 | "parking_lot_core", 818 | ] 819 | 820 | [[package]] 821 | name = "parking_lot_core" 822 | version = "0.9.7" 823 | source = "registry+https://github.com/rust-lang/crates.io-index" 824 | checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" 825 | dependencies = [ 826 | "cfg-if", 827 | "libc", 828 | "redox_syscall", 829 | "smallvec", 830 | "windows-sys", 831 | ] 832 | 833 | [[package]] 834 | name = "pin-project-lite" 835 | version = "0.2.9" 836 | source = "registry+https://github.com/rust-lang/crates.io-index" 837 | checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" 838 | 839 | [[package]] 840 | name = "polars" 841 | version = "0.24.3" 842 | source = "registry+https://github.com/rust-lang/crates.io-index" 843 | checksum = "dcc28d562d296e586634c7efa9468f5fe43e81c20c572f8d1d4542f273db9e27" 844 | dependencies = [ 845 | "polars-core", 846 | "polars-io", 847 | "polars-lazy", 848 | "polars-ops", 849 | "polars-time", 850 | ] 851 | 852 | [[package]] 853 | name = "polars-arrow" 854 | version = "0.24.4" 855 | source = "registry+https://github.com/rust-lang/crates.io-index" 856 | checksum = "932e2474040e28459b9790d86730ea5f92165a4f5630c9d36366b9d3a6128eb1" 857 | dependencies = [ 858 | "arrow2", 859 | "hashbrown 0.12.3", 860 | "num", 861 | "thiserror", 862 | ] 863 | 864 | [[package]] 865 | name = "polars-core" 866 | version = "0.24.3" 867 | source = "registry+https://github.com/rust-lang/crates.io-index" 868 | checksum = "d72cc715f79e808a18c46bd28b3e03b63f710a03acd2f03dbb5c22e2c7d192a5" 869 | dependencies = [ 870 | "ahash 0.7.6", 871 | "anyhow", 872 | "arrow2", 873 | "bitflags", 874 | "chrono", 875 | "comfy-table", 876 | "hashbrown 0.12.3", 877 | "indexmap", 878 | "num", 879 | "once_cell", 880 | "polars-arrow", 881 | "polars-utils", 882 | "rand", 883 | "rand_distr", 884 | "rayon", 885 | "regex", 886 | "thiserror", 887 | ] 888 | 889 | [[package]] 890 | name = "polars-io" 891 | version = "0.24.3" 892 | source = "registry+https://github.com/rust-lang/crates.io-index" 893 | checksum = "a1ef64ab407a16876b6cbd26c3ac50901b7f848970e709d7aef11fe3fcfa70dc" 894 | dependencies = [ 895 | "ahash 0.7.6", 896 | "anyhow", 897 | "arrow2", 898 | "csv-core", 899 | "dirs", 900 | "lexical", 901 | "lexical-core", 902 | "memchr", 903 | "memmap2", 904 | "num", 905 | "once_cell", 906 | "polars-arrow", 907 | "polars-core", 908 | "polars-time", 909 | "polars-utils", 910 | "rayon", 911 | "regex", 912 | "simd-json", 913 | "simdutf8", 914 | ] 915 | 916 | [[package]] 917 | name = "polars-lazy" 918 | version = "0.24.3" 919 | source = "registry+https://github.com/rust-lang/crates.io-index" 920 | checksum = "c96ff67f24c8e0ae9155d9edf9d6980b51b414b73066238a702d31f0d9a26288" 921 | dependencies = [ 922 | "ahash 0.7.6", 923 | "bitflags", 924 | "glob", 925 | "polars-arrow", 926 | "polars-core", 927 | "polars-io", 928 | "polars-ops", 929 | "polars-time", 930 | "polars-utils", 931 | "rayon", 932 | ] 933 | 934 | [[package]] 935 | name = "polars-ops" 936 | version = "0.24.3" 937 | source = "registry+https://github.com/rust-lang/crates.io-index" 938 | checksum = "96f6d31360dffec97537b31a6f32ac189d43095c29ac1956ac20a88d82f57946" 939 | dependencies = [ 940 | "polars-arrow", 941 | "polars-core", 942 | ] 943 | 944 | [[package]] 945 | name = "polars-time" 946 | version = "0.24.3" 947 | source = "registry+https://github.com/rust-lang/crates.io-index" 948 | checksum = "e2f38a9cad199a992d04fd8e68d49a45f2996928dd769966b14f9da3aa71ba7e" 949 | dependencies = [ 950 | "chrono", 951 | "lexical", 952 | "polars-arrow", 953 | "polars-core", 954 | "polars-utils", 955 | ] 956 | 957 | [[package]] 958 | name = "polars-utils" 959 | version = "0.24.4" 960 | source = "registry+https://github.com/rust-lang/crates.io-index" 961 | checksum = "c5eca881853f14d7130406652f19c0f1281349647aea4ec2473fc6ff4303cff1" 962 | dependencies = [ 963 | "rayon", 964 | ] 965 | 966 | [[package]] 967 | name = "ppv-lite86" 968 | version = "0.2.17" 969 | source = "registry+https://github.com/rust-lang/crates.io-index" 970 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 971 | 972 | [[package]] 973 | name = "proc-macro2" 974 | version = "1.0.51" 975 | source = "registry+https://github.com/rust-lang/crates.io-index" 976 | checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" 977 | dependencies = [ 978 | "unicode-ident", 979 | ] 980 | 981 | [[package]] 982 | name = "quote" 983 | version = "1.0.23" 984 | source = "registry+https://github.com/rust-lang/crates.io-index" 985 | checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" 986 | dependencies = [ 987 | "proc-macro2", 988 | ] 989 | 990 | [[package]] 991 | name = "rand" 992 | version = "0.8.5" 993 | source = "registry+https://github.com/rust-lang/crates.io-index" 994 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 995 | dependencies = [ 996 | "libc", 997 | "rand_chacha", 998 | "rand_core", 999 | ] 1000 | 1001 | [[package]] 1002 | name = "rand_chacha" 1003 | version = "0.3.1" 1004 | source = "registry+https://github.com/rust-lang/crates.io-index" 1005 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 1006 | dependencies = [ 1007 | "ppv-lite86", 1008 | "rand_core", 1009 | ] 1010 | 1011 | [[package]] 1012 | name = "rand_core" 1013 | version = "0.6.4" 1014 | source = "registry+https://github.com/rust-lang/crates.io-index" 1015 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 1016 | dependencies = [ 1017 | "getrandom", 1018 | ] 1019 | 1020 | [[package]] 1021 | name = "rand_distr" 1022 | version = "0.4.3" 1023 | source = "registry+https://github.com/rust-lang/crates.io-index" 1024 | checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" 1025 | dependencies = [ 1026 | "num-traits", 1027 | "rand", 1028 | ] 1029 | 1030 | [[package]] 1031 | name = "rayon" 1032 | version = "1.7.0" 1033 | source = "registry+https://github.com/rust-lang/crates.io-index" 1034 | checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" 1035 | dependencies = [ 1036 | "either", 1037 | "rayon-core", 1038 | ] 1039 | 1040 | [[package]] 1041 | name = "rayon-core" 1042 | version = "1.11.0" 1043 | source = "registry+https://github.com/rust-lang/crates.io-index" 1044 | checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" 1045 | dependencies = [ 1046 | "crossbeam-channel", 1047 | "crossbeam-deque", 1048 | "crossbeam-utils", 1049 | "num_cpus", 1050 | ] 1051 | 1052 | [[package]] 1053 | name = "redox_syscall" 1054 | version = "0.2.16" 1055 | source = "registry+https://github.com/rust-lang/crates.io-index" 1056 | checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" 1057 | dependencies = [ 1058 | "bitflags", 1059 | ] 1060 | 1061 | [[package]] 1062 | name = "redox_users" 1063 | version = "0.4.3" 1064 | source = "registry+https://github.com/rust-lang/crates.io-index" 1065 | checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" 1066 | dependencies = [ 1067 | "getrandom", 1068 | "redox_syscall", 1069 | "thiserror", 1070 | ] 1071 | 1072 | [[package]] 1073 | name = "regex" 1074 | version = "1.7.1" 1075 | source = "registry+https://github.com/rust-lang/crates.io-index" 1076 | checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" 1077 | dependencies = [ 1078 | "aho-corasick", 1079 | "memchr", 1080 | "regex-syntax", 1081 | ] 1082 | 1083 | [[package]] 1084 | name = "regex-syntax" 1085 | version = "0.6.28" 1086 | source = "registry+https://github.com/rust-lang/crates.io-index" 1087 | checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" 1088 | 1089 | [[package]] 1090 | name = "rustversion" 1091 | version = "1.0.11" 1092 | source = "registry+https://github.com/rust-lang/crates.io-index" 1093 | checksum = "5583e89e108996506031660fe09baa5011b9dd0341b89029313006d1fb508d70" 1094 | 1095 | [[package]] 1096 | name = "ryu" 1097 | version = "1.0.13" 1098 | source = "registry+https://github.com/rust-lang/crates.io-index" 1099 | checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" 1100 | 1101 | [[package]] 1102 | name = "scopeguard" 1103 | version = "1.1.0" 1104 | source = "registry+https://github.com/rust-lang/crates.io-index" 1105 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 1106 | 1107 | [[package]] 1108 | name = "scratch" 1109 | version = "1.0.4" 1110 | source = "registry+https://github.com/rust-lang/crates.io-index" 1111 | checksum = "5d5e082f6ea090deaf0e6dd04b68360fd5cddb152af6ce8927c9d25db299f98c" 1112 | 1113 | [[package]] 1114 | name = "serde" 1115 | version = "1.0.152" 1116 | source = "registry+https://github.com/rust-lang/crates.io-index" 1117 | checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" 1118 | dependencies = [ 1119 | "serde_derive", 1120 | ] 1121 | 1122 | [[package]] 1123 | name = "serde_derive" 1124 | version = "1.0.152" 1125 | source = "registry+https://github.com/rust-lang/crates.io-index" 1126 | checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" 1127 | dependencies = [ 1128 | "proc-macro2", 1129 | "quote", 1130 | "syn", 1131 | ] 1132 | 1133 | [[package]] 1134 | name = "serde_json" 1135 | version = "1.0.93" 1136 | source = "registry+https://github.com/rust-lang/crates.io-index" 1137 | checksum = "cad406b69c91885b5107daf2c29572f6c8cdb3c66826821e286c533490c0bc76" 1138 | dependencies = [ 1139 | "itoa", 1140 | "ryu", 1141 | "serde", 1142 | ] 1143 | 1144 | [[package]] 1145 | name = "signal-hook" 1146 | version = "0.3.15" 1147 | source = "registry+https://github.com/rust-lang/crates.io-index" 1148 | checksum = "732768f1176d21d09e076c23a93123d40bba92d50c4058da34d45c8de8e682b9" 1149 | dependencies = [ 1150 | "libc", 1151 | "signal-hook-registry", 1152 | ] 1153 | 1154 | [[package]] 1155 | name = "signal-hook-mio" 1156 | version = "0.2.3" 1157 | source = "registry+https://github.com/rust-lang/crates.io-index" 1158 | checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af" 1159 | dependencies = [ 1160 | "libc", 1161 | "mio", 1162 | "signal-hook", 1163 | ] 1164 | 1165 | [[package]] 1166 | name = "signal-hook-registry" 1167 | version = "1.4.1" 1168 | source = "registry+https://github.com/rust-lang/crates.io-index" 1169 | checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" 1170 | dependencies = [ 1171 | "libc", 1172 | ] 1173 | 1174 | [[package]] 1175 | name = "simd-json" 1176 | version = "0.6.0" 1177 | source = "registry+https://github.com/rust-lang/crates.io-index" 1178 | checksum = "9bd78b840b9de64fa3f7d72909b76343849f68e8c3d32608db8d38e4e5481f84" 1179 | dependencies = [ 1180 | "halfbrown", 1181 | "serde", 1182 | "serde_json", 1183 | "simdutf8", 1184 | "value-trait", 1185 | ] 1186 | 1187 | [[package]] 1188 | name = "simdutf8" 1189 | version = "0.1.4" 1190 | source = "registry+https://github.com/rust-lang/crates.io-index" 1191 | checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" 1192 | 1193 | [[package]] 1194 | name = "smallvec" 1195 | version = "1.10.0" 1196 | source = "registry+https://github.com/rust-lang/crates.io-index" 1197 | checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" 1198 | 1199 | [[package]] 1200 | name = "socket2" 1201 | version = "0.4.9" 1202 | source = "registry+https://github.com/rust-lang/crates.io-index" 1203 | checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" 1204 | dependencies = [ 1205 | "libc", 1206 | "winapi", 1207 | ] 1208 | 1209 | [[package]] 1210 | name = "static_assertions" 1211 | version = "1.1.0" 1212 | source = "registry+https://github.com/rust-lang/crates.io-index" 1213 | checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" 1214 | 1215 | [[package]] 1216 | name = "streaming-iterator" 1217 | version = "0.1.9" 1218 | source = "registry+https://github.com/rust-lang/crates.io-index" 1219 | checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" 1220 | 1221 | [[package]] 1222 | name = "strength_reduce" 1223 | version = "0.2.4" 1224 | source = "registry+https://github.com/rust-lang/crates.io-index" 1225 | checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" 1226 | 1227 | [[package]] 1228 | name = "strum" 1229 | version = "0.23.0" 1230 | source = "registry+https://github.com/rust-lang/crates.io-index" 1231 | checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" 1232 | 1233 | [[package]] 1234 | name = "strum_macros" 1235 | version = "0.23.1" 1236 | source = "registry+https://github.com/rust-lang/crates.io-index" 1237 | checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" 1238 | dependencies = [ 1239 | "heck", 1240 | "proc-macro2", 1241 | "quote", 1242 | "rustversion", 1243 | "syn", 1244 | ] 1245 | 1246 | [[package]] 1247 | name = "syn" 1248 | version = "1.0.109" 1249 | source = "registry+https://github.com/rust-lang/crates.io-index" 1250 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 1251 | dependencies = [ 1252 | "proc-macro2", 1253 | "quote", 1254 | "unicode-ident", 1255 | ] 1256 | 1257 | [[package]] 1258 | name = "termcolor" 1259 | version = "1.2.0" 1260 | source = "registry+https://github.com/rust-lang/crates.io-index" 1261 | checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" 1262 | dependencies = [ 1263 | "winapi-util", 1264 | ] 1265 | 1266 | [[package]] 1267 | name = "thiserror" 1268 | version = "1.0.38" 1269 | source = "registry+https://github.com/rust-lang/crates.io-index" 1270 | checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" 1271 | dependencies = [ 1272 | "thiserror-impl", 1273 | ] 1274 | 1275 | [[package]] 1276 | name = "thiserror-impl" 1277 | version = "1.0.38" 1278 | source = "registry+https://github.com/rust-lang/crates.io-index" 1279 | checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" 1280 | dependencies = [ 1281 | "proc-macro2", 1282 | "quote", 1283 | "syn", 1284 | ] 1285 | 1286 | [[package]] 1287 | name = "time" 1288 | version = "0.1.45" 1289 | source = "registry+https://github.com/rust-lang/crates.io-index" 1290 | checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" 1291 | dependencies = [ 1292 | "libc", 1293 | "wasi 0.10.0+wasi-snapshot-preview1", 1294 | "winapi", 1295 | ] 1296 | 1297 | [[package]] 1298 | name = "tokio" 1299 | version = "1.26.0" 1300 | source = "registry+https://github.com/rust-lang/crates.io-index" 1301 | checksum = "03201d01c3c27a29c8a5cee5b55a93ddae1ccf6f08f65365c2c918f8c1b76f64" 1302 | dependencies = [ 1303 | "autocfg", 1304 | "bytes", 1305 | "libc", 1306 | "memchr", 1307 | "mio", 1308 | "num_cpus", 1309 | "parking_lot", 1310 | "pin-project-lite", 1311 | "signal-hook-registry", 1312 | "socket2", 1313 | "tokio-macros", 1314 | "windows-sys", 1315 | ] 1316 | 1317 | [[package]] 1318 | name = "tokio-macros" 1319 | version = "1.8.2" 1320 | source = "registry+https://github.com/rust-lang/crates.io-index" 1321 | checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" 1322 | dependencies = [ 1323 | "proc-macro2", 1324 | "quote", 1325 | "syn", 1326 | ] 1327 | 1328 | [[package]] 1329 | name = "unicode-ident" 1330 | version = "1.0.7" 1331 | source = "registry+https://github.com/rust-lang/crates.io-index" 1332 | checksum = "775c11906edafc97bc378816b94585fbd9a054eabaf86fdd0ced94af449efab7" 1333 | 1334 | [[package]] 1335 | name = "unicode-segmentation" 1336 | version = "1.10.1" 1337 | source = "registry+https://github.com/rust-lang/crates.io-index" 1338 | checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" 1339 | 1340 | [[package]] 1341 | name = "unicode-width" 1342 | version = "0.1.10" 1343 | source = "registry+https://github.com/rust-lang/crates.io-index" 1344 | checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" 1345 | 1346 | [[package]] 1347 | name = "value-trait" 1348 | version = "0.4.0" 1349 | source = "registry+https://github.com/rust-lang/crates.io-index" 1350 | checksum = "c0a635407649b66e125e4d2ffd208153210179f8c7c8b71c030aa2ad3eeb4c8f" 1351 | dependencies = [ 1352 | "float-cmp", 1353 | "halfbrown", 1354 | "itoa", 1355 | "ryu", 1356 | ] 1357 | 1358 | [[package]] 1359 | name = "version_check" 1360 | version = "0.9.4" 1361 | source = "registry+https://github.com/rust-lang/crates.io-index" 1362 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 1363 | 1364 | [[package]] 1365 | name = "wasi" 1366 | version = "0.10.0+wasi-snapshot-preview1" 1367 | source = "registry+https://github.com/rust-lang/crates.io-index" 1368 | checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" 1369 | 1370 | [[package]] 1371 | name = "wasi" 1372 | version = "0.11.0+wasi-snapshot-preview1" 1373 | source = "registry+https://github.com/rust-lang/crates.io-index" 1374 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 1375 | 1376 | [[package]] 1377 | name = "wasm-bindgen" 1378 | version = "0.2.84" 1379 | source = "registry+https://github.com/rust-lang/crates.io-index" 1380 | checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" 1381 | dependencies = [ 1382 | "cfg-if", 1383 | "wasm-bindgen-macro", 1384 | ] 1385 | 1386 | [[package]] 1387 | name = "wasm-bindgen-backend" 1388 | version = "0.2.84" 1389 | source = "registry+https://github.com/rust-lang/crates.io-index" 1390 | checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" 1391 | dependencies = [ 1392 | "bumpalo", 1393 | "log", 1394 | "once_cell", 1395 | "proc-macro2", 1396 | "quote", 1397 | "syn", 1398 | "wasm-bindgen-shared", 1399 | ] 1400 | 1401 | [[package]] 1402 | name = "wasm-bindgen-macro" 1403 | version = "0.2.84" 1404 | source = "registry+https://github.com/rust-lang/crates.io-index" 1405 | checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" 1406 | dependencies = [ 1407 | "quote", 1408 | "wasm-bindgen-macro-support", 1409 | ] 1410 | 1411 | [[package]] 1412 | name = "wasm-bindgen-macro-support" 1413 | version = "0.2.84" 1414 | source = "registry+https://github.com/rust-lang/crates.io-index" 1415 | checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" 1416 | dependencies = [ 1417 | "proc-macro2", 1418 | "quote", 1419 | "syn", 1420 | "wasm-bindgen-backend", 1421 | "wasm-bindgen-shared", 1422 | ] 1423 | 1424 | [[package]] 1425 | name = "wasm-bindgen-shared" 1426 | version = "0.2.84" 1427 | source = "registry+https://github.com/rust-lang/crates.io-index" 1428 | checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" 1429 | 1430 | [[package]] 1431 | name = "winapi" 1432 | version = "0.3.9" 1433 | source = "registry+https://github.com/rust-lang/crates.io-index" 1434 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 1435 | dependencies = [ 1436 | "winapi-i686-pc-windows-gnu", 1437 | "winapi-x86_64-pc-windows-gnu", 1438 | ] 1439 | 1440 | [[package]] 1441 | name = "winapi-i686-pc-windows-gnu" 1442 | version = "0.4.0" 1443 | source = "registry+https://github.com/rust-lang/crates.io-index" 1444 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 1445 | 1446 | [[package]] 1447 | name = "winapi-util" 1448 | version = "0.1.5" 1449 | source = "registry+https://github.com/rust-lang/crates.io-index" 1450 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 1451 | dependencies = [ 1452 | "winapi", 1453 | ] 1454 | 1455 | [[package]] 1456 | name = "winapi-x86_64-pc-windows-gnu" 1457 | version = "0.4.0" 1458 | source = "registry+https://github.com/rust-lang/crates.io-index" 1459 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 1460 | 1461 | [[package]] 1462 | name = "windows-sys" 1463 | version = "0.45.0" 1464 | source = "registry+https://github.com/rust-lang/crates.io-index" 1465 | checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" 1466 | dependencies = [ 1467 | "windows-targets", 1468 | ] 1469 | 1470 | [[package]] 1471 | name = "windows-targets" 1472 | version = "0.42.1" 1473 | source = "registry+https://github.com/rust-lang/crates.io-index" 1474 | checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" 1475 | dependencies = [ 1476 | "windows_aarch64_gnullvm", 1477 | "windows_aarch64_msvc", 1478 | "windows_i686_gnu", 1479 | "windows_i686_msvc", 1480 | "windows_x86_64_gnu", 1481 | "windows_x86_64_gnullvm", 1482 | "windows_x86_64_msvc", 1483 | ] 1484 | 1485 | [[package]] 1486 | name = "windows_aarch64_gnullvm" 1487 | version = "0.42.1" 1488 | source = "registry+https://github.com/rust-lang/crates.io-index" 1489 | checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" 1490 | 1491 | [[package]] 1492 | name = "windows_aarch64_msvc" 1493 | version = "0.42.1" 1494 | source = "registry+https://github.com/rust-lang/crates.io-index" 1495 | checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" 1496 | 1497 | [[package]] 1498 | name = "windows_i686_gnu" 1499 | version = "0.42.1" 1500 | source = "registry+https://github.com/rust-lang/crates.io-index" 1501 | checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" 1502 | 1503 | [[package]] 1504 | name = "windows_i686_msvc" 1505 | version = "0.42.1" 1506 | source = "registry+https://github.com/rust-lang/crates.io-index" 1507 | checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" 1508 | 1509 | [[package]] 1510 | name = "windows_x86_64_gnu" 1511 | version = "0.42.1" 1512 | source = "registry+https://github.com/rust-lang/crates.io-index" 1513 | checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" 1514 | 1515 | [[package]] 1516 | name = "windows_x86_64_gnullvm" 1517 | version = "0.42.1" 1518 | source = "registry+https://github.com/rust-lang/crates.io-index" 1519 | checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" 1520 | 1521 | [[package]] 1522 | name = "windows_x86_64_msvc" 1523 | version = "0.42.1" 1524 | source = "registry+https://github.com/rust-lang/crates.io-index" 1525 | checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" 1526 | -------------------------------------------------------------------------------- /amazon_review_pipeline_polars/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "amazon_review_pipeline_polars" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | log = "0.4.17" 10 | env_logger = "0.9.0" 11 | polars = { version="0.24.3", features = ["lazy", "json"] } 12 | tokio = { version = "1.21.1", features = ["full"] } -------------------------------------------------------------------------------- /amazon_review_pipeline_polars/Makefile: -------------------------------------------------------------------------------- 1 | run_rust_dev: 2 | RUST_LOG=info cargo run 3 | run_rust_release: 4 | RUST_LOG=info ./target/release/amazon_review_pipeline 5 | clean_dirs: 6 | rm -rf datalayers/insights/toys_n_game/ && rm -rf datalayers/analytics/toys_n_game/ 7 | run_pyspark_release: 8 | python pysrc/main.py -------------------------------------------------------------------------------- /amazon_review_pipeline_polars/datalayers/analytics/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDataPsycho/data-pipelines-in-rust/06319272f26745c5de61bab73ff4d5d133771568/amazon_review_pipeline_polars/datalayers/analytics/.gitkeep -------------------------------------------------------------------------------- /amazon_review_pipeline_polars/datalayers/downloader.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | wget -P datalayers/landing http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/Toys_and_Games_5.json.gz 4 | gzip -dk datalayers/landing/Toys_and_Games_5.json.gz 5 | rm -rf datalayers/landing/Toys_and_Games_5.json.gz -------------------------------------------------------------------------------- /amazon_review_pipeline_polars/datalayers/insights/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDataPsycho/data-pipelines-in-rust/06319272f26745c5de61bab73ff4d5d133771568/amazon_review_pipeline_polars/datalayers/insights/.gitkeep -------------------------------------------------------------------------------- /amazon_review_pipeline_polars/datalayers/landing/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDataPsycho/data-pipelines-in-rust/06319272f26745c5de61bab73ff4d5d133771568/amazon_review_pipeline_polars/datalayers/landing/.gitkeep -------------------------------------------------------------------------------- /amazon_review_pipeline_polars/pysrc/main.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | import logging 4 | import time 5 | from pyspark.sql import SparkSession, DataFrame 6 | import pyspark.sql.functions as fn 7 | from pyspark.sql.functions import col 8 | from dotenv import load_dotenv 9 | 10 | logging.basicConfig(format='[%(asctime)s %(levelname)s amazon_review_pipeline] %(message)s', level=logging.INFO) 11 | 12 | 13 | def create_spark_session(): 14 | """Create a Spark Session""" 15 | _ = load_dotenv() 16 | return ( 17 | SparkSession 18 | .builder 19 | .appName("Review") 20 | .getOrCreate() 21 | ) 22 | 23 | 24 | def read_data(path: str) -> DataFrame: 25 | spark = create_spark_session() 26 | spark.conf.set("spark.sql.caseSensitive", "true") 27 | # column_list = [ 28 | # "asin", 29 | # "vote", 30 | # "verified", 31 | # "unixReviewTime", 32 | # "reviewTime", 33 | # "reviewText", 34 | # ] 35 | df = spark.read.json(path) 36 | # df = df.select(*column_list) 37 | 38 | # df = df.select( 39 | # col("asin"), 40 | # fn.coalesce(col("vote"), fn.lit("0")).alias("vote"), 41 | # fn.to_timestamp(col("unixReviewTime")).alias("reviewed_at"), 42 | # fn.coalesce(col("reviewText"), fn.lit("")).alias("review_text"), 43 | # ) 44 | # df = df.withColumn("review_text_len", fn.length(col("review_text"))) 45 | # df = df.withColumn("reviewed_year", fn.year(col("reviewed_at"))) 46 | # df = df.withColumn("reviewed_month", fn.month(col("reviewed_at"))) 47 | logging.info("Data loading plan created successfully!") 48 | return df 49 | 50 | 51 | def main(): 52 | DATA_PATH = 'datalayers/landing/Toys_and_Games_5.json' 53 | df = read_data(DATA_PATH) 54 | schema_info = ", ".join(df.columns) 55 | logging.info(f"Column List: {schema_info}") 56 | print(df.show(n=5)) 57 | 58 | 59 | if __name__ == "__main__": 60 | st = time.time() 61 | main() 62 | et = time.time() 63 | res = et - st 64 | logging.info("Pipeline executed successfully!") 65 | logging.info(f'Pipeline Execution time: {res}s.') -------------------------------------------------------------------------------- /amazon_review_pipeline_polars/src/main.rs: -------------------------------------------------------------------------------- 1 | use polars::prelude::*; 2 | 3 | 4 | fn main() -> PolarsResult<()> { 5 | let schema = Schema::from(vec![ 6 | Field::new("asin", DataType::Utf8), 7 | Field::new("image", DataType::Utf8), 8 | Field::new("overall", DataType::Float64), 9 | Field::new("reviewText", DataType::Utf8), 10 | Field::new("reviewTime", DataType::Utf8), 11 | Field::new("reviwerID", DataType::Utf8), 12 | Field::new("reviewerName", DataType::Utf8), 13 | Field::new("style", DataType::Utf8), 14 | Field::new("summary", DataType::Utf8), 15 | Field::new("unixReviewTime", DataType::Int64), 16 | Field::new("verified", DataType::Boolean), 17 | Field::new("vote", DataType::Float64) 18 | ]); 19 | let file_path = "datalayers/landing/Toys_and_Games_5.json"; 20 | let df = match LazyJsonLineReader::new(file_path.into()) 21 | .with_schema(schema) 22 | .finish() { 23 | Ok(lf) => lf, 24 | Err(e) => panic!("Error: {}", e), 25 | } 26 | .limit(5) 27 | .collect(); 28 | println!("{:?}", df); 29 | Ok(()) 30 | } 31 | -------------------------------------------------------------------------------- /diabetes_ml_pipeline/.dvc/.gitignore: -------------------------------------------------------------------------------- 1 | /config.local 2 | /tmp 3 | /cache 4 | -------------------------------------------------------------------------------- /diabetes_ml_pipeline/.dvc/config: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDataPsycho/data-pipelines-in-rust/06319272f26745c5de61bab73ff4d5d133771568/diabetes_ml_pipeline/.dvc/config -------------------------------------------------------------------------------- /diabetes_ml_pipeline/.dvcignore: -------------------------------------------------------------------------------- 1 | # Add patterns of files dvc should ignore, which could improve 2 | # the performance. Learn more at 3 | # https://dvc.org/doc/user-guide/dvcignore 4 | -------------------------------------------------------------------------------- /diabetes_ml_pipeline/.gitignore: -------------------------------------------------------------------------------- 1 | target -------------------------------------------------------------------------------- /diabetes_ml_pipeline/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "diabetes_ml_pipeline" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | # Data Engineering Packages 10 | polars = {version = "0.23.2", features = ["lazy", "csv-file", "strings", "temporal", "dtype-duration", "dtype-categorical", "concat_str", "list", "list_eval", "describe"]} 11 | log = "0.4.17" 12 | env_logger = "0.9.0" 13 | # ML Packages for Model Training 14 | linfa = "0.6.1" 15 | linfa-logistic = "0.6.1" 16 | csv = "1.2.0" 17 | ndarray = "0.15.6" 18 | ciborium = "0.2.0" 19 | # AWS Lambda Packages 20 | lambda_runtime = "0.7.3" 21 | serde = "1.0.136" 22 | tokio = { version = "1", features = ["macros"] } 23 | tracing = { version = "0.1", features = ["log"] } 24 | tracing-subscriber = { version = "0.3", default-features = false, features = ["fmt"] } 25 | 26 | 27 | [[bin]] 28 | name = "preprocess" 29 | path = "src/bin/stages/preprocess.rs" 30 | 31 | [[bin]] 32 | name = "train" 33 | path = "src/bin/stages/train.rs" 34 | 35 | [[bin]] 36 | name = "test" 37 | path = "src/bin/stages/test.rs" 38 | 39 | [[bin]] 40 | name = "serve" 41 | path = "src/bin/stages/serve.rs" 42 | 43 | -------------------------------------------------------------------------------- /diabetes_ml_pipeline/README.md: -------------------------------------------------------------------------------- 1 | #WIP: A Full Ml Training pipeline In Rust -------------------------------------------------------------------------------- /diabetes_ml_pipeline/data/interim/.gitignore: -------------------------------------------------------------------------------- 1 | /diabetes.csv 2 | -------------------------------------------------------------------------------- /diabetes_ml_pipeline/data/processed/.gitignore: -------------------------------------------------------------------------------- 1 | /diabetes.csv 2 | -------------------------------------------------------------------------------- /diabetes_ml_pipeline/data/raw/.gitignore: -------------------------------------------------------------------------------- 1 | /diabetes.csv 2 | -------------------------------------------------------------------------------- /diabetes_ml_pipeline/data/raw/diabetes.csv.dvc: -------------------------------------------------------------------------------- 1 | outs: 2 | - md5: b43dd020fa775d93fced49f959c227ed 3 | size: 23873 4 | path: diabetes.csv 5 | -------------------------------------------------------------------------------- /diabetes_ml_pipeline/dvc.lock: -------------------------------------------------------------------------------- 1 | schema: '2.0' 2 | stages: 3 | build_preprocessing_bin: 4 | cmd: cargo build --release --bin preprocess 5 | deps: 6 | - path: Cargo.toml 7 | md5: 2c0be3076e4f03af7ff0e3e75e54043d 8 | size: 651 9 | - path: src/bin/stages/preprocess.rs 10 | md5: 81db02dd6d36fc9c416eaf826c9afa1d 11 | size: 3028 12 | outs: 13 | - path: target/release/preprocess 14 | md5: 3059d334e9da5b613ed7760aef0b56ab 15 | size: 20527232 16 | isexec: true 17 | preprocess: 18 | cmd: RUST_LOG=info target/release/preprocess 19 | deps: 20 | - path: data/raw/diabetes.csv 21 | md5: b43dd020fa775d93fced49f959c227ed 22 | size: 23873 23 | - path: target/release/preprocess 24 | md5: 3059d334e9da5b613ed7760aef0b56ab 25 | size: 20527232 26 | outs: 27 | - path: data/interim/diabetes.csv 28 | md5: 8841b536e5bdb83ef37e82f847baf6af 29 | size: 35778 30 | -------------------------------------------------------------------------------- /diabetes_ml_pipeline/dvc.yaml: -------------------------------------------------------------------------------- 1 | stages: 2 | build_preprocessing_bin: 3 | cmd: cargo build --release --bin preprocess 4 | deps: 5 | - src/bin/stages/preprocess.rs 6 | - Cargo.toml 7 | outs: 8 | - target/release/preprocess 9 | preprocess: 10 | cmd: RUST_LOG=info target/release/preprocess 11 | deps: 12 | - data/raw/diabetes.csv 13 | - target/release/preprocess 14 | outs: 15 | - data/interim/diabetes.csv -------------------------------------------------------------------------------- /diabetes_ml_pipeline/model/model.cbor: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDataPsycho/data-pipelines-in-rust/06319272f26745c5de61bab73ff4d5d133771568/diabetes_ml_pipeline/model/model.cbor -------------------------------------------------------------------------------- /diabetes_ml_pipeline/params.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDataPsycho/data-pipelines-in-rust/06319272f26745c5de61bab73ff4d5d133771568/diabetes_ml_pipeline/params.yaml -------------------------------------------------------------------------------- /diabetes_ml_pipeline/src/bin/stages/preprocess.rs: -------------------------------------------------------------------------------- 1 | use env_logger; 2 | use log::info; 3 | use polars::prelude::*; 4 | 5 | fn main() { 6 | env_logger::init(); 7 | let df = read_diabetes_dataset(); 8 | run_pipeline(df); 9 | } 10 | 11 | fn read_diabetes_dataset() -> DataFrame { 12 | let path = "data/raw/diabetes.csv"; 13 | let df = LazyCsvReader::new(path.to_string()) 14 | .finish() 15 | .expect("Can not rea the Data."); 16 | info!("Here is a glimps of the data!"); 17 | println!("{:?}", df.clone().limit(3).collect()); 18 | df.collect().unwrap() 19 | } 20 | 21 | fn filter_zero_values(df: DataFrame) -> DataFrame { 22 | let result = df 23 | .clone() 24 | .lazy() 25 | .filter(col("Glucose").gt(0)) 26 | .filter(col("BloodPressure").gt(0)) 27 | .filter(col("SkinThickness").gt(0)) 28 | .filter(col("Insulin").gt(0)) 29 | .filter(col("BMI").gt(0)) 30 | .filter(col("Age").gt(0)) 31 | .filter(col("*").is_not_null()); 32 | info!("Filters zero applied to lazy frame!"); 33 | result.collect().unwrap() 34 | } 35 | 36 | fn select_relevant_columns(df: DataFrame) -> DataFrame { 37 | let col_list = [ 38 | "Pregnancies", 39 | "Glucose", 40 | "BloodPressure", 41 | "SkinThickness", 42 | "Insulin", 43 | "BMI", 44 | "Age", 45 | "Outcome", 46 | ]; 47 | info!("Only relevant columns are selected!"); 48 | df.select(col_list).unwrap() 49 | } 50 | 51 | fn impute_zero_with_mean(df: DataFrame, col_name: &str) -> DataFrame { 52 | let musk = df.column(col_name).unwrap().gt(0).unwrap(); 53 | let col_mean = df 54 | .column(col_name) 55 | .unwrap() 56 | .filter(&musk) 57 | .unwrap() 58 | .mean() 59 | .unwrap(); 60 | 61 | let predicate = when(col(col_name).lt_eq(0.0)) 62 | .then(lit(col_mean)) 63 | .otherwise(col(col_name)) 64 | .alias(col_name); 65 | let result = df.lazy().with_column(predicate); 66 | info!("Imputed zero value for column {}", col_name); 67 | result.collect().unwrap() 68 | } 69 | 70 | fn apply_imputation(df: DataFrame) -> DataFrame { 71 | let df = impute_zero_with_mean(df, "Glucose"); 72 | let df = impute_zero_with_mean(df, "BloodPressure"); 73 | let df = impute_zero_with_mean(df, "SkinThickness"); 74 | let df = impute_zero_with_mean(df, "Insulin"); 75 | let df = impute_zero_with_mean(df, "BMI"); 76 | let df = impute_zero_with_mean(df, "Age"); 77 | info!("Imputation applied for all columns"); 78 | df 79 | } 80 | 81 | fn run_pipeline(df: DataFrame) { 82 | let write_path = "data/interim/diabetes.csv"; 83 | let mut file = std::fs::File::create(write_path).unwrap(); 84 | info!("Row count before processing. {:?}", df.shape()); 85 | let df = select_relevant_columns(df); 86 | let df = apply_imputation(df); 87 | // let df = apply_filter(df); 88 | // info!("Here is a glimps of the data!"); 89 | // println!("{:?}", df.clone().lazy().limit(3).collect()); 90 | let mut df = filter_zero_values(df); 91 | // let df2: DataFrame = df.describe(None); 92 | // println!("{:?}", df2); 93 | info!("Row count after processing. {:?}", df.shape()); 94 | info!("Column schema changed to {:?}", df.get_column_names()); 95 | CsvWriter::new(&mut file).finish(&mut df).unwrap(); 96 | info!("File written successfully into {}", write_path); 97 | } 98 | -------------------------------------------------------------------------------- /diabetes_ml_pipeline/src/bin/stages/serve.rs: -------------------------------------------------------------------------------- 1 | use lambda_runtime::{service_fn, Error, LambdaEvent}; 2 | use serde::{Deserialize, Serialize}; 3 | 4 | 5 | #[derive(Deserialize)] 6 | struct Request { 7 | command: String, 8 | } 9 | 10 | #[derive(Serialize)] 11 | struct Response { 12 | req_id: String, 13 | msg: String, 14 | } 15 | 16 | #[tokio::main] 17 | async fn main() -> Result<(), Error> { 18 | tracing_subscriber::fmt() 19 | .with_max_level(tracing::Level::INFO) 20 | // disabling time is handy because CloudWatch will add the ingestion time. 21 | .without_time() 22 | .init(); 23 | 24 | let func = service_fn(my_handler); 25 | lambda_runtime::run(func).await?; 26 | Ok(()) 27 | } 28 | 29 | pub(crate) async fn my_handler(event: LambdaEvent) -> Result { 30 | // extract some useful info from the request 31 | let command = event.payload.command; 32 | 33 | // prepare the response 34 | let resp = Response { 35 | req_id: event.context.request_id, 36 | msg: format!("Command {} executed.", command), 37 | }; 38 | 39 | // return `Response` (it will be serialized to JSON automatically by the runtime) 40 | Ok(resp) 41 | } -------------------------------------------------------------------------------- /diabetes_ml_pipeline/src/bin/stages/test.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | println!("Hello World!"); 3 | } 4 | -------------------------------------------------------------------------------- /diabetes_ml_pipeline/src/bin/stages/train.rs: -------------------------------------------------------------------------------- 1 | use ciborium::{cbor, value}; 2 | use csv::Reader; 3 | use env_logger; 4 | use linfa::prelude::*; 5 | use linfa::Dataset; 6 | use linfa_logistic::FittedLogisticRegression; 7 | use linfa_logistic::LogisticRegression; 8 | use log::info; 9 | use ndarray::{Array, Array1, Array2}; 10 | use std::io::Read; 11 | use std::path::Path; 12 | use std::{fs, fs::File}; 13 | 14 | fn main() { 15 | env_logger::init(); 16 | train(); 17 | load_model(); 18 | } 19 | 20 | fn get_dataset() -> Dataset> { 21 | let file_path = "data/interim/diabetes.csv"; 22 | let mut reader = Reader::from_path(file_path).unwrap(); 23 | let headers = get_header(&mut reader); 24 | let data = get_data(&mut reader); 25 | let target_index = headers.len() - 1; 26 | let features = headers[0..target_index].to_vec(); 27 | let records = get_records(&data, target_index); 28 | let targets = get_targets(&data, target_index); 29 | Dataset::new(records, targets).with_feature_names(features) 30 | } 31 | 32 | fn get_header(reader: &mut Reader) -> Vec { 33 | let result = reader 34 | .headers() 35 | .unwrap() 36 | .iter() 37 | .map(|r| r.to_owned()) 38 | .collect(); 39 | info!("Header collected successfully {:?}", result); 40 | result 41 | } 42 | 43 | fn get_targets(data: &Vec>, target_index: usize) -> Array1 { 44 | let targets = data 45 | .iter() 46 | .map(|r| r[target_index] as i32) 47 | .collect::>(); 48 | info!( 49 | "Step: Target collected successfully with length {:?}", 50 | targets.len() 51 | ); 52 | Array::from(targets) 53 | } 54 | 55 | fn get_records(data: &Vec>, target_index: usize) -> Array2 { 56 | let mut records: Vec = vec![]; 57 | for record in data.iter() { 58 | records.extend_from_slice(&record[0..target_index]); 59 | } 60 | 61 | let result = Array::from(records) 62 | .into_shape((data.len(), target_index)) 63 | .unwrap(); 64 | let record_shape = result.shape(); 65 | info!( 66 | "Step: Records collected successfully with shape {:?} x {:?}", 67 | record_shape[0], record_shape[1] 68 | ); 69 | return result; 70 | } 71 | 72 | fn get_data(reader: &mut Reader) -> Vec> { 73 | let result = reader 74 | .records() 75 | .map(|r| { 76 | r.unwrap() 77 | .iter() 78 | .map(|field| field.parse::().unwrap()) 79 | .collect::>() 80 | }) 81 | .collect::>>(); 82 | info!( 83 | "Step: Data collected successfully with record length {:?}", 84 | result.len() 85 | ); 86 | result 87 | } 88 | 89 | fn train() { 90 | let dataset = get_dataset(); 91 | info!("Step: Start Training the model."); 92 | let model = LogisticRegression::default() 93 | .max_iterations(500) 94 | .gradient_tolerance(0.0001) 95 | .fit(&dataset) 96 | .expect("Can not train the model"); 97 | let value_model = cbor!(model).unwrap(); 98 | let mut vec_model = Vec::new(); 99 | let _result = ciborium::ser::into_writer(&value_model, &mut vec_model).unwrap(); 100 | println!("{:?}", _result); 101 | // let prediction = model.predict(&dataset.records); 102 | // println!("{:?}", prediction); 103 | let write_path = Path::new("model").join("model.cbor"); 104 | fs::write(write_path.clone(), vec_model).unwrap(); 105 | info!("Model saved at {:?}", write_path.as_path()); 106 | } 107 | 108 | fn load_model() { 109 | let dataset = get_dataset(); 110 | let mut data: Vec = Vec::new(); 111 | let path = Path::new("model").join("model.cbor"); 112 | let mut file = File::open(&path).unwrap(); 113 | file.read_to_end(&mut data).unwrap(); 114 | let model_value = ciborium::de::from_reader::(&data[..]).unwrap(); 115 | let model: FittedLogisticRegression = model_value.deserialized().unwrap(); 116 | info!("Model loading was also successful!"); 117 | let _ = model.predict(dataset.records); 118 | info!("Step Prediction test with the model was successful!") 119 | } 120 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "py4j" 3 | version = "0.10.9.5" 4 | description = "Enables Python programs to dynamically access arbitrary Java objects" 5 | category = "main" 6 | optional = false 7 | python-versions = "*" 8 | 9 | [[package]] 10 | name = "pyspark" 11 | version = "3.3.0" 12 | description = "Apache Spark Python API" 13 | category = "main" 14 | optional = false 15 | python-versions = ">=3.7" 16 | 17 | [package.dependencies] 18 | py4j = "0.10.9.5" 19 | 20 | [package.extras] 21 | ml = ["numpy (>=1.15)"] 22 | mllib = ["numpy (>=1.15)"] 23 | pandas_on_spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] 24 | sql = ["pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] 25 | 26 | [[package]] 27 | name = "python-dotenv" 28 | version = "0.20.0" 29 | description = "Read key-value pairs from a .env file and set them as environment variables" 30 | category = "main" 31 | optional = false 32 | python-versions = ">=3.5" 33 | 34 | [package.extras] 35 | cli = ["click (>=5.0)"] 36 | 37 | [metadata] 38 | lock-version = "1.1" 39 | python-versions = "^3.9" 40 | content-hash = "731a5d7aefa56befbc6f19ca47290ce824d69c3fe4fafefe76fa9ca15269a654" 41 | 42 | [metadata.files] 43 | py4j = [ 44 | {file = "py4j-0.10.9.5-py2.py3-none-any.whl", hash = "sha256:52d171a6a2b031d8a5d1de6efe451cf4f5baff1a2819aabc3741c8406539ba04"}, 45 | {file = "py4j-0.10.9.5.tar.gz", hash = "sha256:276a4a3c5a2154df1860ef3303a927460e02e97b047dc0a47c1c3fb8cce34db6"}, 46 | ] 47 | pyspark = [ 48 | {file = "pyspark-3.3.0.tar.gz", hash = "sha256:7ebe8e9505647b4d124d5a82fca60dfd3891021cf8ad6c5ec88777eeece92cf7"}, 49 | ] 50 | python-dotenv = [ 51 | {file = "python-dotenv-0.20.0.tar.gz", hash = "sha256:b7e3b04a59693c42c36f9ab1cc2acc46fa5df8c78e178fc33a8d4cd05c8d498f"}, 52 | {file = "python_dotenv-0.20.0-py3-none-any.whl", hash = "sha256:d92a187be61fe482e4fd675b6d52200e7be63a12b724abbf931a40ce4fa92938"}, 53 | ] 54 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "data_pipeline_in_rust" 3 | version = "0.1.0" 4 | description = "Data Pipeline" 5 | authors = ["DataPsycho"] 6 | license = "MIT" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.9" 10 | pyspark = "^3.3.0" 11 | python-dotenv = "^0.20.0" 12 | 13 | [tool.poetry.dev-dependencies] 14 | 15 | [build-system] 16 | requires = ["poetry-core>=1.0.0"] 17 | build-backend = "poetry.core.masonry.api" 18 | -------------------------------------------------------------------------------- /wine_pipeline/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .vscode -------------------------------------------------------------------------------- /wine_pipeline/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "ahash" 7 | version = "0.7.6" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" 10 | dependencies = [ 11 | "getrandom", 12 | "once_cell", 13 | "version_check", 14 | ] 15 | 16 | [[package]] 17 | name = "aho-corasick" 18 | version = "0.7.18" 19 | source = "registry+https://github.com/rust-lang/crates.io-index" 20 | checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" 21 | dependencies = [ 22 | "memchr", 23 | ] 24 | 25 | [[package]] 26 | name = "anyhow" 27 | version = "1.0.58" 28 | source = "registry+https://github.com/rust-lang/crates.io-index" 29 | checksum = "bb07d2053ccdbe10e2af2995a2f116c1330396493dc1269f6a91d0ae82e19704" 30 | 31 | [[package]] 32 | name = "arrow2" 33 | version = "0.12.0" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "5feafd6df4e3f577529e6aa2b9b7cdb3c9fe8e8f66ebc8dc29abbe71a7e968f0" 36 | dependencies = [ 37 | "bytemuck", 38 | "chrono", 39 | "either", 40 | "hash_hasher", 41 | "lexical-core", 42 | "multiversion", 43 | "num-traits", 44 | "simdutf8", 45 | "strength_reduce", 46 | ] 47 | 48 | [[package]] 49 | name = "atty" 50 | version = "0.2.14" 51 | source = "registry+https://github.com/rust-lang/crates.io-index" 52 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 53 | dependencies = [ 54 | "hermit-abi", 55 | "libc", 56 | "winapi", 57 | ] 58 | 59 | [[package]] 60 | name = "autocfg" 61 | version = "1.1.0" 62 | source = "registry+https://github.com/rust-lang/crates.io-index" 63 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 64 | 65 | [[package]] 66 | name = "bitflags" 67 | version = "1.3.2" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 70 | 71 | [[package]] 72 | name = "bytemuck" 73 | version = "1.9.1" 74 | source = "registry+https://github.com/rust-lang/crates.io-index" 75 | checksum = "cdead85bdec19c194affaeeb670c0e41fe23de31459efd1c174d049269cf02cc" 76 | dependencies = [ 77 | "bytemuck_derive", 78 | ] 79 | 80 | [[package]] 81 | name = "bytemuck_derive" 82 | version = "1.1.0" 83 | source = "registry+https://github.com/rust-lang/crates.io-index" 84 | checksum = "562e382481975bc61d11275ac5e62a19abd00b0547d99516a415336f183dcd0e" 85 | dependencies = [ 86 | "proc-macro2", 87 | "quote", 88 | "syn", 89 | ] 90 | 91 | [[package]] 92 | name = "cfg-if" 93 | version = "1.0.0" 94 | source = "registry+https://github.com/rust-lang/crates.io-index" 95 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 96 | 97 | [[package]] 98 | name = "chrono" 99 | version = "0.4.19" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" 102 | dependencies = [ 103 | "libc", 104 | "num-integer", 105 | "num-traits", 106 | "time", 107 | "winapi", 108 | ] 109 | 110 | [[package]] 111 | name = "comfy-table" 112 | version = "5.0.1" 113 | source = "registry+https://github.com/rust-lang/crates.io-index" 114 | checksum = "b103d85ca6e209388771bfb7aa6b68a7aeec4afbf6f0a0264bfbf50360e5212e" 115 | dependencies = [ 116 | "crossterm", 117 | "strum", 118 | "strum_macros", 119 | "unicode-width", 120 | ] 121 | 122 | [[package]] 123 | name = "crossbeam-channel" 124 | version = "0.5.5" 125 | source = "registry+https://github.com/rust-lang/crates.io-index" 126 | checksum = "4c02a4d71819009c192cf4872265391563fd6a84c81ff2c0f2a7026ca4c1d85c" 127 | dependencies = [ 128 | "cfg-if", 129 | "crossbeam-utils", 130 | ] 131 | 132 | [[package]] 133 | name = "crossbeam-deque" 134 | version = "0.8.1" 135 | source = "registry+https://github.com/rust-lang/crates.io-index" 136 | checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" 137 | dependencies = [ 138 | "cfg-if", 139 | "crossbeam-epoch", 140 | "crossbeam-utils", 141 | ] 142 | 143 | [[package]] 144 | name = "crossbeam-epoch" 145 | version = "0.9.9" 146 | source = "registry+https://github.com/rust-lang/crates.io-index" 147 | checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d" 148 | dependencies = [ 149 | "autocfg", 150 | "cfg-if", 151 | "crossbeam-utils", 152 | "memoffset", 153 | "once_cell", 154 | "scopeguard", 155 | ] 156 | 157 | [[package]] 158 | name = "crossbeam-utils" 159 | version = "0.8.9" 160 | source = "registry+https://github.com/rust-lang/crates.io-index" 161 | checksum = "8ff1f980957787286a554052d03c7aee98d99cc32e09f6d45f0a814133c87978" 162 | dependencies = [ 163 | "cfg-if", 164 | "once_cell", 165 | ] 166 | 167 | [[package]] 168 | name = "crossterm" 169 | version = "0.23.2" 170 | source = "registry+https://github.com/rust-lang/crates.io-index" 171 | checksum = "a2102ea4f781910f8a5b98dd061f4c2023f479ce7bb1236330099ceb5a93cf17" 172 | dependencies = [ 173 | "bitflags", 174 | "crossterm_winapi", 175 | "libc", 176 | "mio", 177 | "parking_lot", 178 | "signal-hook", 179 | "signal-hook-mio", 180 | "winapi", 181 | ] 182 | 183 | [[package]] 184 | name = "crossterm_winapi" 185 | version = "0.9.0" 186 | source = "registry+https://github.com/rust-lang/crates.io-index" 187 | checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c" 188 | dependencies = [ 189 | "winapi", 190 | ] 191 | 192 | [[package]] 193 | name = "csv-core" 194 | version = "0.1.10" 195 | source = "registry+https://github.com/rust-lang/crates.io-index" 196 | checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" 197 | dependencies = [ 198 | "memchr", 199 | ] 200 | 201 | [[package]] 202 | name = "dirs" 203 | version = "4.0.0" 204 | source = "registry+https://github.com/rust-lang/crates.io-index" 205 | checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" 206 | dependencies = [ 207 | "dirs-sys", 208 | ] 209 | 210 | [[package]] 211 | name = "dirs-sys" 212 | version = "0.3.7" 213 | source = "registry+https://github.com/rust-lang/crates.io-index" 214 | checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" 215 | dependencies = [ 216 | "libc", 217 | "redox_users", 218 | "winapi", 219 | ] 220 | 221 | [[package]] 222 | name = "either" 223 | version = "1.6.1" 224 | source = "registry+https://github.com/rust-lang/crates.io-index" 225 | checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" 226 | 227 | [[package]] 228 | name = "env_logger" 229 | version = "0.9.0" 230 | source = "registry+https://github.com/rust-lang/crates.io-index" 231 | checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" 232 | dependencies = [ 233 | "atty", 234 | "humantime", 235 | "log", 236 | "regex", 237 | "termcolor", 238 | ] 239 | 240 | [[package]] 241 | name = "getrandom" 242 | version = "0.2.7" 243 | source = "registry+https://github.com/rust-lang/crates.io-index" 244 | checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" 245 | dependencies = [ 246 | "cfg-if", 247 | "libc", 248 | "wasi 0.11.0+wasi-snapshot-preview1", 249 | ] 250 | 251 | [[package]] 252 | name = "glob" 253 | version = "0.3.0" 254 | source = "registry+https://github.com/rust-lang/crates.io-index" 255 | checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" 256 | 257 | [[package]] 258 | name = "hash_hasher" 259 | version = "2.0.3" 260 | source = "registry+https://github.com/rust-lang/crates.io-index" 261 | checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" 262 | 263 | [[package]] 264 | name = "hashbrown" 265 | version = "0.12.1" 266 | source = "registry+https://github.com/rust-lang/crates.io-index" 267 | checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" 268 | dependencies = [ 269 | "ahash", 270 | "rayon", 271 | ] 272 | 273 | [[package]] 274 | name = "heck" 275 | version = "0.3.3" 276 | source = "registry+https://github.com/rust-lang/crates.io-index" 277 | checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" 278 | dependencies = [ 279 | "unicode-segmentation", 280 | ] 281 | 282 | [[package]] 283 | name = "hermit-abi" 284 | version = "0.1.19" 285 | source = "registry+https://github.com/rust-lang/crates.io-index" 286 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 287 | dependencies = [ 288 | "libc", 289 | ] 290 | 291 | [[package]] 292 | name = "humantime" 293 | version = "2.1.0" 294 | source = "registry+https://github.com/rust-lang/crates.io-index" 295 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 296 | 297 | [[package]] 298 | name = "indexmap" 299 | version = "1.9.1" 300 | source = "registry+https://github.com/rust-lang/crates.io-index" 301 | checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" 302 | dependencies = [ 303 | "autocfg", 304 | "hashbrown", 305 | ] 306 | 307 | [[package]] 308 | name = "lexical" 309 | version = "6.1.1" 310 | source = "registry+https://github.com/rust-lang/crates.io-index" 311 | checksum = "c7aefb36fd43fef7003334742cbf77b243fcd36418a1d1bdd480d613a67968f6" 312 | dependencies = [ 313 | "lexical-core", 314 | ] 315 | 316 | [[package]] 317 | name = "lexical-core" 318 | version = "0.8.5" 319 | source = "registry+https://github.com/rust-lang/crates.io-index" 320 | checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" 321 | dependencies = [ 322 | "lexical-parse-float", 323 | "lexical-parse-integer", 324 | "lexical-util", 325 | "lexical-write-float", 326 | "lexical-write-integer", 327 | ] 328 | 329 | [[package]] 330 | name = "lexical-parse-float" 331 | version = "0.8.5" 332 | source = "registry+https://github.com/rust-lang/crates.io-index" 333 | checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" 334 | dependencies = [ 335 | "lexical-parse-integer", 336 | "lexical-util", 337 | "static_assertions", 338 | ] 339 | 340 | [[package]] 341 | name = "lexical-parse-integer" 342 | version = "0.8.6" 343 | source = "registry+https://github.com/rust-lang/crates.io-index" 344 | checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" 345 | dependencies = [ 346 | "lexical-util", 347 | "static_assertions", 348 | ] 349 | 350 | [[package]] 351 | name = "lexical-util" 352 | version = "0.8.5" 353 | source = "registry+https://github.com/rust-lang/crates.io-index" 354 | checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" 355 | dependencies = [ 356 | "static_assertions", 357 | ] 358 | 359 | [[package]] 360 | name = "lexical-write-float" 361 | version = "0.8.5" 362 | source = "registry+https://github.com/rust-lang/crates.io-index" 363 | checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" 364 | dependencies = [ 365 | "lexical-util", 366 | "lexical-write-integer", 367 | "static_assertions", 368 | ] 369 | 370 | [[package]] 371 | name = "lexical-write-integer" 372 | version = "0.8.5" 373 | source = "registry+https://github.com/rust-lang/crates.io-index" 374 | checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" 375 | dependencies = [ 376 | "lexical-util", 377 | "static_assertions", 378 | ] 379 | 380 | [[package]] 381 | name = "libc" 382 | version = "0.2.126" 383 | source = "registry+https://github.com/rust-lang/crates.io-index" 384 | checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" 385 | 386 | [[package]] 387 | name = "libm" 388 | version = "0.2.2" 389 | source = "registry+https://github.com/rust-lang/crates.io-index" 390 | checksum = "33a33a362ce288760ec6a508b94caaec573ae7d3bbbd91b87aa0bad4456839db" 391 | 392 | [[package]] 393 | name = "lock_api" 394 | version = "0.4.7" 395 | source = "registry+https://github.com/rust-lang/crates.io-index" 396 | checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" 397 | dependencies = [ 398 | "autocfg", 399 | "scopeguard", 400 | ] 401 | 402 | [[package]] 403 | name = "log" 404 | version = "0.4.17" 405 | source = "registry+https://github.com/rust-lang/crates.io-index" 406 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" 407 | dependencies = [ 408 | "cfg-if", 409 | ] 410 | 411 | [[package]] 412 | name = "memchr" 413 | version = "2.5.0" 414 | source = "registry+https://github.com/rust-lang/crates.io-index" 415 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 416 | 417 | [[package]] 418 | name = "memmap2" 419 | version = "0.5.4" 420 | source = "registry+https://github.com/rust-lang/crates.io-index" 421 | checksum = "d5172b50c23043ff43dd53e51392f36519d9b35a8f3a410d30ece5d1aedd58ae" 422 | dependencies = [ 423 | "libc", 424 | ] 425 | 426 | [[package]] 427 | name = "memoffset" 428 | version = "0.6.5" 429 | source = "registry+https://github.com/rust-lang/crates.io-index" 430 | checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" 431 | dependencies = [ 432 | "autocfg", 433 | ] 434 | 435 | [[package]] 436 | name = "mio" 437 | version = "0.8.4" 438 | source = "registry+https://github.com/rust-lang/crates.io-index" 439 | checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf" 440 | dependencies = [ 441 | "libc", 442 | "log", 443 | "wasi 0.11.0+wasi-snapshot-preview1", 444 | "windows-sys", 445 | ] 446 | 447 | [[package]] 448 | name = "multiversion" 449 | version = "0.6.1" 450 | source = "registry+https://github.com/rust-lang/crates.io-index" 451 | checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373" 452 | dependencies = [ 453 | "multiversion-macros", 454 | ] 455 | 456 | [[package]] 457 | name = "multiversion-macros" 458 | version = "0.6.1" 459 | source = "registry+https://github.com/rust-lang/crates.io-index" 460 | checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af" 461 | dependencies = [ 462 | "proc-macro2", 463 | "quote", 464 | "syn", 465 | ] 466 | 467 | [[package]] 468 | name = "num" 469 | version = "0.4.0" 470 | source = "registry+https://github.com/rust-lang/crates.io-index" 471 | checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" 472 | dependencies = [ 473 | "num-bigint", 474 | "num-complex", 475 | "num-integer", 476 | "num-iter", 477 | "num-rational", 478 | "num-traits", 479 | ] 480 | 481 | [[package]] 482 | name = "num-bigint" 483 | version = "0.4.3" 484 | source = "registry+https://github.com/rust-lang/crates.io-index" 485 | checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" 486 | dependencies = [ 487 | "autocfg", 488 | "num-integer", 489 | "num-traits", 490 | ] 491 | 492 | [[package]] 493 | name = "num-complex" 494 | version = "0.4.2" 495 | source = "registry+https://github.com/rust-lang/crates.io-index" 496 | checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19" 497 | dependencies = [ 498 | "num-traits", 499 | ] 500 | 501 | [[package]] 502 | name = "num-integer" 503 | version = "0.1.45" 504 | source = "registry+https://github.com/rust-lang/crates.io-index" 505 | checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" 506 | dependencies = [ 507 | "autocfg", 508 | "num-traits", 509 | ] 510 | 511 | [[package]] 512 | name = "num-iter" 513 | version = "0.1.43" 514 | source = "registry+https://github.com/rust-lang/crates.io-index" 515 | checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" 516 | dependencies = [ 517 | "autocfg", 518 | "num-integer", 519 | "num-traits", 520 | ] 521 | 522 | [[package]] 523 | name = "num-rational" 524 | version = "0.4.0" 525 | source = "registry+https://github.com/rust-lang/crates.io-index" 526 | checksum = "d41702bd167c2df5520b384281bc111a4b5efcf7fbc4c9c222c815b07e0a6a6a" 527 | dependencies = [ 528 | "autocfg", 529 | "num-bigint", 530 | "num-integer", 531 | "num-traits", 532 | ] 533 | 534 | [[package]] 535 | name = "num-traits" 536 | version = "0.2.15" 537 | source = "registry+https://github.com/rust-lang/crates.io-index" 538 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" 539 | dependencies = [ 540 | "autocfg", 541 | "libm", 542 | ] 543 | 544 | [[package]] 545 | name = "num_cpus" 546 | version = "1.13.1" 547 | source = "registry+https://github.com/rust-lang/crates.io-index" 548 | checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" 549 | dependencies = [ 550 | "hermit-abi", 551 | "libc", 552 | ] 553 | 554 | [[package]] 555 | name = "once_cell" 556 | version = "1.12.0" 557 | source = "registry+https://github.com/rust-lang/crates.io-index" 558 | checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" 559 | 560 | [[package]] 561 | name = "parking_lot" 562 | version = "0.12.1" 563 | source = "registry+https://github.com/rust-lang/crates.io-index" 564 | checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" 565 | dependencies = [ 566 | "lock_api", 567 | "parking_lot_core", 568 | ] 569 | 570 | [[package]] 571 | name = "parking_lot_core" 572 | version = "0.9.3" 573 | source = "registry+https://github.com/rust-lang/crates.io-index" 574 | checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" 575 | dependencies = [ 576 | "cfg-if", 577 | "libc", 578 | "redox_syscall", 579 | "smallvec", 580 | "windows-sys", 581 | ] 582 | 583 | [[package]] 584 | name = "polars" 585 | version = "0.22.8" 586 | source = "registry+https://github.com/rust-lang/crates.io-index" 587 | checksum = "3d175c67e80ceaef7219258cfc3a8686531d9510875b0cefa25404e5b80a7933" 588 | dependencies = [ 589 | "polars-core", 590 | "polars-io", 591 | "polars-lazy", 592 | "polars-ops", 593 | "polars-time", 594 | ] 595 | 596 | [[package]] 597 | name = "polars-arrow" 598 | version = "0.22.7" 599 | source = "registry+https://github.com/rust-lang/crates.io-index" 600 | checksum = "f66c7d3da2c10a09131294dbe7802fac792f570be639dc6ebf207bfc3e144287" 601 | dependencies = [ 602 | "arrow2", 603 | "hashbrown", 604 | "num", 605 | "thiserror", 606 | ] 607 | 608 | [[package]] 609 | name = "polars-core" 610 | version = "0.22.7" 611 | source = "registry+https://github.com/rust-lang/crates.io-index" 612 | checksum = "f7f15f443a90d5367c4fbbb151e203f03b5b96055c8b928c6bc30655a3644f13" 613 | dependencies = [ 614 | "ahash", 615 | "anyhow", 616 | "arrow2", 617 | "chrono", 618 | "comfy-table", 619 | "hashbrown", 620 | "indexmap", 621 | "num", 622 | "once_cell", 623 | "polars-arrow", 624 | "polars-utils", 625 | "rand", 626 | "rand_distr", 627 | "rayon", 628 | "regex", 629 | "thiserror", 630 | ] 631 | 632 | [[package]] 633 | name = "polars-io" 634 | version = "0.22.7" 635 | source = "registry+https://github.com/rust-lang/crates.io-index" 636 | checksum = "058d0a847ce5009b974c69ec878ed416e306436f21b626543019f738cee12315" 637 | dependencies = [ 638 | "ahash", 639 | "anyhow", 640 | "arrow2", 641 | "csv-core", 642 | "dirs", 643 | "lexical", 644 | "lexical-core", 645 | "memchr", 646 | "memmap2", 647 | "num", 648 | "once_cell", 649 | "polars-arrow", 650 | "polars-core", 651 | "polars-time", 652 | "polars-utils", 653 | "rayon", 654 | "regex", 655 | "simdutf8", 656 | ] 657 | 658 | [[package]] 659 | name = "polars-lazy" 660 | version = "0.22.7" 661 | source = "registry+https://github.com/rust-lang/crates.io-index" 662 | checksum = "dad86a4ce7e32540ff12089bce6f77270fd133a5b263328a92be61defdd6b151" 663 | dependencies = [ 664 | "ahash", 665 | "glob", 666 | "parking_lot", 667 | "polars-arrow", 668 | "polars-core", 669 | "polars-io", 670 | "polars-ops", 671 | "polars-time", 672 | "polars-utils", 673 | "rayon", 674 | ] 675 | 676 | [[package]] 677 | name = "polars-ops" 678 | version = "0.22.7" 679 | source = "registry+https://github.com/rust-lang/crates.io-index" 680 | checksum = "030ecd473be113cd0264f1bc19de39a844fa12fa565db9dc52c859cbc292cf04" 681 | dependencies = [ 682 | "polars-arrow", 683 | "polars-core", 684 | ] 685 | 686 | [[package]] 687 | name = "polars-time" 688 | version = "0.22.7" 689 | source = "registry+https://github.com/rust-lang/crates.io-index" 690 | checksum = "94047b20d2da3bcc55c421be187a0c6f316cf1eea7fe7ed7347c1160a32d017c" 691 | dependencies = [ 692 | "chrono", 693 | "lexical", 694 | "polars-arrow", 695 | "polars-core", 696 | "polars-utils", 697 | ] 698 | 699 | [[package]] 700 | name = "polars-utils" 701 | version = "0.22.7" 702 | source = "registry+https://github.com/rust-lang/crates.io-index" 703 | checksum = "fcd3d0238462d5d9f7fbeaaea46e73ed4d58f6fae8b70d53cbe51d7538cc43f5" 704 | dependencies = [ 705 | "parking_lot", 706 | "rayon", 707 | ] 708 | 709 | [[package]] 710 | name = "ppv-lite86" 711 | version = "0.2.16" 712 | source = "registry+https://github.com/rust-lang/crates.io-index" 713 | checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" 714 | 715 | [[package]] 716 | name = "proc-macro2" 717 | version = "1.0.40" 718 | source = "registry+https://github.com/rust-lang/crates.io-index" 719 | checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" 720 | dependencies = [ 721 | "unicode-ident", 722 | ] 723 | 724 | [[package]] 725 | name = "quote" 726 | version = "1.0.20" 727 | source = "registry+https://github.com/rust-lang/crates.io-index" 728 | checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" 729 | dependencies = [ 730 | "proc-macro2", 731 | ] 732 | 733 | [[package]] 734 | name = "rand" 735 | version = "0.8.5" 736 | source = "registry+https://github.com/rust-lang/crates.io-index" 737 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 738 | dependencies = [ 739 | "libc", 740 | "rand_chacha", 741 | "rand_core", 742 | ] 743 | 744 | [[package]] 745 | name = "rand_chacha" 746 | version = "0.3.1" 747 | source = "registry+https://github.com/rust-lang/crates.io-index" 748 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 749 | dependencies = [ 750 | "ppv-lite86", 751 | "rand_core", 752 | ] 753 | 754 | [[package]] 755 | name = "rand_core" 756 | version = "0.6.3" 757 | source = "registry+https://github.com/rust-lang/crates.io-index" 758 | checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" 759 | dependencies = [ 760 | "getrandom", 761 | ] 762 | 763 | [[package]] 764 | name = "rand_distr" 765 | version = "0.4.3" 766 | source = "registry+https://github.com/rust-lang/crates.io-index" 767 | checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" 768 | dependencies = [ 769 | "num-traits", 770 | "rand", 771 | ] 772 | 773 | [[package]] 774 | name = "rayon" 775 | version = "1.5.3" 776 | source = "registry+https://github.com/rust-lang/crates.io-index" 777 | checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" 778 | dependencies = [ 779 | "autocfg", 780 | "crossbeam-deque", 781 | "either", 782 | "rayon-core", 783 | ] 784 | 785 | [[package]] 786 | name = "rayon-core" 787 | version = "1.9.3" 788 | source = "registry+https://github.com/rust-lang/crates.io-index" 789 | checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" 790 | dependencies = [ 791 | "crossbeam-channel", 792 | "crossbeam-deque", 793 | "crossbeam-utils", 794 | "num_cpus", 795 | ] 796 | 797 | [[package]] 798 | name = "redox_syscall" 799 | version = "0.2.13" 800 | source = "registry+https://github.com/rust-lang/crates.io-index" 801 | checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" 802 | dependencies = [ 803 | "bitflags", 804 | ] 805 | 806 | [[package]] 807 | name = "redox_users" 808 | version = "0.4.3" 809 | source = "registry+https://github.com/rust-lang/crates.io-index" 810 | checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" 811 | dependencies = [ 812 | "getrandom", 813 | "redox_syscall", 814 | "thiserror", 815 | ] 816 | 817 | [[package]] 818 | name = "regex" 819 | version = "1.5.6" 820 | source = "registry+https://github.com/rust-lang/crates.io-index" 821 | checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" 822 | dependencies = [ 823 | "aho-corasick", 824 | "memchr", 825 | "regex-syntax", 826 | ] 827 | 828 | [[package]] 829 | name = "regex-syntax" 830 | version = "0.6.26" 831 | source = "registry+https://github.com/rust-lang/crates.io-index" 832 | checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" 833 | 834 | [[package]] 835 | name = "rustversion" 836 | version = "1.0.7" 837 | source = "registry+https://github.com/rust-lang/crates.io-index" 838 | checksum = "a0a5f7c728f5d284929a1cccb5bc19884422bfe6ef4d6c409da2c41838983fcf" 839 | 840 | [[package]] 841 | name = "scopeguard" 842 | version = "1.1.0" 843 | source = "registry+https://github.com/rust-lang/crates.io-index" 844 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 845 | 846 | [[package]] 847 | name = "signal-hook" 848 | version = "0.3.14" 849 | source = "registry+https://github.com/rust-lang/crates.io-index" 850 | checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" 851 | dependencies = [ 852 | "libc", 853 | "signal-hook-registry", 854 | ] 855 | 856 | [[package]] 857 | name = "signal-hook-mio" 858 | version = "0.2.3" 859 | source = "registry+https://github.com/rust-lang/crates.io-index" 860 | checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af" 861 | dependencies = [ 862 | "libc", 863 | "mio", 864 | "signal-hook", 865 | ] 866 | 867 | [[package]] 868 | name = "signal-hook-registry" 869 | version = "1.4.0" 870 | source = "registry+https://github.com/rust-lang/crates.io-index" 871 | checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" 872 | dependencies = [ 873 | "libc", 874 | ] 875 | 876 | [[package]] 877 | name = "simdutf8" 878 | version = "0.1.4" 879 | source = "registry+https://github.com/rust-lang/crates.io-index" 880 | checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" 881 | 882 | [[package]] 883 | name = "smallvec" 884 | version = "1.8.0" 885 | source = "registry+https://github.com/rust-lang/crates.io-index" 886 | checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" 887 | 888 | [[package]] 889 | name = "static_assertions" 890 | version = "1.1.0" 891 | source = "registry+https://github.com/rust-lang/crates.io-index" 892 | checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" 893 | 894 | [[package]] 895 | name = "strength_reduce" 896 | version = "0.2.3" 897 | source = "registry+https://github.com/rust-lang/crates.io-index" 898 | checksum = "a3ff2f71c82567c565ba4b3009a9350a96a7269eaa4001ebedae926230bc2254" 899 | 900 | [[package]] 901 | name = "strum" 902 | version = "0.23.0" 903 | source = "registry+https://github.com/rust-lang/crates.io-index" 904 | checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" 905 | 906 | [[package]] 907 | name = "strum_macros" 908 | version = "0.23.1" 909 | source = "registry+https://github.com/rust-lang/crates.io-index" 910 | checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" 911 | dependencies = [ 912 | "heck", 913 | "proc-macro2", 914 | "quote", 915 | "rustversion", 916 | "syn", 917 | ] 918 | 919 | [[package]] 920 | name = "syn" 921 | version = "1.0.98" 922 | source = "registry+https://github.com/rust-lang/crates.io-index" 923 | checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" 924 | dependencies = [ 925 | "proc-macro2", 926 | "quote", 927 | "unicode-ident", 928 | ] 929 | 930 | [[package]] 931 | name = "termcolor" 932 | version = "1.1.3" 933 | source = "registry+https://github.com/rust-lang/crates.io-index" 934 | checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" 935 | dependencies = [ 936 | "winapi-util", 937 | ] 938 | 939 | [[package]] 940 | name = "thiserror" 941 | version = "1.0.31" 942 | source = "registry+https://github.com/rust-lang/crates.io-index" 943 | checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" 944 | dependencies = [ 945 | "thiserror-impl", 946 | ] 947 | 948 | [[package]] 949 | name = "thiserror-impl" 950 | version = "1.0.31" 951 | source = "registry+https://github.com/rust-lang/crates.io-index" 952 | checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" 953 | dependencies = [ 954 | "proc-macro2", 955 | "quote", 956 | "syn", 957 | ] 958 | 959 | [[package]] 960 | name = "time" 961 | version = "0.1.44" 962 | source = "registry+https://github.com/rust-lang/crates.io-index" 963 | checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" 964 | dependencies = [ 965 | "libc", 966 | "wasi 0.10.0+wasi-snapshot-preview1", 967 | "winapi", 968 | ] 969 | 970 | [[package]] 971 | name = "unicode-ident" 972 | version = "1.0.1" 973 | source = "registry+https://github.com/rust-lang/crates.io-index" 974 | checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" 975 | 976 | [[package]] 977 | name = "unicode-segmentation" 978 | version = "1.9.0" 979 | source = "registry+https://github.com/rust-lang/crates.io-index" 980 | checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" 981 | 982 | [[package]] 983 | name = "unicode-width" 984 | version = "0.1.9" 985 | source = "registry+https://github.com/rust-lang/crates.io-index" 986 | checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" 987 | 988 | [[package]] 989 | name = "version_check" 990 | version = "0.9.4" 991 | source = "registry+https://github.com/rust-lang/crates.io-index" 992 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 993 | 994 | [[package]] 995 | name = "wasi" 996 | version = "0.10.0+wasi-snapshot-preview1" 997 | source = "registry+https://github.com/rust-lang/crates.io-index" 998 | checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" 999 | 1000 | [[package]] 1001 | name = "wasi" 1002 | version = "0.11.0+wasi-snapshot-preview1" 1003 | source = "registry+https://github.com/rust-lang/crates.io-index" 1004 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 1005 | 1006 | [[package]] 1007 | name = "winapi" 1008 | version = "0.3.9" 1009 | source = "registry+https://github.com/rust-lang/crates.io-index" 1010 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 1011 | dependencies = [ 1012 | "winapi-i686-pc-windows-gnu", 1013 | "winapi-x86_64-pc-windows-gnu", 1014 | ] 1015 | 1016 | [[package]] 1017 | name = "winapi-i686-pc-windows-gnu" 1018 | version = "0.4.0" 1019 | source = "registry+https://github.com/rust-lang/crates.io-index" 1020 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 1021 | 1022 | [[package]] 1023 | name = "winapi-util" 1024 | version = "0.1.5" 1025 | source = "registry+https://github.com/rust-lang/crates.io-index" 1026 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 1027 | dependencies = [ 1028 | "winapi", 1029 | ] 1030 | 1031 | [[package]] 1032 | name = "winapi-x86_64-pc-windows-gnu" 1033 | version = "0.4.0" 1034 | source = "registry+https://github.com/rust-lang/crates.io-index" 1035 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 1036 | 1037 | [[package]] 1038 | name = "windows-sys" 1039 | version = "0.36.1" 1040 | source = "registry+https://github.com/rust-lang/crates.io-index" 1041 | checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" 1042 | dependencies = [ 1043 | "windows_aarch64_msvc", 1044 | "windows_i686_gnu", 1045 | "windows_i686_msvc", 1046 | "windows_x86_64_gnu", 1047 | "windows_x86_64_msvc", 1048 | ] 1049 | 1050 | [[package]] 1051 | name = "windows_aarch64_msvc" 1052 | version = "0.36.1" 1053 | source = "registry+https://github.com/rust-lang/crates.io-index" 1054 | checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" 1055 | 1056 | [[package]] 1057 | name = "windows_i686_gnu" 1058 | version = "0.36.1" 1059 | source = "registry+https://github.com/rust-lang/crates.io-index" 1060 | checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" 1061 | 1062 | [[package]] 1063 | name = "windows_i686_msvc" 1064 | version = "0.36.1" 1065 | source = "registry+https://github.com/rust-lang/crates.io-index" 1066 | checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" 1067 | 1068 | [[package]] 1069 | name = "windows_x86_64_gnu" 1070 | version = "0.36.1" 1071 | source = "registry+https://github.com/rust-lang/crates.io-index" 1072 | checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" 1073 | 1074 | [[package]] 1075 | name = "windows_x86_64_msvc" 1076 | version = "0.36.1" 1077 | source = "registry+https://github.com/rust-lang/crates.io-index" 1078 | checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" 1079 | 1080 | [[package]] 1081 | name = "wine_pipeline" 1082 | version = "0.1.0" 1083 | dependencies = [ 1084 | "env_logger", 1085 | "log", 1086 | "polars", 1087 | ] 1088 | -------------------------------------------------------------------------------- /wine_pipeline/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "wine_pipeline" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | polars = {version = "0.22.8", features = ["describe"]} 10 | log = "0.4.17" 11 | env_logger = "0.9.0" -------------------------------------------------------------------------------- /wine_pipeline/Makefile: -------------------------------------------------------------------------------- 1 | run_dev_rust: 2 | RUST_LOG=info cargo run 3 | run_python_release: 4 | poetry run python pysrc/main.py 5 | run_rust_release: 6 | RUST_LOG=info ./target/release/wine_pipeline 7 | -------------------------------------------------------------------------------- /wine_pipeline/README.md: -------------------------------------------------------------------------------- 1 | # Wine Pipeline 2 | More details about the data set can be found in in the `datastore` directory. 3 | 4 | The following transformation is demonestrated in the data pipeline: 5 | - Reading data with Polars CsvReader 6 | - Show summary of the Dataset 7 | - Groupby and Aggregation 8 | - Create Calculated Column 9 | - Upsample the data 10 | - Aggregation over Up sampled data to measure execution time 11 | 12 | 13 | ## Peformance Logs: 14 | Logs for Polars: 15 | ``` 16 | datapsycho@dataops:~/.../wine_pipeline$ make run_rust_release 17 | RUST_LOG=info ./target/release/wine_pipeline 18 | [2022-07-15T23:23:45Z INFO wine_pipeline] Data read successfully! 19 | [2022-07-15T23:23:45Z INFO wine_pipeline] Basic Statistics calculated. 20 | [2022-07-15T23:23:45Z INFO wine_pipeline] Mean Max Distribution of Proline calculated. 21 | [2022-07-15T23:23:45Z INFO wine_pipeline] Ration data frame is Ceated. 22 | [2022-07-15T23:23:46Z INFO wine_pipeline] Random sample created with size 50000000! 23 | [2022-07-15T23:23:48Z INFO wine_pipeline] Aggregated result calculated. 24 | [2022-07-15T23:23:48Z INFO wine_pipeline] Pipeline executed successfully! 25 | [2022-07-15T23:23:48Z INFO wine_pipeline] CPU Execution time: 3.18567858s 26 | ``` 27 | 28 | Logs for Pandas: 29 | ``` 30 | datapsycho@dataops:~/.../wine_pipeline$ poetry run python pysrc/main.py 31 | [2022-07-16 01:30:58,307 root INFO] Data read successfully! 32 | [2022-07-16 01:30:58,318 root INFO] Basic Statistics calculated. 33 | [2022-07-16 01:30:58,325 root INFO] Mean Max Distribution of Proline calculated. 34 | [2022-07-16 01:30:58,326 root INFO] Ration data frame is Ceated. 35 | [2022-07-16 01:30:58,331 root INFO] Aggregated result calculated. 36 | [2022-07-16 01:31:00,105 root INFO] Random sample created with size 50000000! 37 | [2022-07-16 01:31:04,155 root INFO] Aggregated result calculated. 38 | [2022-07-16 01:31:04,165 root INFO] Pipeline executed successfully! 39 | [2022-07-16 01:31:04,165 root INFO] CPU Execution time: 5.838280569s. 40 | ``` 41 | 42 | Which shows by using Polars in Rust you can decrease the cpu time by more than 50%. 43 | 44 | ## Setup Configuration: 45 | The cpu is used to run the pipelines is intel core i7, 8th Gen with 16 GB RAM on Linux Mint OS. 46 | -------------------------------------------------------------------------------- /wine_pipeline/config.yaml: -------------------------------------------------------------------------------- 1 | column_schema: 2 | - class_label 3 | - alcohol 4 | - malic_acid 5 | - ash 6 | - alcalinity_of_ash 7 | - magnesium 8 | - total_phenols 9 | - flavanoids 10 | - nonflavanoid_phenols 11 | - proanthocyanins 12 | - color_intensity 13 | - hue 14 | - od280/od315_of_diluted_wines 15 | - proline 16 | 17 | 18 | -------------------------------------------------------------------------------- /wine_pipeline/datastore/wine.data: -------------------------------------------------------------------------------- 1 | 1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065 2 | 1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050 3 | 1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185 4 | 1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480 5 | 1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735 6 | 1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450 7 | 1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290 8 | 1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295 9 | 1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045 10 | 1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045 11 | 1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510 12 | 1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280 13 | 1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320 14 | 1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150 15 | 1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547 16 | 1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310 17 | 1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280 18 | 1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130 19 | 1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680 20 | 1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845 21 | 1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780 22 | 1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770 23 | 1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035 24 | 1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015 25 | 1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845 26 | 1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830 27 | 1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195 28 | 1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285 29 | 1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915 30 | 1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035 31 | 1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285 32 | 1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515 33 | 1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990 34 | 1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235 35 | 1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095 36 | 1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920 37 | 1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880 38 | 1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105 39 | 1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020 40 | 1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760 41 | 1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795 42 | 1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035 43 | 1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095 44 | 1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680 45 | 1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885 46 | 1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080 47 | 1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065 48 | 1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985 49 | 1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060 50 | 1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260 51 | 1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150 52 | 1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265 53 | 1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190 54 | 1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375 55 | 1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060 56 | 1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120 57 | 1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970 58 | 1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270 59 | 1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285 60 | 2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520 61 | 2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680 62 | 2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450 63 | 2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630 64 | 2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420 65 | 2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355 66 | 2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678 67 | 2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502 68 | 2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510 69 | 2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750 70 | 2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718 71 | 2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870 72 | 2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410 73 | 2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472 74 | 2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985 75 | 2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886 76 | 2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428 77 | 2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392 78 | 2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500 79 | 2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750 80 | 2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463 81 | 2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278 82 | 2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714 83 | 2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630 84 | 2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515 85 | 2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520 86 | 2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450 87 | 2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495 88 | 2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562 89 | 2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680 90 | 2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625 91 | 2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480 92 | 2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450 93 | 2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495 94 | 2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290 95 | 2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345 96 | 2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937 97 | 2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625 98 | 2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428 99 | 2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660 100 | 2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406 101 | 2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710 102 | 2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562 103 | 2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438 104 | 2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415 105 | 2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672 106 | 2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315 107 | 2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510 108 | 2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488 109 | 2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312 110 | 2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680 111 | 2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562 112 | 2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325 113 | 2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607 114 | 2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434 115 | 2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385 116 | 2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407 117 | 2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495 118 | 2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345 119 | 2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372 120 | 2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564 121 | 2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625 122 | 2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465 123 | 2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365 124 | 2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380 125 | 2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380 126 | 2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378 127 | 2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352 128 | 2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466 129 | 2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342 130 | 2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580 131 | 3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630 132 | 3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530 133 | 3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560 134 | 3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600 135 | 3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650 136 | 3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695 137 | 3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720 138 | 3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515 139 | 3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580 140 | 3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590 141 | 3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600 142 | 3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780 143 | 3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520 144 | 3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550 145 | 3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855 146 | 3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830 147 | 3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415 148 | 3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625 149 | 3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650 150 | 3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550 151 | 3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500 152 | 3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480 153 | 3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425 154 | 3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675 155 | 3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640 156 | 3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725 157 | 3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480 158 | 3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880 159 | 3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660 160 | 3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620 161 | 3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520 162 | 3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680 163 | 3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570 164 | 3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675 165 | 3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615 166 | 3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520 167 | 3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695 168 | 3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685 169 | 3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750 170 | 3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630 171 | 3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510 172 | 3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470 173 | 3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660 174 | 3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740 175 | 3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750 176 | 3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835 177 | 3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840 178 | 3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560 179 | -------------------------------------------------------------------------------- /wine_pipeline/datastore/wine.names: -------------------------------------------------------------------------------- 1 | 1. Title of Database: Wine recognition data 2 | Updated Sept 21, 1998 by C.Blake : Added attribute information 3 | 4 | 2. Sources: 5 | (a) Forina, M. et al, PARVUS - An Extendible Package for Data 6 | Exploration, Classification and Correlation. Institute of Pharmaceutical 7 | and Food Analysis and Technologies, Via Brigata Salerno, 8 | 16147 Genoa, Italy. 9 | 10 | (b) Stefan Aeberhard, email: stefan@coral.cs.jcu.edu.au 11 | (c) July 1991 12 | 3. Past Usage: 13 | 14 | (1) 15 | S. Aeberhard, D. Coomans and O. de Vel, 16 | Comparison of Classifiers in High Dimensional Settings, 17 | Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of 18 | Mathematics and Statistics, James Cook University of North Queensland. 19 | (Also submitted to Technometrics). 20 | 21 | The data was used with many others for comparing various 22 | classifiers. The classes are separable, though only RDA 23 | has achieved 100% correct classification. 24 | (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) 25 | (All results using the leave-one-out technique) 26 | 27 | In a classification context, this is a well posed problem 28 | with "well behaved" class structures. A good data set 29 | for first testing of a new classifier, but not very 30 | challenging. 31 | 32 | (2) 33 | S. Aeberhard, D. Coomans and O. de Vel, 34 | "THE CLASSIFICATION PERFORMANCE OF RDA" 35 | Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of 36 | Mathematics and Statistics, James Cook University of North Queensland. 37 | (Also submitted to Journal of Chemometrics). 38 | 39 | Here, the data was used to illustrate the superior performance of 40 | the use of a new appreciation function with RDA. 41 | 42 | 4. Relevant Information: 43 | 44 | -- These data are the results of a chemical analysis of 45 | wines grown in the same region in Italy but derived from three 46 | different cultivars. 47 | The analysis determined the quantities of 13 constituents 48 | found in each of the three types of wines. 49 | 50 | -- I think that the initial data set had around 30 variables, but 51 | for some reason I only have the 13 dimensional version. 52 | I had a list of what the 30 or so variables were, but a.) 53 | I lost it, and b.), I would not know which 13 variables 54 | are included in the set. 55 | 56 | -- The attributes are (dontated by Riccardo Leardi, 57 | riclea@anchem.unige.it ) 58 | 1) Alcohol 59 | 2) Malic acid 60 | 3) Ash 61 | 4) Alcalinity of ash 62 | 5) Magnesium 63 | 6) Total phenols 64 | 7) Flavanoids 65 | 8) Nonflavanoid phenols 66 | 9) Proanthocyanins 67 | 10)Color intensity 68 | 11)Hue 69 | 12)OD280/OD315 of diluted wines 70 | 13)Proline 71 | 72 | 5. Number of Instances 73 | 74 | class 1 59 75 | class 2 71 76 | class 3 48 77 | 78 | 6. Number of Attributes 79 | 80 | 13 81 | 82 | 7. For Each Attribute: 83 | 84 | All attributes are continuous 85 | 86 | No statistics available, but suggest to standardise 87 | variables for certain uses (e.g. for us with classifiers 88 | which are NOT scale invariant) 89 | 90 | NOTE: 1st attribute is class identifier (1-3) 91 | 92 | 8. Missing Attribute Values: 93 | 94 | None 95 | 96 | 9. Class Distribution: number of instances per class 97 | 98 | class 1 59 99 | class 2 71 100 | class 3 48 101 | -------------------------------------------------------------------------------- /wine_pipeline/datastore/wine_download.sh: -------------------------------------------------------------------------------- 1 | wget https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data 2 | -------------------------------------------------------------------------------- /wine_pipeline/poetry.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "numpy" 3 | version = "1.22.4" 4 | description = "NumPy is the fundamental package for array computing with Python." 5 | category = "main" 6 | optional = false 7 | python-versions = ">=3.8" 8 | 9 | [[package]] 10 | name = "pandas" 11 | version = "1.4.2" 12 | description = "Powerful data structures for data analysis, time series, and statistics" 13 | category = "main" 14 | optional = false 15 | python-versions = ">=3.8" 16 | 17 | [package.dependencies] 18 | numpy = [ 19 | {version = ">=1.18.5", markers = "platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, 20 | {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""}, 21 | {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""}, 22 | ] 23 | python-dateutil = ">=2.8.1" 24 | pytz = ">=2020.1" 25 | 26 | [package.extras] 27 | test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] 28 | 29 | [[package]] 30 | name = "python-dateutil" 31 | version = "2.8.2" 32 | description = "Extensions to the standard Python datetime module" 33 | category = "main" 34 | optional = false 35 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" 36 | 37 | [package.dependencies] 38 | six = ">=1.5" 39 | 40 | [[package]] 41 | name = "pytz" 42 | version = "2022.1" 43 | description = "World timezone definitions, modern and historical" 44 | category = "main" 45 | optional = false 46 | python-versions = "*" 47 | 48 | [[package]] 49 | name = "six" 50 | version = "1.16.0" 51 | description = "Python 2 and 3 compatibility utilities" 52 | category = "main" 53 | optional = false 54 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" 55 | 56 | [metadata] 57 | lock-version = "1.1" 58 | python-versions = "3.8.12" 59 | content-hash = "8522ff21dde8932d32f2ba2a73fea11a908162695388f67c204c66e43ec75a3d" 60 | 61 | [metadata.files] 62 | numpy = [ 63 | {file = "numpy-1.22.4-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:ba9ead61dfb5d971d77b6c131a9dbee62294a932bf6a356e48c75ae684e635b3"}, 64 | {file = "numpy-1.22.4-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:1ce7ab2053e36c0a71e7a13a7475bd3b1f54750b4b433adc96313e127b870887"}, 65 | {file = "numpy-1.22.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7228ad13744f63575b3a972d7ee4fd61815b2879998e70930d4ccf9ec721dce0"}, 66 | {file = "numpy-1.22.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:43a8ca7391b626b4c4fe20aefe79fec683279e31e7c79716863b4b25021e0e74"}, 67 | {file = "numpy-1.22.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a911e317e8c826ea632205e63ed8507e0dc877dcdc49744584dfc363df9ca08c"}, 68 | {file = "numpy-1.22.4-cp310-cp310-win32.whl", hash = "sha256:9ce7df0abeabe7fbd8ccbf343dc0db72f68549856b863ae3dd580255d009648e"}, 69 | {file = "numpy-1.22.4-cp310-cp310-win_amd64.whl", hash = "sha256:3e1ffa4748168e1cc8d3cde93f006fe92b5421396221a02f2274aab6ac83b077"}, 70 | {file = "numpy-1.22.4-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:59d55e634968b8f77d3fd674a3cf0b96e85147cd6556ec64ade018f27e9479e1"}, 71 | {file = "numpy-1.22.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c1d937820db6e43bec43e8d016b9b3165dcb42892ea9f106c70fb13d430ffe72"}, 72 | {file = "numpy-1.22.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4c5d5eb2ec8da0b4f50c9a843393971f31f1d60be87e0fb0917a49133d257d6"}, 73 | {file = "numpy-1.22.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64f56fc53a2d18b1924abd15745e30d82a5782b2cab3429aceecc6875bd5add0"}, 74 | {file = "numpy-1.22.4-cp38-cp38-win32.whl", hash = "sha256:fb7a980c81dd932381f8228a426df8aeb70d59bbcda2af075b627bbc50207cba"}, 75 | {file = "numpy-1.22.4-cp38-cp38-win_amd64.whl", hash = "sha256:e96d7f3096a36c8754207ab89d4b3282ba7b49ea140e4973591852c77d09eb76"}, 76 | {file = "numpy-1.22.4-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:4c6036521f11a731ce0648f10c18ae66d7143865f19f7299943c985cdc95afb5"}, 77 | {file = "numpy-1.22.4-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b89bf9b94b3d624e7bb480344e91f68c1c6c75f026ed6755955117de00917a7c"}, 78 | {file = "numpy-1.22.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2d487e06ecbf1dc2f18e7efce82ded4f705f4bd0cd02677ffccfb39e5c284c7e"}, 79 | {file = "numpy-1.22.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3eb268dbd5cfaffd9448113539e44e2dd1c5ca9ce25576f7c04a5453edc26fa"}, 80 | {file = "numpy-1.22.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37431a77ceb9307c28382c9773da9f306435135fae6b80b62a11c53cfedd8802"}, 81 | {file = "numpy-1.22.4-cp39-cp39-win32.whl", hash = "sha256:cc7f00008eb7d3f2489fca6f334ec19ca63e31371be28fd5dad955b16ec285bd"}, 82 | {file = "numpy-1.22.4-cp39-cp39-win_amd64.whl", hash = "sha256:f0725df166cf4785c0bc4cbfb320203182b1ecd30fee6e541c8752a92df6aa32"}, 83 | {file = "numpy-1.22.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0791fbd1e43bf74b3502133207e378901272f3c156c4df4954cad833b1380207"}, 84 | {file = "numpy-1.22.4.zip", hash = "sha256:425b390e4619f58d8526b3dcf656dde069133ae5c240229821f01b5f44ea07af"}, 85 | ] 86 | pandas = [ 87 | {file = "pandas-1.4.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:be67c782c4f1b1f24c2f16a157e12c2693fd510f8df18e3287c77f33d124ed07"}, 88 | {file = "pandas-1.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5a206afa84ed20e07603f50d22b5f0db3fb556486d8c2462d8bc364831a4b417"}, 89 | {file = "pandas-1.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0010771bd9223f7afe5f051eb47c4a49534345dfa144f2f5470b27189a4dd3b5"}, 90 | {file = "pandas-1.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3228198333dd13c90b6434ddf61aa6d57deaca98cf7b654f4ad68a2db84f8cfe"}, 91 | {file = "pandas-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b79af3a69e5175c6fa7b4e046b21a646c8b74e92c6581a9d825687d92071b51"}, 92 | {file = "pandas-1.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:5586cc95692564b441f4747c47c8a9746792e87b40a4680a2feb7794defb1ce3"}, 93 | {file = "pandas-1.4.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:061609334a8182ab500a90fe66d46f6f387de62d3a9cb9aa7e62e3146c712167"}, 94 | {file = "pandas-1.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b8134651258bce418cb79c71adeff0a44090c98d955f6953168ba16cc285d9f7"}, 95 | {file = "pandas-1.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:df82739e00bb6daf4bba4479a40f38c718b598a84654cbd8bb498fd6b0aa8c16"}, 96 | {file = "pandas-1.4.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:385c52e85aaa8ea6a4c600a9b2821181a51f8be0aee3af6f2dcb41dafc4fc1d0"}, 97 | {file = "pandas-1.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:295872bf1a09758aba199992c3ecde455f01caf32266d50abc1a073e828a7b9d"}, 98 | {file = "pandas-1.4.2-cp38-cp38-win32.whl", hash = "sha256:95c1e422ced0199cf4a34385ff124b69412c4bc912011ce895582bee620dfcaa"}, 99 | {file = "pandas-1.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:5c54ea4ef3823108cd4ec7fb27ccba4c3a775e0f83e39c5e17f5094cb17748bc"}, 100 | {file = "pandas-1.4.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c072c7f06b9242c855ed8021ff970c0e8f8b10b35e2640c657d2a541c5950f59"}, 101 | {file = "pandas-1.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f549097993744ff8c41b5e8f2f0d3cbfaabe89b4ae32c8c08ead6cc535b80139"}, 102 | {file = "pandas-1.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ff08a14ef21d94cdf18eef7c569d66f2e24e0bc89350bcd7d243dd804e3b5eb2"}, 103 | {file = "pandas-1.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c5bf555b6b0075294b73965adaafb39cf71c312e38c5935c93d78f41c19828a"}, 104 | {file = "pandas-1.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51649ef604a945f781105a6d2ecf88db7da0f4868ac5d45c51cb66081c4d9c73"}, 105 | {file = "pandas-1.4.2-cp39-cp39-win32.whl", hash = "sha256:d0d4f13e4be7ce89d7057a786023c461dd9370040bdb5efa0a7fe76b556867a0"}, 106 | {file = "pandas-1.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:09d8be7dd9e1c4c98224c4dfe8abd60d145d934e9fc1f5f411266308ae683e6a"}, 107 | {file = "pandas-1.4.2.tar.gz", hash = "sha256:92bc1fc585f1463ca827b45535957815b7deb218c549b7c18402c322c7549a12"}, 108 | ] 109 | python-dateutil = [ 110 | {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, 111 | {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, 112 | ] 113 | pytz = [ 114 | {file = "pytz-2022.1-py2.py3-none-any.whl", hash = "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"}, 115 | {file = "pytz-2022.1.tar.gz", hash = "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7"}, 116 | ] 117 | six = [ 118 | {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, 119 | {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, 120 | ] 121 | -------------------------------------------------------------------------------- /wine_pipeline/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "wine_pipeline" 3 | version = "0.1.0" 4 | description = "Wine data pipeline" 5 | authors = ["DataPsycho"] 6 | license = "mit" 7 | 8 | [tool.poetry.dependencies] 9 | python = "3.8.12" 10 | pandas = "^1.4.2" 11 | 12 | [tool.poetry.dev-dependencies] 13 | 14 | [build-system] 15 | requires = ["poetry-core>=1.0.0"] 16 | build-backend = "poetry.core.masonry.api" 17 | -------------------------------------------------------------------------------- /wine_pipeline/pysrc/main.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import pandas as pd 3 | import logging 4 | import time 5 | 6 | 7 | pd.set_option('display.max_rows', 500) 8 | logging.basicConfig(format='[%(asctime)s %(name)s %(levelname)s] %(message)s', level=logging.INFO) 9 | 10 | DATASTORE = Path(Path.cwd()).joinpath("datastore") 11 | 12 | 13 | def read_csv_into_df(path: Path, filename: str) -> pd.DataFrame: 14 | """ 15 | Read files by given a filepath and filename 16 | param: path: Path of the main data directory 17 | param: filename: Name of the wine data file to read 18 | """ 19 | full_path = path.joinpath(filename) 20 | _df = pd.read_csv(full_path, header=None) 21 | columns = [ 22 | 'Class label', 'Alcohol', 'Malic acid', 'Ash', 23 | 'Alcalinity of ash', 'Magnesium', 'Total phenols', 24 | 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 25 | 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 26 | 'Proline' 27 | ] 28 | columns = [item.lower().replace(" ", "_") for item in columns] 29 | _df.columns = columns 30 | logging.info("Data read successfully!") 31 | return _df 32 | 33 | def describe_top_features(df: pd.DataFrame) -> None: 34 | top_feature_list = [ 35 | "class_label", 36 | "proline", 37 | "flavanoids", 38 | "color_intensity", 39 | "od280/od315_of_diluted_wines", 40 | "alcohol" 41 | ] 42 | result = df[top_feature_list].describe() 43 | logging.info("Basic Statistics calculated.") 44 | total_groups = df["class_label"].unique().tolist() 45 | 46 | 47 | def get_proline_agg_df(df: pd.DataFrame): 48 | group_colname = "class_label" 49 | features = ["class_label", "proline"] 50 | 51 | _df = df[features].groupby(group_colname).agg( 52 | max_proline=("proline", "max"), 53 | median_proline=("proline", "median"), 54 | min_proline=("proline", "mean"), 55 | ) 56 | logging.info("Mean Max Distribution of Proline calculated.") 57 | # print(_df) 58 | 59 | 60 | def create_arbitary_ration_df(df: pd.DataFrame): 61 | data = dict( 62 | class_label=df["class_label"], 63 | proline_alcohol_ratio=df["proline"]/df["alcohol"], 64 | flavanoids_color_ration=df["flavanoids"]/df["color_intensity"], 65 | od_hue_ration=df["od280/od315_of_diluted_wines"]/df["hue"], 66 | ) 67 | _df = pd.DataFrame(data=data) 68 | logging.info("Ration data frame is Ceated.") 69 | # print(_df.head()) 70 | 71 | def get_up_sampled_df(df: pd.DataFrame, size=100) -> pd.DataFrame: 72 | _df = df.sample(size, replace=True, random_state=1) 73 | logging.info(f"Random sample created with size {len(_df)}!") 74 | return _df 75 | 76 | 77 | def aggregate_features_df(df: pd.DataFrame) -> pd.DataFrame: 78 | groups = ["class_label"] 79 | agg_map = dict( 80 | mean_proline=("proline", "mean"), 81 | median_proline=("proline", "median"), 82 | mean_hue=("hue", "mean"), 83 | median_hue=("hue", "median"), 84 | mean_flavanoids=("flavanoids", "mean"), 85 | median_flavanoids=("flavanoids", "median"), 86 | ) 87 | _df = df.groupby(groups).agg(**agg_map).reset_index().sort_values(groups) 88 | logging.info("Aggregated result calculated.") 89 | # print(_df) 90 | return _df 91 | 92 | 93 | 94 | def main(): 95 | wine_df = read_csv_into_df(DATASTORE, "wine.data") 96 | describe_top_features(wine_df) 97 | get_proline_agg_df(wine_df) 98 | create_arbitary_ration_df(wine_df) 99 | aggregate_features_df(wine_df) 100 | wine_up_sampled_df = get_up_sampled_df(wine_df, 50000000) 101 | aggregate_features_df(wine_up_sampled_df) 102 | 103 | 104 | if __name__ == "__main__": 105 | st = time.process_time() 106 | main() 107 | et = time.process_time() 108 | res = et - st 109 | logging.info("Pipeline executed successfully!") 110 | logging.info(f'CPU Execution time: {res}s.') 111 | -------------------------------------------------------------------------------- /wine_pipeline/src/main.rs: -------------------------------------------------------------------------------- 1 | use polars::prelude::*; 2 | use polars::datatypes::DataType::{Int64, Float64}; 3 | use std::path::{Path, PathBuf}; 4 | use std::time::Instant; 5 | use log::info; 6 | 7 | fn read_csv_into_df(path: PathBuf) -> Result { 8 | let schema = Schema::from(vec![ 9 | Field::new("class_label", Int64), 10 | Field::new("alcohol", Float64), 11 | Field::new("malic_acid", Float64), 12 | Field::new("ash", Float64), 13 | Field::new("alcalinity_of_ash", Float64), 14 | Field::new("magnesium", Float64), 15 | Field::new("total_phenols", Float64), 16 | Field::new("flavanoids", Float64), 17 | Field::new("nonflavanoid_phenols", Float64), 18 | Field::new("color_intensity", Float64), 19 | Field::new("hue", Float64), 20 | Field::new("od280/od315_of_diluted_wines", Float64), 21 | Field::new("proline", Float64), 22 | ]); 23 | info!("Data read successfully!"); 24 | CsvReader::from_path(path)?.has_header(false).with_schema(&schema).finish() 25 | } 26 | 27 | 28 | fn describe_top_features(df: &DataFrame){ 29 | let top_feature_vec = vec![ 30 | "class_label", 31 | "proline", 32 | "flavanoids", 33 | "color_intensity", 34 | "od280/od315_of_diluted_wines", 35 | "alcohol" 36 | ]; 37 | let _df = df.select(top_feature_vec).unwrap(); 38 | info!("Basic Statistics calculated."); 39 | // println!("{}", &_df.describe(None)); 40 | } 41 | 42 | fn get_proline_agg_df(df: &DataFrame){ 43 | let group_colname = ["class_label"]; 44 | let _df = df 45 | .groupby(group_colname) 46 | .unwrap() 47 | .agg(&[("proline", &["mean", "median", "max"])]) 48 | .unwrap(); 49 | info!("Mean Max Distribution of Proline calculated."); 50 | // println!("{}", _df) 51 | } 52 | 53 | fn create_ration(col1: &str, col2: &str, df: &DataFrame, new_series_name: &str) -> Series{ 54 | let series1 = df.column(col1).unwrap().clone(); 55 | let series2 = df.column(col2).unwrap().clone(); 56 | Series::new(new_series_name, &series1/&series2) 57 | } 58 | 59 | fn create_arbitary_ration_df(df: &DataFrame){ 60 | let par = create_ration("proline", "alcohol", df, "proline_alcohol_ration"); 61 | let fcr = create_ration("flavanoids", "color_intensity", df, "flavanoids_color_ration"); 62 | let ohr = create_ration("od280/od315_of_diluted_wines", "hue", df, "od_hue_ration"); 63 | let class_label = df.column("class_label").unwrap().clone(); 64 | let _df = DataFrame::new(vec![class_label, par, fcr, ohr]).unwrap(); 65 | // let mut _df = _df.with_column(par).unwrap(); 66 | info!("Ration data frame is Ceated."); 67 | // println!("{}", _df.head(Some(10))) 68 | } 69 | 70 | fn get_up_sampled_df(df: &DataFrame, size: usize) -> DataFrame { 71 | let _df = df.sample_n(size, true, false, Some(1)).unwrap(); 72 | info!("Random sample created with size {:?}!", _df.height()); 73 | _df 74 | } 75 | 76 | fn aggregate_features_df(df: &DataFrame) -> DataFrame{ 77 | let _df = df.groupby(["class_label"]) 78 | .unwrap() 79 | .agg(&[ 80 | ("proline", &["mean"]), 81 | ("proline", &["median"]), 82 | ("hue", &["mean"]), 83 | ("hue", &["median"]), 84 | ("flavanoids", &["mean"]), 85 | ("flavanoids", &["median"]), 86 | ]) 87 | .unwrap(); 88 | info!("Aggregated result calculated."); 89 | // println!("{}", _df); 90 | _df 91 | } 92 | 93 | fn main() { 94 | env_logger::init(); 95 | let start = Instant::now(); 96 | let curr_path = Path::new("main.rs").parent(); 97 | let file_path = curr_path.unwrap().join("datastore").join("wine.data"); 98 | let result = read_csv_into_df(file_path); 99 | match result { 100 | Ok(wine_df) => { 101 | describe_top_features(&wine_df); 102 | get_proline_agg_df(&wine_df); 103 | create_arbitary_ration_df(&wine_df); 104 | let wine_up_sampled_df = get_up_sampled_df(&wine_df, 50000000); 105 | aggregate_features_df(&wine_up_sampled_df); 106 | }, 107 | Err(error) => panic!("Problem reading file: {:?}", error), 108 | } 109 | let duration = start.elapsed(); 110 | info!{"Pipeline executed successfully!"} 111 | info!("CPU Execution time: {:?}", duration); 112 | } 113 | --------------------------------------------------------------------------------