├── .github └── workflows │ └── main.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE.txt ├── README.md ├── benches └── benchmark.rs ├── images ├── bar.PNG ├── elusion.png ├── interactivedash3.gif ├── platformcom.png └── report.PNG └── src ├── elusion.rs ├── lib.rs ├── main.rs └── prelude.rs /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Platform Compatibility 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: [ubuntu-latest, macos-latest, windows-latest] 15 | rust: [stable] 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Setup Rust 20 | uses: actions-rs/toolchain@v1 21 | with: 22 | profile: minimal 23 | toolchain: ${{ matrix.rust }} 24 | override: true 25 | 26 | - name: Install ODBC (Ubuntu) 27 | if: matrix.os == 'ubuntu-latest' 28 | run: | 29 | sudo apt-get update 30 | sudo apt-get install -y unixodbc-dev 31 | 32 | - name: Install ODBC (macOS) 33 | if: matrix.os == 'macos-latest' 34 | run: | 35 | brew install unixodbc 36 | 37 | - name: Build 38 | run: cargo build --verbose 39 | 40 | - name: Run tests 41 | run: cargo test --verbose 42 | 43 | security-audit: 44 | runs-on: ubuntu-latest 45 | steps: 46 | - uses: actions/checkout@v2 47 | - name: Run security audit 48 | run: | 49 | cargo install cargo-audit 50 | cargo audit --deny warnings --ignore RUSTSEC-2024-0384 --ignore RUSTSEC-2025-0004 --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2023-0055 --ignore RUSTSEC-2023-0086 51 | 52 | tls-check: 53 | runs-on: ubuntu-latest 54 | steps: 55 | - uses: actions/checkout@v2 56 | - name: Check HTTPS usage 57 | run: | 58 | ! grep -r "http://" src/ || exit 1 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [3.11.0] - 2025-05-17 2 | ### Added 3 | - EXCEL reader/loader 4 | 5 | ## [3.10.0] - 2025-05-17 6 | ### Added 7 | - EXCEL Feature. write_to_excel() function 8 | 9 | ## [3.9.0] - 2025-05-08 10 | ### Added 11 | - MYSQL Database connectivity 12 | ## Fixed 13 | - Filter lowercasing column name 14 | - AS case insensitivity for .json() and .json_array() 15 | 16 | ## [3.8.1] - 2025-05-07 17 | ### Fixed 18 | - Client import clash from API (reqwest::Client) and POSTGRES (tokio_postgres::Client) features 19 | ### Updated 20 | - tokio from `1.41.1` to `1.45.0` 21 | 22 | ## [3.8.0] - 2025-05-07 23 | ### Added 24 | - POSTGRES Database connectivity 25 | 26 | ## [3.7.5] - 2025-05-06 27 | ### Fixed 28 | - Fixing features behind the Flag 29 | 30 | ## [3.7.4] - 2025-05-06 31 | ### Added 32 | - json_array() function for parsing json values from DataFrame columns 33 | 34 | ## [3.7.3] - 2025-05-06 35 | ### BREAKING CHANGE 36 | - Removed ODBC features 37 | ### Added 38 | - json() function for parsing json values from DataFrame columns 39 | - Moved Azure Blob Storage behind feature flag 40 | 41 | 42 | ## [3.7.2] - 2025-05-04 43 | ### Added 44 | - Moved Dashboards/Reporting and APIs behind feature flag 45 | ### Dependency update 46 | - Tokio `1.42.0` to `1.42.1` 47 | 48 | ## [3.7.1] - 2025-03-09 49 | ### Added 50 | - extract_value_from_df() that extracts Value from a DataFrame based on column name and row index 51 | - extract_row_from_df() that extracts Row from a DataFrame as a HashMap based on row index 52 | 53 | ## [3.7.0] - 2025-03-09 54 | ### Added 55 | - write_to_json() for writing to local JSON files 56 | - write_json_to_azure_with_sas() for writing JSON to Azure Blob Storage with SAS token 57 | 58 | ## [3.5.1] - 2025-03-08 59 | ### Added 60 | - create_formatted_date_range_table() function that will create Date Table with custom Formats (ISO, European, HumanReadanle...) and week, month, quarter and year ranges (start -end) 61 | 62 | ## [3.5.0] - 2025-03-02 63 | ### Added 64 | - empty() function that will crate empty dataframe taht can be populated 65 | - create_date_range_table() function that will create Date Table 66 | 67 | ## [3.4.1] - 2025-03-02 68 | ### Renamed 69 | - Renamed function from_materialized_view() to from_view() 70 | 71 | ## [3.4.0] - 2025-03-02 72 | ### Added 73 | - Caching and Materialized Views 74 | 75 | ## [3.3.1] - 2025-02-26 76 | ### Fixed build 77 | - Update to arrow crate and arrow-arith crate made build conflict with chrono crate. DOwngraded datafusion to `43.0.0` 78 | arrow fixed to `=53.2.0`, chrono fixed to `=0.4.38` 79 | 80 | ## [3.3.0] - 2025-02-21 81 | ### Fixed 82 | - Select() AS not to get _ in between column name and AS 83 | 84 | ## [3.2.0] - 2025-02-20 85 | ### Fixed 86 | - Query flexible query ordering for select, string, agg, filter and datetime 87 | 88 | ## [3.1.0] - 2025-02-17 89 | ### Fixed 90 | - filter() condition Case Sensitivity 91 | 92 | ## [3.0.0] - 2025-02-17 93 | ### BREAKING CHANGE 94 | - ALL Query Expresions, DataFrame Columns and Functions are normalized LOWERCASE(), TRIM(), REPLACE(" ", "_") 95 | 96 | ## [2.8.0] - 2025-02-16 97 | ### Added 98 | - DateTime Functions within datetime_functions() 99 | ### FeatureFlag 100 | - Moved ODBC behind feature flag 101 | 102 | ## [2.7.0] - 2025-02-11 103 | ### Added 104 | - Interactive tables for Reporting 105 | 106 | ## [2.5.1] - 2025-02-09 107 | ### Added 108 | - Interactive Dashboarding 109 | 110 | ## [2.5.0] - 2025-02-09 111 | ### REAKING CHANGE 112 | - Now write_parquet_to_azure_with_sas() function needs specified writing mode: overwrite or append 113 | ### Fixed 114 | - CSV writer 115 | ### Added 116 | - Dependency tempfile `3.16.0` 117 | 118 | ## [2.4.3] - 2025-02-09 119 | ### Fixed 120 | - Appending data for Parquet Writer 121 | 122 | ## [2.4.2] - 2025-02-08 123 | ### Added 124 | - Custom ERROR handling to UNION, APPEND... 125 | 126 | ## [2.4.1] - 2025-02-07 127 | ### Added 128 | - Custom ERROR handling 129 | 130 | ## [2.4.0] - 2025-02-06 131 | ### Fixed 132 | - Fixed UNION, UNION_ALL, EXCEPT, INTESECT now they return proper results 133 | ### BREAKING CHANGE 134 | - UNION, UNION_ALL, EXCEPT, INTESECT are now async and also need to be evaluated with elusion() - check readme.md for examples 135 | ### Added 136 | - Fixed APPEND, APPEND_ALL 137 | 138 | ## [2.3.0] - 2025-02-04 139 | ### Fixed 140 | - Fixed case sensitivity within statistical functions 141 | 142 | ## [2.2.0] - 2025-02-03 143 | ### Fixed 144 | - Improved parsing for single dataframes, for all functions, to avoid using aliases on single dataframes 145 | 146 | ## [2.1.0] - 2025-02-02 147 | ### BREAKING CHANGE 148 | - REST API now must use file path + json file name as argument. ex: "C:\\Borivoj\\RUST\\Elusion\\sales_jan_2025.json" 149 | 150 | ## [2.0.0] - 2025-01-31 151 | ### Added 152 | - REST API to JSON files 153 | ### BREAKING CHANGES 154 | - REST API is now detached from CustomDataFrame (check readme for examples) 155 | 156 | ## [1.7.2] - 2025-01-31 157 | ### Added 158 | - Improved Reading JSON files performance by 50% 159 | 160 | ## [1.7.1] - 2025-01-29 161 | ### Added 162 | - Wriring Parquet to Azure Blob Storage 163 | 164 | ## [1.7.0] - 2025-01-28 165 | ### Removed 166 | - REST API (until I fix it) 167 | 168 | ## [1.5.1] - 2025-01-28 169 | ### Added 170 | - URL Encoding for REST API params and headers 171 | ### Added 172 | - Dependencies: `urlencoding` `2.1.3` 173 | 174 | ## [1.5.0] - 2025-01-27 175 | ### Added 176 | - Reading Data From API into CustomDataFrame 177 | ### Added 178 | - Dependencies: `reqwest` `0.12` 179 | 180 | ## [1.4.0] - 2025-01-26 181 | ### Added 182 | - Pipeline Scheduler 183 | ### Added 184 | - Dependencies: `tokio-cron-scheduler` `0.13.0` 185 | 186 | ## [1.3.0] - 2025-01-25 187 | ### Added 188 | - Azure Blob Connection. You can connect and download .json or .csv files with from_azure_with_sas_token() function 189 | ### Added 190 | - Dependencies: `azure_storage_blobs` `0.21.0`, `azure_storage` `0.21.0`, `csv` `1.1` 191 | 192 | ## [1.2.0] - 2025-01-24 193 | ### Added 194 | - ODBC Database connections for MySQL and PostgreSQL 195 | ### Added 196 | - Dependencies: `lazy_static` `1.5.0`, `arrow-odbc` `14.1.0` 197 | 198 | ## [1.1.1] - 2025-01-21 199 | ### Added 200 | - Statistical Functions: display_stats(), display_null_analysis(), display_correlation_matrix() 201 | 202 | ## [1.1.0] - 2025-01-21 203 | ### Added 204 | - Dependencies: `plotly` `0.12.1` with Plots: Line, TimeSeries, Bar, Pie, Donut, Histogram, Box 205 | 206 | ## [1.0.1] - 2025-01-20 207 | ### Updated 208 | - Platform Compatibility (MacOS, Linux, Microsoft) and Code/Dependencies Audit 209 | 210 | ## [1.0.0] - 2025-01-19 211 | ### BREAKING CHANGE 212 | - JOIN and JOIN_MANY functions now can receive multiple arguments 213 | ### Updated 214 | - Handling conditions within String Functions and Aggregate functions 215 | ### MAJOR RELEASE 216 | - Library fully tested and ready for production 217 | 218 | ## [0.5.8] - 2025-01-18 219 | ### Added 220 | - PIVOT and UNPIVOT functions 221 | ### Updated 222 | - Dependencies: `datafusion` to `44.0.0` 223 | 224 | ## [0.5.7] - 2025-01-12 225 | ### Fixed 226 | - Window function to proprely parse multiple arguments within aggregation, analytics and ranking 227 | 228 | ## [0.5.5] - 2025-01-12 229 | ### Added 230 | - except() and intersect() 231 | 232 | ## [0.5.4] - 2025-01-12 233 | ### Added 234 | - union() and union_all() 235 | 236 | ## [0.5.3] - 2025-01-09 237 | ### Fixed 238 | - Multiple nested functions in SELECT() 239 | ### Added 240 | - group_by_all() function that Takes all non-aggregated columns from SELECT 241 | 242 | ## [0.5.2] - 2025-01-10 243 | ### Added 244 | - `String Functions` that can be applied on string columns 245 | 246 | ## [0.5.1] - 2025-01-09 247 | ### Fixed 248 | - Scalar and Aggregation function parsing, for single and nested functions 249 | 250 | ## [0.5.0] - 2025-01-07 251 | ### BREAKING CHANGE 252 | - Removed AggegationBuilder now we can use agg() for aggregations 253 | - Removed SQL Support as DataFrame API considerably developed and there is not much need of raw SQL moving forward. If there is a demmand for Raw SQL i will bring it back in v1.0.0 254 | 255 | ## [0.4.0] - 2025-01-06 256 | ### BREAKING CHANGE 257 | - No more use of vec![] in DataFrame API Query Functions 258 | 259 | ## [0.3.0] - 2025-01-05 260 | ### Added 261 | - DELTA table Writer and Reader 262 | 263 | ## [0.2.5] - 2025-01-02 264 | ### Added 265 | - PARQUET reader 266 | - Removed manual SCHEMA declaration, now CustomDataFrame::new() only need file path and alias 267 | 268 | ## [0.2.4] - 2025-01-01 269 | ### Fixed 270 | - JOIN for multiple dataframes 271 | - HAVING and FILTER functions fixed 272 | 273 | ## [0.2.3] - 2024-12-29 274 | ### Added 275 | - CSV writer 276 | 277 | ## [0.2.2] - 2024-12-28 278 | ### Added 279 | - Dependencies: `serde` `1.0.216`, `serde_json` `1.0.134` 280 | - Support for JSON files: Reading and Loading to CustomDataFrame 281 | ### Fixed 282 | - Improved display() function for better formating. 283 | 284 | ## [0.2.0] - 2024-12-24 285 | ### Added 286 | - Full Raw SQL Querying support 287 | 288 | ## [0.1.3] - 2024-12-23 289 | ### Added 290 | - Aliasing column names directly in select() function 291 | 292 | ## [0.1.1] - 2024-12-21 293 | ### Added 294 | - Added support for `prelude` to simplify imports for users. 295 | 296 | ### Fixed 297 | - Improved error handling and clarified documentation. 298 | 299 | ### Updated 300 | - Dependencies: `chrono` to `0.4.39` , `tokio` to `1.42.0` 301 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "elusion" 3 | version = "3.11.0" 4 | edition = "2021" 5 | authors = ["Borivoj Grujicic "] 6 | description = "Elusion is a modern DataFrame / Data Engineering / Data Analysis library that combines the familiarity of DataFrame operations (like those in PySpark, Pandas, and Polars) with the power of SQL query building. It provides flexible query construction without enforcing strict operation ordering, enabling developers to write intuitive and maintainable data transformations." 7 | license = "MIT" 8 | repository = "https://github.com/DataBora/elusion" 9 | homepage = "https://github.com/DataBora/elusion" 10 | documentation = "https://docs.rs/elusion" 11 | readme = "README.md" 12 | keywords = ["data", "engineering", "dataframe", "analysis"] 13 | categories = ["data-structures"] 14 | 15 | [dependencies] 16 | datafusion = "43.0.0" 17 | deltalake = "0.23.0" 18 | arrow = "=53.2.0" 19 | chrono = "=0.4.38" #arrow-arith has same Quarter function 20 | tokio = { version = "1.45.0", features = ["rt-multi-thread"] } 21 | futures = "0.3.31" 22 | regex = "1.11.1" 23 | encoding_rs = "0.8.35" 24 | serde = { version = "1.0.216", features = ["derive"] } 25 | serde_json = "1.0.134" 26 | object_store = "0.11.2" 27 | plotly = { version = "0.12.1", optional = true } 28 | lazy_static = "1.5.0" 29 | azure_storage_blobs = { version = "0.21.0", optional = true } 30 | azure_storage = { version = "0.21.0", optional = true } 31 | base64 = "0.22.1" 32 | csv = "1.1" 33 | tokio-cron-scheduler = "0.13.0" 34 | bytes = "1.10.0" 35 | num_cpus = "1.0" 36 | reqwest = { version = "0.12", features = ["json", "rustls-tls"], default-features = false, optional = true} 37 | urlencoding = {version = "2.1.3", optional = true} 38 | tempfile = "3.16.0" 39 | ring = ">=0.17.12" 40 | tokio-postgres = { version = "0.7.13", optional = true } 41 | rust_decimal = { version = "1.32", features = ["db-postgres"], optional = true } 42 | mysql_async = { version = "0.32.2", default-features = false, features = ["minimal"], optional = true} 43 | rust_xlsxwriter = {version = "0.87.0", optional = true} 44 | calamine = "0.23.1" 45 | 46 | [features] 47 | default = [] 48 | dashboard = ["dep:plotly"] 49 | api = ["dep:reqwest", "dep:urlencoding"] 50 | azure = ["dep:azure_storage_blobs", "dep:azure_storage"] 51 | postgres = ["dep:tokio-postgres", "dep:rust_decimal"] 52 | mysql = ["dep:mysql_async"] 53 | excel = ["dep:rust_xlsxwriter"] 54 | all = ["dashboard", "api", "azure", "postgres", "mysql","excel"] 55 | 56 | [dev-dependencies] 57 | criterion = { version = "0.5.1", features = ["html_reports"] } 58 | 59 | [[bench]] 60 | name = "benchmark" 61 | harness = false 62 | 63 | [lib] 64 | name = "elusion" 65 | path = "src/elusion.rs" 66 | 67 | [target.'cfg(not(target_os = "windows"))'.dependencies] 68 | openssl = "=0.10.72" #security fix -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | Copyright 2024 Borivoj Grujicic 179 | 180 | Licensed under the Apache License, Version 2.0 (the "License"); 181 | you may not use this file except in compliance with the License. 182 | You may obtain a copy of the License at 183 | 184 | http://www.apache.org/licenses/LICENSE-2.0 185 | 186 | Unless required by applicable law or agreed to in writing, software 187 | distributed under the License is distributed on an "AS IS" BASIS, 188 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 189 | See the License for the specific language governing permissions and 190 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Elusion 🦀 DataFrame / Data Engineering / Data Analysis Library for Everybody! 2 | 3 | 4 | ![Elusion Logo](images/elusion.png) 5 | 6 | ## Best Way to learn Elusion: 7 | Udemy Course - [Click to start learning on Udemy!](https://www.udemy.com/course/rust-data-engineering-analytics-elusion/) 8 | 9 | 10 | Elusion is a high-performance DataFrame / Data Engineering / Data Analysis library designed for in-memory data formats such as CSV, EXCEL, JSON, PARQUET, DELTA, as well as for Azure Blob Storage Connections, Postgres Database Connection, MySql Database Connection, and REST API's for creating JSON files which can be forwarded to DataFrame. 11 | Additionally you can easily create Reports and Dashboard by passing DataFrame results. 12 | 13 | All of the DataFrame operations can be placed in PipelineScheduler for automated Data Engineering Pipelines. 14 | 15 | Tailored for Data Engineers and Data Analysts seeking a powerful abstraction over data transformations. Elusion streamlines complex operations like filtering, joining, aggregating, and more with its intuitive, chainable DataFrame API, and provides a robust interface for managing and querying data efficiently, as well as Integrated Plotting and Interactive Dashboard features. 16 | 17 | ## Core Philosophy 18 | Elusion wants you to be you! 19 | 20 | Elusion offers flexibility in constructing queries without enforcing specific patterns or chaining orders, unlike SQL, PySpark, Polars, or Pandas. You can build your queries in ANY SEQUENCE THAT BEST FITS YOUR LOGIC, writing functions in ANY ORDER or a manner that makes sense to you. Regardless of the order of function calls, Elusion ensures consistent results. 21 | ### ALL DataFrame OPERATIONS AND EXAMPLES, that you will need, are bellow. 22 | 23 | ## Platform Compatibility 24 | Tested for MacOS, Linux and Windows 25 | ![Platform comp](images/platformcom.png) 26 | 27 | ## Security 28 | Codebase has Undergone Rigorous Auditing and Security Testing, ensuring that it is fully prepared for Production. 29 | 30 | ## Key Features 31 | 32 | ### 🔄 Job Scheduling (PipelineScheduler) 33 | Flexible Intervals: From 1 minute to 30 days scheduling intervals. 34 | Graceful Shutdown: Built-in Ctrl+C signal handling for clean termination. 35 | Async Support: Built on tokio for non-blocking operations. 36 | 37 | ### 🌐 External Data Sources Integration 38 | - Azure Blob Storage: Direct integration with Azure Blob Storage for Reading and Writing data files. 39 | - REST API's: Create JSON files from REST API endpoints with Customizable Headers, Params, Date Ranges, Pagination... 40 | 41 | ### 🚀 High-Performance DataFrame Query Operations 42 | Seamless Data Loading: Easily load and process data from CSV, EXCEL, PARQUET, JSON, and DELTA table files. 43 | SQL-Like Transformations: Execute transformations such as SELECT, AGG, STRING FUNCTIONS, JOIN, FILTER, HAVING, GROUP BY, ORDER BY, DATETIME and WINDOW with ease. 44 | 45 | ### 🚀 Caching and Views 46 | The caching and views functionality offer several significant advantages over regular querying: 47 | #### Reduced Computation Time, Memory Management, Query Optimization, Interactive Analysis, Multiple visualizations for Dashboards and Reports, Resource Utilization, Concurrency 48 | 49 | ### 📉 Aggregations and Analytics 50 | Comprehensive Aggregations: Utilize built-in functions like SUM, AVG, MEAN, MEDIAN, MIN, COUNT, MAX, and more. 51 | Advanced Scalar Math: Perform calculations using functions such as ABS, FLOOR, CEIL, SQRT, ISNAN, ISZERO, PI, POWER, and others. 52 | 53 | ### 🔗 Flexible Joins 54 | Diverse Join Types: Perform joins using INNER, LEFT, RIGHT, FULL, and other join types. 55 | Intuitive Syntax: Easily specify join conditions and aliases for clarity and simplicity. 56 | 57 | ### 🪟 Window Functions 58 | Analytical Capabilities: Implement window functions like RANK, DENSE_RANK, ROW_NUMBER, and custom partition-based calculations to perform advanced analytics. 59 | 60 | ### 🔄 Pivot and Unpivot Functions 61 | Data Reshaping: Transform your data structure using PIVOT and UNPIVOT functions to suit your analytical needs. 62 | 63 | ### 📊 Create REPORTS 64 | Create HTML files with Interactive Dashboards with multiple interactive Plots and Tables. 65 | Plots Available: TimeSeries, Bar, Pie, Donut, Histogram, Scatter, Box... 66 | Tables can Paginate pages, Filter, Resize, Reorder columns... 67 | Export Tables data to EXCEL and CSV 68 | 69 | ### 🧹 Clean Query Construction 70 | Readable Queries: Construct SQL queries that are both readable and reusable. 71 | Advanced Query Support: Utilize operations such as APPEND, UNION, UNION ALL, INTERSECT, and EXCEPT. For multiple Dataframea operations: APPEND_MANY, UNION_MANY, UNION_ALL_MANY. 72 | 73 | ### 🛠️ Easy-to-Use API 74 | Chainable Interface: Build queries using a chainable and intuitive API for streamlined development. 75 | Debugging Support: Access readable debug outputs of the generated SQL for easy verification and troubleshooting. 76 | **Data Preview**: Quickly preview your data by displaying a subset of rows in the terminal. 77 | **Composable Queries**: Seamlessly chain transformations to create reusable and testable workflows. 78 | 79 | --- 80 | ## Installation 81 | 82 | To add **Elusion** to your Rust project, include the following lines in your `Cargo.toml` under `[dependencies]`: 83 | 84 | ```toml 85 | elusion = "3.11.0" 86 | tokio = { version = "1.45.0", features = ["rt-multi-thread"] } 87 | ``` 88 | ## Rust version needed 89 | ```toml 90 | >= 1.81 91 | ``` 92 | --- 93 | ## Feature Flags 94 | Elusion uses Cargo feature flags to keep the library lightweight and modular. 95 | You can enable only the features you need, which helps reduce dependencies and compile time. 96 | 97 | ## Available Features 98 | ```rust 99 | ["postgres"] 100 | ``` 101 | Enables Postgres Database connectivity. 102 | ```rust 103 | ["mysql"] 104 | ``` 105 | Enables MySql Database connectivity 106 | ```rust 107 | ["azure"] 108 | ``` 109 | Enables Azure BLOB storage connectivity. 110 | ```rust 111 | ["api"] 112 | ``` 113 | Enables HTTP API integration for fetching data from web services. 114 | ```rust 115 | ["dashboard"] 116 | ``` 117 | Enables data visualization and dashboard creation capabilities. 118 | ```rust 119 | ["excel"] 120 | ``` 121 | Enables writing DataFrame to Excel file. 122 | ```rust 123 | ["all"] 124 | ``` 125 | Enables all available features. 126 | 127 | Usage: 128 | - In your Cargo.toml, specify which features you want to enable: 129 | 130 | - Add the POSTGRES feature when specifying the dependency: 131 | ```toml 132 | [dependencies] 133 | elusion = { version = "3.11.0", features = ["postgres"] } 134 | ``` 135 | 136 | - Using NO Features (minimal dependencies): 137 | ```rust 138 | [dependencies] 139 | elusion = "3.11.0" 140 | ``` 141 | 142 | - Using multiple specific features: 143 | ```rust 144 | [dependencies] 145 | elusion = { version = "3.11.0", features = ["dashboard", "api", "mysql"] } 146 | ``` 147 | 148 | - Using all features: 149 | ```rust 150 | [dependencies] 151 | elusion = { version = "3.11.0", features = ["all"] } 152 | ``` 153 | 154 | ### Feature Implications 155 | #### When a feature is not enabled, You'll receive an error: 156 | #### Error: ***Warning***: API feature not enabled. Add feature under [dependencies] 157 | --- 158 | ## NORMALIZATION 159 | #### DataFrame (your files) Column Names will be normalized to LOWERCASE(), TRIM() and REPLACE(" ","_") 160 | #### All DataFrame query expresions, functions, aliases and column names will be normalized to LOWERCASE(), TRIM() and REPLACE(" ","_") 161 | --- 162 | ## Schema 163 | #### SCHEMA IS DYNAMICALLY INFERED 164 | --- 165 | # Usage examples: 166 | 167 | ### MAIN function 168 | 169 | ```rust 170 | // Import everything needed 171 | use elusion::prelude::*; 172 | 173 | #[tokio::main] 174 | async fn main() -> ElusionResult<()> { 175 | 176 | Ok(()) 177 | } 178 | ``` 179 | --- 180 | # CREATING DATA FRAMES 181 | --- 182 | ### - Loading data into CustomDataFrame can be from: 183 | #### - Empty() DataFrames 184 | #### - In-Memory data formats: CSV, EXCEL, JSON, PARQUET, DELTA 185 | #### - Azure Blob Storage endpoints (BLOB, DFS) 186 | #### - Postgres Database SQL Queries 187 | #### - MySQL Database Queries 188 | #### - REST API -> json -> DataFrame 189 | 190 | #### -> NEXT is example for reading data from local files, 191 | #### at the end are examples for Azure Blob Storage, Postgres and MySQL Databases 192 | --- 193 | ### LOADING data from Files into CustomDataFrame (in-memory data formats) 194 | #### - File extensions are automatically recognized 195 | #### - All you have to do is to provide path to your file 196 | 197 | ## Creating CustomDataFrame 198 | #### 2 arguments needed: **Path**, **Table Alias** 199 | 200 | ### LOADING data from CSV into CustomDataFrame 201 | ```rust 202 | let csv_path = "C:\\Borivoj\\RUST\\Elusion\\csv_data.csv"; 203 | let df = CustomDataFrame::new(csv_path, "csv_data").await?; 204 | ``` 205 | ### LOADING data from EXCEL into CustomDataFrame 206 | ```rust 207 | let excel_path = "C:\\Borivoj\\RUST\\Elusion\\excel_data.xlsx"; 208 | let df = CustomDataFrame::new(excel_path, "xlsx_data").await?; 209 | ``` 210 | ### LOADING data from PARQUET into CustomDataFrame 211 | ```rust 212 | let parquet_path = "C:\\Borivoj\\RUST\\Elusion\\prod_data.parquet"; 213 | let df = CustomDataFrame::new(parquet_path, "parq_data").await?; 214 | ``` 215 | ### LOADING data from JSON into CustomDataFrame 216 | ```rust 217 | let json_path = "C:\\Borivoj\\RUST\\Elusion\\mongo_data.json"; 218 | let df = CustomDataFrame::new(json_path, "json_data").await?; 219 | ``` 220 | ### LOADING data from DELTA table into CustomDataFrame 221 | ```rust 222 | let delta_path = "C:\\Borivoj\\RUST\\Elusion\\agg_sales"; // for DELTA you just specify folder name without extension 223 | let df = CustomDataFrame::new(delta_path, "delta_data").await?; 224 | ``` 225 | ### LOADING data from Azure BLOB Storage into CustomDataFrame (**scroll till the end for FULL example**) 226 | ```rust 227 | let df = CustomDataFrame::from_azure_with_sas_token( 228 | blob_url, 229 | sas_token, 230 | Some("folder-name/file-name"), // FILTERING is optional. Can be None if you want to take everything from Container 231 | "data" // alias for registering table 232 | ).await?; 233 | ``` 234 | ### LOADING data from POSTGRES into CustomDataFrame (**scroll till the end for FULL example with config, conn and query**) 235 | ```rust 236 | let df = CustomDataFrame::from_postgres(&conn, query, "df_alias").await?; 237 | ``` 238 | ### LOADING data from MySQL into CustomDataFrame (**scroll till the end for FULL example with config, conn and query**) 239 | ```rust 240 | let df = CustomDataFrame::from_mysql(&conn, query, "df_alias").await?; 241 | ``` 242 | --- 243 | ## CREATE EMPTY DATA FRAME 244 | #### Create empty() DataFrame and populate it with data 245 | ```rust 246 | let temp_df = CustomDataFrame::empty().await?; 247 | 248 | let date_table = temp_df 249 | .datetime_functions([ 250 | "CURRENT_DATE() as current_date", 251 | "DATE_TRUNC('week', CURRENT_DATE()) AS week_start", 252 | "DATE_TRUNC('week', CURRENT_DATE()) + INTERVAL '1 week' AS next_week_start", 253 | "DATE_PART('year', CURRENT_DATE()) AS current_year", 254 | "DATE_PART('week', CURRENT_DATE()) AS current_week_num", 255 | ]) 256 | .elusion("date_table").await?; 257 | 258 | date_table.display().await?; 259 | 260 | RESULT: 261 | +--------------+---------------------+---------------------+--------------+------------------+ 262 | | current_date | week_start | next_week_start | current_year | current_week_num | 263 | +--------------+---------------------+---------------------+--------------+------------------+ 264 | | 2025-03-07 | 2025-03-03T00:00:00 | 2025-03-10T00:00:00 | 2025.0 | 10.0 | 265 | +--------------+---------------------+---------------------+--------------+------------------+ 266 | ``` 267 | --- 268 | ## CREATE DATE TABLE 269 | #### Create Date Table from Range of Dates 270 | ```rust 271 | let date_table = CustomDataFrame::create_date_range_table( 272 | "2025-01-01", // start date 273 | "2025-12-31", // end date 274 | "calendar_2025" // table alias 275 | ).await?; 276 | 277 | date_table.display().await?; 278 | 279 | RESULT: 280 | +------------+------+-------+-----+---------+----------+-------------+------------------+-------------+------------+-------------+---------------+------------+------------+ 281 | | date | year | month | day | quarter | week_num | day_of_week | day_of_week_name | day_of_year | week_start | month_start | quarter_start | year_start | is_weekend | 282 | +------------+------+-------+-----+---------+----------+-------------+------------------+-------------+------------+-------------+---------------+------------+------------+ 283 | | 2025-01-01 | 2025 | 1 | 1 | 1 | 1 | 3 | Wednesday | 1 | 2024-12-29 | 2025-01-01 | 2025-01-01 | 2025-01-01 | false | 284 | | 2025-01-02 | 2025 | 1 | 2 | 1 | 1 | 4 | Thursday | 2 | 2024-12-29 | 2025-01-01 | 2025-01-01 | 2025-01-01 | false | 285 | | 2025-01-03 | 2025 | 1 | 3 | 1 | 1 | 5 | Friday | 3 | 2024-12-29 | 2025-01-01 | 2025-01-01 | 2025-01-01 | false | 286 | | 2025-01-04 | 2025 | 1 | 4 | 1 | 1 | 6 | Saturday | 4 | 2024-12-29 | 2025-01-01 | 2025-01-01 | 2025-01-01 | true | 287 | | 2025-01-05 | 2025 | 1 | 5 | 1 | 1 | 0 | Sunday | 5 | 2025-01-05 | 2025-01-01 | 2025-01-01 | 2025-01-01 | true | 288 | | 2025-01-06 | 2025 | 1 | 6 | 1 | 2 | 1 | Monday | 6 | 2025-01-05 | 2025-01-01 | 2025-01-01 | 2025-01-01 | false | 289 | | 2025-01-07 | 2025 | 1 | 7 | 1 | 2 | 2 | Tuesday | 7 | 2025-01-05 | 2025-01-01 | 2025-01-01 | 2025-01-01 | false | 290 | | 2025-01-08 | 2025 | 1 | 8 | 1 | 2 | 3 | Wednesday | 8 | 2025-01-05 | 2025-01-01 | 2025-01-01 | 2025-01-01 | false | 291 | | 2025-01-09 | 2025 | 1 | 9 | 1 | 2 | 4 | Thursday | 9 | 2025-01-05 | 2025-01-01 | 2025-01-01 | 2025-01-01 | false | 292 | | .......... | .... | . | . | . | . | . | ................ | .......... | .......... | .......... | ............. | ...........| .......... | 293 | +------------+------+-------+-----+---------+----------+-------------+------------------+-------------+------------+-------------+---------------+------------+------------+ 294 | ``` 295 | --- 296 | ## CREATE DATE TABLE WITH CUSTOM FORMATS 297 | #### You can create Date Table with Custom formats (ISO, Compact, Human Readable...) and week, month, quarter, year Ranges (start-end) 298 | ```rust 299 | let date_table = CustomDataFrame::create_formatted_date_range_table( 300 | "2025-01-01", // date start 301 | "2025-12-31", // date end 302 | "calendar_2025", // table alias 303 | "date".to_string(), // first column name 304 | DateFormat::HumanReadable, // 1 Jan 2025 305 | true, // Include period ranges (start - end) 306 | Weekday::Mon // Week starts on Monday 307 | ).await?; 308 | 309 | date_table.display().await?; 310 | 311 | RESULT: 312 | +-------------+------+-------+-----+---------+----------+-------------+------------------+-------------+------------+-------------+-------------+-------------+-------------+---------------+-------------+-------------+-------------+ 313 | | date | year | month | day | quarter | week_num | day_of_week | day_of_week_name | day_of_year | is_weekend | week_start | week_end | month_start | month_end | quarter_start | quarter_end | year_start | year_end | 314 | +-------------+------+-------+-----+---------+----------+-------------+------------------+-------------+------------+-------------+-------------+-------------+-------------+---------------+-------------+-------------+-------------+ 315 | | 1 Jan 2025 | 2025 | 1 | 1 | 1 | 1 | 2 | Wednesday | 1 | false | 30 Dec 2024 | 5 Jan 2025 | 1 Jan 2025 | 31 Jan 2025 | 1 Jan 2025 | 31 Mar 2025 | 1 Jan 2025 | 31 Dec 2025 | 316 | | 2 Jan 2025 | 2025 | 1 | 2 | 1 | 1 | 3 | Thursday | 2 | false | 30 Dec 2024 | 5 Jan 2025 | 1 Jan 2025 | 31 Jan 2025 | 1 Jan 2025 | 31 Mar 2025 | 1 Jan 2025 | 31 Dec 2025 | 317 | | 3 Jan 2025 | 2025 | 1 | 3 | 1 | 1 | 4 | Friday | 3 | false | 30 Dec 2024 | 5 Jan 2025 | 1 Jan 2025 | 31 Jan 2025 | 1 Jan 2025 | 31 Mar 2025 | 1 Jan 2025 | 31 Dec 2025 | 318 | | 4 Jan 2025 | 2025 | 1 | 4 | 1 | 1 | 5 | Saturday | 4 | true | 30 Dec 2024 | 5 Jan 2025 | 1 Jan 2025 | 31 Jan 2025 | 1 Jan 2025 | 31 Mar 2025 | 1 Jan 2025 | 31 Dec 2025 | 319 | | 5 Jan 2025 | 2025 | 1 | 5 | 1 | 1 | 6 | Sunday | 5 | true | 30 Dec 2024 | 5 Jan 2025 | 1 Jan 2025 | 31 Jan 2025 | 1 Jan 2025 | 31 Mar 2025 | 1 Jan 2025 | 31 Dec 2025 | 320 | | 6 Jan 2025 | 2025 | 1 | 6 | 1 | 2 | 0 | Monday | 6 | false | 6 Jan 2025 | 12 Jan 2025 | 1 Jan 2025 | 31 Jan 2025 | 1 Jan 2025 | 31 Mar 2025 | 1 Jan 2025 | 31 Dec 2025 | 321 | | 7 Jan 2025 | 2025 | 1 | 7 | 1 | 2 | 1 | Tuesday | 7 | false | 6 Jan 2025 | 12 Jan 2025 | 1 Jan 2025 | 31 Jan 2025 | 1 Jan 2025 | 31 Mar 2025 | 1 Jan 2025 | 31 Dec 2025 | 322 | | 8 Jan 2025 | 2025 | 1 | 8 | 1 | 2 | 2 | Wednesday | 8 | false | 6 Jan 2025 | 12 Jan 2025 | 1 Jan 2025 | 31 Jan 2025 | 1 Jan 2025 | 31 Mar 2025 | 1 Jan 2025 | 31 Dec 2025 | 323 | | 9 Jan 2025 | 2025 | 1 | 9 | 1 | 2 | 3 | Thursday | 9 | false | 6 Jan 2025 | 12 Jan 2025 | 1 Jan 2025 | 31 Jan 2025 | 1 Jan 2025 | 31 Mar 2025 | 1 Jan 2025 | 31 Dec 2025 | 324 | | ........... | .... | .. | .. | . | . | . | ......... | ... | ..... | ........... | .......... | .......... | ........... | .......... | ........... | .......... | ........... | 325 | +-------------+------+-------+-----+---------+----------+-------------+------------------+-------------+------------+-------------+-------------+-------------+-------------+---------------+-------------+-------------+-------------+ 326 | ``` 327 | ### ALL AVAILABLE DATE FORMATS 328 | ```rust 329 | IsoDate, // YYYY-MM-DD 330 | IsoDateTime, // YYYY-MM-DD HH:MM:SS 331 | UsDate, // MM/DD/YYYY 332 | EuropeanDate, // DD.MM.YYYY 333 | EuropeanDateDash, // DD-MM-YYYY 334 | BritishDate, // DD/MM/YYYY 335 | HumanReadable, // 1 Jan 2025 336 | HumanReadableTime, // 1 Jan 2025 00:00 337 | SlashYMD, // YYYY/MM/DD 338 | DotYMD, // YYYY.MM.DD 339 | CompactDate, // YYYYMMDD 340 | YearMonth, // YYYY-MM 341 | MonthYear, // MM-YYYY 342 | MonthNameYear, // January 2025 343 | Custom(String) // Custom format string 344 | 345 | For Custom Date formats some of the common format specifiers: 346 | %Y - Full year (2025) 347 | %y - Short year (25) 348 | %m - Month as number (01-12) 349 | %b - Abbreviated month name (Jan) 350 | %B - Full month name (January) 351 | %d - Day of month (01-31) 352 | %e - Day of month, space-padded ( 1-31) 353 | %a - Abbreviated weekday name (Mon) 354 | %A - Full weekday name (Monday) 355 | %H - Hour (00-23) 356 | %I - Hour (01-12) 357 | %M - Minute (00-59) 358 | %S - Second (00-59) 359 | %p - AM/PM 360 | 361 | EXAMPLES: 362 | DateFormat::Custom("%d %b %Y %H:%M".to_string()), // "01 Jan 2025 00:00" 363 | // ISO 8601 with T separator and timezone 364 | DateFormat::Custom("%Y-%m-%dT%H:%M:%S%z".to_string()) 365 | // US date with 12-hour time 366 | DateFormat::Custom("%m/%d/%Y %I:%M %p".to_string()) 367 | // Custom format with weekday 368 | DateFormat::Custom("%A, %B %e, %Y".to_string()) // "Monday, January 1, 2025" 369 | ``` 370 | --- 371 | ## CREATE VIEWS and CACHING 372 | ### Materialized Views: 373 | For long-term storage of complex query results. When results need to be referenced by name. For data that changes infrequently. Example: Monthly sales summaries, customer metrics, product analytics 374 | ### Query Caching: 375 | For transparent performance optimization. When the same query might be run multiple times in a session. For interactive analysis scenarios. Example: Dashboard queries, repeated data exploration. 376 | ```rust 377 | let sales = "C:\\Borivoj\\RUST\\Elusion\\SalesData2022.csv"; 378 | let products = "C:\\Borivoj\\RUST\\Elusion\\Products.csv"; 379 | let customers = "C:\\Borivoj\\RUST\\Elusion\\Customers.csv"; 380 | 381 | let sales_df = CustomDataFrame::new(sales, "s").await?; 382 | let customers_df = CustomDataFrame::new(customers, "c").await?; 383 | let products_df = CustomDataFrame::new(products, "p").await?; 384 | 385 | // Example 1: Using materialized view for customer count 386 | // The TTL parameter (3600) specifies how long the view remains valid in seconds (1 hour) 387 | customers_df 388 | .select(["COUNT(*) as count"]) 389 | .limit(10) 390 | .create_view("customer_count_view", Some(3600)) 391 | .await?; 392 | 393 | // Access the view by name - no recomputation needed 394 | let customer_count = CustomDataFrame::from_view("customer_count_view").await?; 395 | 396 | // Example 2: Using query caching with complex joins and aggregations 397 | // First execution computes and stores the result 398 | let join_result = sales_df 399 | .join_many([ 400 | (customers_df.clone(), ["s.CustomerKey = c.CustomerKey"], "INNER"), 401 | (products_df.clone(), ["s.ProductKey = p.ProductKey"], "INNER"), 402 | ]) 403 | .select(["c.CustomerKey", "c.FirstName", "c.LastName", "p.ProductName"]) 404 | .agg([ 405 | "SUM(s.OrderQuantity) AS total_quantity", 406 | "AVG(s.OrderQuantity) AS avg_quantity" 407 | ]) 408 | .group_by(["c.CustomerKey", "c.FirstName", "c.LastName", "p.ProductName"]) 409 | .having_many([ 410 | ("total_quantity > 10"), 411 | ("avg_quantity < 100") 412 | ]) 413 | .order_by_many([ 414 | ("total_quantity", true), 415 | ("p.ProductName", false) 416 | ]) 417 | .elusion_with_cache("sales_join") // caching query with DataFrame alias 418 | .await?; 419 | 420 | join_result.display().await?; 421 | 422 | // Other useful cache/view management functions: 423 | CustomDataFrame::invalidate_cache(&["table_name".to_string()]); // Clear cache for specific tables 424 | CustomDataFrame::clear_cache(); // Clear entire cache 425 | CustomDataFrame::refresh_view("view_name").await?; // Refresh a materialized view 426 | CustomDataFrame::drop_view("view_name").await?; // Remove a materialized view 427 | CustomDataFrame::list_views().await; // Get info about all views 428 | ``` 429 | --- 430 | # DATAFRAME WRANGLING 431 | --- 432 | ## SELECT 433 | ### ALIAS column names in SELECT() function (AS is case insensitive) 434 | ```rust 435 | let df_AS = select_df 436 | .select(["CustomerKey AS customerkey_alias", "FirstName as first_name", "LastName", "EmailAddress"]); 437 | 438 | let df_select_all = select_df.select(["*"]); 439 | 440 | let df_count_all = select_df.select(["COUNT(*)"]); 441 | 442 | let df_distinct = select_df.select(["DISTINCT(column_name) as distinct_values"]); 443 | ``` 444 | --- 445 | ## Where to use which Functions: 446 | ### Scalar and Operators -> in SELECT() function 447 | ### Aggregation Functions -> in AGG() function 448 | ### String Column Functions -> in STRING_FUNCTIONS() function 449 | ### DateTime Functions -> in DATETIME_FUNCTIONS() function 450 | --- 451 | ### Numerical Operators (supported +, -, * , / , %) 452 | ```rust 453 | let num_ops_sales = sales_order_df 454 | .select([ 455 | "customer_name", 456 | "order_date", 457 | "billable_value", 458 | "billable_value * 2 AS double_billable_value", // Multiplication 459 | "billable_value / 100 AS percentage_billable" // Division 460 | ]) 461 | .filter("billable_value > 100.0") 462 | .order_by(["order_date"], [true]) 463 | .limit(10); 464 | 465 | let num_ops_res = num_ops_sales.elusion("scalar_df").await?; 466 | num_ops_res.display().await?; 467 | ``` 468 | ### FILTER (evaluated before aggregations) 469 | ```rust 470 | let filter_df = sales_order_df 471 | .select(["customer_name", "order_date", "billable_value"]) 472 | .filter_many([("order_date > '2021-07-04'"), ("billable_value > 100.0")]) 473 | .order_by(["order_date"], [true]) 474 | .limit(10); 475 | 476 | let filtered = filter_df.elusion("result_sales").await?; 477 | filtered.display().await?; 478 | 479 | // exmple 2 480 | const FILTER_CUSTOMER: &str = "customer_name == 'Customer IRRVL'"; 481 | 482 | let filter_query = sales_order_df 483 | .select([ 484 | "customer_name", 485 | "order_date", 486 | "ABS(billable_value) AS abs_billable_value", 487 | "ROUND(SQRT(billable_value), 2) AS SQRT_billable_value", 488 | "billable_value * 2 AS double_billable_value", // Multiplication 489 | "billable_value / 100 AS percentage_billable" // Division 490 | ]) 491 | .agg([ 492 | "ROUND(AVG(ABS(billable_value)), 2) AS avg_abs_billable", 493 | "SUM(billable_value) AS total_billable", 494 | "MAX(ABS(billable_value)) AS max_abs_billable", 495 | "SUM(billable_value) * 2 AS double_total_billable", // Operator-based aggregation 496 | "SUM(billable_value) / 100 AS percentage_total_billable" // Operator-based aggregation 497 | ]) 498 | .filter(FILTER_CUSTOMER) 499 | .group_by_all() 500 | .order_by_many([ 501 | ("total_billable", false), // Order by total_billable descending 502 | ("max_abs_billable", true), // Then by max_abs_billable ascending 503 | ]) 504 | ``` 505 | ### HAVING (evaluated after aggregations) 506 | ```rust 507 | //Example 1 with aggregatied column names 508 | let example1 = sales_df 509 | .join_many([ 510 | (customers_df, ["s.CustomerKey = c.CustomerKey"], "INNER"), 511 | (products_df, ["s.ProductKey = p.ProductKey"], "INNER"), 512 | ]) 513 | .select(["c.CustomerKey", "c.FirstName", "c.LastName", "p.ProductName"]) 514 | .agg([ 515 | "SUM(s.OrderQuantity) AS total_quantity", 516 | "AVG(s.OrderQuantity) AS avg_quantity" 517 | ]) 518 | .group_by(["c.CustomerKey", "c.FirstName", "c.LastName", "p.ProductName"]) 519 | .having_many([ 520 | ("total_quantity > 10"), 521 | ("avg_quantity < 100") 522 | ]) 523 | .order_by_many([ 524 | ("total_quantity", true ), 525 | ("p.ProductName", false) 526 | ]); 527 | 528 | let result = example1.elusion("sales_res").await?; 529 | result.display().await?; 530 | 531 | //Example 2 with aggregation in having 532 | let df_having= sales_df 533 | .join(customers_df, ["s.CustomerKey = c.CustomerKey"], 534 | "INNER" 535 | ) 536 | .select(["c.CustomerKey", "c.FirstName", "c.LastName"]) 537 | .agg([ 538 | "SUM(s.OrderQuantity) AS total_quantity", 539 | "AVG(s.OrderQuantity) AS avg_quantity" 540 | ]) 541 | .group_by(["c.CustomerKey", "c.FirstName", "c.LastName"]) 542 | .having_many([ 543 | ("SUM(s.OrderQuantity) > 10"), 544 | ("AVG(s.OrderQuantity) < 100") 545 | ]) 546 | .order_by(["total_quantity"], [true]) 547 | .limit(5); 548 | 549 | let result = df_having.elusion("sales_res").await?; 550 | result.display().await?; 551 | ``` 552 | ### SCALAR functions 553 | ```rust 554 | let scalar_df = sales_order_df 555 | .select([ 556 | "customer_name", 557 | "order_date", 558 | "ABS(billable_value) AS abs_billable_value", 559 | "ROUND(SQRT(billable_value), 2) AS SQRT_billable_value"]) 560 | .filter("billable_value > 100.0") 561 | .order_by(["order_date"], [true]) 562 | .limit(10); 563 | 564 | let scalar_res = scalar_df.elusion("scalar_df").await?; 565 | scalar_res.display().await?; 566 | ``` 567 | ### AGGREGATE functions with nested Scalar functions 568 | ```rust 569 | let scalar_df = sales_order_df 570 | .select([ 571 | "customer_name", 572 | "order_date" 573 | ]) 574 | .agg([ 575 | "ROUND(AVG(ABS(billable_value)), 2) AS avg_abs_billable", 576 | "SUM(billable_value) AS total_billable", 577 | "MAX(ABS(billable_value)) AS max_abs_billable", 578 | "SUM(billable_value) * 2 AS double_total_billable", // Operator-based aggregation 579 | "SUM(billable_value) / 100 AS percentage_total_billable" // Operator-based aggregation 580 | ]) 581 | .group_by(["customer_name", "order_date"]) 582 | .filter("billable_value > 100.0") 583 | .order_by(["order_date"], [true]) 584 | .limit(10); 585 | 586 | let scalar_res = scalar_df.elusion("scalar_df").await?; 587 | scalar_res.display().await?; 588 | ``` 589 | ### STRING functions 590 | ```rust 591 | let df = sales_df 592 | .select(["FirstName", "LastName"]) 593 | .string_functions([ 594 | "'New' AS new_old_customer", 595 | "TRIM(c.EmailAddress) AS trimmed_email", 596 | "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name", 597 | ]); 598 | 599 | let result_df = df.elusion("df").await?; 600 | result_df.display().await?; 601 | ``` 602 | ### Numerical Operators, Scalar Functions, Aggregated Functions... 603 | ```rust 604 | let mix_query = sales_order_df 605 | .select([ 606 | "customer_name", 607 | "order_date", 608 | "ABS(billable_value) AS abs_billable_value", 609 | "ROUND(SQRT(billable_value), 2) AS SQRT_billable_value", 610 | "billable_value * 2 AS double_billable_value", // Multiplication 611 | "billable_value / 100 AS percentage_billable" // Division 612 | ]) 613 | .agg([ 614 | "ROUND(AVG(ABS(billable_value)), 2) AS avg_abs_billable", 615 | "SUM(billable_value) AS total_billable", 616 | "MAX(ABS(billable_value)) AS max_abs_billable", 617 | "SUM(billable_value) * 2 AS double_total_billable", // Operator-based aggregation 618 | "SUM(billable_value) / 100 AS percentage_total_billable" // Operator-based aggregation 619 | ]) 620 | .filter("billable_value > 50.0") 621 | .group_by_all() 622 | .order_by_many([ 623 | ("total_billable", false), // Order by total_billable descending 624 | ("max_abs_billable", true), // Then by max_abs_billable ascending 625 | ]); 626 | 627 | let mix_res = mix_query.elusion("scalar_df").await?; 628 | mix_res.display().await?; 629 | ``` 630 | --- 631 | ### Supported Aggregation functions 632 | ```rust 633 | SUM, AVG, MEAN, MEDIAN, MIN, COUNT, MAX, 634 | LAST_VALUE, FIRST_VALUE, 635 | GROUPING, STRING_AGG, ARRAY_AGG, VAR, VAR_POP, 636 | VAR_POPULATION, VAR_SAMP, VAR_SAMPLE, 637 | BIT_AND, BIT_OR, BIT_XOR, BOOL_AND, BOOL_OR 638 | ``` 639 | ### Supported Scalar Math Functions 640 | ```rust 641 | ABS, FLOOR, CEIL, SQRT, ISNAN, ISZERO, 642 | PI, POW, POWER, RADIANS, RANDOM, ROUND, 643 | FACTORIAL, ACOS, ACOSH, ASIN, ASINH, 644 | COS, COSH, COT, DEGREES, EXP, 645 | SIN, SINH, TAN, TANH, TRUNC, CBRT, 646 | ATAN, ATAN2, ATANH, GCD, LCM, LN, 647 | LOG, LOG10, LOG2, NANVL, SIGNUM 648 | ``` 649 | --- 650 | ## JOIN 651 | #### JOIN examples with single condition and 2 dataframes, AGGREGATION, GROUP BY 652 | ```rust 653 | let single_join = df_sales 654 | .join(df_customers, ["s.CustomerKey = c.CustomerKey"], "INNER") 655 | .select(["s.OrderDate","c.FirstName", "c.LastName"]) 656 | .agg([ 657 | "SUM(s.OrderQuantity) AS total_quantity", 658 | "AVG(s.OrderQuantity) AS avg_quantity", 659 | ]) 660 | .group_by(["s.OrderDate","c.FirstName","c.LastName"]) 661 | .having("total_quantity > 10") 662 | .order_by(["total_quantity"], [false]) // true is ascending, false is descending 663 | .limit(10); 664 | 665 | let join_df1 = single_join.elusion("result_query").await?; 666 | join_df1.display().await?; 667 | ``` 668 | ### JOIN with single conditions and 3 dataframes, AGGREGATION, GROUP BY, HAVING, SELECT, ORDER BY 669 | ```rust 670 | let many_joins = df_sales 671 | .join_many([ 672 | (df_customers, ["s.CustomerKey = c.CustomerKey"], "INNER"), 673 | (df_products, ["s.ProductKey = p.ProductKey"], "INNER"), 674 | ]) 675 | .select([ 676 | "c.CustomerKey","c.FirstName","c.LastName","p.ProductName", 677 | ]) 678 | .agg([ 679 | "SUM(s.OrderQuantity) AS total_quantity", 680 | "AVG(s.OrderQuantity) AS avg_quantity", 681 | ]) 682 | .group_by(["c.CustomerKey", "c.FirstName", "c.LastName", "p.ProductName"]) 683 | .having_many([("total_quantity > 10"), ("avg_quantity < 100")]) 684 | .order_by_many([ 685 | ("total_quantity", true), // true is ascending 686 | ("p.ProductName", false) // false is descending 687 | ]) 688 | .limit(10); 689 | 690 | let join_df3 = many_joins.elusion("df_joins").await?; 691 | join_df3.display().await?; 692 | ``` 693 | ### JOIN with multiple conditions and 2 data frames 694 | ```rust 695 | let result_join = orders_df 696 | .join( 697 | customers_df, 698 | ["o.CustomerID = c.CustomerID" , "o.RegionID = c.RegionID"], 699 | "INNER" 700 | ) 701 | .select([ 702 | "o.OrderID", 703 | "c.Name", 704 | "o.OrderDate" 705 | ]) 706 | .string_functions([ 707 | "CONCAT(TRIM(c.Name), ' (', c.Email, ')') AS customer_info", 708 | "UPPER(c.Status) AS customer_status", 709 | "LEFT(c.Email, POSITION('@' IN c.Email) - 1) AS username" 710 | ]) 711 | .agg([ 712 | "SUM(o.Amount) AS total_amount", 713 | "AVG(o.Quantity) AS avg_quantity", 714 | "COUNT(DISTINCT o.OrderID) AS order_count", 715 | "MAX(o.Amount) AS max_amount", 716 | "MIN(o.Amount) AS min_amount" 717 | ]) 718 | .group_by([ 719 | "o.OrderID", 720 | "c.Name", 721 | "o.OrderDate", 722 | "c.Email", 723 | "c.Status" 724 | ]); 725 | 726 | let res_joins = result_join.elusion("one_join").await?; 727 | res_joins.display().await?; 728 | ``` 729 | ### JOIN_MANY with multiple conditions and 3 data frames 730 | ```rust 731 | let result_join_many = order_join_df 732 | .join_many([ 733 | (customer_join_df, 734 | ["o.CustomerID = c.CustomerID" , "o.RegionID = c.RegionID"], 735 | "INNER" 736 | ), 737 | (regions_join_df, 738 | ["c.RegionID = r.RegionID" , "r.IsActive = true"], 739 | "INNER" 740 | ) 741 | ]) 742 | .select(["o.OrderID","c.Name","r.RegionName", "r.CountryID"]) 743 | .string_functions([ 744 | "CONCAT(r.RegionName, ' (', r.CountryID, ')') AS region_info", 745 | 746 | "CASE c.CreditLimit 747 | WHEN 1000 THEN 'Basic' 748 | WHEN 2000 THEN 'Premium' 749 | ELSE 'Standard' 750 | END AS credit_tier", 751 | 752 | "CASE 753 | WHEN c.CreditLimit > 2000 THEN 'High' 754 | WHEN c.CreditLimit > 1000 THEN 'Medium' 755 | ELSE 'Low' 756 | END AS credit_status", 757 | 758 | "CASE 759 | WHEN o.Amount > 1000 AND c.Status = 'active' THEN 'Priority' 760 | WHEN o.Amount > 500 THEN 'Regular' 761 | ELSE 'Standard' 762 | END AS order_priority", 763 | 764 | "CASE r.RegionName 765 | WHEN 'East Coast' THEN 'Eastern' 766 | WHEN 'West Coast' THEN 'Western' 767 | ELSE 'Other' 768 | END AS region_category", 769 | 770 | "CASE 771 | WHEN EXTRACT(DOW FROM o.OrderDate) IN (0, 6) THEN 'Weekend' 772 | ELSE 'Weekday' 773 | END AS order_day_type" 774 | ]) 775 | .agg([ 776 | "SUM(o.Amount) AS total_amount", 777 | "COUNT(*) AS row_count", 778 | "SUM(o.Amount * (1 - o.Discount/100)) AS net_amount", 779 | "ROUND(SUM(o.Amount) / COUNT(*), 2) AS avg_order_value", 780 | "SUM(o.Amount * r.TaxRate) AS total_tax" 781 | ]) 782 | .group_by_all() 783 | .having("total_amount > 200") 784 | .order_by(["total_amount"], [false]); 785 | 786 | let res_joins_many = result_join_many.elusion("many_join").await?; 787 | res_joins_many.display().await?; 788 | ``` 789 | ### JOIN_MANY with single condition and 3 dataframes, STRING FUNCTIONS, AGGREGATION, GROUP BY, HAVING_MANY, ORDER BY 790 | ```rust 791 | 792 | let str_func_joins = df_sales 793 | .join_many([ 794 | (df_customers, ["s.CustomerKey = c.CustomerKey"], "INNER"), 795 | (df_products, ["s.ProductKey = p.ProductKey"], "INNER"), 796 | ]) 797 | .select([ 798 | "c.CustomerKey", 799 | "c.FirstName", 800 | "c.LastName", 801 | "c.EmailAddress", 802 | "p.ProductName", 803 | ]) 804 | .string_functions([ 805 | "TRIM(c.EmailAddress) AS trimmed_email_address", 806 | "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name", 807 | "LEFT(p.ProductName, 15) AS short_product_name", 808 | "RIGHT(p.ProductName, 5) AS end_product_name", 809 | ]) 810 | .agg([ 811 | "COUNT(p.ProductKey) AS product_count", 812 | "SUM(s.OrderQuantity) AS total_order_quantity", 813 | ]) 814 | .group_by_all() 815 | .having_many([("total_order_quantity > 10"), ("product_count >= 1")]) 816 | .order_by_many([ 817 | ("total_order_quantity", true), 818 | ("p.ProductName", false) 819 | ]); 820 | 821 | let join_str_df3 = str_func_joins.elusion("df_joins").await?; 822 | join_str_df3.display().await?; 823 | ``` 824 | #### Currently implemented join types 825 | ```rust 826 | "INNER", "LEFT", "RIGHT", "FULL", 827 | "LEFT SEMI", "RIGHT SEMI", 828 | "LEFT ANTI", "RIGHT ANTI", "LEFT MARK" 829 | ``` 830 | --- 831 | ### STRING FUNCTIONS 832 | ```rust 833 | let string_functions_df = df_sales 834 | .join_many([ 835 | (df_customers, ["s.CustomerKey = c.CustomerKey"], "INNER"), 836 | (df_products, ["s.ProductKey = p.ProductKey"], "INNER"), 837 | ]) 838 | .select([ 839 | "c.CustomerKey", 840 | "c.FirstName", 841 | "c.LastName", 842 | "c.EmailAddress", 843 | "p.ProductName" 844 | ]) 845 | .string_functions([ 846 | // Basic String Functions 847 | "TRIM(c.EmailAddress) AS trimmed_email", 848 | "LTRIM(c.EmailAddress) AS left_trimmed_email", 849 | "RTRIM(c.EmailAddress) AS right_trimmed_email", 850 | "UPPER(c.FirstName) AS upper_first_name", 851 | "LOWER(c.LastName) AS lower_last_name", 852 | "LENGTH(c.EmailAddress) AS email_length", 853 | "LEFT(p.ProductName, 10) AS product_start", 854 | "RIGHT(p.ProductName, 10) AS product_end", 855 | "SUBSTRING(p.ProductName, 1, 5) AS product_substr", 856 | // Concatenation 857 | "CONCAT(c.FirstName, ' ', c.LastName) AS full_name", 858 | "CONCAT_WS(' ', c.FirstName, c.LastName, c.EmailAddress) AS all_info", 859 | // Position and Search 860 | "POSITION('@' IN c.EmailAddress) AS at_symbol_pos", 861 | "STRPOS(c.EmailAddress, '@') AS email_at_pos", 862 | // Replacement and Modification 863 | "REPLACE(c.EmailAddress, '@adventure-works.com', '@newdomain.com') AS new_email", 864 | "TRANSLATE(c.FirstName, 'AEIOU', '12345') AS vowels_replaced", 865 | "REPEAT('*', 5) AS stars", 866 | "REVERSE(c.FirstName) AS reversed_name", 867 | // Padding 868 | "LPAD(c.CustomerKey::TEXT, 10, '0') AS padded_customer_id", 869 | "RPAD(c.FirstName, 20, '.') AS padded_name", 870 | // Case Formatting 871 | "INITCAP(LOWER(c.FirstName)) AS proper_case_name", 872 | // String Extraction 873 | "SPLIT_PART(c.EmailAddress, '@', 1) AS email_username", 874 | // Type Conversion 875 | "TO_CHAR(s.OrderDate, 'YYYY-MM-DD') AS formatted_date" 876 | ]) 877 | .agg([ 878 | "COUNT(*) AS total_records", 879 | "STRING_AGG(p.ProductName, ', ') AS all_products" 880 | ]) 881 | .filter("c.emailaddress IS NOT NULL") 882 | .group_by_all() 883 | .having("COUNT(*) > 1") 884 | .order_by(["c.CustomerKey"], [true]); 885 | 886 | let str_df = string_functions_df.elusion("df_joins").await?; 887 | str_df.display().await?; 888 | ``` 889 | #### Currently Available String functions 890 | ```rust 891 | 1.Basic String Functions: 892 | TRIM() - Remove leading/trailing spaces 893 | LTRIM() - Remove leading spaces 894 | RTRIM() - Remove trailing spaces 895 | UPPER() - Convert to uppercase 896 | LOWER() - Convert to lowercase 897 | LENGTH() or LEN() - Get string length 898 | LEFT() - Extract leftmost characters 899 | RIGHT() - Extract rightmost characters 900 | SUBSTRING() - Extract part of string 901 | 2. String concatenation: 902 | CONCAT() - Concatenate strings 903 | CONCAT_WS() - Concatenate with separator 904 | 3. String Position and Search: 905 | POSITION() - Find position of substring 906 | STRPOS() - Find position of substring 907 | INSTR() - Find position of substring 908 | LOCATE() - Find position of substring 909 | 4. String Replacement and Modification: 910 | REPLACE() - Replace all occurrences of substring 911 | TRANSLATE() - Replace characters 912 | OVERLAY() - Replace portion of string 913 | REPEAT() - Repeat string 914 | REVERSE() - Reverse string characters 915 | 5. String Pattern Matching: 916 | LIKE() - Pattern matching with wildcards 917 | REGEXP() or RLIKE() - Pattern matching with regular expressions 918 | 6. String Padding: 919 | LPAD() - Pad string on left 920 | RPAD() - Pad string on right 921 | SPACE() - Generate spaces 922 | 7. String Case Formatting: 923 | INITCAP() - Capitalize first letter of each word 924 | 8. String Extraction: 925 | SPLIT_PART() - Split string and get nth part 926 | SUBSTR() - Get substring 927 | 9. String Type Conversion: 928 | TO_CHAR() - Convert to string 929 | CAST() - Type conversion 930 | CONVERT() - Type conversion 931 | 10. Control Flow: 932 | CASE() 933 | ``` 934 | --- 935 | ### DATETIME FUNCTIONS 936 | #### Work best with YYYY-MM-DD format 937 | ```rust 938 | let dt_query = sales_order_df 939 | .select([ 940 | "customer_name", 941 | "order_date", 942 | "delivery_date" 943 | ]) 944 | .datetime_functions([ 945 | // Current date/time comparisons 946 | "CURRENT_DATE() AS today", 947 | "CURRENT_TIME() AS current_time", 948 | "CURRENT_TIMESTAMP() AS now", 949 | "NOW() AS now_timestamp", 950 | "TODAY() AS today_timestamp", 951 | 952 | // Date binning (for time-series analysis) 953 | "DATE_BIN('1 week', order_date, MAKE_DATE(2020, 1, 1)) AS weekly_bin", 954 | "DATE_BIN('1 month', order_date, MAKE_DATE(2020, 1, 1)) AS monthly_bin", 955 | 956 | // Date formatting 957 | "DATE_FORMAT(order_date, '%Y-%m-%d') AS formatted_date", 958 | "DATE_FORMAT(order_date, '%Y/%m/%d') AS formatted_date_alt", 959 | 960 | // Basic date components 961 | "DATE_PART('year', order_date) AS year", 962 | "DATE_PART('month', order_date) AS month", 963 | "DATE_PART('day', order_date) AS day", 964 | 965 | // Quarters and weeks 966 | "DATE_PART('quarter', order_date) AS order_quarter", 967 | "DATE_PART('week', order_date) AS order_week", 968 | 969 | // Day of week/year 970 | "DATE_PART('dow', order_date) AS day_of_week", 971 | "DATE_PART('doy', order_date) AS day_of_year", 972 | 973 | // Analysis 974 | "DATE_PART('day', delivery_date - order_date) AS delivery_days", 975 | "DATE_PART('day', CURRENT_DATE() - order_date) AS days_since_order", 976 | 977 | // Date truncation (alternative syntax) 978 | "DATE_TRUNC('week', order_date) AS week_start", 979 | "DATE_TRUNC('quarter', order_date) AS quarter_start", 980 | "DATE_TRUNC('month', order_date) AS month_start", 981 | "DATE_TRUNC('year', order_date) AS year_start", 982 | 983 | // Complex date calculations 984 | "CASE 985 | WHEN DATE_PART('month', order_date) <= 3 THEN 'Q1' 986 | WHEN DATE_PART('month', order_date) <= 6 THEN 'Q2' 987 | WHEN DATE_PART('month', order_date) <= 9 THEN 'Q3' 988 | ELSE 'Q4' 989 | END AS fiscal_quarter", 990 | 991 | // Date comparisons with current date 992 | "CASE 993 | WHEN order_date = CURRENT_DATE() THEN 'Today' 994 | WHEN DATE_PART('day', CURRENT_DATE() - order_date) <= 7 THEN 'Last Week' 995 | WHEN DATE_PART('day', CURRENT_DATE() - order_date) <= 30 THEN 'Last Month' 996 | ELSE 'Older' 997 | END AS order_recency", 998 | 999 | // Time windows 1000 | "CASE 1001 | WHEN DATE_BIN('1 week', order_date, CURRENT_DATE()) = DATE_BIN('1 week', CURRENT_DATE(), CURRENT_DATE()) 1002 | THEN 'This Week' 1003 | ELSE 'Previous Weeks' 1004 | END AS week_window", 1005 | 1006 | // Fiscal year calculations 1007 | "CASE 1008 | WHEN DATE_PART('month', order_date) >= 7 1009 | THEN DATE_PART('year', order_date) + 1 1010 | ELSE DATE_PART('year', order_date) 1011 | END AS fiscal_year", 1012 | 1013 | // Complex date logic - modified to work with Date32 1014 | "CASE 1015 | WHEN order_date < MAKE_DATE(2024, 1, 1) THEN 'Past' 1016 | ELSE 'Present' 1017 | END AS temporal_status", 1018 | 1019 | "CASE 1020 | WHEN DATE_PART('hour', CURRENT_TIMESTAMP()) < 12 THEN 'Morning' 1021 | ELSE 'Afternoon' 1022 | END AS time_of_day" 1023 | ]) 1024 | .order_by(["order_date"], [false]) 1025 | 1026 | let dt_res = dt_query.elusion("datetime_df").await?; 1027 | dt_res.display().await?; 1028 | ``` 1029 | #### Currently Available DateTime Functions 1030 | ```rust 1031 | CURRENT_DATE() 1032 | CURRENT_TIME() 1033 | CURRENT_TIMESTAMP() 1034 | NOW() 1035 | TODAY() 1036 | DATE_PART() 1037 | DATE_TRUNC() 1038 | DATE_BIN() 1039 | MAKE_DATE() 1040 | DATE_FORMAT() 1041 | ``` 1042 | --- 1043 | ### WINDOW functions 1044 | #### Aggregate, Ranking and Analytical functions 1045 | ```rust 1046 | let window_query = df_sales 1047 | .join(df_customers, ["s.CustomerKey = c.CustomerKey"], "INNER") 1048 | .select(["s.OrderDate","c.FirstName","c.LastName","s.OrderQuantity"]) 1049 | //aggregated window functions 1050 | .window("SUM(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) as running_total") 1051 | .window("AVG(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS running_avg") 1052 | .window("MIN(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS running_min") 1053 | .window("MAX(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS running_max") 1054 | .window("COUNT(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS running_count") 1055 | //ranking window functions 1056 | .window("ROW_NUMBER() OVER (ORDER BY c.CustomerKey) AS customer_index") 1057 | .window("ROW_NUMBER() OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) as row_num") 1058 | .window("DENSE_RANK() OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS dense_rnk") 1059 | .window("PERCENT_RANK() OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS pct_rank") 1060 | .window("CUME_DIST() OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS cume_dist") 1061 | .window("NTILE(4) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS quartile") 1062 | // analytical window functions 1063 | .window("FIRST_VALUE(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS first_qty") 1064 | .window("LAST_VALUE(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS last_qty") 1065 | .window("LAG(s.OrderQuantity, 1, 0) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS prev_qty") 1066 | .window("LEAD(s.OrderQuantity, 1, 0) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS next_qty") 1067 | .window("NTH_VALUE(s.OrderQuantity, 3) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS third_qty"); 1068 | 1069 | let window_df = window_query.elusion("result_window").await?; 1070 | window_df.display().await?; 1071 | ``` 1072 | #### Rolling Window Functions 1073 | ```rust 1074 | let rollin_query = df_sales 1075 | .join(df_customers, ["s.CustomerKey = c.CustomerKey"], "INNER") 1076 | .select(["s.OrderDate", "c.FirstName", "c.LastName", "s.OrderQuantity"]) 1077 | //aggregated rolling windows 1078 | .window("SUM(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate 1079 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total") 1080 | .window("AVG(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate 1081 | ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS full_partition_avg"); 1082 | 1083 | let rollin_df = rollin_query.elusion("rollin_result").await?; 1084 | rollin_df.display().await?; 1085 | ``` 1086 | --- 1087 | ## JSON functions 1088 | ### .json() 1089 | #### function works with Columns that only have simple JSON values 1090 | #### example json structure: [{"Key1":"Value1","Key2":"Value2","Key3":"Value3"}] 1091 | #### example usage 1092 | ```rust 1093 | let path = "C:\\Borivoj\\RUST\\Elusion\\jsonFile.csv"; 1094 | let json_df = CustomDataFrame::new(path, "j").await?; 1095 | 1096 | let df_extracted = json_df.json([ 1097 | "ColumnName.'$Key1' AS column_name_1", 1098 | "ColumnName.'$Key2' AS column_name_2", 1099 | "ColumnName.'$Key3' AS column_name_3" 1100 | ]) 1101 | .select(["some_column1", "some_column2"]) 1102 | .elusion("json_extract").await?; 1103 | 1104 | df_extracted.display().await?; 1105 | ``` 1106 | ```rust 1107 | RESULT: 1108 | +---------------+---------------+---------------+---------------+---------------+ 1109 | | column_name_1 | column_name_2 | column_name_3 | some_column1 | some_column2 | 1110 | +---------------+---------------+---------------+---------------+---------------+ 1111 | | registrations | 2022-09-15 | CustomerCode | 779-0009E3370 | 646443D134762 | 1112 | | registrations | 2023-09-11 | CustomerCode | 770-00009ED61 | 463497C334762 | 1113 | | registrations | 2017-10-01 | CustomerCode | 889-000049C9E | 634697C134762 | 1114 | | registrations | 2019-03-26 | CustomerCode | 000-00006C4D5 | 446397D134762 | 1115 | | registrations | 2021-08-31 | CustomerCode | 779-0009E3370 | 463643D134762 | 1116 | | registrations | 2019-05-09 | CustomerCode | 770-00009ED61 | 634697C934762 | 1117 | | registrations | 2005-10-24 | CustomerCode | 889-000049C9E | 123397C334762 | 1118 | | registrations | 2023-02-14 | CustomerCode | 000-00006C4D5 | 932393D134762 | 1119 | | registrations | 2021-01-20 | CustomerCode | 779-0009E3370 | 323297C334762 | 1120 | | registrations | 2018-07-17 | CustomerCode | 000-00006C4D5 | 322097C921462 | 1121 | +---------------+---------------+---------------+---------------+---------------+ 1122 | ``` 1123 | ### .json_array() 1124 | #### function works with Columns that has Array of objects with pathern "column.'$ValueField:IdField=IdValue' AS column_alias" 1125 | The function parameters: 1126 | column: The column containing the JSON array 1127 | ValueField: The field to extract from matching objects 1128 | IdField: The field to use as identifier 1129 | IdValue: The value to match on the identifier field 1130 | column_alias: The output column name 1131 | 1132 | #### example json structure 1133 | ```rust 1134 | [ 1135 | {"Id":"Date","Value":"2022-09-15","ValueKind":"Date"}, 1136 | {"Id":"MadeBy","Value":"Borivoj Grujicic","ValueKind":"Text"}, 1137 | {"Id":"Timeline","Value":1.0,"ValueKind":"Number"}, 1138 | {"Id":"ETR_1","Value":1.0,"ValueKind":"Number"} 1139 | ] 1140 | ``` 1141 | #### example usage 1142 | ```rust 1143 | let multiple_values = df_json.json_array([ 1144 | "Value.'$Value:Id=Date' AS date", 1145 | "Value.'$Value:Id=MadeBy' AS made_by", 1146 | "Value.'$Value:Id=Timeline' AS timeline", 1147 | "Value.'$Value:Id=ETR_1' AS etr_1", 1148 | "Value.'$Value:Id=ETR_2' AS etr_2", 1149 | "Value.'$Value:Id=ETR_3' AS etr_3" 1150 | ]) 1151 | .select(["Id"]) 1152 | .elusion("multiple_values") 1153 | .await?; 1154 | 1155 | multiple_values.display().await?; 1156 | 1157 | RESULT: 1158 | +-----------------+-------------------+----------+-------+-------+-------+--------+ 1159 | | date | made_by | timeline | etr_1 | etr_2 | etr_3 | id | 1160 | +-----------------+-------------------+----------+-------+-------+-------+--------+ 1161 | | 2022-09-15 | Borivoj Grujicic | 1.0 | 1.0 | 1.0 | 1.0 | 77E10C | 1162 | | 2023-09-11 | | 5.0 | | | | 770C24 | 1163 | | 2017-10-01 | | | | | | 7795FA | 1164 | | 2019-03-26 | | 1.0 | | | | 77F2E6 | 1165 | | 2021-08-31 | | 5.0 | | | | 77926E | 1166 | | 2019-05-09 | | | | | | 77CC0F | 1167 | | 2005-10-24 | | | | | | 7728BA | 1168 | | 2023-02-14 | | | | | | 77F7F8 | 1169 | | 2021-01-20 | | | | | | 7731F6 | 1170 | | 2018-07-17 | | 3.0 | | | | 77FB18 | 1171 | +-----------------+-------------------+----------+-------+-------+-------+--------+ 1172 | ``` 1173 | --- 1174 | ## APPEND, APPEND_MANY 1175 | #### APPEND: Combines rows from two dataframes, keeping all rows 1176 | #### APPEND_MANY: Combines rows from many dataframes, keeping all rows 1177 | ```rust 1178 | let df1 = "C:\\Borivoj\\RUST\\Elusion\\API\\df1.json"; 1179 | let df2 = "C:\\Borivoj\\RUST\\Elusion\\API\\df2.json"; 1180 | let df3 = "C:\\Borivoj\\RUST\\Elusion\\API\\df3.json"; 1181 | let df4 = "C:\\Borivoj\\RUST\\Elusion\\API\\df4.json"; 1182 | let df5 = "C:\\Borivoj\\RUST\\Elusion\\API\\df5.json"; 1183 | 1184 | let df1 = CustomDataFrame::new(df1, "msales1").await?; 1185 | let df2 = CustomDataFrame::new(df2, "msales2").await?; 1186 | let df3 = CustomDataFrame::new(df3, "msales3").await?; 1187 | let df4 = CustomDataFrame::new(df4, "msales4").await?; 1188 | let df5 = CustomDataFrame::new(df5, "msales5").await?; 1189 | 1190 | let res_df1 = df1.select(["Month", "TotalSales"]).string_functions(["'site1' AS Restaurant"]); 1191 | let result_df1 = res_df1.elusion("el1").await?; 1192 | 1193 | let res_df2 = df2.select(["Month", "TotalSales"]).string_functions(["'site2' AS Restaurant"]); 1194 | let result_df2 = res_df2.elusion("el2").await?; 1195 | 1196 | let res_df3 = df3.select(["Month", "TotalSales"]).string_functions(["'site3' AS Restaurant"]); 1197 | let result_df3 = res_df3.elusion("el3").await?; 1198 | 1199 | let res_df4 = df4.select(["Month", "TotalSales"]).string_functions(["'site4' AS Restaurant"]); 1200 | let result_df4 = res_df4.elusion("el4").await?; 1201 | 1202 | let res_df5 = df5.select(["Month", "TotalSales"]).string_functions(["'site5' AS Restaurant"]); 1203 | let resuld_df5 = res_df5.elusion("el5").await?; 1204 | 1205 | //APPEND 1206 | let append_df = result_df1.append(result_df2).await?; 1207 | //APPEND_MANY 1208 | let append_many_df = result_df1.append_many([result_df2, result_df3, result_df4, resuld_df5]).await?; 1209 | ``` 1210 | --- 1211 | ## UNION, UNION ALL, EXCEPT, INTERSECT 1212 | #### UNION: Combines rows from both, removing duplicates 1213 | #### UNION ALL: Combines rows from both, keeping duplicates 1214 | #### EXCEPT: Difference of two sets (only rows in left minus those in right). 1215 | #### INTERSECT: Intersection of two sets (only rows in both). 1216 | ```rust 1217 | //UNION 1218 | let df1 = sales_df.clone() 1219 | .join( 1220 | customers_df.clone(), ["s.CustomerKey = c.CustomerKey"], "INNER", 1221 | ) 1222 | .select(["c.FirstName", "c.LastName"]) 1223 | .string_functions([ 1224 | "TRIM(c.EmailAddress) AS trimmed_email", 1225 | "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name", 1226 | ]); 1227 | 1228 | let df2 = sales_df.clone() 1229 | .join( 1230 | customers_df.clone(), ["s.CustomerKey = c.CustomerKey"], "INNER", 1231 | ) 1232 | .select(["c.FirstName", "c.LastName"]) 1233 | .string_functions([ 1234 | "TRIM(c.EmailAddress) AS trimmed_email", 1235 | "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name", 1236 | ]); 1237 | 1238 | let result_df1 = df1.elusion("df1").await?; 1239 | let result_df2 = df2.elusion("df2").await?; 1240 | 1241 | let union_df = result_df1.union(result_df2).await?; 1242 | 1243 | let union_df_final = union_df.limit(100).elusion("union_df").await?; 1244 | union_df_final.display().await?; 1245 | 1246 | //UNION ALL 1247 | let union_all_df = result_df1.union_all(result_df2).await?; 1248 | //EXCEPT 1249 | let except_df = result_df1.except(result_df2).await?; 1250 | //INTERSECT 1251 | let intersect_df = result_df1.intersect(result_df2).await?; 1252 | 1253 | ``` 1254 | ## UNION_MANY, UNION_ALL_MANY 1255 | #### UNION_MANY: Combines rows from many dataframes, removing duplicates 1256 | #### UNION_ALL_MANY: Combines rows from many dataframes, keeping duplicates 1257 | ```rust 1258 | let df1 = "C:\\Borivoj\\RUST\\Elusion\\API\\df1.json"; 1259 | let df2 = "C:\\Borivoj\\RUST\\Elusion\\API\\df2.json"; 1260 | let df3 = "C:\\Borivoj\\RUST\\Elusion\\API\\df3.json"; 1261 | let df4 = "C:\\Borivoj\\RUST\\Elusion\\API\\df4.json"; 1262 | let df5 = "C:\\Borivoj\\RUST\\Elusion\\API\\df5.json"; 1263 | 1264 | let df1 = CustomDataFrame::new(df1, "msales").await?; 1265 | let df2 = CustomDataFrame::new(df2, "msales").await?; 1266 | let df3 = CustomDataFrame::new(df3, "msales").await?; 1267 | let df4 = CustomDataFrame::new(df4, "msales").await?; 1268 | let df5 = CustomDataFrame::new(df5, "msales").await?; 1269 | 1270 | let res_df1 = df1.select(["Month", "TotalSales"]).string_functions(["'df1' AS Sitename"]); 1271 | let result_df1 = res_df1.elusion("el1").await?; 1272 | 1273 | let res_df2 = df2.select(["Month", "TotalSales"]).string_functions(["'df2' AS Sitename"]); 1274 | let result_df2 = res_df2.elusion("el2").await?; 1275 | 1276 | let res_df3 = df3.select(["Month", "TotalSales"]).string_functions(["'df3' AS Sitename"]); 1277 | let result_df3 = res_df3.elusion("el3").await?; 1278 | 1279 | let res_df4 = df4.select(["Month", "TotalSales"]).string_functions(["'df4' AS Sitename"]); 1280 | let result_df4 = res_df4.elusion("el4").await?; 1281 | 1282 | let res_df5 = df5.select(["Month", "TotalSales"]).string_functions(["'df5' AS Sitename"]); 1283 | let resuld_df5 = res_df5.elusion("el5").await?; 1284 | 1285 | //UNION_MANY 1286 | let union_all_df = result_df1.union_many([result_df2, result_df3, result_df4, resuld_df5]).await?; 1287 | //UNION_ALL_MANY 1288 | let union_all_many_df = result_df1.union_all_many([result_df2, result_df3, result_df4, resuld_df5]).await?; 1289 | ``` 1290 | --- 1291 | ## PIVOT and UNPIVOT 1292 | #### Pivot and Unpivot functions are ASYNC function 1293 | #### They should be used separately from other functions: 1. directly on initial CustomDataFrame, 2. after .elusion() evaluation. 1294 | #### Future needs to be in final state so .await? must be used 1295 | ```rust 1296 | // PIVOT 1297 | // directly on initial CustomDataFrame 1298 | let sales_p = "C:\\Borivoj\\RUST\\Elusion\\SalesData2022.csv"; 1299 | let df_sales = CustomDataFrame::new(sales_p, "s").await?; 1300 | 1301 | let pivoted = df_sales 1302 | .pivot( 1303 | ["StockDate"], // Row identifiers 1304 | "TerritoryKey", // Column to pivot 1305 | "OrderQuantity", // Value to aggregate 1306 | "SUM" // Aggregation function 1307 | ).await?; 1308 | 1309 | let result_pivot = pivoted.elusion("pivoted_df").await?; 1310 | result_pivot.display().await?; 1311 | 1312 | // after .elusion() evaluation 1313 | let sales_path = "C:\\Borivoj\\RUST\\Elusion\\sales_order_report.csv"; 1314 | let sales_order_df = CustomDataFrame::new(sales_path, "sales").await?; 1315 | 1316 | let scalar_df = sales_order_df 1317 | .select([ 1318 | "customer_name", 1319 | "order_date", 1320 | "ABS(billable_value) AS abs_billable_value", 1321 | "ROUND(SQRT(billable_value), 2) AS SQRT_billable_value"]) 1322 | .filter("billable_value > 100.0") 1323 | .order_by(["order_date"], [true]) 1324 | .limit(10); 1325 | // elusion evaluation 1326 | let scalar_res = scalar_df.elusion("scalar_df").await?; 1327 | 1328 | let pivoted_scalar = scalar_res 1329 | .pivot( 1330 | ["customer_name"], // Row identifiers 1331 | "order_date", // Column to pivot 1332 | "abs_billable_value", // Value to aggregate 1333 | "SUM" // Aggregation function 1334 | ).await?; 1335 | 1336 | let pitvoted_scalar = pivoted_scalar.elusion("pivoted_df").await?; 1337 | pitvoted_scalar.display().await?; 1338 | 1339 | // UNPIVOT 1340 | let unpivoted = result_pivot 1341 | .unpivot( 1342 | ["StockDate"], // ID columns 1343 | ["TerritoryKey_1", "TerritoryKey_2"], // Value columns to unpivot 1344 | "Territory", // New name column 1345 | "Quantity" // New value column 1346 | ).await?; 1347 | 1348 | let result_unpivot = unpivoted.elusion("unpivoted_df").await?; 1349 | result_unpivot.display().await?; 1350 | 1351 | // example 2 1352 | let unpivot_scalar = scalar_res 1353 | .unpivot( 1354 | ["customer_name", "order_date"], // Keep these as identifiers 1355 | ["abs_billable_value", "sqrt_billable_value"], // Columns to unpivot 1356 | "measure_name", // Name for the measure column 1357 | "measure_value" // Name for the value column 1358 | ).await?; 1359 | 1360 | let result_unpivot_scalar = unpivot_scalar.elusion("unpivoted_df2").await?; 1361 | result_unpivot_scalar.display().await?; 1362 | ``` 1363 | --- 1364 | ## Statistical Functions 1365 | #### These Functions can give you quick statistical overview of your DataFrame columns and correlations 1366 | #### Currently available: display_stats(), display_null_analysis(), display_correlation_matrix() 1367 | ```rust 1368 | df.display_stats(&[ 1369 | "abs_billable_value", 1370 | "sqrt_billable_value", 1371 | "double_billable_value", 1372 | "percentage_billable" 1373 | ]).await?; 1374 | 1375 | === Column Statistics === 1376 | -------------------------------------------------------------------------------- 1377 | Column: abs_billable_value 1378 | ------------------------------------------------------------------------------ 1379 | | Metric | Value | Min | Max | 1380 | ------------------------------------------------------------------------------ 1381 | | Records | 10 | - | - | 1382 | | Non-null Records | 10 | - | - | 1383 | | Mean | 1025.71 | - | - | 1384 | | Standard Dev | 761.34 | - | - | 1385 | | Value Range | - | 67.4 | 2505.23 | 1386 | ------------------------------------------------------------------------------ 1387 | 1388 | Column: sqrt_billable_value 1389 | ------------------------------------------------------------------------------ 1390 | | Metric | Value | Min | Max | 1391 | ------------------------------------------------------------------------------ 1392 | | Records | 10 | - | - | 1393 | | Non-null Records | 10 | - | - | 1394 | | Mean | 29.48 | - | - | 1395 | | Standard Dev | 13.20 | - | - | 1396 | | Value Range | - | 8.21 | 50.05 | 1397 | ------------------------------------------------------------------------------ 1398 | 1399 | // Display null analysis 1400 | // Keep None if you want all columns to be analized 1401 | df.display_null_analysis(None).await?; 1402 | 1403 | ---------------------------------------------------------------------------------------- 1404 | | Column | Total Rows | Null Count | Null Percentage | 1405 | ---------------------------------------------------------------------------------------- 1406 | | total_billable | 10 | 0 | 0.00% | 1407 | | order_count | 10 | 0 | 0.00% | 1408 | | customer_name | 10 | 0 | 0.00% | 1409 | | order_date | 10 | 0 | 0.00% | 1410 | | abs_billable_value | 10 | 0 | 0.00% | 1411 | ---------------------------------------------------------------------------------------- 1412 | 1413 | // Display correlation matrix 1414 | df.display_correlation_matrix(&[ 1415 | "abs_billable_value", 1416 | "sqrt_billable_value", 1417 | "double_billable_value", 1418 | "percentage_billable" 1419 | ]).await?; 1420 | 1421 | === Correlation Matrix === 1422 | ------------------------------------------------------------------------------------------- 1423 | | | abs_billable_va | sqrt_billable_v | double_billable | percentage_bill | 1424 | ------------------------------------------------------------------------------------------- 1425 | | abs_billable_va | 1.00 | 0.98 | 1.00 | 1.00 | 1426 | | sqrt_billable_v | 0.98 | 1.00 | 0.98 | 0.98 | 1427 | | double_billable | 1.00 | 0.98 | 1.00 | 1.00 | 1428 | | percentage_bill | 1.00 | 0.98 | 1.00 | 1.00 | 1429 | ------------------------------------------------------------------------------------------- 1430 | ``` 1431 | --- 1432 | ## EXTRACTING VALUES: extract_value_from_df() 1433 | #### Example how you can extract values from DataFrame and use it within REST API 1434 | ```rust 1435 | //create calendar dataframe 1436 | let date_calendar = CustomDataFrame::create_formatted_date_range_table( 1437 | "2025-01-01", 1438 | "2025-12-31", 1439 | "dt", 1440 | "date".to_string(), 1441 | DateFormat::HumanReadableTime, 1442 | true, 1443 | Weekday::Mon 1444 | ).await?; 1445 | 1446 | // take columns from Calendar 1447 | let week_range_2025 = date_calendar 1448 | .select(["DISTINCT(week_start)","week_end", "week_num"]) 1449 | .order_by(["week_num"], [true]) 1450 | .elusion("wr") 1451 | .await?; 1452 | 1453 | // create empty dataframe 1454 | let temp_df = CustomDataFrame::empty().await?; 1455 | 1456 | //populate empty dataframe with current week number 1457 | let current_week = temp_df 1458 | .datetime_functions([ 1459 | "CAST(DATE_PART('week', CURRENT_DATE()) as INT) AS current_week_num", 1460 | ]) 1461 | .elusion("cd").await?; 1462 | 1463 | // join data frames to get range for current week 1464 | let week_for_api = week_range_2025 1465 | .join(current_week,["wr.week_num == cd.current_week_num"], "INNER") 1466 | .select(["TRIM(wr.week_start) AS datefrom", "TRIM(wr.week_end) AS dateto"]) 1467 | .elusion("api_week") 1468 | .await?; 1469 | 1470 | // Extract Date Value from DataFrame based on column name and Row Index 1471 | let date_from = extract_value_from_df(&week_for_api, "datefrom", 0).await?; 1472 | let date_to = extract_value_from_df(&week_for_api, "dateto", 0).await?; 1473 | 1474 | //PRINT results for preview 1475 | week_for_api.display().await?; 1476 | 1477 | println!("Date from: {}", date_from); 1478 | println!("Date to: {}", date_to); 1479 | 1480 | RESULT: 1481 | +------------------+------------------+ 1482 | | datefrom | dateto | 1483 | +------------------+------------------+ 1484 | | 3 Mar 2025 00:00 | 9 Mar 2025 00:00 | 1485 | +------------------+------------------+ 1486 | 1487 | Date from: 3 Mar 2025 00:00 1488 | Date to: 9 Mar 2025 00:00 1489 | 1490 | NOW WE CAN USE THESE EXTRACTED VALUES: 1491 | 1492 | let post_df = ElusionApi::new(); 1493 | post_df.from_api_with_dates( 1494 | "https://jsonplaceholder.typicode.com/posts", // url 1495 | &date_from, // date from 1496 | &date_to, // date to 1497 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\rest_api_data.json", // path where json will be stored 1498 | ).await?; 1499 | ``` 1500 | ## EXTRACTING ROWS: extract_row_from_df() 1501 | #### Example how you can extract Row from DataFrame and use it within REST API. 1502 | ```rust 1503 | //create calendar dataframe 1504 | let date_calendar = CustomDataFrame::create_formatted_date_range_table( 1505 | "2025-01-01", 1506 | "2025-12-31", 1507 | "dt", 1508 | "date".to_string(), 1509 | DateFormat::IsoDate, 1510 | true, 1511 | Weekday::Mon 1512 | ).await?; 1513 | //take columns from calendar 1514 | let week_range_2025 = date_calendar 1515 | .select(["DISTINCT(week_start)","week_end", "week_num"]) 1516 | .order_by(["week_num"], [true]) 1517 | .elusion("wr") 1518 | .await?; 1519 | 1520 | // create empty dataframe 1521 | let temp_df = CustomDataFrame::empty().await?; 1522 | 1523 | //populate empty dataframe with current week number 1524 | let current_week = temp_df 1525 | .datetime_functions([ 1526 | "CAST(DATE_PART('week', CURRENT_DATE()) as INT) AS current_week_num", 1527 | ]) 1528 | .elusion("cd").await?; 1529 | 1530 | // join data frames to ge range for current week 1531 | let week_for_api = week_range_2025 1532 | .join(current_week,["wr.week_num == cd.current_week_num"], "INNER") 1533 | .select(["TRIM(wr.week_start) AS datefrom", "TRIM(wr.week_end) AS dateto"]) 1534 | .elusion("api_week") 1535 | .await?; 1536 | 1537 | // Extract Row Values from DataFrame based on Row Index 1538 | let row_values = extract_row_from_df(&week_for_api, 0).await?; 1539 | 1540 | // PRINT row for preview 1541 | println!("DataFrame row: {:?}", row_values); 1542 | 1543 | RESULT: 1544 | DataFrame row: {"datefrom": "2025-03-03", "dateto": "2025-03-09"} 1545 | 1546 | NOW WE CAN USE THESE EXTRACTED ROW: 1547 | 1548 | let post_df = ElusionApi::new(); 1549 | post_df.from_api_with_dates( 1550 | "https://jsonplaceholder.typicode.com/posts", // url 1551 | row_values.get("datefrom").unwrap_or(&String::new()), // date from 1552 | row_values.get("dateto").unwrap_or(&String::new()), // date to 1553 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\extraction_df2.json", // path where json will be stored 1554 | ).await?; 1555 | ``` 1556 | --- 1557 | # Postgres Database Connector 1558 | ### Create Config, Conn and Query, and pass it to from_postgres() function. 1559 | ```rust 1560 | let pg_config = PostgresConfig { 1561 | host: "localhost".to_string(), 1562 | port: 5433, 1563 | user: "borivoj".to_string(), 1564 | password: "pass123".to_string(), 1565 | database: "db_test".to_string(), 1566 | pool_size: Some(5), 1567 | }; 1568 | 1569 | let conn = PostgresConnection::new(pg_config).await?; 1570 | 1571 | Option2: You can use map_err() 1572 | let conn = PostgresConnection::new(pg_config).await 1573 | .map_err(|e| ElusionError::Custom(format!("PostgreSQL connection error: {}", e)))?; 1574 | 1575 | let query = " 1576 | SELECT 1577 | c.id, 1578 | c.name, 1579 | s.product_name, 1580 | SUM(s.quantity * s.price) as total_revenue 1581 | FROM customers c 1582 | LEFT JOIN sales s ON c.id = s.customer_id 1583 | GROUP BY c.id, c.name, s.product_name 1584 | ORDER BY total_revenue DESC 1585 | "; 1586 | 1587 | let sales_by_customer_df = CustomDataFrame::from_postgres(&conn, query, "postgres_df").await?; 1588 | 1589 | sales_by_customer_df.display().await?; 1590 | ``` 1591 | # MySQL Database Connector 1592 | ### Create Config, Conn and Query, and pass it to from_mysql() function. 1593 | ```rust 1594 | let mysql_config = MySqlConfig { 1595 | host: "localhost".to_string(), 1596 | port: 3306, 1597 | user: "borivoj".to_string(), 1598 | password: "pass123".to_string(), 1599 | database: "brewery".to_string(), 1600 | pool_size: Some(5), 1601 | }; 1602 | 1603 | let conn = MySqlConnection::new(mysql_config).await?; 1604 | 1605 | let mysql_query = " 1606 | WITH ranked_sales AS ( 1607 | SELECT 1608 | c.color AS brew_color, 1609 | bd.beer_style, 1610 | bd.location, 1611 | SUM(bd.total_sales) AS total_sales 1612 | FROM 1613 | brewery_data bd 1614 | JOIN 1615 | colors c ON bd.Color = c.color_number 1616 | WHERE 1617 | bd.brew_date >= '2020-01-01' AND bd.brew_date <= '2020-03-01' 1618 | GROUP BY 1619 | c.color, bd.beer_style, bd.location 1620 | ) 1621 | SELECT 1622 | brew_color, 1623 | beer_style, 1624 | location, 1625 | total_sales, 1626 | ROW_NUMBER() OVER (PARTITION BY brew_color ORDER BY total_sales DESC) AS ranked 1627 | FROM 1628 | ranked_sales 1629 | ORDER BY 1630 | brew_color, total_sales DESC"; 1631 | 1632 | let df = CustomDataFrame::from_mysql(&conn, mysql_query, "mysql_df").await?; 1633 | 1634 | result.display().await?; 1635 | ``` 1636 | --- 1637 | # AZURE Blob Storage Connector 1638 | ## Storage connector available with BLOB and DFS url endpoints, along with SAS token provided 1639 | ### Currently supported file types .JSON and .CSV 1640 | #### DFS endpoint is “Data Lake Storage Gen2” and behave more like a real file system. This makes reading operations more efficient—especially at large scale. 1641 | 1642 | ### BLOB endpoint example 1643 | ```rust 1644 | let blob_url= "https://your_storage_account_name.blob.core.windows.net/your-container-name"; 1645 | let sas_token = "your_sas_token"; 1646 | 1647 | let df = CustomDataFrame::from_azure_with_sas_token( 1648 | blob_url, 1649 | sas_token, 1650 | Some("folder-name/file-name"), // FILTERING is optional. Can be None if you want to take everything from Container 1651 | "data" // alias for registering table 1652 | ).await?; 1653 | 1654 | let data_df = df.select(["*"]); 1655 | 1656 | let test_data = data_df.elusion("data_df").await?; 1657 | test_data.display().await?; 1658 | ``` 1659 | ### DFS endpoint example 1660 | 1661 | ```rust 1662 | let dfs_url= "https://your_storage_account_name.dfs.core.windows.net/your-container-name"; 1663 | let sas_token = "your_sas_token"; 1664 | 1665 | let df = CustomDataFrame::from_azure_with_sas_token( 1666 | dfs_url, 1667 | sas_token, 1668 | Some("folder-name/file-name"), // FILTERING is optional. Can be None if you want to take everything from Container 1669 | "data" // alias for registering table 1670 | ).await?; 1671 | 1672 | let data_df = df.select(["*"]); 1673 | 1674 | let test_data = data_df.elusion("data_df").await?; 1675 | test_data.display().await?; 1676 | ``` 1677 | --- 1678 | # Pipeline Scheduler 1679 | ### Time is set according to UTC 1680 | 1681 | #### Currently available job frequencies 1682 | ```rust 1683 | "1min","2min","5min","10min","15min","30min" , 1684 | "1h","2h","3h","4h","5h","6h","7h","8h","9h","10h","11h","12h","24h" 1685 | "2days","3days","4days","5days","6days","7days","14days","30days" 1686 | ``` 1687 | ### PipelineScheduler Example (parsing data from Azure BLOB Stoarge, DataFrame operation and Writing to Parquet) 1688 | ```rust 1689 | use elusion::prelude::*; 1690 | 1691 | #[tokio::main] 1692 | async fn main() -> ElusionResult<()>{ 1693 | 1694 | // Create Pipeline Scheduler 1695 | let scheduler = PipelineScheduler::new("5min", || async { 1696 | 1697 | let dfs_url= "https://your_storage_account_name.dfs.core.windows.net/your-container-name"; 1698 | let sas_token = "your_sas_token"; 1699 | // Read from Azure 1700 | let header_df = CustomDataFrame::from_azure_with_sas_token( 1701 | dfs_url, 1702 | dfs_sas_token, 1703 | Some("folder_name/"), // Optional: FILTERING can filter any part of string: file path, file name... 1704 | "head" 1705 | ).await?; 1706 | 1707 | // DataFrame operation 1708 | let headers_payments = header_df 1709 | .select(["Brand", "Id", "Name", "Item", "Bill", "Tax", 1710 | "ServCharge", "Percentage", "Discount", "Date"]) 1711 | .agg([ 1712 | "SUM(Bill) AS total_bill", 1713 | "SUM(Tax) AS total_tax", 1714 | "SUM(ServCharge) AS total_service", 1715 | "AVG(Percentage) AS avg_percentage", 1716 | "COUNT(*) AS transaction_count", 1717 | "SUM(ServCharge) / SUM(Bill) * 100 AS service_ratio" 1718 | ]) 1719 | .group_by(["Brand", "Date"]) 1720 | .filter("Bill > 0") 1721 | .order_by(["total_bill"], [true]) 1722 | 1723 | let headers_data = headers_payments.elusion("headers_df").await?; 1724 | 1725 | // Write output 1726 | headers_data 1727 | .write_to_parquet( 1728 | "overwrite", 1729 | "C:\\Borivoj\\RUST\\Elusion\\Scheduler\\sales_data.parquet", 1730 | None 1731 | ) 1732 | .await?; 1733 | 1734 | Ok(()) 1735 | 1736 | }).await?; 1737 | 1738 | scheduler.shutdown().await?; 1739 | 1740 | Ok(()) 1741 | } 1742 | 1743 | ``` 1744 | --- 1745 | # JSON files 1746 | ### Currently supported files can include: Fileds, Arrays, Objects. 1747 | #### Best performance with flat json ("key":"value") 1748 | #### for JSON, all field types are infered to VARCHAR/TEXT/STRING 1749 | ```rust 1750 | // example json structure with key:value pairs 1751 | { 1752 | "name": "Adeel Solangi", 1753 | "language": "Sindhi", 1754 | "id": "V59OF92YF627HFY0", 1755 | "bio": "Donec lobortis eleifend condimentum. Cras dictum dolor lacinia lectus vehicula rutrum.", 1756 | "version": 6.1 1757 | } 1758 | 1759 | let json_path = "C:\\Borivoj\\RUST\\Elusion\\test.json"; 1760 | let json_df = CustomDataFrame::new(json_path, "test").await?; 1761 | 1762 | let df = json_df.select(["*"]).limit(10); 1763 | 1764 | let result = df.elusion("df").await?; 1765 | result.display().await?; 1766 | 1767 | // example json structure with Fields and Arrays 1768 | [ 1769 | { 1770 | "id": "1", 1771 | "name": "Form 1", 1772 | "fields": [ 1773 | {"key": "first_name", "type": "text", "required": true}, 1774 | {"key": "age", "type": "number", "required": false}, 1775 | {"key": "email", "type": "email", "required": true} 1776 | ] 1777 | }, 1778 | { 1779 | "id": "2", 1780 | "name": "Form 2", 1781 | "fields": [ 1782 | {"key": "address", "type": "text", "required": false}, 1783 | {"key": "phone", "type": "tel", "required": true} 1784 | ] 1785 | }, 1786 | { 1787 | "id": "3", 1788 | "name": "Form 3", 1789 | "fields": [ 1790 | {"key": "notes", "type": "textarea", "required": false}, 1791 | {"key": "date", "type": "date", "required": true}, 1792 | {"key": "status", "type": "select", "required": true} 1793 | ] 1794 | } 1795 | ] 1796 | 1797 | let json_path = "C:\\Borivoj\\RUST\\Elusion\\test2.json"; 1798 | let json_df = CustomDataFrame::new(json_path, "test2").await?; 1799 | ``` 1800 | --- 1801 | # REST API 1802 | ### Creating JSON files from REST API's 1803 | #### Customizable Headers, Params, Pagination, Date Ranges... 1804 | ### FROM API 1805 | ```rust 1806 | // example 1 1807 | let posts_df = ElusionApi::new(); 1808 | posts_df 1809 | .from_api( 1810 | "https://jsonplaceholder.typicode.com/posts", // url 1811 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\posts_data.json" // path where json will be stored 1812 | ).await?; 1813 | 1814 | // example 2 1815 | let users_df = ElusionApi::new(); 1816 | users_df.from_api( 1817 | "https://jsonplaceholder.typicode.com/users", 1818 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\users_data.json", 1819 | ).await?; 1820 | 1821 | // example 3 1822 | let ceo = ElusionApi::new(); 1823 | ceo.from_api( 1824 | "https://dog.ceo/api/breeds/image/random/3", 1825 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\ceo_data.json" 1826 | ).await?; 1827 | ``` 1828 | ### FROM API WITH HEADERS 1829 | ```rust 1830 | // example 1 1831 | let mut headers = HashMap::new(); 1832 | headers.insert("Custom-Header".to_string(), "test-value".to_string()); 1833 | 1834 | let bin_df = ElusionApi::new(); 1835 | bin_df.from_api_with_headers( 1836 | "https://httpbin.org/headers", // url 1837 | headers, // headers 1838 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\bin_data.json", // path where json will be stored 1839 | ).await?; 1840 | 1841 | // example 2 1842 | let mut headers = HashMap::new(); 1843 | headers.insert("Accept".to_string(), "application/vnd.github.v3+json".to_string()); 1844 | headers.insert("User-Agent".to_string(), "elusion-dataframe-test".to_string()); 1845 | 1846 | let git_hub = ElusionApi::new(); 1847 | git_hub.from_api_with_headers( 1848 | "https://api.github.com/search/repositories?q=rust+language:rust&sort=stars&order=desc", 1849 | headers, 1850 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\git_hub_data.json" 1851 | ).await?; 1852 | 1853 | // example 3 1854 | let mut headers = HashMap::new(); 1855 | headers.insert("Accept".to_string(), "application/json".to_string()); 1856 | headers.insert("X-Version".to_string(), "1".to_string()); 1857 | 1858 | let pokemon_df = ElusionApi::new(); 1859 | pokemon_df.from_api_with_headers( 1860 | "https://pokeapi.co/api/v2/pokemon", 1861 | headers, 1862 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\pokemon_data.json" 1863 | ).await?; 1864 | ``` 1865 | ### FROM API WITH PARAMS 1866 | ```rust 1867 | // Using OpenLibrary API with params 1868 | let mut params = HashMap::new(); 1869 | params.insert("q", "rust programming"); 1870 | params.insert("limit", "10"); 1871 | 1872 | let open_lib = ElusionApi::new(); 1873 | open_lib.from_api_with_params( 1874 | "https://openlibrary.org/search.json", // url 1875 | params, // params 1876 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\open_lib_data.json", // path where json will be stored 1877 | ).await?; 1878 | 1879 | // Random User Generator API with params 1880 | let mut params = HashMap::new(); 1881 | params.insert("results", "10"); 1882 | params.insert("nat", "us,gb"); 1883 | 1884 | let generator = ElusionApi::new(); 1885 | generator.from_api_with_params( 1886 | "https://randomuser.me/api", 1887 | params, 1888 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\generator_data.json" 1889 | ).await?; 1890 | 1891 | // JSON Placeholder with multiple endpoints 1892 | let mut params = HashMap::new(); 1893 | params.insert("userId", "1"); 1894 | params.insert("_limit", "5"); 1895 | 1896 | let multi = ElusionApi::new(); 1897 | multi.from_api_with_params( 1898 | "https://jsonplaceholder.typicode.com/posts", 1899 | params, 1900 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\multi_data.json" 1901 | ).await?; 1902 | 1903 | // NASA Astronomy Picture of the Day 1904 | let mut params = HashMap::new(); 1905 | params.insert("count", "5"); 1906 | params.insert("thumbs", "true"); 1907 | 1908 | let nasa = ElusionApi::new(); 1909 | nasa.from_api_with_params( 1910 | "https://api.nasa.gov/planetary/apod", 1911 | params, 1912 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\nasa_pics_data.json" 1913 | ).await?; 1914 | 1915 | // example 5 1916 | let mut params = HashMap::new(); 1917 | params.insert("brand", "elusion"); 1918 | params.insert("password", "some_password"); 1919 | params.insert("siteid", "993"); 1920 | params.insert("Datefrom", "01 jan 2025 06:00"); 1921 | params.insert("Dateto", "31 jan 2025 06:00"); 1922 | params.insert("user", "borivoj"); 1923 | 1924 | let api = ElusionApi::new(); 1925 | api.from_api_with_params( 1926 | "https://salesapi.net.co.rs/SSPAPI/api/data", 1927 | params, 1928 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\sales_jan_2025.json" 1929 | ).await?; 1930 | ``` 1931 | ### FROM API WITH PARAMS AND HEADERS 1932 | ```rust 1933 | let mut params = HashMap::new(); 1934 | params.insert("since", "2024-01-01T00:00:00Z"); 1935 | params.insert("until", "2024-01-07T23:59:59Z"); 1936 | 1937 | let mut headers = HashMap::new(); 1938 | headers.insert("Accept".to_string(), "application/vnd.github.v3+json".to_string()); 1939 | headers.insert("User-Agent".to_string(), "elusion-dataframe-test".to_string()); 1940 | 1941 | let commits_df = ElusionApi::new(); 1942 | commits_df.from_api_with_params_and_headers( 1943 | "https://api.github.com/repos/rust-lang/rust/commits", // url 1944 | params, // params 1945 | headers, // headers 1946 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\commits_data.json", // path where json will be stored 1947 | ).await?; 1948 | ``` 1949 | ### FROM API WITH DATES 1950 | ```rust 1951 | // example 1 1952 | let post_df = ElusionApi::new(); 1953 | post_df.from_api_with_dates( 1954 | "https://jsonplaceholder.typicode.com/posts", // url 1955 | "2024-01-01", // date from 1956 | "2024-01-07", // date to 1957 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\post_data.json", // path where json will be stored 1958 | ).await?; 1959 | 1960 | // Example 2: COVID-19 historical data 1961 | let covid_df = ElusionApi::new(); 1962 | covid_df.from_api_with_dates( 1963 | "https://disease.sh/v3/covid-19/historical/all", 1964 | "2024-01-01", 1965 | "2024-01-07", 1966 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\covid_data.json" 1967 | ).await?; 1968 | ``` 1969 | ### FROM API WITH PAGINATION 1970 | ```rust 1971 | // example 1 1972 | let reqres = ElusionApi::new(); 1973 | reqres.from_api_with_pagination( 1974 | "https://reqres.in/api/users", 1975 | 1, // page 1976 | 10, // per_page 1977 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\reqres_data.json" 1978 | ).await?; 1979 | ``` 1980 | ### FROM API WITH SORT 1981 | ```rust 1982 | let movie_db = ElusionApi::new(); 1983 | movie_db.from_api_with_sort( 1984 | "https://api.themoviedb.org/3/discover/movie", // base url 1985 | "popularity", // sort field 1986 | "desc", // order 1987 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\popular_movies.json" 1988 | ).await?; 1989 | ``` 1990 | ### FROM API WITH HEADERS AND SORT 1991 | ```rust 1992 | let mut headers = HashMap::new(); 1993 | headers.insert("Authorization".to_string(), "Bearer YOUR_TMDB_API_KEY".to_string()); 1994 | headers.insert("accept".to_string(), "application/json".to_string()); 1995 | 1996 | let movie_db = ElusionApi::new(); 1997 | movie_db.from_api_with_headers_and_sort( 1998 | "https://api.themoviedb.org/3/discover/movie", // base url 1999 | headers, // headers 2000 | "popularity", // sort field 2001 | "desc", // order 2002 | "C:\\Borivoj\\RUST\\Elusion\\JSON\\popular_movies1.json" 2003 | ).await?; 2004 | ``` 2005 | --- 2006 | # WRITERS 2007 | 2008 | ## Writing to EXCEL File ***needs excel feature enabled 2009 | 2010 | #### EXCEL writer can only write or overwrite, so only 2 arguments needed 2011 | #### 1. Path, 2. Optional Sheet name. (default is Sheet1) 2012 | ```rust 2013 | df.write_to_excel( 2014 | "C:\\Borivoj\\RUST\\Elusion\\Excel\\sales2.xlsx", //path 2015 | Some("string_interop") // Optional sheet name. Can be None 2016 | ).await?; 2017 | ``` 2018 | ## Writing to Parquet File 2019 | #### We have 2 writing modes: **Overwrite** and **Append** 2020 | ```rust 2021 | // overwrite existing file 2022 | df.write_to_parquet( 2023 | "overwrite", 2024 | "C:\\Path\\To\\Your\\test.parquet", 2025 | None // I've set WriteOptions to default for writing Parquet files, so keep it None 2026 | ) 2027 | .await?; 2028 | 2029 | // append to exisiting file 2030 | df.write_to_parquet( 2031 | "append", 2032 | "C:\\Path\\To\\Your\\test.parquet", 2033 | None // I've set WriteOptions to default for writing Parquet files, so keep it None 2034 | ) 2035 | .await?; 2036 | ``` 2037 | ## Writing to CSV File 2038 | 2039 | #### CSV Writing options are **mandatory** 2040 | ##### has_headers: TRUE is dynamically set for Overwrite mode, and FALSE for Append mode. 2041 | ```rust 2042 | let custom_csv_options = CsvWriteOptions { 2043 | delimiter: b',', 2044 | escape: b'\\', 2045 | quote: b'"', 2046 | double_quote: false, 2047 | null_value: "NULL".to_string(), 2048 | }; 2049 | ``` 2050 | #### We have 2 writing modes: Overwrite and Append 2051 | ```rust 2052 | // overwrite existing file 2053 | df.write_to_csv( 2054 | "overwrite", 2055 | "C:\\Borivoj\\RUST\\Elusion\\agg_sales.csv", 2056 | custom_csv_options 2057 | ) 2058 | .await?; 2059 | 2060 | // append to exisiting file 2061 | df.write_to_csv( 2062 | "append", 2063 | "C:\\Borivoj\\RUST\\Elusion\\agg_sales.csv", 2064 | custom_csv_options 2065 | ) 2066 | .await?; 2067 | 2068 | ``` 2069 | ## Writing to JSON File 2070 | 2071 | #### JSON writer can only overwrite, so only 2 arguments needed 2072 | #### 1. Path, 2. If you want pretty-printed JSON or not (true or false) 2073 | ```rust 2074 | df.write_to_json( 2075 | "C:\\Borivoj\\RUST\\Elusion\\date_table.json", // path 2076 | true // pretty-printed JSON, false for compact JSON 2077 | ).await?; 2078 | ``` 2079 | ## Writing to DELTA table / lake 2080 | #### We can write to delta in 2 modes **Overwrite** and **Append** 2081 | #### Partitioning column is OPTIONAL and if you decide to use column for partitioning, make sure that you don't need that column as you won't be able to read it back to dataframe 2082 | #### Once you decide to use partitioning column for writing your delta table, if you want to APPEND to it, append also need to have same column for partitioning 2083 | ```rust 2084 | // Overwrite 2085 | df.write_to_delta_table( 2086 | "overwrite", 2087 | "C:\\Borivoj\\RUST\\Elusion\\agg_sales", 2088 | Some(vec!["order_date".into()]), 2089 | ) 2090 | .await 2091 | .expect("Failed to overwrite Delta table"); 2092 | // Append 2093 | df.write_to_delta_table( 2094 | "append", 2095 | "C:\\Borivoj\\RUST\\Elusion\\agg_sales", 2096 | Some(vec!["order_date".into()]), 2097 | ) 2098 | .await 2099 | .expect("Failed to append to Delta table"); 2100 | ``` 2101 | ## Writing Parquet to Azure BLOB Storage 2102 | #### We have 2 writing options "overwrite" and "append" 2103 | #### Writing is set to Default, Compression: SNAPPY and Parquet 2.0 2104 | #### Threshold file size is 1GB 2105 | ```rust 2106 | let df = CustomDataFrame::new(csv_data, "sales").await?; 2107 | 2108 | let query = df.select(["*"]); 2109 | 2110 | let data = query.elusion("df_sales").await?; 2111 | 2112 | let url_to_folder = "https://your_storage_account_name.dfs.core.windows.net/your-container-name/folder/sales.parquet"; 2113 | let sas_write_token = "your_sas_token"; // make sure SAS token has writing permissions 2114 | 2115 | data.write_parquet_to_azure_with_sas( 2116 | "overwrite", 2117 | url_to_folder, 2118 | sas_write_token 2119 | ).await?; 2120 | 2121 | // append version 2122 | data.write_parquet_to_azure_with_sas( 2123 | "append", 2124 | url_to_folder, 2125 | sas_write_token 2126 | ).await?; 2127 | ``` 2128 | ## Writing JSON to Azure BLOB Storage 2129 | #### Only can create new or overwrite exisitng file 2130 | #### Threshold file size is 1GB 2131 | ```rust 2132 | let df = CustomDataFrame::new(csv_data, "sales").await?; 2133 | 2134 | let query = df.select(["*"]); 2135 | 2136 | let data = query.elusion("df_sales").await?; 2137 | 2138 | let url_to_folder = "https://your_storage_account_name.dfs.core.windows.net/your-container-name/folder/data.json"; 2139 | let sas_write_token = "your_sas_token"; // make sure SAS token has writing permissions 2140 | 2141 | data.write_json_to_azure_with_sas( 2142 | url_to_folder, 2143 | sas_write_token, 2144 | true // Set to true for pretty-printed JSON, false for compact JSON 2145 | ).await?; 2146 | ``` 2147 | --- 2148 | # REPORTING 2149 | ### CREATING REPORT with Interactive Plots/Visuals and Tables 2150 | ### Export Table data to EXCEL and CSV 2151 | #### Currently available Interactive Plots: TimeSeries, Box, Bar, Histogram, Pie, Donut, Scatter... 2152 | #### Interactive Tables can: Paginate pages, Filter, Reorder, Resize columns... 2153 | ```rust 2154 | let ord = "C:\\Borivoj\\RUST\\Elusion\\sales_order_report.csv"; 2155 | let sales_order_df = CustomDataFrame::new(ord, "ord").await?; 2156 | 2157 | let mix_query = sales_order_df.clone() 2158 | .select([ 2159 | "customer_name", 2160 | "order_date", 2161 | "ABS(billable_value) AS abs_billable_value", 2162 | "ROUND(SQRT(billable_value), 2) AS SQRT_billable_value", 2163 | "billable_value * 2 AS double_billable_value", // Multiplication 2164 | "billable_value / 100 AS percentage_billable" // Division 2165 | ]) 2166 | .agg([ 2167 | "ROUND(AVG(ABS(billable_value)), 2) AS avg_abs_billable", 2168 | "SUM(billable_value) AS total_billable", 2169 | "MAX(ABS(billable_value)) AS max_abs_billable", 2170 | "SUM(billable_value) * 2 AS double_total_billable", // Operator-based aggregation 2171 | "SUM(billable_value) / 100 AS percentage_total_billable" // Operator-based aggregation 2172 | ]) 2173 | .filter("billable_value > 50.0") 2174 | .group_by_all() 2175 | .order_by_many([ 2176 | ("total_billable", false), // Order by total_billable descending 2177 | ("max_abs_billable", true), // Then by max_abs_billable ascending 2178 | ]); 2179 | 2180 | let mix_res = mix_query.elusion("scalar_df").await?; 2181 | 2182 | //INTERACTIVE PLOTS 2183 | // Line plot showing sales over time 2184 | let line = mix_res.plot_line( 2185 | "order_date", // - x_col: column name for x-axis (can be date or numeric) 2186 | "double_billable_value", // - y_col: column name for y-axis 2187 | true, // - show_markers: true to show points, false for line only 2188 | Some("Sales over time") // - title: optional custom title (can be None) 2189 | ).await?; 2190 | 2191 | // Bar plot showing aggregated values 2192 | let bars = mix_res 2193 | .plot_bar( 2194 | "customer_name", // X-axis: Customer names 2195 | "total_billable", // Y-axis: Total billable amount 2196 | Some("Customer Total Sales") // Title of the plot 2197 | ).await?; 2198 | 2199 | // Time series showing sales trend 2200 | let time_series = mix_res 2201 | .plot_time_series( 2202 | "order_date", // X-axis: Date column (must be Date32 type) 2203 | "total_billable", // Y-axis: Total billable amount 2204 | true, // Show markers on the line 2205 | Some("Sales Trend Over Time") // Title of the plot 2206 | ).await?; 2207 | 2208 | // Histogram showing distribution of abs billable values 2209 | let histogram = mix_res 2210 | .plot_histogram( 2211 | "abs_billable_value", // Data column for distribution analysis 2212 | Some("Distribution of Sale Values") // Title of the plot 2213 | ).await?; 2214 | 2215 | // Box plot showing abs billable value distribution 2216 | let box_plot = mix_res 2217 | .plot_box( 2218 | "abs_billable_value", // Value column for box plot 2219 | Some("customer_name"), // Optional grouping column 2220 | Some("Sales Distribution by Customer") // Title of the plot 2221 | ).await?; 2222 | 2223 | // Scatter plot showing relationship between original and doubled values 2224 | let scatter = mix_res 2225 | .plot_scatter( 2226 | "abs_billable_value", // X-axis: Original values 2227 | "double_billable_value", // Y-axis: Doubled values 2228 | Some(8) // Optional marker size 2229 | ).await?; 2230 | 2231 | // Pie chart showing sales distribution 2232 | let pie = mix_res 2233 | .plot_pie( 2234 | "customer_name", // Labels for pie segments 2235 | "total_billable", // Values for pie segments 2236 | Some("Sales Share by Customer") // Title of the plot 2237 | ).await?; 2238 | 2239 | // Donut chart alternative view 2240 | let donut = mix_res 2241 | .plot_donut( 2242 | "customer_name", // Labels for donut segments 2243 | "percentage_total_billable", // Values as percentages 2244 | Some("Percentage Distribution") // Title of the plot 2245 | ).await?; 2246 | 2247 | // Create Tables to add to report 2248 | let summary_table = mix_res.clone() //Clone for multiple usages 2249 | .select([ 2250 | "customer_name", 2251 | "total_billable", 2252 | "avg_abs_billable", 2253 | "max_abs_billable", 2254 | "percentage_total_billable" 2255 | ]) 2256 | .order_by_many([ 2257 | ("total_billable", false) 2258 | ]) 2259 | .elusion("summary") 2260 | .await?; 2261 | 2262 | let transactions_table = mix_res 2263 | .select([ 2264 | "customer_name", 2265 | "order_date", 2266 | "abs_billable_value", 2267 | "double_billable_value", 2268 | "percentage_billable" 2269 | ]) 2270 | .order_by_many([ 2271 | ("order_date", false), 2272 | ("abs_billable_value", false) 2273 | ]) 2274 | .elusion("transactions") 2275 | .await?; 2276 | 2277 | // Create comprehensive dashboard with all plots 2278 | let plots = [ 2279 | (&line, "Sales Line"), // Line based analysis 2280 | (&time_series, "Sales Timeline"), // Time-based analysis 2281 | (&bars, "Customer Sales"), // Customer comparison 2282 | (&histogram, "Sales Distribution"), // Value distribution 2283 | (&scatter, "Value Comparison"), // Value relationships 2284 | (&box_plot, "Customer Distributions"), // Statistical distribution 2285 | (&pie, "Sales Share"), // Share analysis 2286 | (&donut, "Percentage View"), // Percentage breakdown 2287 | ]; 2288 | 2289 | // Add tables array 2290 | let tables = [ 2291 | (&summary_table, "Customer Summary"), 2292 | (&transactions_table, "Transaction Details") 2293 | ]; 2294 | 2295 | let layout = ReportLayout { 2296 | grid_columns: 2, // Arrange plots in 2 columns 2297 | grid_gap: 30, // 30px gap between plots 2298 | max_width: 1600, // Maximum width of 1600px 2299 | plot_height: 450, // Each plot 450px high 2300 | table_height: 500, // Height for tables 2301 | }; 2302 | 2303 | let table_options = TableOptions { 2304 | pagination: true, // Enable pagination for tables 2305 | page_size: 15, // Show 15 rows per page 2306 | enable_sorting: true, // Allow column sorting 2307 | enable_filtering: true, // Allow column filtering 2308 | enable_column_menu: true, // Show column menu (sort/filter/hide options) 2309 | theme: "ag-theme-alpine".to_string(), // Use Alpine theme for modern look 2310 | }; 2311 | 2312 | // Generate the enhanced interactive report with all plots and tables 2313 | CustomDataFrame::create_report( 2314 | Some(&plots), // plots (Optional) 2315 | Some(&tables), // tables (Optional) 2316 | "Interactive Sales Analysis Dashboard", // report_title 2317 | "C:\\Borivoj\\RUST\\Elusion\\Plots\\interactive_aggrid_dashboard.html", // filename 2318 | Some(layout), // layout_config (Optional) 2319 | Some(table_options) // table_options (Optional) 2320 | ).await?; 2321 | ``` 2322 | ### Dashboard Demo 2323 | ![Dash](./images/interactivedash3.gif) 2324 | --- 2325 | ### License 2326 | Elusion is distributed under the [MIT License](https://opensource.org/licenses/MIT). 2327 | However, since it builds upon [DataFusion](https://datafusion.apache.org/), which is distributed under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), some parts of this project are subject to the terms of the Apache License 2.0. 2328 | For full details, see the [LICENSE.txt file](LICENSE.txt). 2329 | 2330 | ### Acknowledgments 2331 | This library leverages the power of Rust's type system and libraries like [DataFusion](https://datafusion.apache.org/) 2332 | ,Appache Arrow, Tokio Cron Scheduler, Tokio... for efficient query processing. Special thanks to the open-source community for making this project possible. 2333 | 2334 | ## Where you can find me: 2335 | borivoj.grujicic@gmail.com 2336 | -------------------------------------------------------------------------------- /benches/benchmark.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Criterion}; 2 | use elusion::prelude::*; 3 | 4 | // Helper function to set up test DataFrames 5 | async fn setup_test_dataframes() -> ElusionResult<(CustomDataFrame, CustomDataFrame, CustomDataFrame, CustomDataFrame)> { 6 | let sales_path = "C:\\Borivoj\\RUST\\Elusion\\SalesData2022.csv"; 7 | let customer_path = "C:\\Borivoj\\RUST\\Elusion\\Customers.csv"; 8 | let products_path = "C:\\Borivoj\\RUST\\Elusion\\Products.csv"; 9 | let sales_order_path = "C:\\Borivoj\\RUST\\Elusion\\sales_order_report2.csv"; 10 | 11 | let sales_df = CustomDataFrame::new(sales_path, "se").await?; 12 | let customers_df = CustomDataFrame::new(customer_path, "c").await?; 13 | let products_df = CustomDataFrame::new(products_path, "p").await?; 14 | let order_df = CustomDataFrame::new(sales_order_path, "o").await?; 15 | 16 | Ok((sales_df, customers_df, products_df, order_df)) 17 | } 18 | 19 | fn benchmark_joins(c: &mut Criterion) { 20 | let rt = tokio::runtime::Runtime::new().unwrap(); 21 | let (sales_df, customers_df, products_df, _) = rt.block_on(setup_test_dataframes()).unwrap(); 22 | 23 | let mut group = c.benchmark_group("Joins"); 24 | 25 | // Single Join Benchmark 26 | group.bench_function("single_join", |b| b.iter(|| { 27 | rt.block_on(async { 28 | sales_df.clone() 29 | .join( 30 | customers_df.clone(), 31 | ["se.CustomerKey = c.CustomerKey"], 32 | "INNER" 33 | ) 34 | .select([ 35 | "se.OrderDate", 36 | "c.FirstName", 37 | "c.LastName", 38 | "se.OrderQuantity" 39 | ]) 40 | .elusion("bench_join") 41 | .await 42 | .unwrap() 43 | }) 44 | })); 45 | 46 | // Multiple Joins Benchmark 47 | group.bench_function("multiple_joins", |b| b.iter(|| { 48 | rt.block_on(async { 49 | sales_df.clone() 50 | .join_many([ 51 | (customers_df.clone(), ["se.CustomerKey = c.CustomerKey"], "INNER"), 52 | (products_df.clone(), ["se.ProductKey = p.ProductKey"], "INNER"), 53 | ]) 54 | .select([ 55 | "c.CustomerKey", 56 | "c.FirstName", 57 | "c.LastName", 58 | "p.ProductName", 59 | ]) 60 | .elusion("bench_many_joins") 61 | .await 62 | .unwrap() 63 | }) 64 | })); 65 | 66 | group.finish(); 67 | } 68 | 69 | fn benchmark_aggregations(c: &mut Criterion) { 70 | let rt = tokio::runtime::Runtime::new().unwrap(); 71 | let (sales_df, customers_df, _, _) = rt.block_on(setup_test_dataframes()).unwrap(); 72 | 73 | let mut group = c.benchmark_group("Aggregations"); 74 | 75 | group.bench_function("simple_agg", |b| b.iter(|| { 76 | rt.block_on(async { 77 | sales_df.clone() 78 | .agg([ 79 | "SUM(se.OrderQuantity) AS total_quantity", 80 | "AVG(se.OrderQuantity) AS avg_quantity", 81 | ]) 82 | .elusion("bench_agg") 83 | .await 84 | .unwrap(); 85 | }) 86 | })); 87 | 88 | group.bench_function("complex_agg_with_join", |b| b.iter(|| { 89 | rt.block_on(async { 90 | sales_df.clone() 91 | .join( 92 | customers_df.clone(), // Use the destructured customers_df 93 | ["se.CustomerKey = c.CustomerKey"], 94 | "INNER" 95 | ) 96 | .select([ 97 | "c.FirstName", 98 | "c.LastName" 99 | ]) 100 | .agg([ 101 | "SUM(se.OrderQuantity) AS total_quantity", 102 | "AVG(se.OrderQuantity) AS avg_quantity" 103 | ]) 104 | .group_by(["c.FirstName", "c.LastName"]) 105 | .elusion("bench_complex_agg") 106 | .await 107 | .unwrap(); 108 | }) 109 | })); 110 | 111 | group.finish(); 112 | } 113 | 114 | fn benchmark_multiple_groupings(c: &mut Criterion) { 115 | let rt = tokio::runtime::Runtime::new().unwrap(); 116 | let ( _, _, _,order_df) = rt.block_on(setup_test_dataframes()).unwrap(); 117 | 118 | let mut group = c.benchmark_group("Multiple_Groupings"); 119 | 120 | group.bench_function("agg_multiple_groupings", |b| b.iter(|| { 121 | rt.block_on(async { 122 | order_df.clone() 123 | .select(["customer_name"]) 124 | .agg([ 125 | "SUM(order_value) AS total_value" 126 | ]) 127 | .group_by(["customer_name"]) 128 | .elusion("agg_multiple_groupings") 129 | .await 130 | .unwrap(); 131 | }) 132 | })); 133 | 134 | group.finish(); 135 | } 136 | 137 | 138 | 139 | fn benchmark_window_functions(c: &mut Criterion) { 140 | let rt = tokio::runtime::Runtime::new().unwrap(); 141 | let (sales_df, customers_df, _, _) = rt.block_on(setup_test_dataframes()).unwrap(); 142 | 143 | let mut group = c.benchmark_group("Window_Functions"); 144 | 145 | group.bench_function("basic_window_functions", |b| b.iter(|| { 146 | rt.block_on(async { 147 | sales_df.clone() 148 | .join(customers_df.clone(), ["se.CustomerKey = c.CustomerKey"], "INNER") 149 | .select([ 150 | "se.OrderDate", 151 | "c.FirstName", 152 | "c.LastName", 153 | "se.OrderQuantity" 154 | ]) 155 | // Aggregated window functions 156 | .window("SUM(se.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS running_total") 157 | .window("AVG(se.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS running_avg") 158 | // Ranking window functions 159 | .window("ROW_NUMBER() OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS row_num") 160 | .window("DENSE_RANK() OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS dense_rnk") 161 | .limit(10) 162 | .elusion("bench_window_functions") 163 | .await 164 | .unwrap(); 165 | }) 166 | })); 167 | 168 | group.bench_function("advanced_window_functions", |b| b.iter(|| { 169 | rt.block_on(async { 170 | sales_df.clone() 171 | .join(customers_df.clone(), ["se.CustomerKey = c.CustomerKey"], "INNER") 172 | .select([ 173 | "se.OrderDate", 174 | "c.FirstName", 175 | "c.LastName", 176 | "se.OrderQuantity" 177 | ]) 178 | // Analytical window functions 179 | .window("FIRST_VALUE(se.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS first_qty") 180 | .window("LAST_VALUE(se.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS last_qty") 181 | .window("LAG(se.OrderQuantity, 1, 0) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS prev_qty") 182 | .window("LEAD(se.OrderQuantity, 1, 0) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS next_qty") 183 | .limit(10) 184 | .elusion("bench_advanced_window_functions") 185 | .await 186 | .unwrap(); 187 | }) 188 | })); 189 | 190 | group.finish(); 191 | } 192 | 193 | fn benchmark_window_functions_with_frames(c: &mut Criterion) { 194 | let rt = tokio::runtime::Runtime::new().unwrap(); 195 | let (sales_df, customers_df, _, _) = rt.block_on(setup_test_dataframes()).unwrap(); 196 | 197 | let mut group = c.benchmark_group("Window_Functions_With_Frames"); 198 | 199 | group.bench_function("aggregated_rolling_windows", |b| b.iter(|| { 200 | rt.block_on(async { 201 | sales_df.clone() 202 | .join(customers_df.clone(), ["se.CustomerKey = c.CustomerKey"], "INNER") 203 | .select(["se.OrderDate", "c.FirstName", "c.LastName", "se.OrderQuantity"]) 204 | // Aggregated rolling windows 205 | .window("SUM(se.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total") 206 | .window("AVG(se.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS full_partition_avg") 207 | .limit(10) 208 | .elusion("bench_rolling_windows") 209 | .await 210 | .unwrap(); 211 | }) 212 | })); 213 | 214 | group.finish(); 215 | } 216 | 217 | fn benchmark_pivot(c: &mut Criterion) { 218 | let rt = tokio::runtime::Runtime::new().unwrap(); 219 | let (sales_df, _, _, _) = rt.block_on(setup_test_dataframes()).unwrap(); 220 | 221 | let mut group = c.benchmark_group("Pivot"); 222 | 223 | group.bench_function("pivot_operation", |b| b.iter(|| { 224 | rt.block_on(async { 225 | sales_df.clone() 226 | .pivot( 227 | ["StockDate"], // Row identifiers 228 | "TerritoryKey", // Column to pivot 229 | "OrderQuantity", // Value to aggregate 230 | "SUM" // Aggregation function 231 | ) 232 | .await 233 | .unwrap() 234 | .elusion("bench_pivot") 235 | .await 236 | .unwrap(); 237 | }) 238 | })); 239 | 240 | group.finish(); 241 | } 242 | 243 | fn benchmark_unpivot(c: &mut Criterion) { 244 | let rt = tokio::runtime::Runtime::new().unwrap(); 245 | let (pivoted_df, _) = rt.block_on(async { 246 | let (sales_df, _, _, _) = setup_test_dataframes().await.unwrap(); 247 | let pivoted = sales_df.clone() 248 | .pivot( 249 | ["StockDate"], 250 | "TerritoryKey", 251 | "OrderQuantity", 252 | "SUM" 253 | ) 254 | .await 255 | .unwrap() 256 | .elusion("pivoted_df") 257 | .await 258 | .unwrap(); 259 | (pivoted, ()) 260 | }); 261 | 262 | let mut group = c.benchmark_group("Unpivot"); 263 | 264 | group.bench_function("unpivot_operation", |b| b.iter(|| { 265 | rt.block_on(async { 266 | pivoted_df.clone() 267 | .unpivot( 268 | ["StockDate"], // ID columns 269 | ["TerritoryKey_1", "TerritoryKey_2"], // Value columns to unpivot 270 | "Territory", // New name column 271 | "Quantity" // New value column 272 | ) 273 | .await 274 | .unwrap() 275 | .elusion("bench_unpivot") 276 | .await 277 | .unwrap(); 278 | }) 279 | })); 280 | 281 | group.finish(); 282 | } 283 | 284 | fn benchmark_string_functions(c: &mut Criterion) { 285 | let rt = tokio::runtime::Runtime::new().unwrap(); 286 | let (sales_df, customers_df, products_df, _) = rt.block_on(setup_test_dataframes()).unwrap(); 287 | 288 | let mut group = c.benchmark_group("String_Functions"); 289 | 290 | group.bench_function("string_functions_query", |b| b.iter(|| { 291 | rt.block_on(async { 292 | sales_df.clone() 293 | .join_many([ 294 | (customers_df.clone(), ["se.CustomerKey = c.CustomerKey"], "INNER"), 295 | (products_df.clone(), ["se.ProductKey = p.ProductKey"], "INNER"), 296 | ]) 297 | .select([ 298 | "c.CustomerKey", 299 | "c.FirstName", 300 | "c.LastName", 301 | "c.EmailAddress", 302 | "p.ProductName" 303 | ]) 304 | .string_functions([ 305 | "TRIM(c.EmailAddress) AS trimmed_email", 306 | "LTRIM(c.EmailAddress) AS left_trimmed_email", 307 | "RTRIM(c.EmailAddress) AS right_trimmed_email", 308 | "UPPER(c.FirstName) AS upper_first_name", 309 | "LOWER(c.LastName) AS lower_last_name", 310 | "LENGTH(c.EmailAddress) AS email_length", 311 | "LEFT(p.ProductName, 10) AS product_start", 312 | "RIGHT(p.ProductName, 10) AS product_end", 313 | "SUBSTRING(p.ProductName, 1, 5) AS product_substr", 314 | // Concatenation 315 | "CONCAT(c.FirstName, ' ', c.LastName) AS full_name", 316 | "CONCAT_WS(' ', c.FirstName, c.LastName, c.EmailAddress) AS all_info", 317 | // Position and Search 318 | "POSITION('@' IN c.EmailAddress) AS at_symbol_pos", 319 | "STRPOS(c.EmailAddress, '@') AS email_at_pos", 320 | // Replacement and Modification 321 | "REPLACE(c.EmailAddress, '@adventure-works.com', '@newdomain.com') AS new_email", 322 | "TRANSLATE(c.FirstName, 'AEIOU', '12345') AS vowels_replaced", 323 | "REPEAT('*', 5) AS stars", 324 | "REVERSE(c.FirstName) AS reversed_name", 325 | // Padding 326 | "LPAD(c.CustomerKey::TEXT, 10, '0') AS padded_customer_id", 327 | "RPAD(c.FirstName, 20, '.') AS padded_name", 328 | // Case Formatting 329 | "INITCAP(LOWER(c.FirstName)) AS proper_case_name", 330 | // String Extraction 331 | "SPLIT_PART(c.EmailAddress, '@', 1) AS email_username", 332 | ]) 333 | .agg([ 334 | "COUNT(*) AS total_records", 335 | "STRING_AGG(p.ProductName, ', ') AS all_products" 336 | ]) 337 | .filter("c.emailaddress IS NOT NULL") 338 | .group_by_all() 339 | .having("COUNT(*) > 1") 340 | .order_by(["c.CustomerKey"], [true]) 341 | .limit(10) 342 | .elusion("bench_string_functions") 343 | .await 344 | .unwrap(); 345 | }) 346 | })); 347 | 348 | group.finish(); 349 | } 350 | 351 | pub fn benchmark_appending(c: &mut Criterion) { 352 | let rt = tokio::runtime::Runtime::new().unwrap(); 353 | let (sales_df, customers_df, _, _) = rt.block_on(setup_test_dataframes()).unwrap(); 354 | 355 | let mut group = c.benchmark_group("Union_Intersect_Operations"); 356 | group.sample_size(10); 357 | 358 | // Benchmark Union with String Functions 359 | group.bench_function("union_string_functions", |b| b.iter(|| { 360 | rt.block_on(async { 361 | // First DataFrame 362 | let df1 = sales_df.clone() 363 | .join( 364 | customers_df.clone(), 365 | ["se.CustomerKey = c.CustomerKey"], 366 | "INNER" 367 | ) 368 | .select(["c.FirstName", "c.LastName"]) 369 | .string_functions([ 370 | "TRIM(c.EmailAddress) AS trimmed_email", 371 | "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name", 372 | ]); 373 | 374 | // Second DataFrame 375 | let df2 = sales_df.clone() 376 | .join( 377 | customers_df.clone(), 378 | ["se.CustomerKey = c.CustomerKey"], 379 | "INNER" 380 | ) 381 | .select(["c.FirstName", "c.LastName"]) 382 | .string_functions([ 383 | "TRIM(c.EmailAddress) AS trimmed_email", 384 | "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name", 385 | ]); 386 | 387 | // Execute transformations and union 388 | let result_df1 = df1.elusion("df1").await.unwrap(); 389 | let result_df2 = df2.elusion("df2").await.unwrap(); 390 | 391 | let union_df = result_df1.union(result_df2).await.unwrap(); 392 | let _ = union_df.limit(100).elusion("union_re").await.unwrap(); 393 | }) 394 | })); 395 | 396 | // Benchmark Intersect with String Functions 397 | group.bench_function("intersect_string_functions", |b| b.iter(|| { 398 | rt.block_on(async { 399 | // First DataFrame 400 | let df1 = sales_df.clone() 401 | .join( 402 | customers_df.clone(), 403 | ["se.CustomerKey = c.CustomerKey"], 404 | "INNER" 405 | ) 406 | .select(["c.FirstName", "c.LastName"]) 407 | .string_functions([ 408 | "TRIM(c.EmailAddress) AS trimmed_email", 409 | "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name", 410 | ]); 411 | 412 | // Second DataFrame - same structure for intersection 413 | let df2 = sales_df.clone() 414 | .join( 415 | customers_df.clone(), 416 | ["se.CustomerKey = c.CustomerKey"], 417 | "INNER" 418 | ) 419 | .select(["c.FirstName", "c.LastName"]) 420 | .string_functions([ 421 | "TRIM(c.EmailAddress) AS trimmed_email", 422 | "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name", 423 | ]); 424 | 425 | // Execute transformations and intersect 426 | let result_df1 = df1.elusion("df1_intersect").await.unwrap(); 427 | let result_df2 = df2.elusion("df2_intersect").await.unwrap(); 428 | 429 | let intersect_df = result_df1.intersect(result_df2).await.unwrap(); 430 | let _ = intersect_df.limit(100).elusion("intersect_result").await.unwrap(); 431 | }) 432 | })); 433 | 434 | // Benchmark Union with Aggregations 435 | group.bench_function("union_with_aggregations", |b| b.iter(|| { 436 | rt.block_on(async { 437 | // First DataFrame with aggregations 438 | let df1 = sales_df.clone() 439 | .join( 440 | customers_df.clone(), 441 | ["se.CustomerKey = c.CustomerKey"], 442 | "INNER" 443 | ) 444 | .select(["c.FirstName", "c.LastName"]) 445 | .agg([ 446 | "SUM(s.SalesAmount) as total_sales", 447 | "COUNT(*) as transaction_count" 448 | ]) 449 | .group_by_all(); 450 | 451 | // Second DataFrame with aggregations 452 | let df2 = sales_df.clone() 453 | .join( 454 | customers_df.clone(), 455 | ["se.CustomerKey = c.CustomerKey"], 456 | "INNER" 457 | ) 458 | .select(["c.FirstName", "c.LastName"]) 459 | .agg([ 460 | "SUM(s.SalesAmount) as total_sales", 461 | "COUNT(*) as transaction_count" 462 | ]) 463 | .group_by_all(); 464 | 465 | // Execute transformations and union 466 | let result_df1 = df1.elusion("df1_agg").await.unwrap(); 467 | let result_df2 = df2.elusion("df2_agg").await.unwrap(); 468 | 469 | let union_df = result_df1.union(result_df2).await.unwrap(); 470 | let _ = union_df.limit(100).elusion("union_agg_result").await.unwrap(); 471 | }) 472 | })); 473 | 474 | // Benchmark Intersect with Aggregations 475 | group.bench_function("intersect_with_aggregations", |b| b.iter(|| { 476 | rt.block_on(async { 477 | // First DataFrame with aggregations 478 | let df1 = sales_df.clone() 479 | .join( 480 | customers_df.clone(), 481 | ["se.CustomerKey = c.CustomerKey"], 482 | "INNER" 483 | ) 484 | .select(["c.FirstName", "c.LastName"]) 485 | .agg([ 486 | "SUM(s.SalesAmount) as total_sales", 487 | "COUNT(*) as transaction_count" 488 | ]) 489 | .group_by_all(); 490 | 491 | // Second DataFrame with aggregations 492 | let df2 = sales_df.clone() 493 | .join( 494 | customers_df.clone(), 495 | ["se.CustomerKey = c.CustomerKey"], 496 | "INNER" 497 | ) 498 | .select(["c.FirstName", "c.LastName"]) 499 | .agg([ 500 | "SUM(s.SalesAmount) as total_sales", 501 | "COUNT(*) as transaction_count" 502 | ]) 503 | .group_by_all(); 504 | 505 | // Execute transformations and intersect 506 | let result_df1 = df1.elusion("df1_agg_intersect").await.unwrap(); 507 | let result_df2 = df2.elusion("df2_agg_intersect").await.unwrap(); 508 | 509 | let intersect_df = result_df1.intersect(result_df2).await.unwrap(); 510 | let _ = intersect_df.limit(100).elusion("intersect_agg_result").await.unwrap(); 511 | }) 512 | })); 513 | 514 | group.finish(); 515 | } 516 | 517 | fn benchmark_mysql_operations(c: &mut Criterion) { 518 | let rt = tokio::runtime::Runtime::new().unwrap(); 519 | 520 | let mut group = c.benchmark_group("MySQL_Operations"); 521 | group.sample_size(10); // Reduce sample size for database operations 522 | 523 | // Benchmark basic MySQL query 524 | group.bench_function("basic_mysql_query", |b| b.iter(|| { 525 | rt.block_on(async { 526 | let mysql_config = MySqlConfig { 527 | host: "localhost".to_string(), 528 | port: 3306, 529 | user: "databora".to_string(), 530 | password: "!Djavolak1".to_string(), 531 | database: "brewery".to_string(), 532 | pool_size: Some(5), 533 | }; 534 | 535 | let conn = MySqlConnection::new(mysql_config).await.unwrap(); 536 | 537 | // Simple query 538 | let query = "SELECT * FROM brewery_data LIMIT 10"; 539 | let df = CustomDataFrame::from_mysql(&conn, query, "basic_mysql_data").await.unwrap(); 540 | let _ = df.limit(10).elusion("basic_result").await.unwrap(); 541 | 542 | }) 543 | })); 544 | 545 | // Benchmark complex MySQL query with CTE, JOINS and window functions 546 | group.bench_function("complex_mysql_query", |b| b.iter(|| { 547 | rt.block_on(async { 548 | let mysql_config = MySqlConfig { 549 | host: "localhost".to_string(), 550 | port: 3306, 551 | user: "databora".to_string(), 552 | password: "!Djavolak1".to_string(), 553 | database: "brewery".to_string(), 554 | pool_size: Some(5), 555 | }; 556 | 557 | let conn = MySqlConnection::new(mysql_config).await.unwrap(); 558 | 559 | // Complex query with CTE, JOIN, and window functions 560 | let mysql_query = " 561 | WITH ranked_sales AS ( 562 | SELECT 563 | c.color AS brew_color, 564 | bd.beer_style, 565 | bd.location, 566 | SUM(bd.total_sales) AS total_sales 567 | FROM 568 | brewery_data bd 569 | JOIN 570 | colors c ON bd.Color = c.color_number 571 | WHERE 572 | bd.brew_date >= '2020-01-01' AND bd.brew_date <= '2020-03-01' 573 | GROUP BY 574 | c.color, bd.beer_style, bd.location 575 | ) 576 | SELECT 577 | brew_color, 578 | beer_style, 579 | location, 580 | total_sales, 581 | ROW_NUMBER() OVER (PARTITION BY brew_color ORDER BY total_sales DESC) AS ranked 582 | FROM 583 | ranked_sales 584 | ORDER BY 585 | brew_color, total_sales DESC"; 586 | 587 | let df = CustomDataFrame::from_mysql(&conn, mysql_query, "mysql_data").await.unwrap(); 588 | let _ = df.limit(100).elusion("complex_result").await.unwrap(); 589 | 590 | }) 591 | })); 592 | 593 | // Benchmark MySQL query with post-processing 594 | group.bench_function("mysql_with_processing", |b| b.iter(|| { 595 | rt.block_on(async { 596 | let mysql_config = MySqlConfig { 597 | host: "localhost".to_string(), 598 | port: 3306, 599 | user: "databora".to_string(), 600 | password: "!Djavolak1".to_string(), 601 | database: "brewery".to_string(), 602 | pool_size: Some(5), 603 | }; 604 | 605 | let conn = MySqlConnection::new(mysql_config).await.unwrap(); 606 | 607 | let query = "SELECT * FROM brewery_data"; 608 | let df = CustomDataFrame::from_mysql(&conn, query, "process_mysql_data").await.unwrap(); 609 | // Apply additional processing with Elusion 610 | let _ = df 611 | .select([ 612 | "brew_date", 613 | "beer_style", 614 | "location", 615 | "total_sales" 616 | ]) 617 | .filter("total_sales > 1000") 618 | .agg([ 619 | "SUM(total_sales) AS total_revenue", 620 | "AVG(total_sales) AS avg_revenue", 621 | "COUNT(*) AS sale_count" 622 | ]) 623 | .group_by([ 624 | "beer_style", 625 | "location" 626 | ]) 627 | .order_by(["total_revenue"], [false]) // DESC order 628 | .limit(20) 629 | .elusion("processed_result") 630 | .await 631 | .unwrap(); 632 | 633 | }) 634 | })); 635 | 636 | group.finish(); 637 | } 638 | 639 | criterion_group!( 640 | benches, 641 | benchmark_joins, 642 | benchmark_multiple_groupings, 643 | benchmark_aggregations, 644 | benchmark_window_functions, 645 | benchmark_window_functions_with_frames, 646 | benchmark_pivot, 647 | benchmark_unpivot, 648 | benchmark_string_functions, 649 | benchmark_appending, 650 | benchmark_mysql_operations 651 | ); 652 | criterion_main!(benches); 653 | -------------------------------------------------------------------------------- /images/bar.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataBora/elusion/eeebe26b0637e66835581d4b8b34d133954f61cd/images/bar.PNG -------------------------------------------------------------------------------- /images/elusion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataBora/elusion/eeebe26b0637e66835581d4b8b34d133954f61cd/images/elusion.png -------------------------------------------------------------------------------- /images/interactivedash3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataBora/elusion/eeebe26b0637e66835581d4b8b34d133954f61cd/images/interactivedash3.gif -------------------------------------------------------------------------------- /images/platformcom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataBora/elusion/eeebe26b0637e66835581d4b8b34d133954f61cd/images/platformcom.png -------------------------------------------------------------------------------- /images/report.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataBora/elusion/eeebe26b0637e66835581d4b8b34d133954f61cd/images/report.PNG -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod prelude; 2 | pub mod error; 3 | 4 | pub use prelude::*; 5 | 6 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use elusion::prelude::*; 2 | 3 | #[tokio::main] 4 | async fn main() -> ElusionResult<()> { 5 | 6 | println!("Hello, Elusion!"); 7 | 8 | Ok(()) 9 | } 10 | 11 | -------------------------------------------------------------------------------- /src/prelude.rs: -------------------------------------------------------------------------------- 1 | pub use crate::PipelineScheduler; 2 | 3 | pub use crate::{CustomDataFrame, AliasedDataFrame, CsvWriteOptions}; 4 | pub use crate::{ElusionError, ElusionResult}; 5 | //====== postgres 6 | pub use crate::{PostgresConfig, PostgresConnection}; 7 | //========= mysql 8 | pub use crate::{MySqlConfig, MySqlConnection}; 9 | //====== dashboard 10 | pub use crate::{ReportLayout, TableOptions}; 11 | 12 | pub use crate::DateFormat; 13 | pub use crate::{extract_row_from_df, extract_value_from_df}; 14 | 15 | pub use regex::Regex; 16 | pub use datafusion::prelude::*; 17 | pub use datafusion::error::DataFusionError; 18 | pub use futures::future::BoxFuture; 19 | pub use datafusion::datasource::MemTable; 20 | pub use std::sync::Arc; 21 | pub use arrow::datatypes::{Field, DataType as ArrowDataType, Schema, SchemaRef}; 22 | pub use chrono::NaiveDate; 23 | pub use arrow::array::{StringBuilder, ArrayRef, ArrayBuilder, Float64Builder, Int64Builder, UInt64Builder}; 24 | 25 | pub use arrow::record_batch::RecordBatch; 26 | pub use ArrowDataType::*; 27 | pub use arrow::csv::writer::WriterBuilder; 28 | 29 | // ========= CSV 30 | pub use std::fs::{self, File, OpenOptions}; 31 | pub use std::io::{self, Read, Write, BufWriter}; 32 | 33 | //============ WRITERS 34 | pub use datafusion::prelude::SessionContext; 35 | pub use datafusion::dataframe::{DataFrame,DataFrameWriteOptions}; 36 | 37 | // ========= JSON 38 | pub use serde_json::{json, Map, Value}; 39 | pub use serde::{Deserialize, Serialize}; 40 | pub use std::collections::{HashMap, HashSet}; 41 | pub use arrow::error::Result as ArrowResult; 42 | pub use datafusion::arrow::datatypes::TimeUnit; 43 | //---json writer 44 | pub use arrow::array::{ListArray,TimestampMicrosecondArray,TimestampMillisecondArray,TimestampSecondArray,LargeBinaryArray,BinaryArray,LargeStringArray,Float32Array,UInt64Array,UInt32Array,BooleanArray}; 45 | 46 | //delta 47 | pub use std::result::Result; 48 | pub use std::path::{Path as LocalPath, PathBuf}; 49 | pub use deltalake::operations::DeltaOps; 50 | pub use deltalake::writer::{RecordBatchWriter, WriteMode, DeltaWriter}; 51 | pub use deltalake::{open_table, DeltaTableBuilder, DeltaTableError, ObjectStore, Path as DeltaPath}; 52 | pub use deltalake::protocol::SaveMode; 53 | pub use deltalake::kernel::{DataType as DeltaType, Metadata, Protocol, StructType}; 54 | pub use deltalake::kernel::StructField; 55 | pub use futures::StreamExt; 56 | pub use deltalake::storage::object_store::local::LocalFileSystem; 57 | // use object_store::path::Path as ObjectStorePath; 58 | 59 | // =========== ERRROR 60 | 61 | pub use std::fmt::{self, Debug}; 62 | pub use std::error::Error; 63 | 64 | // PIVOT 65 | pub use arrow::compute; 66 | pub use arrow::array::StringArray; 67 | 68 | //PLOTTING 69 | #[cfg(feature = "dashboard")] 70 | pub use plotly::{Plot, Scatter, Bar, Histogram, BoxPlot, Pie}; 71 | #[cfg(feature = "dashboard")] 72 | pub use plotly::common::{Mode, Line, Marker, Orientation}; 73 | #[cfg(feature = "dashboard")] 74 | pub use plotly::layout::{Axis, Layout}; 75 | #[cfg(feature = "dashboard")] 76 | pub use plotly::color::Rgb; 77 | #[cfg(feature = "dashboard")] 78 | pub use plotly::layout::update_menu::{Button,UpdateMenu,UpdateMenuDirection}; 79 | #[cfg(feature = "dashboard")] 80 | pub use plotly::layout::{DragMode, RangeSlider}; 81 | 82 | pub use arrow::array::{Array, Float64Array,Int64Array,Int32Array,TimestampNanosecondArray, Date64Array,Date32Array}; 83 | #[cfg(feature = "dashboard")] 84 | pub use std::cmp::Ordering; 85 | 86 | #[cfg(not(feature = "dashboard"))] 87 | pub struct Plot; 88 | #[cfg(not(feature = "dashboard"))] 89 | pub struct Scatter; 90 | #[cfg(not(feature = "dashboard"))] 91 | pub struct Bar; 92 | #[cfg(not(feature = "dashboard"))] 93 | pub struct Histogram; 94 | #[cfg(not(feature = "dashboard"))] 95 | pub struct BoxPlot; 96 | #[cfg(not(feature = "dashboard"))] 97 | pub struct Pie; 98 | #[cfg(not(feature = "dashboard"))] 99 | pub struct Mode; 100 | #[cfg(not(feature = "dashboard"))] 101 | pub struct Line; 102 | #[cfg(not(feature = "dashboard"))] 103 | pub struct Marker; 104 | #[cfg(not(feature = "dashboard"))] 105 | pub struct Orientation; 106 | #[cfg(not(feature = "dashboard"))] 107 | pub struct Axis; 108 | #[cfg(not(feature = "dashboard"))] 109 | pub struct Layout; 110 | #[cfg(not(feature = "dashboard"))] 111 | pub struct Rgb; 112 | #[cfg(not(feature = "dashboard"))] 113 | pub struct Button; 114 | #[cfg(not(feature = "dashboard"))] 115 | pub struct UpdateMenu; 116 | #[cfg(not(feature = "dashboard"))] 117 | pub struct UpdateMenuDirection; 118 | #[cfg(not(feature = "dashboard"))] 119 | pub struct DragMode; 120 | #[cfg(not(feature = "dashboard"))] 121 | pub struct RangeSlider; 122 | 123 | // STATISTICS 124 | pub use datafusion::common::ScalarValue; 125 | 126 | // ========== AZURE 127 | #[cfg(feature = "azure")] 128 | pub use azure_storage_blobs::prelude::*; 129 | #[cfg(feature = "azure")] 130 | pub use azure_storage::StorageCredentials; 131 | #[cfg(feature = "azure")] 132 | pub use azure_storage::CloudLocation; 133 | pub use futures::stream; 134 | pub use std::io::BufReader; 135 | pub use futures::pin_mut; 136 | pub use csv::ReaderBuilder; 137 | pub use csv::Trim::All; 138 | pub use serde_json::Deserializer; 139 | // ==== pisanje 140 | #[cfg(feature = "azure")] 141 | pub use azure_storage_blobs::blob::{BlockList, BlobBlockType}; 142 | pub use bytes::Bytes; 143 | pub use datafusion::parquet::basic::Compression; 144 | pub use datafusion::parquet::file::properties::{WriterProperties, WriterVersion}; 145 | pub use datafusion::parquet::arrow::ArrowWriter; 146 | pub use base64::engine::general_purpose::STANDARD; 147 | pub use base64::Engine; 148 | pub use futures::TryStreamExt; 149 | pub use tempfile::Builder; 150 | 151 | // ======== Scheduler 152 | pub use std::future::Future; 153 | pub use tokio_cron_scheduler::{JobScheduler, Job}; 154 | 155 | // ======== From API 156 | #[cfg(feature = "api")] 157 | pub use reqwest::Client; 158 | #[cfg(feature = "api")] 159 | pub use urlencoding::encode; 160 | 161 | pub use crate::ElusionApi; 162 | 163 | #[cfg(not(feature = "api"))] 164 | pub struct Client; 165 | 166 | 167 | // ========= VIEWS and CAche 168 | pub use std::hash::{Hash, Hasher}; 169 | pub use std::collections::hash_map::DefaultHasher; 170 | pub use chrono::{DateTime, Utc}; 171 | pub use std::sync::Mutex; 172 | pub use lazy_static::lazy_static; 173 | 174 | // =========== DATE TABLE BUILDER 175 | pub use arrow::array::Int32Builder; 176 | pub use arrow::array::BooleanBuilder; 177 | pub use chrono::{Datelike, Weekday, Duration, NaiveDateTime, NaiveTime}; 178 | 179 | // =========EXCEL 180 | #[cfg(feature = "excel")] 181 | pub use rust_xlsxwriter::{Format, Workbook, ExcelDateTime}; 182 | #[cfg(feature = "excel")] 183 | pub use arrow::array::{Int8Array, Int16Array,UInt8Array, UInt16Array}; 184 | 185 | pub use calamine::DataType as CalamineDataType; 186 | pub use calamine::{Reader, Xlsx, open_workbook}; --------------------------------------------------------------------------------