├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── CHANGELOG.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE.txt
├── README.md
├── benches
    └── benchmark.rs
├── images
    ├── bar.PNG
    ├── elusion.png
    ├── interactivedash3.gif
    ├── platformcom.png
    └── report.PNG
└── src
    ├── elusion.rs
    ├── lib.rs
    ├── main.rs
    └── prelude.rs


/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Platform Compatibility
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ${{ matrix.os }}
12 |     strategy:
13 |       matrix:
14 |         os: [ubuntu-latest, macos-latest, windows-latest]
15 |         rust: [stable]
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v2
19 |     - name: Setup Rust
20 |       uses: actions-rs/toolchain@v1
21 |       with:
22 |         profile: minimal
23 |         toolchain: ${{ matrix.rust }}
24 |         override: true
25 | 
26 |     - name: Install ODBC (Ubuntu)
27 |       if: matrix.os == 'ubuntu-latest'
28 |       run: |
29 |         sudo apt-get update
30 |         sudo apt-get install -y unixodbc-dev
31 | 
32 |     - name: Install ODBC (macOS)
33 |       if: matrix.os == 'macos-latest'
34 |       run: |
35 |         brew install unixodbc
36 | 
37 |     - name: Build
38 |       run: cargo build --verbose
39 | 
40 |     - name: Run tests
41 |       run: cargo test --verbose
42 | 
43 |   security-audit:
44 |     runs-on: ubuntu-latest
45 |     steps:
46 |     - uses: actions/checkout@v2
47 |     - name: Run security audit
48 |       run: |
49 |         cargo install cargo-audit
50 |         cargo audit --deny warnings --ignore RUSTSEC-2024-0384 --ignore RUSTSEC-2025-0004 --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2023-0055 --ignore RUSTSEC-2023-0086
51 | 
52 |   tls-check:
53 |     runs-on: ubuntu-latest
54 |     steps:
55 |     - uses: actions/checkout@v2
56 |     - name: Check HTTPS usage
57 |       run: |
58 |         ! grep -r "http://" src/ || exit 1


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | ## [3.11.0] - 2025-05-17
  2 | ### Added
  3 | - EXCEL reader/loader
  4 | 
  5 | ## [3.10.0] - 2025-05-17
  6 | ### Added
  7 | - EXCEL Feature. write_to_excel() function 
  8 | 
  9 | ## [3.9.0] - 2025-05-08
 10 | ### Added
 11 | - MYSQL Database connectivity
 12 | ## Fixed
 13 | - Filter lowercasing column name
 14 | - AS case insensitivity for .json() and .json_array()
 15 | 
 16 | ## [3.8.1] - 2025-05-07
 17 | ### Fixed
 18 | - Client import clash from API (reqwest::Client) and POSTGRES (tokio_postgres::Client) features
 19 | ### Updated
 20 | - tokio from `1.41.1` to `1.45.0`
 21 | 
 22 | ## [3.8.0] - 2025-05-07
 23 | ### Added
 24 | - POSTGRES Database connectivity
 25 | 
 26 | ## [3.7.5] - 2025-05-06
 27 | ### Fixed
 28 | - Fixing features behind the Flag
 29 | 
 30 | ## [3.7.4] - 2025-05-06
 31 | ### Added
 32 | - json_array() function for parsing json values from DataFrame columns
 33 | 
 34 | ## [3.7.3] - 2025-05-06
 35 | ### BREAKING CHANGE
 36 | - Removed ODBC features
 37 | ### Added
 38 | - json() function for parsing json values from DataFrame columns
 39 | - Moved Azure Blob Storage behind feature flag
 40 | 
 41 | 
 42 | ## [3.7.2] - 2025-05-04
 43 | ### Added
 44 | - Moved Dashboards/Reporting and APIs behind feature flag
 45 | ### Dependency update
 46 | - Tokio `1.42.0` to `1.42.1`
 47 | 
 48 | ## [3.7.1] - 2025-03-09
 49 | ### Added
 50 | - extract_value_from_df() that extracts Value from a DataFrame based on column name and row index
 51 | - extract_row_from_df() that extracts Row from a DataFrame as a HashMap based on row index
 52 | 
 53 | ## [3.7.0] - 2025-03-09
 54 | ### Added
 55 | - write_to_json() for writing to local JSON files
 56 | - write_json_to_azure_with_sas() for writing JSON to Azure Blob Storage with SAS token
 57 | 
 58 | ## [3.5.1] - 2025-03-08
 59 | ### Added
 60 | - create_formatted_date_range_table() function that will create Date Table with custom Formats (ISO, European, HumanReadanle...) and week, month, quarter and year ranges (start -end)
 61 | 
 62 | ## [3.5.0] - 2025-03-02
 63 | ### Added
 64 | - empty() function that will crate empty dataframe taht can be populated
 65 | - create_date_range_table() function that will create Date Table
 66 | 
 67 | ## [3.4.1] - 2025-03-02
 68 | ### Renamed
 69 | - Renamed function from_materialized_view() to from_view()
 70 | 
 71 | ## [3.4.0] - 2025-03-02
 72 | ### Added
 73 | - Caching and Materialized Views
 74 | 
 75 | ## [3.3.1] - 2025-02-26
 76 | ### Fixed build 
 77 | - Update to arrow crate and arrow-arith crate made build conflict with chrono crate. DOwngraded datafusion to `43.0.0`
 78 | arrow fixed to `=53.2.0`, chrono fixed to `=0.4.38`
 79 | 
 80 | ## [3.3.0] - 2025-02-21
 81 | ### Fixed
 82 | - Select() AS not to get _ in between column name and AS
 83 | 
 84 | ## [3.2.0] - 2025-02-20
 85 | ### Fixed
 86 | - Query flexible query ordering for select, string, agg, filter and datetime
 87 | 
 88 | ## [3.1.0] - 2025-02-17
 89 | ### Fixed
 90 | - filter() condition Case Sensitivity
 91 | 
 92 | ## [3.0.0] - 2025-02-17
 93 | ### BREAKING CHANGE
 94 | - ALL Query Expresions, DataFrame Columns and Functions are normalized LOWERCASE(), TRIM(), REPLACE(" ", "_")
 95 | 
 96 | ## [2.8.0] - 2025-02-16
 97 | ### Added
 98 | - DateTime Functions within datetime_functions()
 99 | ### FeatureFlag
100 | - Moved ODBC behind feature flag
101 | 
102 | ## [2.7.0] - 2025-02-11
103 | ### Added
104 | - Interactive tables for Reporting
105 | 
106 | ## [2.5.1] - 2025-02-09
107 | ### Added
108 | - Interactive Dashboarding
109 | 
110 | ## [2.5.0] - 2025-02-09
111 | ### REAKING CHANGE
112 | - Now write_parquet_to_azure_with_sas() function needs specified writing mode: overwrite or append
113 | ### Fixed
114 | - CSV writer
115 | ### Added
116 | - Dependency tempfile `3.16.0`
117 | 
118 | ## [2.4.3] - 2025-02-09
119 | ### Fixed
120 | - Appending data for Parquet Writer
121 | 
122 | ## [2.4.2] - 2025-02-08
123 | ### Added
124 | - Custom ERROR handling to UNION, APPEND...
125 | 
126 | ## [2.4.1] - 2025-02-07
127 | ### Added
128 | - Custom ERROR handling
129 | 
130 | ## [2.4.0] - 2025-02-06
131 | ### Fixed
132 | - Fixed UNION, UNION_ALL, EXCEPT, INTESECT now they return proper results
133 | ### BREAKING CHANGE
134 | - UNION, UNION_ALL, EXCEPT, INTESECT are now async and also need to be evaluated with elusion() - check readme.md for examples
135 | ### Added
136 | - Fixed APPEND, APPEND_ALL
137 | 
138 | ## [2.3.0] - 2025-02-04
139 | ### Fixed
140 | - Fixed case sensitivity within statistical functions
141 | 
142 | ## [2.2.0] - 2025-02-03
143 | ### Fixed
144 | - Improved parsing for single dataframes, for all functions, to avoid using aliases on single dataframes
145 | 
146 | ## [2.1.0] - 2025-02-02
147 | ### BREAKING CHANGE
148 | - REST API now must use file path + json file name as argument. ex: "C:\\Borivoj\\RUST\\Elusion\\sales_jan_2025.json"
149 | 
150 | ## [2.0.0] - 2025-01-31
151 | ### Added
152 | - REST API to JSON files
153 | ### BREAKING CHANGES
154 | - REST API is now detached from CustomDataFrame (check readme for examples)
155 | 
156 | ## [1.7.2] - 2025-01-31
157 | ### Added
158 | - Improved Reading JSON files performance by 50%
159 | 
160 | ## [1.7.1] - 2025-01-29
161 | ### Added
162 | - Wriring Parquet to Azure Blob Storage
163 | 
164 | ## [1.7.0] - 2025-01-28
165 | ### Removed
166 | - REST API (until I fix it)
167 | 
168 | ## [1.5.1] - 2025-01-28
169 | ### Added
170 | - URL Encoding for REST API params and headers 
171 | ### Added
172 | - Dependencies: `urlencoding` `2.1.3`
173 | 
174 | ## [1.5.0] - 2025-01-27
175 | ### Added
176 | - Reading Data From API into CustomDataFrame
177 | ### Added
178 | - Dependencies: `reqwest` `0.12`
179 | 
180 | ## [1.4.0] - 2025-01-26
181 | ### Added
182 | - Pipeline Scheduler
183 | ### Added
184 | - Dependencies: `tokio-cron-scheduler` `0.13.0`
185 | 
186 | ## [1.3.0] - 2025-01-25
187 | ### Added
188 | - Azure Blob Connection. You can connect and download .json or .csv files with from_azure_with_sas_token() function 
189 | ### Added
190 | - Dependencies: `azure_storage_blobs` `0.21.0`, `azure_storage` `0.21.0`, `csv` `1.1`
191 | 
192 | ## [1.2.0] - 2025-01-24
193 | ### Added
194 | - ODBC Database connections for MySQL and PostgreSQL
195 | ### Added
196 | - Dependencies: `lazy_static` `1.5.0`, `arrow-odbc` `14.1.0`
197 | 
198 | ## [1.1.1] - 2025-01-21
199 | ### Added
200 | - Statistical Functions: display_stats(), display_null_analysis(), display_correlation_matrix()
201 | 
202 | ## [1.1.0] - 2025-01-21
203 | ### Added
204 | - Dependencies: `plotly` `0.12.1` with Plots: Line, TimeSeries, Bar, Pie, Donut, Histogram, Box
205 | 
206 | ## [1.0.1] - 2025-01-20
207 | ### Updated
208 | - Platform Compatibility (MacOS, Linux, Microsoft) and Code/Dependencies Audit
209 | 
210 | ## [1.0.0] - 2025-01-19
211 | ### BREAKING CHANGE
212 | - JOIN and JOIN_MANY functions now can receive multiple arguments
213 | ### Updated
214 | - Handling conditions within String Functions and Aggregate functions
215 | ### MAJOR RELEASE
216 | - Library fully tested and ready for production
217 | 
218 | ## [0.5.8] - 2025-01-18
219 | ### Added
220 | - PIVOT and UNPIVOT functions
221 | ### Updated
222 | - Dependencies: `datafusion` to `44.0.0`
223 | 
224 | ## [0.5.7] - 2025-01-12
225 | ### Fixed
226 | - Window function to proprely parse multiple arguments within aggregation, analytics and ranking
227 | 
228 | ## [0.5.5] - 2025-01-12
229 | ### Added
230 | - except() and intersect()
231 | 
232 | ## [0.5.4] - 2025-01-12
233 | ### Added
234 | - union() and union_all()
235 | 
236 | ## [0.5.3] - 2025-01-09
237 | ### Fixed
238 | - Multiple nested functions in SELECT()
239 | ### Added
240 | - group_by_all() function that Takes all non-aggregated columns from SELECT
241 | 
242 | ## [0.5.2] - 2025-01-10
243 | ### Added
244 | - `String Functions` that can be applied on string columns
245 | 
246 | ## [0.5.1] - 2025-01-09
247 | ### Fixed
248 | - Scalar and Aggregation function parsing, for single and nested functions
249 | 
250 | ## [0.5.0] - 2025-01-07
251 | ### BREAKING CHANGE
252 | - Removed AggegationBuilder now we can use agg() for aggregations
253 | - Removed SQL Support as DataFrame API considerably developed and there is not much need of raw SQL moving forward. If there is a demmand for Raw SQL i will bring it back in v1.0.0
254 | 
255 | ## [0.4.0] - 2025-01-06
256 | ### BREAKING CHANGE
257 | - No more use of vec![] in DataFrame API Query Functions
258 | 
259 | ## [0.3.0] - 2025-01-05
260 | ### Added
261 | - DELTA table Writer and Reader
262 | 
263 | ## [0.2.5] - 2025-01-02
264 | ### Added
265 | - PARQUET reader
266 | - Removed manual SCHEMA declaration, now CustomDataFrame::new() only need file path and alias 
267 | 
268 | ## [0.2.4] - 2025-01-01
269 | ### Fixed
270 | - JOIN for multiple dataframes
271 | - HAVING and FILTER functions fixed
272 | 
273 | ## [0.2.3] - 2024-12-29
274 | ### Added
275 | - CSV writer
276 | 
277 | ## [0.2.2] - 2024-12-28
278 | ### Added
279 | - Dependencies: `serde`  `1.0.216`, `serde_json` `1.0.134`
280 | - Support for JSON files: Reading and Loading to CustomDataFrame
281 | ### Fixed
282 | - Improved display() function for better formating.
283 | 
284 | ## [0.2.0] - 2024-12-24
285 | ### Added
286 | - Full Raw SQL Querying support
287 | 
288 | ## [0.1.3] - 2024-12-23
289 | ### Added
290 | - Aliasing column names directly in select() function
291 | 
292 | ## [0.1.1] - 2024-12-21
293 | ### Added
294 | - Added support for `prelude` to simplify imports for users.
295 | 
296 | ### Fixed
297 | - Improved error handling and clarified documentation.
298 | 
299 | ### Updated
300 | - Dependencies: `chrono` to `0.4.39` , `tokio` to `1.42.0`
301 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "elusion"
 3 | version = "3.11.0"
 4 | edition = "2021"
 5 | authors = ["Borivoj Grujicic <borivoj.grujicic@gmail.com>"]
 6 | description = "Elusion is a modern DataFrame / Data Engineering / Data Analysis library that combines the familiarity of DataFrame operations (like those in PySpark, Pandas, and Polars) with the power of SQL query building. It provides flexible query construction without enforcing strict operation ordering, enabling developers to write intuitive and maintainable data transformations."
 7 | license = "MIT"
 8 | repository = "https://github.com/DataBora/elusion"
 9 | homepage = "https://github.com/DataBora/elusion"
10 | documentation = "https://docs.rs/elusion"
11 | readme = "README.md"
12 | keywords = ["data", "engineering", "dataframe", "analysis"]
13 | categories = ["data-structures"]
14 | 
15 | [dependencies]
16 | datafusion = "43.0.0"
17 | deltalake = "0.23.0"
18 | arrow = "=53.2.0"
19 | chrono = "=0.4.38" #arrow-arith has same Quarter function
20 | tokio = { version = "1.45.0", features = ["rt-multi-thread"] }
21 | futures = "0.3.31"
22 | regex = "1.11.1"
23 | encoding_rs = "0.8.35"
24 | serde = { version = "1.0.216", features = ["derive"] }
25 | serde_json = "1.0.134"
26 | object_store = "0.11.2"
27 | plotly = { version = "0.12.1", optional = true }
28 | lazy_static = "1.5.0"
29 | azure_storage_blobs = { version = "0.21.0", optional = true }
30 | azure_storage = { version = "0.21.0", optional = true }
31 | base64 = "0.22.1"
32 | csv = "1.1"
33 | tokio-cron-scheduler = "0.13.0"
34 | bytes = "1.10.0"
35 | num_cpus = "1.0"
36 | reqwest = { version = "0.12", features = ["json", "rustls-tls"], default-features = false, optional = true}
37 | urlencoding = {version = "2.1.3", optional = true}
38 | tempfile = "3.16.0"
39 | ring = ">=0.17.12"
40 | tokio-postgres = { version = "0.7.13", optional = true }
41 | rust_decimal = { version = "1.32", features = ["db-postgres"], optional = true }
42 | mysql_async = { version = "0.32.2", default-features = false, features = ["minimal"], optional = true}
43 | rust_xlsxwriter = {version = "0.87.0", optional = true}
44 | calamine = "0.23.1"
45 | 
46 | [features]
47 | default = []
48 | dashboard = ["dep:plotly"]
49 | api = ["dep:reqwest", "dep:urlencoding"]
50 | azure = ["dep:azure_storage_blobs", "dep:azure_storage"]
51 | postgres = ["dep:tokio-postgres", "dep:rust_decimal"]
52 | mysql = ["dep:mysql_async"]
53 | excel = ["dep:rust_xlsxwriter"]
54 | all = ["dashboard", "api", "azure", "postgres", "mysql","excel"]
55 | 
56 | [dev-dependencies]
57 | criterion = { version = "0.5.1", features = ["html_reports"] }
58 | 
59 | [[bench]]
60 | name = "benchmark"
61 | harness = false
62 | 
63 | [lib]
64 | name = "elusion"
65 | path = "src/elusion.rs"
66 | 
67 | [target.'cfg(not(target_os = "windows"))'.dependencies]
68 | openssl = "=0.10.72" #security fix


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 |                       Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    Copyright 2024 Borivoj Grujicic
179 | 
180 |    Licensed under the Apache License, Version 2.0 (the "License");
181 |    you may not use this file except in compliance with the License.
182 |    You may obtain a copy of the License at
183 | 
184 |        http://www.apache.org/licenses/LICENSE-2.0
185 | 
186 |    Unless required by applicable law or agreed to in writing, software
187 |    distributed under the License is distributed on an "AS IS" BASIS,
188 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189 |    See the License for the specific language governing permissions and
190 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
   1 | # Elusion 🦀 DataFrame / Data Engineering / Data Analysis Library for Everybody!
   2 | 
   3 | 
   4 | ![Elusion Logo](images/elusion.png)
   5 | 
   6 | ## Best Way to learn Elusion:
   7 | Udemy Course - [Click to start learning on Udemy!](https://www.udemy.com/course/rust-data-engineering-analytics-elusion/)
   8 | 
   9 | 
  10 | Elusion is a high-performance DataFrame / Data Engineering / Data Analysis library designed for in-memory data formats such as CSV, EXCEL, JSON, PARQUET, DELTA, as well as for Azure Blob Storage Connections, Postgres Database Connection, MySql Database Connection, and REST API's for creating JSON files which can be forwarded to DataFrame.
  11 | Additionally you can easily create Reports and Dashboard by passing DataFrame results.
  12 | 
  13 | All of the DataFrame operations can be placed in PipelineScheduler for automated Data Engineering Pipelines.
  14 | 
  15 | Tailored for Data Engineers and Data Analysts seeking a powerful abstraction over data transformations. Elusion streamlines complex operations like filtering, joining, aggregating, and more with its intuitive, chainable DataFrame API, and provides a robust interface for managing and querying data efficiently, as well as Integrated Plotting and Interactive Dashboard features.
  16 | 
  17 | ## Core Philosophy
  18 | Elusion wants you to be you!
  19 | 
  20 | Elusion offers flexibility in constructing queries without enforcing specific patterns or chaining orders, unlike SQL, PySpark, Polars, or Pandas. You can build your queries in ANY SEQUENCE THAT BEST FITS YOUR LOGIC, writing functions in ANY ORDER or a manner that makes sense to you. Regardless of the order of function calls, Elusion ensures consistent results.
  21 | ### ALL DataFrame OPERATIONS AND EXAMPLES, that you will need, are bellow.
  22 | 
  23 | ## Platform Compatibility
  24 | Tested for MacOS, Linux and Windows
  25 | ![Platform comp](images/platformcom.png)
  26 | 
  27 | ## Security
  28 | Codebase has Undergone Rigorous Auditing and Security Testing, ensuring that it is fully prepared for Production.
  29 | 
  30 | ## Key Features
  31 | 
  32 | ### 🔄 Job Scheduling (PipelineScheduler)
  33 | Flexible Intervals: From 1 minute to 30 days scheduling intervals.
  34 | Graceful Shutdown: Built-in Ctrl+C signal handling for clean termination.
  35 | Async Support: Built on tokio for non-blocking operations.
  36 | 
  37 | ### 🌐 External Data Sources Integration
  38 | - Azure Blob Storage: Direct integration with Azure Blob Storage for Reading and Writing data files.
  39 | - REST API's: Create JSON files from REST API endpoints with Customizable Headers, Params, Date Ranges, Pagination...
  40 | 
  41 | ### 🚀 High-Performance DataFrame Query Operations
  42 | Seamless Data Loading: Easily load and process data from CSV, EXCEL, PARQUET, JSON, and DELTA table files.
  43 | SQL-Like Transformations: Execute transformations such as SELECT, AGG, STRING FUNCTIONS, JOIN, FILTER, HAVING, GROUP BY, ORDER BY, DATETIME and WINDOW with ease.
  44 | 
  45 | ### 🚀 Caching and Views
  46 | The caching and views functionality offer several significant advantages over regular querying:
  47 | #### Reduced Computation Time, Memory Management, Query Optimization, Interactive Analysis, Multiple visualizations for Dashboards and Reports, Resource Utilization, Concurrency
  48 | 
  49 | ### 📉 Aggregations and Analytics
  50 | Comprehensive Aggregations: Utilize built-in functions like SUM, AVG, MEAN, MEDIAN, MIN, COUNT, MAX, and more.
  51 | Advanced Scalar Math: Perform calculations using functions such as ABS, FLOOR, CEIL, SQRT, ISNAN, ISZERO, PI, POWER, and others.
  52 | 
  53 | ### 🔗 Flexible Joins
  54 | Diverse Join Types: Perform joins using INNER, LEFT, RIGHT, FULL, and other join types.
  55 | Intuitive Syntax: Easily specify join conditions and aliases for clarity and simplicity.
  56 | 
  57 | ### 🪟 Window Functions
  58 | Analytical Capabilities: Implement window functions like RANK, DENSE_RANK, ROW_NUMBER, and custom partition-based calculations to perform advanced analytics.
  59 | 
  60 | ### 🔄 Pivot and Unpivot Functions
  61 | Data Reshaping: Transform your data structure using PIVOT and UNPIVOT functions to suit your analytical needs.
  62 | 
  63 | ### 📊 Create REPORTS
  64 | Create HTML files with Interactive Dashboards with multiple interactive Plots and Tables.
  65 | Plots Available: TimeSeries, Bar, Pie, Donut, Histogram, Scatter, Box...
  66 | Tables can Paginate pages, Filter, Resize, Reorder columns...
  67 | Export Tables data to EXCEL and CSV
  68 | 
  69 | ### 🧹 Clean Query Construction
  70 | Readable Queries: Construct SQL queries that are both readable and reusable.
  71 | Advanced Query Support: Utilize operations such as APPEND, UNION, UNION ALL, INTERSECT, and EXCEPT. For multiple Dataframea operations: APPEND_MANY, UNION_MANY, UNION_ALL_MANY.
  72 | 
  73 | ### 🛠️ Easy-to-Use API
  74 | Chainable Interface: Build queries using a chainable and intuitive API for streamlined development.
  75 | Debugging Support: Access readable debug outputs of the generated SQL for easy verification and troubleshooting.
  76 | **Data Preview**: Quickly preview your data by displaying a subset of rows in the terminal.
  77 | **Composable Queries**: Seamlessly chain transformations to create reusable and testable workflows.
  78 | 
  79 | ---
  80 | ## Installation
  81 | 
  82 | To add **Elusion** to your Rust project, include the following lines in your `Cargo.toml` under `[dependencies]`:
  83 | 
  84 | ```toml
  85 | elusion = "3.11.0"
  86 | tokio = { version = "1.45.0", features = ["rt-multi-thread"] }
  87 | ```
  88 | ## Rust version needed
  89 | ```toml
  90 | >= 1.81
  91 | ```
  92 | ---
  93 | ## Feature Flags
  94 | Elusion uses Cargo feature flags to keep the library lightweight and modular. 
  95 | You can enable only the features you need, which helps reduce dependencies and compile time.
  96 | 
  97 | ## Available Features
  98 | ```rust 
  99 | ["postgres"]
 100 | ```
 101 | Enables Postgres Database connectivity.
 102 | ```rust 
 103 | ["mysql"]
 104 | ```
 105 | Enables MySql Database connectivity
 106 | ```rust 
 107 | ["azure"]
 108 | ``` 
 109 | Enables Azure BLOB storage connectivity.
 110 | ```rust 
 111 | ["api"]
 112 | ```
 113 | Enables HTTP API integration for fetching data from web services.
 114 | ```rust 
 115 | ["dashboard"]
 116 | ```
 117 | Enables data visualization and dashboard creation capabilities.
 118 | ```rust 
 119 | ["excel"]
 120 | ```
 121 | Enables writing DataFrame to Excel file.
 122 | ```rust 
 123 | ["all"]
 124 | ```
 125 | Enables all available features.
 126 | 
 127 | Usage:
 128 | - In your Cargo.toml, specify which features you want to enable:
 129 | 
 130 | - Add the POSTGRES feature when specifying the dependency:
 131 | ```toml
 132 | [dependencies]
 133 | elusion = { version = "3.11.0", features = ["postgres"] }
 134 | ```
 135 | 
 136 | - Using NO Features (minimal dependencies):
 137 | ```rust
 138 | [dependencies]
 139 | elusion = "3.11.0"
 140 | ```
 141 | 
 142 | - Using multiple specific features:
 143 | ```rust
 144 | [dependencies]
 145 | elusion = { version = "3.11.0", features = ["dashboard", "api", "mysql"] }
 146 | ```
 147 | 
 148 | - Using all features:
 149 | ```rust
 150 | [dependencies]
 151 | elusion = { version = "3.11.0", features = ["all"] }
 152 | ```
 153 | 
 154 | ### Feature Implications
 155 | #### When a feature is not enabled, You'll receive an error:
 156 | #### Error: ***Warning***: API feature not enabled. Add feature under [dependencies]
 157 | ---
 158 | ## NORMALIZATION
 159 | #### DataFrame (your files) Column Names will be normalized to LOWERCASE(), TRIM() and REPLACE(" ","_")
 160 | #### All DataFrame query expresions, functions, aliases and column names will be normalized to LOWERCASE(), TRIM() and REPLACE(" ","_")
 161 | ---
 162 | ## Schema 
 163 | #### SCHEMA IS DYNAMICALLY INFERED
 164 | ---
 165 | # Usage examples:
 166 | 
 167 | ### MAIN function 
 168 | 
 169 | ```rust
 170 | // Import everything needed
 171 | use elusion::prelude::*; 
 172 | 
 173 | #[tokio::main]
 174 | async fn main() -> ElusionResult<()> {
 175 | 
 176 |     Ok(())
 177 | }
 178 | ```
 179 | ---
 180 | # CREATING DATA FRAMES
 181 | ---
 182 | ### - Loading data into CustomDataFrame can be from:
 183 | #### - Empty() DataFrames
 184 | #### - In-Memory data formats: CSV, EXCEL, JSON, PARQUET, DELTA 
 185 | #### - Azure Blob Storage endpoints (BLOB, DFS)
 186 | #### - Postgres Database SQL Queries
 187 | #### - MySQL Database Queries
 188 | #### - REST API -> json -> DataFrame
 189 | 
 190 | #### -> NEXT is example for reading data from local files, 
 191 | #### at the end are examples for Azure Blob Storage, Postgres and MySQL Databases
 192 | ---
 193 | ### LOADING data from Files into CustomDataFrame (in-memory data formats)
 194 | #### - File extensions are automatically recognized 
 195 | #### - All you have to do is to provide path to your file
 196 | 
 197 | ## Creating CustomDataFrame
 198 | #### 2 arguments needed:  **Path**, **Table Alias**
 199 | 
 200 | ### LOADING data from CSV into CustomDataFrame
 201 | ```rust
 202 | let csv_path = "C:\\Borivoj\\RUST\\Elusion\\csv_data.csv";
 203 | let df = CustomDataFrame::new(csv_path, "csv_data").await?; 
 204 | ```
 205 | ### LOADING data from EXCEL into CustomDataFrame
 206 | ```rust
 207 | let excel_path = "C:\\Borivoj\\RUST\\Elusion\\excel_data.xlsx";
 208 | let df = CustomDataFrame::new(excel_path, "xlsx_data").await?;
 209 | ```
 210 | ### LOADING data from PARQUET into CustomDataFrame
 211 | ```rust
 212 | let parquet_path = "C:\\Borivoj\\RUST\\Elusion\\prod_data.parquet";
 213 | let df = CustomDataFrame::new(parquet_path, "parq_data").await?;
 214 | ```
 215 | ### LOADING data from JSON into CustomDataFrame
 216 | ```rust
 217 | let json_path = "C:\\Borivoj\\RUST\\Elusion\\mongo_data.json";
 218 | let df = CustomDataFrame::new(json_path, "json_data").await?;
 219 | ```
 220 | ### LOADING data from DELTA table into CustomDataFrame
 221 | ```rust
 222 | let delta_path = "C:\\Borivoj\\RUST\\Elusion\\agg_sales"; // for DELTA you just specify folder name without extension
 223 | let df = CustomDataFrame::new(delta_path, "delta_data").await?;
 224 | ```
 225 | ### LOADING data from Azure BLOB Storage into CustomDataFrame (**scroll till the end for FULL example**)
 226 | ```rust
 227 | let df = CustomDataFrame::from_azure_with_sas_token(
 228 |         blob_url, 
 229 |         sas_token, 
 230 |         Some("folder-name/file-name"), // FILTERING is optional. Can be None if you want to take everything from Container
 231 |         "data" // alias for registering table
 232 |     ).await?;
 233 | ```
 234 | ### LOADING data from POSTGRES into CustomDataFrame (**scroll till the end for FULL example with config, conn and query**)
 235 | ```rust
 236 | let df = CustomDataFrame::from_postgres(&conn, query, "df_alias").await?;
 237 | ```
 238 | ### LOADING data from MySQL into CustomDataFrame (**scroll till the end for FULL example with config, conn and query**)
 239 | ```rust
 240 | let df = CustomDataFrame::from_mysql(&conn, query, "df_alias").await?;
 241 | ```
 242 | ---
 243 | ## CREATE EMPTY DATA FRAME
 244 | #### Create empty() DataFrame and populate it with data
 245 | ```rust
 246 |  let temp_df = CustomDataFrame::empty().await?;
 247 |     
 248 | let date_table = temp_df
 249 |     .datetime_functions([
 250 |         "CURRENT_DATE() as current_date", 
 251 |         "DATE_TRUNC('week', CURRENT_DATE()) AS week_start",
 252 |         "DATE_TRUNC('week', CURRENT_DATE()) + INTERVAL '1 week' AS next_week_start",
 253 |         "DATE_PART('year', CURRENT_DATE()) AS current_year",
 254 |         "DATE_PART('week', CURRENT_DATE()) AS current_week_num",
 255 |     ])
 256 |     .elusion("date_table").await?;
 257 | 
 258 | date_table.display().await?;
 259 | 
 260 | RESULT:
 261 | +--------------+---------------------+---------------------+--------------+------------------+
 262 | | current_date | week_start          | next_week_start     | current_year | current_week_num |
 263 | +--------------+---------------------+---------------------+--------------+------------------+
 264 | | 2025-03-07   | 2025-03-03T00:00:00 | 2025-03-10T00:00:00 | 2025.0       | 10.0             |
 265 | +--------------+---------------------+---------------------+--------------+------------------+
 266 | ```
 267 | ---
 268 | ## CREATE DATE TABLE
 269 | #### Create Date Table from Range of Dates
 270 | ```rust
 271 | let date_table = CustomDataFrame::create_date_range_table(
 272 |     "2025-01-01", // start date
 273 |     "2025-12-31", // end date
 274 |     "calendar_2025" // table alias
 275 | ).await?;
 276 | 
 277 | date_table.display().await?;
 278 | 
 279 | RESULT:
 280 | +------------+------+-------+-----+---------+----------+-------------+------------------+-------------+------------+-------------+---------------+------------+------------+
 281 | | date       | year | month | day | quarter | week_num | day_of_week | day_of_week_name | day_of_year | week_start | month_start | quarter_start | year_start | is_weekend |
 282 | +------------+------+-------+-----+---------+----------+-------------+------------------+-------------+------------+-------------+---------------+------------+------------+
 283 | | 2025-01-01 | 2025 | 1     | 1   | 1       | 1        | 3           | Wednesday        | 1           | 2024-12-29 | 2025-01-01  | 2025-01-01    | 2025-01-01 | false      |
 284 | | 2025-01-02 | 2025 | 1     | 2   | 1       | 1        | 4           | Thursday         | 2           | 2024-12-29 | 2025-01-01  | 2025-01-01    | 2025-01-01 | false      |
 285 | | 2025-01-03 | 2025 | 1     | 3   | 1       | 1        | 5           | Friday           | 3           | 2024-12-29 | 2025-01-01  | 2025-01-01    | 2025-01-01 | false      |
 286 | | 2025-01-04 | 2025 | 1     | 4   | 1       | 1        | 6           | Saturday         | 4           | 2024-12-29 | 2025-01-01  | 2025-01-01    | 2025-01-01 | true       |
 287 | | 2025-01-05 | 2025 | 1     | 5   | 1       | 1        | 0           | Sunday           | 5           | 2025-01-05 | 2025-01-01  | 2025-01-01    | 2025-01-01 | true       |
 288 | | 2025-01-06 | 2025 | 1     | 6   | 1       | 2        | 1           | Monday           | 6           | 2025-01-05 | 2025-01-01  | 2025-01-01    | 2025-01-01 | false      |
 289 | | 2025-01-07 | 2025 | 1     | 7   | 1       | 2        | 2           | Tuesday          | 7           | 2025-01-05 | 2025-01-01  | 2025-01-01    | 2025-01-01 | false      |
 290 | | 2025-01-08 | 2025 | 1     | 8   | 1       | 2        | 3           | Wednesday        | 8           | 2025-01-05 | 2025-01-01  | 2025-01-01    | 2025-01-01 | false      |
 291 | | 2025-01-09 | 2025 | 1     | 9   | 1       | 2        | 4           | Thursday         | 9           | 2025-01-05 | 2025-01-01  | 2025-01-01    | 2025-01-01 | false      |
 292 | | .......... | .... | .     | .   | .       | .        | .           | ................ | ..........  | .......... | ..........  | ............. | ...........| .......... |
 293 | +------------+------+-------+-----+---------+----------+-------------+------------------+-------------+------------+-------------+---------------+------------+------------+
 294 | ```
 295 | ---
 296 | ## CREATE DATE TABLE WITH CUSTOM FORMATS
 297 | #### You can create Date Table with Custom formats (ISO, Compact, Human Readable...) and week, month, quarter, year Ranges (start-end)
 298 | ```rust
 299 | let date_table = CustomDataFrame::create_formatted_date_range_table(
 300 |     "2025-01-01", // date start
 301 |     "2025-12-31", // date end
 302 |     "calendar_2025", // table alias
 303 |     "date".to_string(), // first column name
 304 |     DateFormat::HumanReadable, // 1 Jan 2025
 305 |     true,  // Include period ranges (start - end)
 306 |     Weekday::Mon  // Week starts on Monday
 307 | ).await?;
 308 | 
 309 | date_table.display().await?;
 310 | 
 311 | RESULT:
 312 | +-------------+------+-------+-----+---------+----------+-------------+------------------+-------------+------------+-------------+-------------+-------------+-------------+---------------+-------------+-------------+-------------+
 313 | | date        | year | month | day | quarter | week_num | day_of_week | day_of_week_name | day_of_year | is_weekend | week_start  | week_end    | month_start | month_end   | quarter_start | quarter_end | year_start  | year_end    |
 314 | +-------------+------+-------+-----+---------+----------+-------------+------------------+-------------+------------+-------------+-------------+-------------+-------------+---------------+-------------+-------------+-------------+
 315 | |  1 Jan 2025 | 2025 | 1     | 1   | 1       | 1        | 2           | Wednesday        | 1           | false      | 30 Dec 2024 |  5 Jan 2025 |  1 Jan 2025 | 31 Jan 2025 |  1 Jan 2025   | 31 Mar 2025 |  1 Jan 2025 | 31 Dec 2025 |
 316 | |  2 Jan 2025 | 2025 | 1     | 2   | 1       | 1        | 3           | Thursday         | 2           | false      | 30 Dec 2024 |  5 Jan 2025 |  1 Jan 2025 | 31 Jan 2025 |  1 Jan 2025   | 31 Mar 2025 |  1 Jan 2025 | 31 Dec 2025 |
 317 | |  3 Jan 2025 | 2025 | 1     | 3   | 1       | 1        | 4           | Friday           | 3           | false      | 30 Dec 2024 |  5 Jan 2025 |  1 Jan 2025 | 31 Jan 2025 |  1 Jan 2025   | 31 Mar 2025 |  1 Jan 2025 | 31 Dec 2025 |
 318 | |  4 Jan 2025 | 2025 | 1     | 4   | 1       | 1        | 5           | Saturday         | 4           | true       | 30 Dec 2024 |  5 Jan 2025 |  1 Jan 2025 | 31 Jan 2025 |  1 Jan 2025   | 31 Mar 2025 |  1 Jan 2025 | 31 Dec 2025 |
 319 | |  5 Jan 2025 | 2025 | 1     | 5   | 1       | 1        | 6           | Sunday           | 5           | true       | 30 Dec 2024 |  5 Jan 2025 |  1 Jan 2025 | 31 Jan 2025 |  1 Jan 2025   | 31 Mar 2025 |  1 Jan 2025 | 31 Dec 2025 |
 320 | |  6 Jan 2025 | 2025 | 1     | 6   | 1       | 2        | 0           | Monday           | 6           | false      |  6 Jan 2025 | 12 Jan 2025 |  1 Jan 2025 | 31 Jan 2025 |  1 Jan 2025   | 31 Mar 2025 |  1 Jan 2025 | 31 Dec 2025 |
 321 | |  7 Jan 2025 | 2025 | 1     | 7   | 1       | 2        | 1           | Tuesday          | 7           | false      |  6 Jan 2025 | 12 Jan 2025 |  1 Jan 2025 | 31 Jan 2025 |  1 Jan 2025   | 31 Mar 2025 |  1 Jan 2025 | 31 Dec 2025 |
 322 | |  8 Jan 2025 | 2025 | 1     | 8   | 1       | 2        | 2           | Wednesday        | 8           | false      |  6 Jan 2025 | 12 Jan 2025 |  1 Jan 2025 | 31 Jan 2025 |  1 Jan 2025   | 31 Mar 2025 |  1 Jan 2025 | 31 Dec 2025 |
 323 | |  9 Jan 2025 | 2025 | 1     | 9   | 1       | 2        | 3           | Thursday         | 9           | false      |  6 Jan 2025 | 12 Jan 2025 |  1 Jan 2025 | 31 Jan 2025 |  1 Jan 2025   | 31 Mar 2025 |  1 Jan 2025 | 31 Dec 2025 |
 324 | | ........... | .... | ..    | ..  | .       | .        | .           | .........        | ...         | .....      | ........... |  .......... |  .......... | ........... |  ..........   | ........... |  .......... | ........... |
 325 | +-------------+------+-------+-----+---------+----------+-------------+------------------+-------------+------------+-------------+-------------+-------------+-------------+---------------+-------------+-------------+-------------+
 326 | ```
 327 | ### ALL AVAILABLE DATE FORMATS
 328 | ```rust
 329 | IsoDate,            // YYYY-MM-DD
 330 | IsoDateTime,        // YYYY-MM-DD HH:MM:SS
 331 | UsDate,             // MM/DD/YYYY
 332 | EuropeanDate,       // DD.MM.YYYY
 333 | EuropeanDateDash,   // DD-MM-YYYY
 334 | BritishDate,        // DD/MM/YYYY
 335 | HumanReadable,      // 1 Jan 2025
 336 | HumanReadableTime,  // 1 Jan 2025 00:00
 337 | SlashYMD,           // YYYY/MM/DD
 338 | DotYMD,             // YYYY.MM.DD
 339 | CompactDate,        // YYYYMMDD
 340 | YearMonth,          // YYYY-MM
 341 | MonthYear,          // MM-YYYY
 342 | MonthNameYear,      // January 2025
 343 | Custom(String)      // Custom format string
 344 | 
 345 | For Custom Date formats some of the common format specifiers:
 346 | %Y - Full year (2025)
 347 | %y - Short year (25)
 348 | %m - Month as number (01-12)
 349 | %b - Abbreviated month name (Jan)
 350 | %B - Full month name (January)
 351 | %d - Day of month (01-31)
 352 | %e - Day of month, space-padded ( 1-31)
 353 | %a - Abbreviated weekday name (Mon)
 354 | %A - Full weekday name (Monday)
 355 | %H - Hour (00-23)
 356 | %I - Hour (01-12)
 357 | %M - Minute (00-59)
 358 | %S - Second (00-59)
 359 | %p - AM/PM
 360 | 
 361 | EXAMPLES:
 362 | DateFormat::Custom("%d %b %Y %H:%M".to_string()),  // "01 Jan 2025 00:00"
 363 | // ISO 8601 with T separator and timezone
 364 | DateFormat::Custom("%Y-%m-%dT%H:%M:%S%z".to_string())
 365 | // US date with 12-hour time
 366 | DateFormat::Custom("%m/%d/%Y %I:%M %p".to_string())
 367 | // Custom format with weekday
 368 | DateFormat::Custom("%A, %B %e, %Y".to_string())  // "Monday, January 1, 2025"
 369 | ```
 370 | ---
 371 | ## CREATE VIEWS and CACHING
 372 | ### Materialized Views:
 373 | For long-term storage of complex query results. When results need to be referenced by name. For data that changes infrequently.  Example: Monthly sales summaries, customer metrics, product analytics
 374 | ### Query Caching:
 375 | For transparent performance optimization. When the same query might be run multiple times in a session. For interactive analysis scenarios. Example: Dashboard queries, repeated data exploration.
 376 | ```rust
 377 | let sales = "C:\\Borivoj\\RUST\\Elusion\\SalesData2022.csv";
 378 | let products = "C:\\Borivoj\\RUST\\Elusion\\Products.csv";
 379 | let customers = "C:\\Borivoj\\RUST\\Elusion\\Customers.csv";
 380 | 
 381 | let sales_df = CustomDataFrame::new(sales, "s").await?;
 382 | let customers_df = CustomDataFrame::new(customers, "c").await?;
 383 | let products_df = CustomDataFrame::new(products, "p").await?;
 384 | 
 385 | // Example 1: Using materialized view for customer count
 386 | // The TTL parameter (3600) specifies how long the view remains valid in seconds (1 hour)
 387 | customers_df
 388 |     .select(["COUNT(*) as count"])
 389 |     .limit(10)
 390 |     .create_view("customer_count_view", Some(3600)) 
 391 |     .await?;
 392 | 
 393 | // Access the view by name - no recomputation needed
 394 | let customer_count = CustomDataFrame::from_view("customer_count_view").await?;
 395 | 
 396 | // Example 2: Using query caching with complex joins and aggregations
 397 | // First execution computes and stores the result
 398 | let join_result = sales_df
 399 |     .join_many([
 400 |         (customers_df.clone(), ["s.CustomerKey = c.CustomerKey"], "INNER"),
 401 |         (products_df.clone(), ["s.ProductKey = p.ProductKey"], "INNER"),
 402 |     ])
 403 |     .select(["c.CustomerKey", "c.FirstName", "c.LastName", "p.ProductName"])
 404 |     .agg([
 405 |         "SUM(s.OrderQuantity) AS total_quantity",
 406 |         "AVG(s.OrderQuantity) AS avg_quantity"
 407 |     ])
 408 |     .group_by(["c.CustomerKey", "c.FirstName", "c.LastName", "p.ProductName"])
 409 |     .having_many([
 410 |         ("total_quantity > 10"),
 411 |         ("avg_quantity < 100")
 412 |     ])
 413 |     .order_by_many([
 414 |         ("total_quantity", true),
 415 |         ("p.ProductName", false)
 416 |     ])
 417 |     .elusion_with_cache("sales_join") // caching query with DataFrame alias 
 418 |     .await?;
 419 | 
 420 | join_result.display().await?;
 421 | 
 422 | // Other useful cache/view management functions:
 423 | CustomDataFrame::invalidate_cache(&["table_name".to_string()]); // Clear cache for specific tables
 424 | CustomDataFrame::clear_cache(); // Clear entire cache
 425 | CustomDataFrame::refresh_view("view_name").await?; // Refresh a materialized view
 426 | CustomDataFrame::drop_view("view_name").await?; // Remove a materialized view
 427 | CustomDataFrame::list_views().await; // Get info about all views
 428 | ```
 429 | ---
 430 | # DATAFRAME WRANGLING
 431 | ---
 432 | ## SELECT
 433 | ### ALIAS column names in SELECT() function (AS is case insensitive)
 434 | ```rust
 435 | let df_AS = select_df
 436 |     .select(["CustomerKey AS customerkey_alias", "FirstName as first_name", "LastName", "EmailAddress"]);
 437 | 
 438 | let df_select_all = select_df.select(["*"]);
 439 | 
 440 | let df_count_all = select_df.select(["COUNT(*)"]);
 441 | 
 442 | let df_distinct = select_df.select(["DISTINCT(column_name) as distinct_values"]);
 443 | ```
 444 | ---
 445 | ## Where to use which Functions:
 446 | ### Scalar and Operators -> in SELECT() function
 447 | ### Aggregation Functions -> in AGG() function
 448 | ### String Column Functions -> in STRING_FUNCTIONS() function
 449 | ### DateTime Functions -> in DATETIME_FUNCTIONS() function
 450 | ---
 451 | ### Numerical Operators (supported +, -, * , / , %)
 452 | ```rust
 453 | let num_ops_sales = sales_order_df
 454 |     .select([
 455 |         "customer_name",
 456 |         "order_date",
 457 |         "billable_value",
 458 |         "billable_value * 2 AS double_billable_value",  // Multiplication
 459 |         "billable_value / 100 AS percentage_billable"  // Division
 460 |     ])
 461 |     .filter("billable_value > 100.0")
 462 |     .order_by(["order_date"], [true])
 463 |     .limit(10);
 464 | 
 465 | let num_ops_res = num_ops_sales.elusion("scalar_df").await?;
 466 | num_ops_res.display().await?;
 467 | ```
 468 | ### FILTER (evaluated before aggregations)
 469 | ```rust
 470 | let filter_df = sales_order_df
 471 |     .select(["customer_name", "order_date", "billable_value"])
 472 |     .filter_many([("order_date > '2021-07-04'"), ("billable_value > 100.0")])
 473 |     .order_by(["order_date"], [true])
 474 |     .limit(10);
 475 | 
 476 | let filtered = filter_df.elusion("result_sales").await?;
 477 | filtered.display().await?;
 478 | 
 479 | // exmple 2
 480 | const FILTER_CUSTOMER: &str = "customer_name == 'Customer IRRVL'";
 481 | 
 482 | let filter_query = sales_order_df
 483 |     .select([
 484 |         "customer_name",
 485 |         "order_date",
 486 |         "ABS(billable_value) AS abs_billable_value",
 487 |         "ROUND(SQRT(billable_value), 2) AS SQRT_billable_value",
 488 |         "billable_value * 2 AS double_billable_value",  // Multiplication
 489 |         "billable_value / 100 AS percentage_billable"  // Division
 490 |     ])
 491 |     .agg([
 492 |         "ROUND(AVG(ABS(billable_value)), 2) AS avg_abs_billable",
 493 |         "SUM(billable_value) AS total_billable",
 494 |         "MAX(ABS(billable_value)) AS max_abs_billable",
 495 |         "SUM(billable_value) * 2 AS double_total_billable",      // Operator-based aggregation
 496 |         "SUM(billable_value) / 100 AS percentage_total_billable" // Operator-based aggregation
 497 |     ])
 498 |     .filter(FILTER_CUSTOMER)
 499 |     .group_by_all()
 500 |     .order_by_many([
 501 |         ("total_billable", false),  // Order by total_billable descending
 502 |         ("max_abs_billable", true), // Then by max_abs_billable ascending
 503 |     ])
 504 | ```
 505 | ### HAVING (evaluated after aggregations)
 506 | ```rust
 507 | //Example 1 with aggregatied column names
 508 |  let example1 = sales_df
 509 |     .join_many([
 510 |         (customers_df, ["s.CustomerKey = c.CustomerKey"], "INNER"),
 511 |         (products_df, ["s.ProductKey = p.ProductKey"], "INNER"),
 512 |     ])
 513 |     .select(["c.CustomerKey", "c.FirstName", "c.LastName", "p.ProductName"])
 514 |     .agg([
 515 |         "SUM(s.OrderQuantity) AS total_quantity",
 516 |         "AVG(s.OrderQuantity) AS avg_quantity"
 517 |     ])
 518 |     .group_by(["c.CustomerKey", "c.FirstName", "c.LastName", "p.ProductName"])
 519 |     .having_many([
 520 |         ("total_quantity > 10"),
 521 |         ("avg_quantity < 100")
 522 |     ])
 523 |     .order_by_many([
 524 |         ("total_quantity", true ),
 525 |         ("p.ProductName", false)
 526 |     ]);
 527 | 
 528 | let result = example1.elusion("sales_res").await?;
 529 | result.display().await?;
 530 | 
 531 | //Example 2 with aggregation in having
 532 | let df_having= sales_df
 533 |     .join(customers_df, ["s.CustomerKey = c.CustomerKey"], 
 534 |         "INNER"
 535 |     )
 536 |     .select(["c.CustomerKey", "c.FirstName", "c.LastName"])
 537 |     .agg([
 538 |         "SUM(s.OrderQuantity) AS total_quantity",
 539 |         "AVG(s.OrderQuantity) AS avg_quantity"
 540 |     ])
 541 |     .group_by(["c.CustomerKey", "c.FirstName", "c.LastName"])
 542 |     .having_many([
 543 |         ("SUM(s.OrderQuantity) > 10"),
 544 |         ("AVG(s.OrderQuantity) < 100")
 545 |     ])
 546 |     .order_by(["total_quantity"], [true])
 547 |     .limit(5);
 548 | 
 549 | let result = df_having.elusion("sales_res").await?;
 550 | result.display().await?;
 551 | ```
 552 | ### SCALAR functions
 553 | ```rust
 554 | let scalar_df = sales_order_df
 555 |     .select([
 556 |         "customer_name", 
 557 |         "order_date", 
 558 |         "ABS(billable_value) AS abs_billable_value",
 559 |         "ROUND(SQRT(billable_value), 2) AS SQRT_billable_value"])
 560 |     .filter("billable_value > 100.0")
 561 |     .order_by(["order_date"], [true])
 562 |     .limit(10);
 563 | 
 564 | let scalar_res = scalar_df.elusion("scalar_df").await?;
 565 | scalar_res.display().await?;
 566 | ```
 567 | ### AGGREGATE functions with nested Scalar functions 
 568 | ```rust
 569 | let scalar_df = sales_order_df
 570 |     .select([
 571 |         "customer_name", 
 572 |         "order_date"
 573 |     ])
 574 |     .agg([
 575 |         "ROUND(AVG(ABS(billable_value)), 2) AS avg_abs_billable",
 576 |         "SUM(billable_value) AS total_billable",
 577 |         "MAX(ABS(billable_value)) AS max_abs_billable",
 578 |         "SUM(billable_value) * 2 AS double_total_billable",      // Operator-based aggregation
 579 |         "SUM(billable_value) / 100 AS percentage_total_billable" // Operator-based aggregation
 580 |     ])
 581 |     .group_by(["customer_name", "order_date"])
 582 |     .filter("billable_value > 100.0")
 583 |     .order_by(["order_date"], [true])
 584 |     .limit(10);
 585 | 
 586 | let scalar_res = scalar_df.elusion("scalar_df").await?;
 587 | scalar_res.display().await?;
 588 | ```
 589 | ### STRING functions
 590 | ```rust
 591 | let df = sales_df
 592 |     .select(["FirstName", "LastName"])
 593 |     .string_functions([
 594 |         "'New' AS new_old_customer",
 595 |         "TRIM(c.EmailAddress) AS trimmed_email",
 596 |         "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name",
 597 |     ]);
 598 | 
 599 | let result_df = df.elusion("df").await?;
 600 | result_df.display().await?;
 601 | ```
 602 | ### Numerical Operators, Scalar Functions, Aggregated Functions...
 603 | ```rust
 604 | let mix_query = sales_order_df
 605 |     .select([
 606 |         "customer_name",
 607 |         "order_date",
 608 |         "ABS(billable_value) AS abs_billable_value",
 609 |         "ROUND(SQRT(billable_value), 2) AS SQRT_billable_value",
 610 |         "billable_value * 2 AS double_billable_value",  // Multiplication
 611 |         "billable_value / 100 AS percentage_billable"  // Division
 612 |     ])
 613 |     .agg([
 614 |         "ROUND(AVG(ABS(billable_value)), 2) AS avg_abs_billable",
 615 |         "SUM(billable_value) AS total_billable",
 616 |         "MAX(ABS(billable_value)) AS max_abs_billable",
 617 |         "SUM(billable_value) * 2 AS double_total_billable",      // Operator-based aggregation
 618 |         "SUM(billable_value) / 100 AS percentage_total_billable" // Operator-based aggregation
 619 |     ])
 620 |     .filter("billable_value > 50.0")
 621 |     .group_by_all()
 622 |     .order_by_many([
 623 |         ("total_billable", false),  // Order by total_billable descending
 624 |         ("max_abs_billable", true), // Then by max_abs_billable ascending
 625 |     ]);
 626 | 
 627 | let mix_res = mix_query.elusion("scalar_df").await?;
 628 | mix_res.display().await?;
 629 | ```
 630 | ---
 631 | ### Supported Aggregation functions
 632 | ```rust
 633 | SUM, AVG, MEAN, MEDIAN, MIN, COUNT, MAX,  
 634 | LAST_VALUE, FIRST_VALUE,  
 635 | GROUPING, STRING_AGG, ARRAY_AGG, VAR, VAR_POP,  
 636 | VAR_POPULATION, VAR_SAMP, VAR_SAMPLE,  
 637 | BIT_AND, BIT_OR, BIT_XOR, BOOL_AND, BOOL_OR 
 638 | ```
 639 | ### Supported Scalar Math Functions
 640 | ```rust
 641 | ABS, FLOOR, CEIL, SQRT, ISNAN, ISZERO,  
 642 | PI, POW, POWER, RADIANS, RANDOM, ROUND,  
 643 | FACTORIAL, ACOS, ACOSH, ASIN, ASINH,  
 644 | COS, COSH, COT, DEGREES, EXP,  
 645 | SIN, SINH, TAN, TANH, TRUNC, CBRT,  
 646 | ATAN, ATAN2, ATANH, GCD, LCM, LN,  
 647 | LOG, LOG10, LOG2, NANVL, SIGNUM
 648 | ```
 649 | ---
 650 | ## JOIN
 651 | #### JOIN examples with single condition and 2 dataframes, AGGREGATION, GROUP BY
 652 | ```rust
 653 | let single_join = df_sales
 654 |     .join(df_customers, ["s.CustomerKey = c.CustomerKey"], "INNER")
 655 |     .select(["s.OrderDate","c.FirstName", "c.LastName"])
 656 |     .agg([
 657 |         "SUM(s.OrderQuantity) AS total_quantity",
 658 |         "AVG(s.OrderQuantity) AS avg_quantity",
 659 |     ])
 660 |     .group_by(["s.OrderDate","c.FirstName","c.LastName"])
 661 |     .having("total_quantity > 10") 
 662 |     .order_by(["total_quantity"], [false]) // true is ascending, false is descending
 663 |     .limit(10);
 664 | 
 665 | let join_df1 = single_join.elusion("result_query").await?;
 666 | join_df1.display().await?;
 667 | ```
 668 | ### JOIN with single conditions and 3 dataframes, AGGREGATION, GROUP BY, HAVING, SELECT, ORDER BY
 669 | ```rust
 670 | let many_joins = df_sales
 671 |     .join_many([
 672 |         (df_customers, ["s.CustomerKey = c.CustomerKey"], "INNER"),
 673 |         (df_products, ["s.ProductKey = p.ProductKey"], "INNER"),
 674 |     ]) 
 675 |     .select([
 676 |         "c.CustomerKey","c.FirstName","c.LastName","p.ProductName",
 677 |     ]) 
 678 |     .agg([
 679 |         "SUM(s.OrderQuantity) AS total_quantity",
 680 |         "AVG(s.OrderQuantity) AS avg_quantity",
 681 |     ]) 
 682 |     .group_by(["c.CustomerKey", "c.FirstName", "c.LastName", "p.ProductName"]) 
 683 |     .having_many([("total_quantity > 10"), ("avg_quantity < 100")]) 
 684 |     .order_by_many([
 685 |         ("total_quantity", true), // true is ascending 
 686 |         ("p.ProductName", false)  // false is descending
 687 |     ])
 688 |     .limit(10); 
 689 | 
 690 | let join_df3 = many_joins.elusion("df_joins").await?;
 691 | join_df3.display().await?;
 692 | ```
 693 | ### JOIN with multiple conditions and 2 data frames
 694 | ```rust
 695 | let result_join = orders_df
 696 |     .join(
 697 |         customers_df,
 698 |         ["o.CustomerID = c.CustomerID" , "o.RegionID = c.RegionID"],
 699 |         "INNER"
 700 |     )
 701 |     .select([
 702 |         "o.OrderID",
 703 |         "c.Name",
 704 |         "o.OrderDate"
 705 |     ])
 706 |     .string_functions([
 707 |         "CONCAT(TRIM(c.Name), ' (', c.Email, ')') AS customer_info",
 708 |         "UPPER(c.Status) AS customer_status",
 709 |         "LEFT(c.Email, POSITION('@' IN c.Email) - 1) AS username"
 710 |     ])
 711 |     .agg([
 712 |         "SUM(o.Amount) AS total_amount",
 713 |         "AVG(o.Quantity) AS avg_quantity",
 714 |         "COUNT(DISTINCT o.OrderID) AS order_count",
 715 |         "MAX(o.Amount) AS max_amount",
 716 |         "MIN(o.Amount) AS min_amount"
 717 |     ])
 718 |     .group_by([
 719 |         "o.OrderID",
 720 |         "c.Name",
 721 |         "o.OrderDate",
 722 |         "c.Email",   
 723 |         "c.Status"
 724 |     ]);
 725 | 
 726 | let res_joins = result_join.elusion("one_join").await?;
 727 | res_joins.display().await?;
 728 | ```
 729 | ### JOIN_MANY with multiple conditions and 3 data frames
 730 | ```rust
 731 | let result_join_many = order_join_df
 732 |     .join_many([
 733 |         (customer_join_df,
 734 |             ["o.CustomerID = c.CustomerID" , "o.RegionID = c.RegionID"],
 735 |             "INNER"
 736 |         ),
 737 |         (regions_join_df,
 738 |             ["c.RegionID = r.RegionID" , "r.IsActive = true"],
 739 |             "INNER"
 740 |         )
 741 |     ])
 742 |     .select(["o.OrderID","c.Name","r.RegionName", "r.CountryID"])
 743 |     .string_functions([
 744 |     "CONCAT(r.RegionName, ' (', r.CountryID, ')') AS region_info",
 745 |  
 746 |     "CASE c.CreditLimit 
 747 |         WHEN 1000 THEN 'Basic'
 748 |         WHEN 2000 THEN 'Premium'
 749 |         ELSE 'Standard'
 750 |     END AS credit_tier",
 751 | 
 752 |     "CASE 
 753 |         WHEN c.CreditLimit > 2000 THEN 'High'
 754 |         WHEN c.CreditLimit > 1000 THEN 'Medium'
 755 |         ELSE 'Low'
 756 |     END AS credit_status",
 757 | 
 758 |     "CASE
 759 |         WHEN o.Amount > 1000 AND c.Status = 'active' THEN 'Priority'
 760 |         WHEN o.Amount > 500 THEN 'Regular'
 761 |         ELSE 'Standard'
 762 |     END AS order_priority",
 763 | 
 764 |     "CASE r.RegionName
 765 |         WHEN 'East Coast' THEN 'Eastern'
 766 |         WHEN 'West Coast' THEN 'Western'
 767 |         ELSE 'Other'
 768 |     END AS region_category",
 769 | 
 770 |     "CASE
 771 |         WHEN EXTRACT(DOW FROM o.OrderDate) IN (0, 6) THEN 'Weekend'
 772 |         ELSE 'Weekday'
 773 |     END AS order_day_type"
 774 |     ])
 775 |     .agg([
 776 |         "SUM(o.Amount) AS total_amount",                                  
 777 |         "COUNT(*) AS row_count",                                       
 778 |         "SUM(o.Amount * (1 - o.Discount/100)) AS net_amount",          
 779 |         "ROUND(SUM(o.Amount) / COUNT(*), 2) AS avg_order_value",       
 780 |         "SUM(o.Amount * r.TaxRate) AS total_tax"                      
 781 |     ])
 782 |     .group_by_all()
 783 |     .having("total_amount > 200")
 784 |     .order_by(["total_amount"], [false]); 
 785 | 
 786 | let res_joins_many = result_join_many.elusion("many_join").await?;
 787 | res_joins_many.display().await?;
 788 | ```
 789 | ### JOIN_MANY with single condition and 3 dataframes, STRING FUNCTIONS, AGGREGATION, GROUP BY, HAVING_MANY, ORDER BY
 790 | ```rust
 791 | 
 792 | let str_func_joins = df_sales
 793 |     .join_many([
 794 |         (df_customers, ["s.CustomerKey = c.CustomerKey"], "INNER"),
 795 |         (df_products, ["s.ProductKey = p.ProductKey"], "INNER"),
 796 |     ]) 
 797 |     .select([
 798 |         "c.CustomerKey",
 799 |         "c.FirstName",
 800 |         "c.LastName",
 801 |         "c.EmailAddress",
 802 |         "p.ProductName",
 803 |     ])
 804 |     .string_functions([
 805 |         "TRIM(c.EmailAddress) AS trimmed_email_address",
 806 |         "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name",
 807 |         "LEFT(p.ProductName, 15) AS short_product_name",
 808 |         "RIGHT(p.ProductName, 5) AS end_product_name",
 809 |     ])
 810 |     .agg([
 811 |         "COUNT(p.ProductKey) AS product_count",
 812 |         "SUM(s.OrderQuantity) AS total_order_quantity",
 813 |     ])
 814 |     .group_by_all()
 815 |     .having_many([("total_order_quantity > 10"),  ("product_count >= 1")])  
 816 |     .order_by_many([
 817 |         ("total_order_quantity", true), 
 818 |         ("p.ProductName", false) 
 819 |     ]); 
 820 | 
 821 | let join_str_df3 = str_func_joins.elusion("df_joins").await?;
 822 | join_str_df3.display().await?;
 823 | ```
 824 | #### Currently implemented join types
 825 | ```rust
 826 | "INNER", "LEFT", "RIGHT", "FULL", 
 827 | "LEFT SEMI", "RIGHT SEMI", 
 828 | "LEFT ANTI", "RIGHT ANTI", "LEFT MARK" 
 829 | ```
 830 | ---
 831 | ### STRING FUNCTIONS
 832 | ```rust
 833 | let string_functions_df = df_sales
 834 |     .join_many([
 835 |         (df_customers, ["s.CustomerKey = c.CustomerKey"], "INNER"),
 836 |         (df_products, ["s.ProductKey = p.ProductKey"], "INNER"),
 837 |     ]) 
 838 |     .select([
 839 |         "c.CustomerKey",
 840 |         "c.FirstName",
 841 |         "c.LastName",
 842 |         "c.EmailAddress",
 843 |         "p.ProductName"
 844 |     ])
 845 |     .string_functions([
 846 |     // Basic String Functions
 847 |     "TRIM(c.EmailAddress) AS trimmed_email",
 848 |     "LTRIM(c.EmailAddress) AS left_trimmed_email",
 849 |     "RTRIM(c.EmailAddress) AS right_trimmed_email",
 850 |     "UPPER(c.FirstName) AS upper_first_name",
 851 |     "LOWER(c.LastName) AS lower_last_name",
 852 |     "LENGTH(c.EmailAddress) AS email_length",
 853 |     "LEFT(p.ProductName, 10) AS product_start",
 854 |     "RIGHT(p.ProductName, 10) AS product_end",
 855 |     "SUBSTRING(p.ProductName, 1, 5) AS product_substr",
 856 |     // Concatenation
 857 |     "CONCAT(c.FirstName, ' ', c.LastName) AS full_name",
 858 |     "CONCAT_WS(' ', c.FirstName, c.LastName, c.EmailAddress) AS all_info",
 859 |     // Position and Search
 860 |     "POSITION('@' IN c.EmailAddress) AS at_symbol_pos",
 861 |     "STRPOS(c.EmailAddress, '@') AS email_at_pos",
 862 |     // Replacement and Modification
 863 |     "REPLACE(c.EmailAddress, '@adventure-works.com', '@newdomain.com') AS new_email",
 864 |     "TRANSLATE(c.FirstName, 'AEIOU', '12345') AS vowels_replaced",
 865 |     "REPEAT('*', 5) AS stars",
 866 |     "REVERSE(c.FirstName) AS reversed_name",
 867 |     // Padding
 868 |     "LPAD(c.CustomerKey::TEXT, 10, '0') AS padded_customer_id",
 869 |     "RPAD(c.FirstName, 20, '.') AS padded_name",
 870 |     // Case Formatting
 871 |     "INITCAP(LOWER(c.FirstName)) AS proper_case_name",
 872 |     // String Extraction
 873 |     "SPLIT_PART(c.EmailAddress, '@', 1) AS email_username",
 874 |     // Type Conversion
 875 |     "TO_CHAR(s.OrderDate, 'YYYY-MM-DD') AS formatted_date"
 876 |     ])
 877 |     .agg([
 878 |         "COUNT(*) AS total_records",
 879 |         "STRING_AGG(p.ProductName, ', ') AS all_products"
 880 |     ])
 881 |     .filter("c.emailaddress IS NOT NULL")
 882 |     .group_by_all()
 883 |     .having("COUNT(*) > 1")
 884 |     .order_by(["c.CustomerKey"], [true]);   
 885 | 
 886 | let str_df = string_functions_df.elusion("df_joins").await?;
 887 | str_df.display().await?;    
 888 | ```
 889 | #### Currently Available String functions
 890 | ```rust
 891 | 1.Basic String Functions:
 892 | TRIM() - Remove leading/trailing spaces
 893 | LTRIM() - Remove leading spaces
 894 | RTRIM() - Remove trailing spaces
 895 | UPPER() - Convert to uppercase
 896 | LOWER() - Convert to lowercase
 897 | LENGTH() or LEN() - Get string length
 898 | LEFT() - Extract leftmost characters
 899 | RIGHT() - Extract rightmost characters
 900 | SUBSTRING() - Extract part of string
 901 | 2. String concatenation:
 902 | CONCAT() - Concatenate strings
 903 | CONCAT_WS() - Concatenate with separator
 904 | 3. String Position and Search:
 905 | POSITION() - Find position of substring
 906 | STRPOS() - Find position of substring
 907 | INSTR() - Find position of substring
 908 | LOCATE() - Find position of substring
 909 | 4. String Replacement and Modification:
 910 | REPLACE() - Replace all occurrences of substring
 911 | TRANSLATE() - Replace characters
 912 | OVERLAY() - Replace portion of string
 913 | REPEAT() - Repeat string
 914 | REVERSE() - Reverse string characters
 915 | 5. String Pattern Matching:
 916 | LIKE() - Pattern matching with wildcards
 917 | REGEXP() or RLIKE() - Pattern matching with regular expressions
 918 | 6. String Padding:
 919 | LPAD() - Pad string on left
 920 | RPAD() - Pad string on right
 921 | SPACE() - Generate spaces
 922 | 7. String Case Formatting:
 923 | INITCAP() - Capitalize first letter of each word
 924 | 8. String Extraction:
 925 | SPLIT_PART() - Split string and get nth part
 926 | SUBSTR() - Get substring
 927 | 9. String Type Conversion:
 928 | TO_CHAR() - Convert to string
 929 | CAST() - Type conversion
 930 | CONVERT() - Type conversion
 931 | 10. Control Flow:
 932 | CASE()
 933 | ```
 934 | ---
 935 | ### DATETIME FUNCTIONS
 936 | #### Work best with YYYY-MM-DD format
 937 | ```rust
 938 | let dt_query = sales_order_df
 939 |     .select([
 940 |         "customer_name",
 941 |         "order_date",
 942 |         "delivery_date"
 943 |     ])
 944 |     .datetime_functions([
 945 |     // Current date/time comparisons
 946 |     "CURRENT_DATE() AS today",
 947 |     "CURRENT_TIME() AS current_time",
 948 |     "CURRENT_TIMESTAMP() AS now",
 949 |     "NOW() AS now_timestamp",
 950 |     "TODAY() AS today_timestamp",
 951 |     
 952 |     // Date binning (for time-series analysis)
 953 |     "DATE_BIN('1 week', order_date, MAKE_DATE(2020, 1, 1)) AS weekly_bin",
 954 |     "DATE_BIN('1 month', order_date, MAKE_DATE(2020, 1, 1)) AS monthly_bin",
 955 |     
 956 |     // Date formatting
 957 |     "DATE_FORMAT(order_date, '%Y-%m-%d') AS formatted_date",
 958 |     "DATE_FORMAT(order_date, '%Y/%m/%d') AS formatted_date_alt",
 959 |     
 960 |     // Basic date components
 961 |     "DATE_PART('year', order_date) AS year",
 962 |     "DATE_PART('month', order_date) AS month",
 963 |     "DATE_PART('day', order_date) AS day",
 964 | 
 965 |     // Quarters and weeks
 966 |     "DATE_PART('quarter', order_date) AS order_quarter",
 967 |     "DATE_PART('week', order_date) AS order_week",
 968 | 
 969 |     // Day of week/year
 970 |     "DATE_PART('dow', order_date) AS day_of_week",
 971 |     "DATE_PART('doy', order_date) AS day_of_year",
 972 | 
 973 |     // Analysis
 974 |     "DATE_PART('day', delivery_date - order_date) AS delivery_days",
 975 |     "DATE_PART('day', CURRENT_DATE() - order_date) AS days_since_order",
 976 |     
 977 |     // Date truncation (alternative syntax)
 978 |     "DATE_TRUNC('week', order_date) AS week_start",
 979 |     "DATE_TRUNC('quarter', order_date) AS quarter_start",
 980 |     "DATE_TRUNC('month', order_date) AS month_start",
 981 |     "DATE_TRUNC('year', order_date) AS year_start",
 982 |     
 983 |     // Complex date calculations
 984 |     "CASE 
 985 |         WHEN DATE_PART('month', order_date) <= 3 THEN 'Q1'
 986 |         WHEN DATE_PART('month', order_date) <= 6 THEN 'Q2'
 987 |         WHEN DATE_PART('month', order_date) <= 9 THEN 'Q3'
 988 |         ELSE 'Q4'
 989 |         END AS fiscal_quarter",
 990 |     
 991 |     // Date comparisons with current date
 992 |     "CASE 
 993 |         WHEN order_date = CURRENT_DATE() THEN 'Today'
 994 |         WHEN DATE_PART('day', CURRENT_DATE() - order_date) <= 7 THEN 'Last Week'
 995 |         WHEN DATE_PART('day', CURRENT_DATE() - order_date) <= 30 THEN 'Last Month'
 996 |         ELSE 'Older'
 997 |         END AS order_recency",
 998 | 
 999 |     // Time windows
1000 |     "CASE 
1001 |         WHEN DATE_BIN('1 week', order_date, CURRENT_DATE()) = DATE_BIN('1 week', CURRENT_DATE(), CURRENT_DATE()) 
1002 |         THEN 'This Week'
1003 |         ELSE 'Previous Weeks'
1004 |     END AS week_window",
1005 | 
1006 |     // Fiscal year calculations
1007 |     "CASE 
1008 |         WHEN DATE_PART('month', order_date) >= 7 
1009 |         THEN DATE_PART('year', order_date) + 1 
1010 |         ELSE DATE_PART('year', order_date) 
1011 |     END AS fiscal_year",
1012 | 
1013 |     // Complex date logic - modified to work with Date32
1014 |     "CASE 
1015 |         WHEN order_date < MAKE_DATE(2024, 1, 1) THEN 'Past'
1016 |         ELSE 'Present'
1017 |     END AS temporal_status",
1018 |     
1019 |     "CASE 
1020 |         WHEN DATE_PART('hour', CURRENT_TIMESTAMP()) < 12 THEN 'Morning'
1021 |         ELSE 'Afternoon'
1022 |     END AS time_of_day"
1023 |     ])
1024 |     .order_by(["order_date"], [false])
1025 | 
1026 | let dt_res = dt_query.elusion("datetime_df").await?;
1027 | dt_res.display().await?;
1028 | ```
1029 | #### Currently Available DateTime Functions
1030 | ```rust
1031 | CURRENT_DATE()
1032 | CURRENT_TIME()
1033 | CURRENT_TIMESTAMP()
1034 | NOW()
1035 | TODAY()
1036 | DATE_PART()
1037 | DATE_TRUNC()
1038 | DATE_BIN()
1039 | MAKE_DATE()
1040 | DATE_FORMAT()
1041 | ```
1042 | ---
1043 | ### WINDOW functions
1044 | #### Aggregate, Ranking and Analytical functions
1045 | ```rust
1046 | let window_query = df_sales
1047 |     .join(df_customers, ["s.CustomerKey = c.CustomerKey"], "INNER")
1048 |     .select(["s.OrderDate","c.FirstName","c.LastName","s.OrderQuantity"])
1049 |     //aggregated window functions
1050 |     .window("SUM(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) as running_total")
1051 |     .window("AVG(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS running_avg")
1052 |     .window("MIN(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS running_min")
1053 |     .window("MAX(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS running_max")
1054 |     .window("COUNT(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS running_count")
1055 |     //ranking window functions
1056 |     .window("ROW_NUMBER() OVER (ORDER BY c.CustomerKey) AS customer_index")
1057 |     .window("ROW_NUMBER() OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) as row_num")
1058 |     .window("DENSE_RANK() OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS dense_rnk")
1059 |     .window("PERCENT_RANK() OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS pct_rank")
1060 |     .window("CUME_DIST() OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS cume_dist")
1061 |     .window("NTILE(4) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS quartile")
1062 |     // analytical window functions
1063 |     .window("FIRST_VALUE(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS first_qty")
1064 |     .window("LAST_VALUE(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS last_qty")
1065 |     .window("LAG(s.OrderQuantity, 1, 0) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS prev_qty")
1066 |     .window("LEAD(s.OrderQuantity, 1, 0) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS next_qty")
1067 |     .window("NTH_VALUE(s.OrderQuantity, 3) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate) AS third_qty");
1068 | 
1069 | let window_df = window_query.elusion("result_window").await?;
1070 | window_df.display().await?;
1071 | ```
1072 | #### Rolling Window Functions
1073 | ```rust
1074 | let rollin_query = df_sales
1075 |     .join(df_customers, ["s.CustomerKey = c.CustomerKey"], "INNER")
1076 |     .select(["s.OrderDate", "c.FirstName", "c.LastName", "s.OrderQuantity"])
1077 |         //aggregated rolling windows
1078 |     .window("SUM(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate
1079 |              ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total")
1080 |     .window("AVG(s.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY s.OrderDate
1081 |              ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS full_partition_avg");
1082 | 
1083 | let rollin_df = rollin_query.elusion("rollin_result").await?;
1084 | rollin_df.display().await?;
1085 | ```
1086 | ---
1087 | ## JSON functions
1088 | ### .json() 
1089 | #### function works with Columns that only have simple JSON values
1090 | #### example json structure: [{"Key1":"Value1","Key2":"Value2","Key3":"Value3"}]
1091 | #### example usage
1092 | ```rust
1093 | let path = "C:\\Borivoj\\RUST\\Elusion\\jsonFile.csv";
1094 | let json_df = CustomDataFrame::new(path, "j").await?;
1095 | 
1096 | let df_extracted = json_df.json([
1097 |     "ColumnName.'$Key1' AS column_name_1",
1098 |     "ColumnName.'$Key2' AS column_name_2",
1099 |     "ColumnName.'$Key3' AS column_name_3"
1100 | ])
1101 | .select(["some_column1", "some_column2"])
1102 | .elusion("json_extract").await?;
1103 | 
1104 | df_extracted.display().await?;
1105 | ```
1106 | ```rust
1107 | RESULT:
1108 | +---------------+---------------+---------------+---------------+---------------+
1109 | | column_name_1 | column_name_2 | column_name_3 | some_column1  | some_column2  |
1110 | +---------------+---------------+---------------+---------------+---------------+
1111 | | registrations | 2022-09-15    | CustomerCode  | 779-0009E3370 | 646443D134762 |
1112 | | registrations | 2023-09-11    | CustomerCode  | 770-00009ED61 | 463497C334762 |
1113 | | registrations | 2017-10-01    | CustomerCode  | 889-000049C9E | 634697C134762 |
1114 | | registrations | 2019-03-26    | CustomerCode  | 000-00006C4D5 | 446397D134762 |
1115 | | registrations | 2021-08-31    | CustomerCode  | 779-0009E3370 | 463643D134762 |
1116 | | registrations | 2019-05-09    | CustomerCode  | 770-00009ED61 | 634697C934762 |
1117 | | registrations | 2005-10-24    | CustomerCode  | 889-000049C9E | 123397C334762 |
1118 | | registrations | 2023-02-14    | CustomerCode  | 000-00006C4D5 | 932393D134762 |
1119 | | registrations | 2021-01-20    | CustomerCode  | 779-0009E3370 | 323297C334762 |
1120 | | registrations | 2018-07-17    | CustomerCode  | 000-00006C4D5 | 322097C921462 |
1121 | +---------------+---------------+---------------+---------------+---------------+
1122 | ```
1123 | ### .json_array() 
1124 | #### function works with Columns that has Array of objects with pathern "column.'$ValueField:IdField=IdValue' AS column_alias"
1125 | The function parameters:
1126 | column: The column containing the JSON array
1127 | ValueField: The field to extract from matching objects
1128 | IdField: The field to use as identifier
1129 | IdValue: The value to match on the identifier field
1130 | column_alias: The output column name
1131 | 
1132 | #### example json structure
1133 | ```rust
1134 | [
1135 |   {"Id":"Date","Value":"2022-09-15","ValueKind":"Date"},
1136 |   {"Id":"MadeBy","Value":"Borivoj Grujicic","ValueKind":"Text"},
1137 |   {"Id":"Timeline","Value":1.0,"ValueKind":"Number"},
1138 |   {"Id":"ETR_1","Value":1.0,"ValueKind":"Number"}
1139 | ]
1140 | ```
1141 | #### example usage
1142 | ```rust
1143 | let multiple_values = df_json.json_array([
1144 |     "Value.'$Value:Id=Date' AS date",
1145 |     "Value.'$Value:Id=MadeBy' AS made_by",
1146 |     "Value.'$Value:Id=Timeline' AS timeline",
1147 |     "Value.'$Value:Id=ETR_1' AS etr_1",
1148 |     "Value.'$Value:Id=ETR_2' AS etr_2", 
1149 |     "Value.'$Value:Id=ETR_3' AS etr_3"
1150 |     ])
1151 | .select(["Id"])
1152 | .elusion("multiple_values")
1153 | .await?;
1154 | 
1155 | multiple_values.display().await?;
1156 | 
1157 | RESULT:
1158 | +-----------------+-------------------+----------+-------+-------+-------+--------+
1159 | | date            | made_by           | timeline | etr_1 | etr_2 | etr_3 | id     |
1160 | +-----------------+-------------------+----------+-------+-------+-------+--------+
1161 | | 2022-09-15      | Borivoj Grujicic  | 1.0      | 1.0   | 1.0   | 1.0   | 77E10C |
1162 | | 2023-09-11      |                   | 5.0      |       |       |       | 770C24 |
1163 | | 2017-10-01      |                   |          |       |       |       | 7795FA |
1164 | | 2019-03-26      |                   | 1.0      |       |       |       | 77F2E6 |
1165 | | 2021-08-31      |                   | 5.0      |       |       |       | 77926E |
1166 | | 2019-05-09      |                   |          |       |       |       | 77CC0F |
1167 | | 2005-10-24      |                   |          |       |       |       | 7728BA |
1168 | | 2023-02-14      |                   |          |       |       |       | 77F7F8 |
1169 | | 2021-01-20      |                   |          |       |       |       | 7731F6 |
1170 | | 2018-07-17      |                   | 3.0      |       |       |       | 77FB18 |
1171 | +-----------------+-------------------+----------+-------+-------+-------+--------+
1172 | ```
1173 | ---
1174 | ## APPEND, APPEND_MANY
1175 | #### APPEND: Combines rows from two dataframes, keeping all rows
1176 | #### APPEND_MANY: Combines rows from many dataframes, keeping all rows
1177 | ```rust
1178 | let df1 = "C:\\Borivoj\\RUST\\Elusion\\API\\df1.json";
1179 | let df2 = "C:\\Borivoj\\RUST\\Elusion\\API\\df2.json";
1180 | let df3 = "C:\\Borivoj\\RUST\\Elusion\\API\\df3.json";
1181 | let df4 = "C:\\Borivoj\\RUST\\Elusion\\API\\df4.json";
1182 | let df5 = "C:\\Borivoj\\RUST\\Elusion\\API\\df5.json";
1183 | 
1184 | let df1 = CustomDataFrame::new(df1, "msales1").await?; 
1185 | let df2 = CustomDataFrame::new(df2, "msales2").await?; 
1186 | let df3 = CustomDataFrame::new(df3, "msales3").await?; 
1187 | let df4 = CustomDataFrame::new(df4, "msales4").await?; 
1188 | let df5 = CustomDataFrame::new(df5, "msales5").await?; 
1189 | 
1190 | let res_df1 = df1.select(["Month", "TotalSales"]).string_functions(["'site1' AS Restaurant"]);
1191 | let result_df1 = res_df1.elusion("el1").await?;
1192 | 
1193 | let res_df2 = df2.select(["Month", "TotalSales"]).string_functions(["'site2' AS Restaurant"]);
1194 | let result_df2 = res_df2.elusion("el2").await?;
1195 | 
1196 | let res_df3 = df3.select(["Month", "TotalSales"]).string_functions(["'site3' AS Restaurant"]);
1197 | let result_df3 = res_df3.elusion("el3").await?;
1198 | 
1199 | let res_df4 = df4.select(["Month", "TotalSales"]).string_functions(["'site4' AS Restaurant"]);
1200 | let result_df4 = res_df4.elusion("el4").await?;
1201 | 
1202 | let res_df5 = df5.select(["Month", "TotalSales"]).string_functions(["'site5' AS Restaurant"]);
1203 | let resuld_df5 = res_df5.elusion("el5").await?;
1204 | 
1205 | //APPEND
1206 | let append_df = result_df1.append(result_df2).await?;
1207 | //APPEND_MANY
1208 | let append_many_df = result_df1.append_many([result_df2, result_df3, result_df4, resuld_df5]).await?;
1209 | ```
1210 | ---
1211 | ## UNION, UNION ALL, EXCEPT, INTERSECT
1212 | #### UNION: Combines rows from both, removing duplicates
1213 | #### UNION ALL: Combines rows from both, keeping duplicates
1214 | #### EXCEPT: Difference of two sets (only rows in left minus those in right).
1215 | #### INTERSECT: Intersection of two sets (only rows in both).
1216 | ```rust
1217 | //UNION
1218 | let df1 = sales_df.clone()
1219 | .join(
1220 |     customers_df.clone(), ["s.CustomerKey = c.CustomerKey"], "INNER",
1221 | )
1222 | .select(["c.FirstName", "c.LastName"])
1223 | .string_functions([
1224 |     "TRIM(c.EmailAddress) AS trimmed_email",
1225 |     "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name",
1226 | ]);
1227 | 
1228 | let df2 = sales_df.clone()
1229 | .join(
1230 |     customers_df.clone(), ["s.CustomerKey = c.CustomerKey"], "INNER",
1231 | )
1232 | .select(["c.FirstName", "c.LastName"])
1233 | .string_functions([
1234 |     "TRIM(c.EmailAddress) AS trimmed_email",
1235 |     "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name",
1236 | ]);
1237 | 
1238 | let result_df1 = df1.elusion("df1").await?;
1239 | let result_df2 = df2.elusion("df2").await?;
1240 | 
1241 | let union_df = result_df1.union(result_df2).await?;
1242 | 
1243 | let union_df_final = union_df.limit(100).elusion("union_df").await?;
1244 | union_df_final.display().await?;
1245 | 
1246 | //UNION ALL
1247 | let union_all_df = result_df1.union_all(result_df2).await?;
1248 | //EXCEPT
1249 | let except_df = result_df1.except(result_df2).await?;
1250 | //INTERSECT
1251 | let intersect_df = result_df1.intersect(result_df2).await?;
1252 | 
1253 | ```
1254 | ## UNION_MANY, UNION_ALL_MANY
1255 | #### UNION_MANY: Combines rows from many dataframes, removing duplicates
1256 | #### UNION_ALL_MANY: Combines rows from many dataframes, keeping duplicates
1257 | ```rust
1258 | let df1 = "C:\\Borivoj\\RUST\\Elusion\\API\\df1.json";
1259 | let df2 = "C:\\Borivoj\\RUST\\Elusion\\API\\df2.json";
1260 | let df3 = "C:\\Borivoj\\RUST\\Elusion\\API\\df3.json";
1261 | let df4 = "C:\\Borivoj\\RUST\\Elusion\\API\\df4.json";
1262 | let df5 = "C:\\Borivoj\\RUST\\Elusion\\API\\df5.json";
1263 | 
1264 | let df1 = CustomDataFrame::new(df1, "msales").await?; 
1265 | let df2 = CustomDataFrame::new(df2, "msales").await?; 
1266 | let df3 = CustomDataFrame::new(df3, "msales").await?; 
1267 | let df4 = CustomDataFrame::new(df4, "msales").await?; 
1268 | let df5 = CustomDataFrame::new(df5, "msales").await?; 
1269 | 
1270 | let res_df1 = df1.select(["Month", "TotalSales"]).string_functions(["'df1' AS Sitename"]);
1271 | let result_df1 = res_df1.elusion("el1").await?;
1272 | 
1273 | let res_df2 = df2.select(["Month", "TotalSales"]).string_functions(["'df2' AS Sitename"]);
1274 | let result_df2 = res_df2.elusion("el2").await?;
1275 | 
1276 | let res_df3 = df3.select(["Month", "TotalSales"]).string_functions(["'df3' AS Sitename"]);
1277 | let result_df3 = res_df3.elusion("el3").await?;
1278 | 
1279 | let res_df4 = df4.select(["Month", "TotalSales"]).string_functions(["'df4' AS Sitename"]);
1280 | let result_df4 = res_df4.elusion("el4").await?;
1281 | 
1282 | let res_df5 = df5.select(["Month", "TotalSales"]).string_functions(["'df5' AS Sitename"]);
1283 | let resuld_df5 = res_df5.elusion("el5").await?;
1284 | 
1285 | //UNION_MANY
1286 | let union_all_df = result_df1.union_many([result_df2, result_df3, result_df4, resuld_df5]).await?;
1287 | //UNION_ALL_MANY
1288 | let union_all_many_df = result_df1.union_all_many([result_df2, result_df3, result_df4, resuld_df5]).await?;
1289 | ```
1290 | ---
1291 | ## PIVOT and UNPIVOT
1292 | #### Pivot and Unpivot functions are ASYNC function
1293 | #### They should be used separately from other functions: 1. directly on initial CustomDataFrame, 2. after .elusion() evaluation.
1294 | #### Future needs to be in final state so .await? must be used
1295 | ```rust
1296 | // PIVOT
1297 | // directly on initial CustomDataFrame
1298 | let sales_p = "C:\\Borivoj\\RUST\\Elusion\\SalesData2022.csv";
1299 | let df_sales = CustomDataFrame::new(sales_p, "s").await?;
1300 | 
1301 | let pivoted = df_sales
1302 |     .pivot(
1303 |         ["StockDate"],     // Row identifiers
1304 |         "TerritoryKey",    // Column to pivot
1305 |         "OrderQuantity",   // Value to aggregate
1306 |         "SUM"              // Aggregation function
1307 |     ).await?;
1308 | 
1309 | let result_pivot = pivoted.elusion("pivoted_df").await?;
1310 | result_pivot.display().await?;
1311 | 
1312 | // after .elusion() evaluation
1313 | let sales_path = "C:\\Borivoj\\RUST\\Elusion\\sales_order_report.csv";
1314 | let sales_order_df = CustomDataFrame::new(sales_path, "sales").await?;
1315 | 
1316 | let scalar_df = sales_order_df
1317 |     .select([
1318 |         "customer_name", 
1319 |         "order_date", 
1320 |         "ABS(billable_value) AS abs_billable_value",
1321 |         "ROUND(SQRT(billable_value), 2) AS SQRT_billable_value"])
1322 |     .filter("billable_value > 100.0")
1323 |     .order_by(["order_date"], [true])
1324 |     .limit(10);
1325 | // elusion evaluation
1326 | let scalar_res = scalar_df.elusion("scalar_df").await?;
1327 | 
1328 | let pivoted_scalar = scalar_res
1329 |     .pivot(
1330 |         ["customer_name"],          // Row identifiers
1331 |         "order_date",               // Column to pivot
1332 |         "abs_billable_value",       // Value to aggregate
1333 |         "SUM"                       // Aggregation function
1334 |     ).await?;
1335 | 
1336 | let pitvoted_scalar = pivoted_scalar.elusion("pivoted_df").await?;
1337 | pitvoted_scalar.display().await?;
1338 | 
1339 | // UNPIVOT
1340 | let unpivoted = result_pivot
1341 |     .unpivot(
1342 |         ["StockDate"],                         // ID columns
1343 |         ["TerritoryKey_1", "TerritoryKey_2"],  // Value columns to unpivot
1344 |         "Territory",                           // New name column
1345 |         "Quantity"                             // New value column
1346 |     ).await?;
1347 | 
1348 | let result_unpivot = unpivoted.elusion("unpivoted_df").await?;
1349 | result_unpivot.display().await?;
1350 | 
1351 | // example 2
1352 | let unpivot_scalar = scalar_res
1353 |     .unpivot(
1354 |         ["customer_name", "order_date"],      // Keep these as identifiers
1355 |         ["abs_billable_value", "sqrt_billable_value"], // Columns to unpivot
1356 |         "measure_name",                       // Name for the measure column
1357 |         "measure_value"                       // Name for the value column
1358 |     ).await?;
1359 | 
1360 | let result_unpivot_scalar = unpivot_scalar.elusion("unpivoted_df2").await?;
1361 | result_unpivot_scalar.display().await?;
1362 | ```
1363 | ---
1364 | ## Statistical Functions
1365 | #### These Functions can give you quick statistical overview of your DataFrame columns and correlations
1366 | #### Currently available: display_stats(), display_null_analysis(), display_correlation_matrix()
1367 | ```rust
1368 | df.display_stats(&[
1369 |     "abs_billable_value",
1370 |     "sqrt_billable_value",
1371 |     "double_billable_value",
1372 |     "percentage_billable"
1373 | ]).await?;
1374 | 
1375 | === Column Statistics ===
1376 | --------------------------------------------------------------------------------
1377 | Column: abs_billable_value
1378 | ------------------------------------------------------------------------------
1379 | | Metric               |           Value |             Min |             Max |
1380 | ------------------------------------------------------------------------------
1381 | | Records              |              10 | -               | -               |
1382 | | Non-null Records     |              10 | -               | -               |
1383 | | Mean                 |         1025.71 | -               | -               |
1384 | | Standard Dev         |          761.34 | -               | -               |
1385 | | Value Range          |               - | 67.4            | 2505.23         |
1386 | ------------------------------------------------------------------------------
1387 | 
1388 | Column: sqrt_billable_value
1389 | ------------------------------------------------------------------------------
1390 | | Metric               |           Value |             Min |             Max |
1391 | ------------------------------------------------------------------------------
1392 | | Records              |              10 | -               | -               |
1393 | | Non-null Records     |              10 | -               | -               |
1394 | | Mean                 |           29.48 | -               | -               |
1395 | | Standard Dev         |           13.20 | -               | -               |
1396 | | Value Range          |               - | 8.21            | 50.05           |
1397 | ------------------------------------------------------------------------------
1398 |     
1399 | // Display null analysis
1400 | // Keep None if you want all columns to be analized
1401 | df.display_null_analysis(None).await?;
1402 | 
1403 | ----------------------------------------------------------------------------------------
1404 | | Column                         |      Total Rows |      Null Count | Null Percentage |
1405 | ----------------------------------------------------------------------------------------
1406 | | total_billable                 |              10 |               0 |           0.00% |
1407 | | order_count                    |              10 |               0 |           0.00% |
1408 | | customer_name                  |              10 |               0 |           0.00% |
1409 | | order_date                     |              10 |               0 |           0.00% |
1410 | | abs_billable_value             |              10 |               0 |           0.00% |
1411 | ----------------------------------------------------------------------------------------
1412 | 
1413 | // Display correlation matrix
1414 | df.display_correlation_matrix(&[
1415 |     "abs_billable_value",
1416 |     "sqrt_billable_value",
1417 |     "double_billable_value",
1418 |     "percentage_billable"
1419 | ]).await?;
1420 | 
1421 | === Correlation Matrix ===
1422 | -------------------------------------------------------------------------------------------
1423 | |                 | abs_billable_va | sqrt_billable_v | double_billable | percentage_bill |
1424 | -------------------------------------------------------------------------------------------
1425 | | abs_billable_va |            1.00 |            0.98 |            1.00 |            1.00 |
1426 | | sqrt_billable_v |            0.98 |            1.00 |            0.98 |            0.98 |
1427 | | double_billable |            1.00 |            0.98 |            1.00 |            1.00 |
1428 | | percentage_bill |            1.00 |            0.98 |            1.00 |            1.00 |
1429 | -------------------------------------------------------------------------------------------
1430 | ```
1431 | ---
1432 | ## EXTRACTING VALUES: extract_value_from_df()
1433 | #### Example how you can extract values from DataFrame and use it within REST API
1434 | ```rust
1435 | //create calendar dataframe
1436 |  let date_calendar = CustomDataFrame::create_formatted_date_range_table(
1437 |     "2025-01-01", 
1438 |     "2025-12-31", 
1439 |     "dt", 
1440 |     "date".to_string(),
1441 |     DateFormat::HumanReadableTime, 
1442 |     true, 
1443 |     Weekday::Mon
1444 | ).await?;
1445 | 
1446 | // take columns from Calendar
1447 | let week_range_2025 = date_calendar
1448 |     .select(["DISTINCT(week_start)","week_end", "week_num"])
1449 |     .order_by(["week_num"], [true])
1450 |     .elusion("wr")
1451 |     .await?;
1452 | 
1453 | // create empty dataframe
1454 | let temp_df = CustomDataFrame::empty().await?;
1455 | 
1456 | //populate empty dataframe with current week number
1457 | let current_week = temp_df
1458 |     .datetime_functions([
1459 |         "CAST(DATE_PART('week', CURRENT_DATE()) as INT) AS current_week_num",
1460 |     ])
1461 |     .elusion("cd").await?;
1462 | 
1463 | // join data frames to get range for current week
1464 | let week_for_api = week_range_2025
1465 |     .join(current_week,["wr.week_num == cd.current_week_num"], "INNER")
1466 |     .select(["TRIM(wr.week_start) AS datefrom", "TRIM(wr.week_end) AS dateto"])
1467 |     .elusion("api_week")
1468 |     .await?;
1469 | 
1470 | // Extract Date Value from DataFrame based on column name and Row Index
1471 | let date_from = extract_value_from_df(&week_for_api, "datefrom", 0).await?;
1472 | let date_to = extract_value_from_df(&week_for_api, "dateto", 0).await?;
1473 | 
1474 | //PRINT results for preview
1475 | week_for_api.display().await?;
1476 | 
1477 | println!("Date from: {}", date_from);
1478 | println!("Date to: {}", date_to);
1479 | 
1480 | RESULT:
1481 | +------------------+------------------+
1482 | | datefrom         | dateto           |
1483 | +------------------+------------------+
1484 | | 3 Mar 2025 00:00 | 9 Mar 2025 00:00 |
1485 | +------------------+------------------+
1486 | 
1487 | Date from: 3 Mar 2025 00:00
1488 | Date to: 9 Mar 2025 00:00
1489 | 
1490 | NOW WE CAN USE THESE EXTRACTED VALUES:
1491 | 
1492 | let post_df = ElusionApi::new();
1493 | post_df.from_api_with_dates(
1494 |     "https://jsonplaceholder.typicode.com/posts",  // url
1495 |     &date_from,  // date from
1496 |     &date_to,  // date to
1497 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\rest_api_data.json",  // path where json will be stored
1498 | ).await?;
1499 | ```
1500 | ## EXTRACTING ROWS: extract_row_from_df()
1501 | #### Example how you can extract Row from DataFrame and use it within REST API.
1502 | ```rust
1503 | //create calendar dataframe
1504 |  let date_calendar = CustomDataFrame::create_formatted_date_range_table(
1505 |     "2025-01-01", 
1506 |     "2025-12-31", 
1507 |     "dt", 
1508 |     "date".to_string(),
1509 |     DateFormat::IsoDate, 
1510 |     true, 
1511 |     Weekday::Mon
1512 | ).await?;
1513 | //take columns from calendar
1514 | let week_range_2025 = date_calendar
1515 |     .select(["DISTINCT(week_start)","week_end", "week_num"])
1516 |     .order_by(["week_num"], [true])
1517 |     .elusion("wr")
1518 |     .await?;
1519 | 
1520 | // create empty dataframe
1521 | let temp_df = CustomDataFrame::empty().await?;
1522 | 
1523 | //populate empty dataframe with current week number
1524 | let current_week = temp_df
1525 |     .datetime_functions([
1526 |         "CAST(DATE_PART('week', CURRENT_DATE()) as INT) AS current_week_num",
1527 |     ])
1528 |     .elusion("cd").await?;
1529 | 
1530 | // join data frames to ge range for current week
1531 | let week_for_api = week_range_2025
1532 |     .join(current_week,["wr.week_num == cd.current_week_num"], "INNER")
1533 |     .select(["TRIM(wr.week_start) AS datefrom", "TRIM(wr.week_end) AS dateto"])
1534 |     .elusion("api_week")
1535 |     .await?;
1536 | 
1537 | // Extract Row Values from DataFrame based on Row Index
1538 | let row_values = extract_row_from_df(&week_for_api, 0).await?;
1539 | 
1540 | // PRINT row for preview
1541 | println!("DataFrame row: {:?}", row_values);
1542 | 
1543 | RESULT:
1544 | DataFrame row: {"datefrom": "2025-03-03", "dateto": "2025-03-09"}
1545 | 
1546 | NOW WE CAN USE THESE EXTRACTED ROW:
1547 | 
1548 | let post_df = ElusionApi::new();
1549 | post_df.from_api_with_dates(
1550 |     "https://jsonplaceholder.typicode.com/posts", // url
1551 |     row_values.get("datefrom").unwrap_or(&String::new()), // date from
1552 |     row_values.get("dateto").unwrap_or(&String::new()), // date to
1553 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\extraction_df2.json",  // path where json will be stored
1554 | ).await?;
1555 | ```
1556 | ---
1557 | # Postgres Database Connector 
1558 | ### Create Config, Conn and Query, and pass it to from_postgres() function.
1559 | ```rust
1560 |  let pg_config = PostgresConfig {
1561 |         host: "localhost".to_string(),
1562 |         port: 5433,
1563 |         user: "borivoj".to_string(),
1564 |         password: "pass123".to_string(),
1565 |         database: "db_test".to_string(),
1566 |         pool_size: Some(5), 
1567 |     };
1568 | 
1569 | let conn = PostgresConnection::new(pg_config).await?;
1570 | 
1571 | Option2: You can use map_err()
1572 | let conn = PostgresConnection::new(pg_config).await
1573 |     .map_err(|e| ElusionError::Custom(format!("PostgreSQL connection error: {}", e)))?;
1574 | 
1575 | let query = "
1576 |     SELECT 
1577 |         c.id, 
1578 |         c.name, 
1579 |         s.product_name,
1580 |         SUM(s.quantity * s.price) as total_revenue
1581 |     FROM customers c
1582 |     LEFT JOIN sales s ON c.id = s.customer_id
1583 |     GROUP BY c.id, c.name, s.product_name
1584 |     ORDER BY total_revenue DESC
1585 | ";
1586 | 
1587 | let sales_by_customer_df = CustomDataFrame::from_postgres(&conn, query, "postgres_df").await?;
1588 | 
1589 | sales_by_customer_df.display().await?;
1590 | ```
1591 | # MySQL Database Connector 
1592 | ### Create Config, Conn and Query, and pass it to from_mysql() function.
1593 | ```rust
1594 | let mysql_config = MySqlConfig {
1595 |     host: "localhost".to_string(),
1596 |     port: 3306,
1597 |     user: "borivoj".to_string(),
1598 |     password: "pass123".to_string(),
1599 |     database: "brewery".to_string(),
1600 |     pool_size: Some(5),
1601 | };
1602 | 
1603 | let conn = MySqlConnection::new(mysql_config).await?;
1604 | 
1605 | let mysql_query = "
1606 |     WITH ranked_sales AS (
1607 |         SELECT 
1608 |             c.color AS brew_color, 
1609 |             bd.beer_style, 
1610 |             bd.location, 
1611 |             SUM(bd.total_sales) AS total_sales
1612 |         FROM 
1613 |             brewery_data bd
1614 |         JOIN 
1615 |             colors c ON bd.Color = c.color_number
1616 |         WHERE 
1617 |             bd.brew_date >= '2020-01-01' AND bd.brew_date <= '2020-03-01'
1618 |         GROUP BY 
1619 |             c.color, bd.beer_style, bd.location
1620 |     )
1621 |     SELECT 
1622 |         brew_color, 
1623 |         beer_style, 
1624 |         location, 
1625 |         total_sales,
1626 |         ROW_NUMBER() OVER (PARTITION BY brew_color ORDER BY total_sales DESC) AS ranked
1627 |     FROM 
1628 |         ranked_sales
1629 |     ORDER BY 
1630 |     brew_color, total_sales DESC";
1631 | 
1632 | let df = CustomDataFrame::from_mysql(&conn, mysql_query, "mysql_df").await?;
1633 | 
1634 | result.display().await?;
1635 | ```
1636 | ---
1637 | # AZURE Blob Storage Connector 
1638 | ## Storage connector available with BLOB and DFS url endpoints, along with SAS token provided
1639 | ### Currently supported file types .JSON and .CSV
1640 | #### DFS endpoint is “Data Lake Storage Gen2” and behave more like a real file system. This makes reading operations more efficient—especially at large scale.
1641 | 
1642 | ### BLOB endpoint example
1643 | ```rust
1644 | let blob_url= "https://your_storage_account_name.blob.core.windows.net/your-container-name";
1645 | let sas_token = "your_sas_token";
1646 | 
1647 | let df = CustomDataFrame::from_azure_with_sas_token(
1648 |         blob_url, 
1649 |         sas_token, 
1650 |         Some("folder-name/file-name"), // FILTERING is optional. Can be None if you want to take everything from Container
1651 |         "data" // alias for registering table
1652 |     ).await?;
1653 | 
1654 | let data_df = df.select(["*"]);
1655 | 
1656 | let test_data = data_df.elusion("data_df").await?;
1657 | test_data.display().await?;
1658 | ```
1659 | ### DFS endpoint example
1660 | 
1661 | ```rust
1662 | let dfs_url= "https://your_storage_account_name.dfs.core.windows.net/your-container-name";
1663 | let sas_token = "your_sas_token";
1664 | 
1665 | let df = CustomDataFrame::from_azure_with_sas_token(
1666 |         dfs_url, 
1667 |         sas_token, 
1668 |         Some("folder-name/file-name"), // FILTERING is optional. Can be None if you want to take everything from Container
1669 |         "data" // alias for registering table
1670 |     ).await?;
1671 | 
1672 | let data_df = df.select(["*"]);
1673 | 
1674 | let test_data = data_df.elusion("data_df").await?;
1675 | test_data.display().await?;
1676 | ```
1677 | ---
1678 | # Pipeline Scheduler
1679 | ### Time is set according to UTC
1680 | 
1681 | #### Currently available job frequencies
1682 | ```rust
1683 | "1min","2min","5min","10min","15min","30min" ,
1684 | "1h","2h","3h","4h","5h","6h","7h","8h","9h","10h","11h","12h","24h" 
1685 | "2days","3days","4days","5days","6days","7days","14days","30days" 
1686 | ```
1687 | ### PipelineScheduler Example (parsing data from Azure BLOB Stoarge, DataFrame operation and Writing to Parquet)
1688 | ```rust
1689 | use elusion::prelude::*;
1690 | 
1691 | #[tokio::main]
1692 | async fn main() -> ElusionResult<()>{
1693 |     
1694 | // Create Pipeline Scheduler 
1695 | let scheduler = PipelineScheduler::new("5min", || async {
1696 | 
1697 | let dfs_url= "https://your_storage_account_name.dfs.core.windows.net/your-container-name";
1698 | let sas_token = "your_sas_token";
1699 | // Read from Azure
1700 | let header_df = CustomDataFrame::from_azure_with_sas_token(
1701 |     dfs_url,
1702 |     dfs_sas_token,
1703 |     Some("folder_name/"), // Optional: FILTERING can filter any part of string: file path, file name...
1704 |     "head"
1705 | ).await?;
1706 | 
1707 | // DataFrame operation
1708 | let headers_payments = header_df
1709 |    .select(["Brand", "Id", "Name", "Item", "Bill", "Tax",
1710 |            "ServCharge", "Percentage", "Discount", "Date"])
1711 |    .agg([
1712 |        "SUM(Bill) AS total_bill",
1713 |        "SUM(Tax) AS total_tax", 
1714 |        "SUM(ServCharge) AS total_service",
1715 |        "AVG(Percentage) AS avg_percentage",
1716 |        "COUNT(*) AS transaction_count",
1717 |        "SUM(ServCharge) / SUM(Bill) * 100 AS service_ratio"
1718 |    ])
1719 |    .group_by(["Brand", "Date"])
1720 |    .filter("Bill > 0")
1721 |    .order_by(["total_bill"], [true])
1722 | 
1723 | let headers_data = headers_payments.elusion("headers_df").await?;
1724 | 
1725 | // Write output
1726 | headers_data
1727 |     .write_to_parquet(
1728 |         "overwrite",
1729 |         "C:\\Borivoj\\RUST\\Elusion\\Scheduler\\sales_data.parquet",
1730 |         None
1731 |     )
1732 |     .await?;
1733 |     
1734 |     Ok(())
1735 | 
1736 | }).await?;
1737 | 
1738 | scheduler.shutdown().await?;
1739 | 
1740 | Ok(())
1741 | }
1742 | 
1743 | ```
1744 | ---
1745 | # JSON files
1746 | ### Currently supported files can include: Fileds, Arrays, Objects. 
1747 | #### Best performance with flat json ("key":"value") 
1748 | #### for JSON, all field types are infered to VARCHAR/TEXT/STRING
1749 | ```rust
1750 | // example json structure with key:value pairs
1751 | {
1752 | "name": "Adeel Solangi",
1753 | "language": "Sindhi",
1754 | "id": "V59OF92YF627HFY0",
1755 | "bio": "Donec lobortis eleifend condimentum. Cras dictum dolor lacinia lectus vehicula rutrum.",
1756 | "version": 6.1
1757 | }
1758 | 
1759 | let json_path = "C:\\Borivoj\\RUST\\Elusion\\test.json";
1760 | let json_df = CustomDataFrame::new(json_path, "test").await?;
1761 | 
1762 | let df = json_df.select(["*"]).limit(10);
1763 | 
1764 | let result = df.elusion("df").await?;
1765 | result.display().await?;
1766 | 
1767 | // example json structure with Fields and Arrays
1768 | [
1769 |   {
1770 |     "id": "1",
1771 |     "name": "Form 1",
1772 |     "fields": [
1773 |       {"key": "first_name", "type": "text", "required": true},
1774 |       {"key": "age", "type": "number", "required": false},
1775 |       {"key": "email", "type": "email", "required": true}
1776 |     ]
1777 |   },
1778 |   {
1779 |     "id": "2",
1780 |     "name": "Form 2",
1781 |     "fields": [
1782 |       {"key": "address", "type": "text", "required": false},
1783 |       {"key": "phone", "type": "tel", "required": true}
1784 |     ]
1785 |   },
1786 |   {
1787 |     "id": "3",
1788 |     "name": "Form 3",
1789 |     "fields": [
1790 |       {"key": "notes", "type": "textarea", "required": false},
1791 |       {"key": "date", "type": "date", "required": true},
1792 |       {"key": "status", "type": "select", "required": true}
1793 |     ]
1794 |   }
1795 | ]
1796 | 
1797 | let json_path = "C:\\Borivoj\\RUST\\Elusion\\test2.json";
1798 | let json_df = CustomDataFrame::new(json_path, "test2").await?;
1799 | ```
1800 | ---
1801 | # REST API
1802 | ### Creating JSON files from REST API's
1803 | #### Customizable Headers, Params, Pagination, Date Ranges...
1804 | ### FROM API
1805 | ```rust
1806 | // example 1
1807 | let posts_df = ElusionApi::new();
1808 | posts_df
1809 |     .from_api(
1810 |         "https://jsonplaceholder.typicode.com/posts", // url
1811 |         "C:\\Borivoj\\RUST\\Elusion\\JSON\\posts_data.json" // path where json will be stored
1812 |     ).await?;
1813 | 
1814 | // example 2
1815 | let users_df = ElusionApi::new();
1816 | users_df.from_api(
1817 |     "https://jsonplaceholder.typicode.com/users",
1818 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\users_data.json",
1819 | ).await?;
1820 | 
1821 | // example 3
1822 | let ceo = ElusionApi::new();
1823 | ceo.from_api(
1824 |     "https://dog.ceo/api/breeds/image/random/3",
1825 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\ceo_data.json"
1826 | ).await?;
1827 | ```
1828 | ### FROM API WITH HEADERS
1829 | ```rust
1830 | // example 1
1831 | let mut headers = HashMap::new();
1832 | headers.insert("Custom-Header".to_string(), "test-value".to_string());
1833 | 
1834 | let bin_df = ElusionApi::new();
1835 | bin_df.from_api_with_headers(
1836 |     "https://httpbin.org/headers",  // url
1837 |     headers,                        // headers
1838 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\bin_data.json",  // path where json will be stored
1839 | ).await?;
1840 |     
1841 | // example 2
1842 | let mut headers = HashMap::new();
1843 | headers.insert("Accept".to_string(), "application/vnd.github.v3+json".to_string());
1844 | headers.insert("User-Agent".to_string(), "elusion-dataframe-test".to_string());
1845 | 
1846 | let git_hub = ElusionApi::new();
1847 | git_hub.from_api_with_headers(
1848 |     "https://api.github.com/search/repositories?q=rust+language:rust&sort=stars&order=desc",
1849 |     headers,
1850 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\git_hub_data.json"
1851 | ).await?;
1852 | 
1853 | // example 3
1854 | let mut headers = HashMap::new();
1855 | headers.insert("Accept".to_string(), "application/json".to_string());
1856 | headers.insert("X-Version".to_string(), "1".to_string());
1857 | 
1858 | let pokemon_df = ElusionApi::new();
1859 | pokemon_df.from_api_with_headers(
1860 |     "https://pokeapi.co/api/v2/pokemon", 
1861 |     headers,
1862 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\pokemon_data.json"
1863 | ).await?;
1864 | ```
1865 | ### FROM API WITH PARAMS
1866 | ```rust
1867 | // Using OpenLibrary API with params
1868 | let mut params = HashMap::new();
1869 | params.insert("q", "rust programming");
1870 | params.insert("limit", "10");
1871 | 
1872 | let open_lib = ElusionApi::new();
1873 | open_lib.from_api_with_params(
1874 |     "https://openlibrary.org/search.json",           // url
1875 |     params,                                          // params
1876 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\open_lib_data.json",  // path where json will be stored
1877 | ).await?;
1878 | 
1879 | // Random User Generator API with params
1880 | let mut params = HashMap::new();
1881 | params.insert("results", "10");
1882 | params.insert("nat", "us,gb");
1883 | 
1884 | let generator = ElusionApi::new(); 
1885 | generator.from_api_with_params(
1886 |     "https://randomuser.me/api",
1887 |     params,
1888 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\generator_data.json"
1889 | ).await?;
1890 | 
1891 | // JSON Placeholder with multiple endpoints
1892 | let mut params = HashMap::new();
1893 | params.insert("userId", "1");
1894 | params.insert("_limit", "5");
1895 | 
1896 | let multi = ElusionApi::new(); 
1897 | multi.from_api_with_params(
1898 |     "https://jsonplaceholder.typicode.com/posts",
1899 |     params,
1900 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\multi_data.json"
1901 | ).await?;
1902 | 
1903 | // NASA Astronomy Picture of the Day
1904 | let mut params = HashMap::new();
1905 | params.insert("count", "5");
1906 | params.insert("thumbs", "true");
1907 | 
1908 | let nasa = ElusionApi::new(); 
1909 | nasa.from_api_with_params(
1910 |     "https://api.nasa.gov/planetary/apod",
1911 |     params,
1912 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\nasa_pics_data.json"
1913 | ).await?;
1914 | 
1915 | // example 5
1916 | let mut params = HashMap::new();
1917 | params.insert("brand", "elusion");
1918 | params.insert("password", "some_password");
1919 | params.insert("siteid", "993");
1920 | params.insert("Datefrom", "01 jan 2025 06:00");
1921 | params.insert("Dateto", "31 jan 2025 06:00");
1922 | params.insert("user", "borivoj");
1923 | 
1924 | let api = ElusionApi::new();
1925 | api.from_api_with_params(
1926 |     "https://salesapi.net.co.rs/SSPAPI/api/data",
1927 |     params,
1928 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\sales_jan_2025.json"
1929 | ).await?;
1930 | ```
1931 | ### FROM API WITH PARAMS AND HEADERS
1932 | ```rust
1933 | let mut params = HashMap::new();
1934 | params.insert("since", "2024-01-01T00:00:00Z");
1935 | params.insert("until", "2024-01-07T23:59:59Z");
1936 | 
1937 | let mut headers = HashMap::new();
1938 | headers.insert("Accept".to_string(), "application/vnd.github.v3+json".to_string());
1939 | headers.insert("User-Agent".to_string(), "elusion-dataframe-test".to_string());
1940 | 
1941 | let commits_df = ElusionApi::new();
1942 | commits_df.from_api_with_params_and_headers(
1943 |     "https://api.github.com/repos/rust-lang/rust/commits",    // url
1944 |     params,                                                   // params
1945 |     headers,                                                 // headers
1946 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\commits_data.json",  // path where json will be stored
1947 | ).await?;
1948 | ```
1949 | ### FROM API WITH DATES
1950 | ```rust
1951 | // example 1
1952 | let post_df = ElusionApi::new();
1953 | post_df.from_api_with_dates(
1954 |     "https://jsonplaceholder.typicode.com/posts",            // url
1955 |     "2024-01-01",                                           // date from
1956 |     "2024-01-07",                                           // date to
1957 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\post_data.json",  // path where json will be stored
1958 | ).await?;
1959 | 
1960 | // Example 2: COVID-19 historical data
1961 | let covid_df = ElusionApi::new();
1962 | covid_df.from_api_with_dates(
1963 |     "https://disease.sh/v3/covid-19/historical/all",
1964 |     "2024-01-01",
1965 |     "2024-01-07",
1966 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\covid_data.json"
1967 | ).await?;
1968 | ```
1969 | ### FROM API WITH PAGINATION
1970 | ```rust
1971 | // example 1
1972 | let reqres = ElusionApi::new();
1973 | reqres.from_api_with_pagination(
1974 |     "https://reqres.in/api/users",
1975 |     1,      // page
1976 |     10,      // per_page
1977 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\reqres_data.json"
1978 | ).await?;
1979 | ```
1980 | ### FROM API WITH SORT
1981 | ```rust
1982 | let movie_db = ElusionApi::new();
1983 | movie_db.from_api_with_sort(
1984 |     "https://api.themoviedb.org/3/discover/movie", // base url
1985 |     "popularity",   // sort field
1986 |     "desc",         // order
1987 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\popular_movies.json"
1988 | ).await?;
1989 | ```
1990 | ### FROM API WITH HEADERS AND SORT
1991 | ```rust
1992 | let mut headers = HashMap::new();
1993 | headers.insert("Authorization".to_string(), "Bearer YOUR_TMDB_API_KEY".to_string());
1994 | headers.insert("accept".to_string(), "application/json".to_string());
1995 | 
1996 | let movie_db = ElusionApi::new();
1997 | movie_db.from_api_with_headers_and_sort(
1998 |     "https://api.themoviedb.org/3/discover/movie",  // base url
1999 |     headers,                                        // headers
2000 |     "popularity",                                   // sort field
2001 |     "desc",                                        // order
2002 |     "C:\\Borivoj\\RUST\\Elusion\\JSON\\popular_movies1.json"
2003 | ).await?;
2004 | ```
2005 | ---
2006 | # WRITERS
2007 | 
2008 | ## Writing to EXCEL File ***needs excel feature enabled
2009 | 
2010 | #### EXCEL writer can only write or overwrite, so only 2 arguments needed
2011 | #### 1. Path, 2. Optional Sheet name. (default is Sheet1)
2012 | ```rust
2013 |  df.write_to_excel(
2014 |     "C:\\Borivoj\\RUST\\Elusion\\Excel\\sales2.xlsx", //path
2015 |     Some("string_interop") // Optional sheet name. Can be None
2016 | ).await?;
2017 | ```
2018 | ## Writing to Parquet File
2019 | #### We have 2 writing modes: **Overwrite** and **Append**
2020 | ```rust
2021 | // overwrite existing file
2022 | df.write_to_parquet(
2023 |     "overwrite",
2024 |     "C:\\Path\\To\\Your\\test.parquet",
2025 |     None // I've set WriteOptions to default for writing Parquet files, so keep it None
2026 | )
2027 | .await?;
2028 | 
2029 | // append to exisiting file
2030 | df.write_to_parquet(
2031 |     "append",
2032 |     "C:\\Path\\To\\Your\\test.parquet",
2033 |     None // I've set WriteOptions to default for writing Parquet files, so keep it None
2034 | ) 
2035 | .await?;
2036 | ```
2037 | ## Writing to CSV File
2038 | 
2039 | #### CSV Writing options are **mandatory**
2040 | ##### has_headers: TRUE is dynamically set for Overwrite mode, and FALSE for Append mode.
2041 | ```rust
2042 | let custom_csv_options = CsvWriteOptions {
2043 |         delimiter: b',',
2044 |         escape: b'\\',
2045 |         quote: b'"',
2046 |         double_quote: false,
2047 |         null_value: "NULL".to_string(),
2048 |     };
2049 | ```
2050 | #### We have 2 writing modes: Overwrite and Append
2051 | ```rust
2052 | // overwrite existing file
2053 | df.write_to_csv(
2054 |     "overwrite", 
2055 |     "C:\\Borivoj\\RUST\\Elusion\\agg_sales.csv", 
2056 |     custom_csv_options
2057 | )
2058 | .await?;
2059 | 
2060 | // append to exisiting file
2061 | df.write_to_csv(
2062 |     "append", 
2063 |     "C:\\Borivoj\\RUST\\Elusion\\agg_sales.csv", 
2064 |     custom_csv_options
2065 | )
2066 | .await?;
2067 | 
2068 | ```
2069 | ## Writing to JSON File
2070 | 
2071 | #### JSON writer can only overwrite, so only 2 arguments needed
2072 | #### 1. Path, 2. If you want pretty-printed JSON or not (true or false)
2073 | ```rust
2074 | df.write_to_json(
2075 |     "C:\\Borivoj\\RUST\\Elusion\\date_table.json", // path
2076 |     true // pretty-printed JSON, false for compact JSON
2077 | ).await?;
2078 | ```
2079 | ## Writing to DELTA table / lake 
2080 | #### We can write to delta in 2 modes **Overwrite** and **Append**
2081 | #### Partitioning column is OPTIONAL and if you decide to use column for partitioning, make sure that you don't need that column as you won't be able to read it back to dataframe
2082 | #### Once you decide to use partitioning column for writing your delta table, if you want to APPEND to it, append also need to have same column for partitioning
2083 | ```rust
2084 | // Overwrite
2085 | df.write_to_delta_table(
2086 |     "overwrite",
2087 |     "C:\\Borivoj\\RUST\\Elusion\\agg_sales", 
2088 |     Some(vec!["order_date".into()]), 
2089 | )
2090 | .await
2091 | .expect("Failed to overwrite Delta table");
2092 | // Append
2093 | df.write_to_delta_table(
2094 |     "append",
2095 |     "C:\\Borivoj\\RUST\\Elusion\\agg_sales",
2096 |     Some(vec!["order_date".into()]),
2097 | )
2098 | .await
2099 | .expect("Failed to append to Delta table");
2100 | ```
2101 | ## Writing Parquet to Azure BLOB Storage 
2102 | #### We have 2 writing options "overwrite" and "append"
2103 | #### Writing is set to Default, Compression: SNAPPY and Parquet 2.0
2104 | #### Threshold file size is 1GB
2105 | ```rust
2106 | let df = CustomDataFrame::new(csv_data, "sales").await?; 
2107 | 
2108 | let query = df.select(["*"]);
2109 | 
2110 | let data = query.elusion("df_sales").await?;
2111 | 
2112 | let url_to_folder = "https://your_storage_account_name.dfs.core.windows.net/your-container-name/folder/sales.parquet";
2113 | let sas_write_token = "your_sas_token"; // make sure SAS token has writing permissions
2114 | 
2115 | data.write_parquet_to_azure_with_sas(
2116 |     "overwrite",
2117 |     url_to_folder,
2118 |     sas_write_token
2119 | ).await?;
2120 | 
2121 | // append version
2122 | data.write_parquet_to_azure_with_sas(
2123 |     "append",
2124 |     url_to_folder,
2125 |     sas_write_token
2126 | ).await?;
2127 | ```
2128 | ## Writing JSON to Azure BLOB Storage 
2129 | #### Only can create new or overwrite exisitng file
2130 | #### Threshold file size is 1GB
2131 | ```rust
2132 | let df = CustomDataFrame::new(csv_data, "sales").await?; 
2133 | 
2134 | let query = df.select(["*"]);
2135 | 
2136 | let data = query.elusion("df_sales").await?;
2137 | 
2138 | let url_to_folder = "https://your_storage_account_name.dfs.core.windows.net/your-container-name/folder/data.json";
2139 | let sas_write_token = "your_sas_token"; // make sure SAS token has writing permissions
2140 | 
2141 | data.write_json_to_azure_with_sas(
2142 |     url_to_folder,
2143 |     sas_write_token,
2144 |     true  // Set to true for pretty-printed JSON, false for compact JSON
2145 | ).await?;
2146 | ```
2147 | ---
2148 | # REPORTING
2149 | ### CREATING REPORT with Interactive Plots/Visuals and Tables
2150 | ### Export Table data to EXCEL and CSV
2151 | #### Currently available Interactive Plots: TimeSeries, Box, Bar, Histogram, Pie, Donut, Scatter...
2152 | #### Interactive Tables can: Paginate pages, Filter, Reorder, Resize columns...
2153 | ```rust
2154 | let ord = "C:\\Borivoj\\RUST\\Elusion\\sales_order_report.csv";
2155 | let sales_order_df = CustomDataFrame::new(ord, "ord").await?;
2156 | 
2157 | let mix_query = sales_order_df.clone()
2158 | .select([
2159 |     "customer_name",
2160 |     "order_date",
2161 |     "ABS(billable_value) AS abs_billable_value",
2162 |     "ROUND(SQRT(billable_value), 2) AS SQRT_billable_value",
2163 |     "billable_value * 2 AS double_billable_value",  // Multiplication
2164 |     "billable_value / 100 AS percentage_billable"  // Division
2165 | ])
2166 | .agg([
2167 |     "ROUND(AVG(ABS(billable_value)), 2) AS avg_abs_billable",
2168 |     "SUM(billable_value) AS total_billable",
2169 |     "MAX(ABS(billable_value)) AS max_abs_billable",
2170 |     "SUM(billable_value) * 2 AS double_total_billable",      // Operator-based aggregation
2171 |     "SUM(billable_value) / 100 AS percentage_total_billable" // Operator-based aggregation
2172 | ])
2173 | .filter("billable_value > 50.0")
2174 | .group_by_all()
2175 | .order_by_many([
2176 |     ("total_billable", false),  // Order by total_billable descending
2177 |     ("max_abs_billable", true), // Then by max_abs_billable ascending
2178 | ]);
2179 | 
2180 | let mix_res = mix_query.elusion("scalar_df").await?;
2181 | 
2182 | //INTERACTIVE PLOTS
2183 | // Line plot showing sales over time
2184 | let line = mix_res.plot_line(
2185 |     "order_date", // - x_col: column name for x-axis (can be date or numeric)
2186 |     "double_billable_value", // - y_col: column name for y-axis
2187 |     true,  // - show_markers: true to show points, false for line only
2188 |     Some("Sales over time") // - title: optional custom title (can be None)
2189 | ).await?;
2190 | 
2191 | // Bar plot showing aggregated values
2192 | let bars = mix_res
2193 |    .plot_bar(
2194 |        "customer_name",         // X-axis: Customer names
2195 |        "total_billable",        // Y-axis: Total billable amount
2196 |        Some("Customer Total Sales") // Title of the plot
2197 |    ).await?;
2198 | 
2199 | // Time series showing sales trend
2200 | let time_series = mix_res
2201 |    .plot_time_series(
2202 |        "order_date",           // X-axis: Date column (must be Date32 type)
2203 |        "total_billable",       // Y-axis: Total billable amount
2204 |        true,                   // Show markers on the line
2205 |        Some("Sales Trend Over Time") // Title of the plot
2206 |    ).await?;
2207 | 
2208 | // Histogram showing distribution of abs billable values
2209 | let histogram = mix_res
2210 |    .plot_histogram(
2211 |        "abs_billable_value",   // Data column for distribution analysis
2212 |        Some("Distribution of Sale Values") // Title of the plot
2213 |    ).await?;
2214 | 
2215 | // Box plot showing abs billable value distribution
2216 | let box_plot = mix_res
2217 |    .plot_box(
2218 |        "abs_billable_value",   // Value column for box plot
2219 |        Some("customer_name"),   // Optional grouping column
2220 |        Some("Sales Distribution by Customer") // Title of the plot
2221 |    ).await?;
2222 | 
2223 | // Scatter plot showing relationship between original and doubled values
2224 | let scatter = mix_res
2225 |    .plot_scatter(
2226 |        "abs_billable_value",   // X-axis: Original values
2227 |        "double_billable_value", // Y-axis: Doubled values
2228 |        Some(8)                 // Optional marker size
2229 |    ).await?;
2230 | 
2231 | // Pie chart showing sales distribution
2232 | let pie = mix_res
2233 |    .plot_pie(
2234 |        "customer_name",        // Labels for pie segments
2235 |        "total_billable",       // Values for pie segments
2236 |        Some("Sales Share by Customer") // Title of the plot
2237 |    ).await?;
2238 | 
2239 | // Donut chart alternative view
2240 | let donut = mix_res
2241 |    .plot_donut(
2242 |        "customer_name",        // Labels for donut segments
2243 |        "percentage_total_billable", // Values as percentages
2244 |        Some("Percentage Distribution") // Title of the plot
2245 |    ).await?;
2246 | 
2247 |  // Create Tables to add to report
2248 | let summary_table = mix_res.clone() //Clone for multiple usages
2249 |     .select([
2250 |         "customer_name",
2251 |         "total_billable",
2252 |         "avg_abs_billable",
2253 |         "max_abs_billable",
2254 |         "percentage_total_billable"
2255 |     ])
2256 |     .order_by_many([
2257 |         ("total_billable", false)
2258 |     ])
2259 |     .elusion("summary")
2260 |     .await?;
2261 | 
2262 | let transactions_table = mix_res
2263 |     .select([
2264 |         "customer_name",
2265 |         "order_date",
2266 |         "abs_billable_value",
2267 |         "double_billable_value",
2268 |         "percentage_billable"
2269 |     ])
2270 |     .order_by_many([
2271 |         ("order_date", false),
2272 |         ("abs_billable_value", false)
2273 |     ])
2274 |     .elusion("transactions")
2275 |     .await?;
2276 | 
2277 | // Create comprehensive dashboard with all plots
2278 | let plots = [
2279 |     (&line, "Sales Line"),                  // Line based analysis
2280 |     (&time_series, "Sales Timeline"),       // Time-based analysis
2281 |     (&bars, "Customer Sales"),              // Customer comparison
2282 |     (&histogram, "Sales Distribution"),      // Value distribution
2283 |     (&scatter, "Value Comparison"),         // Value relationships
2284 |     (&box_plot, "Customer Distributions"),   // Statistical distribution
2285 |     (&pie, "Sales Share"),                  // Share analysis
2286 |     (&donut, "Percentage View"),            // Percentage breakdown
2287 | ];
2288 | 
2289 | // Add tables array
2290 | let tables = [
2291 |     (&summary_table, "Customer Summary"),
2292 |     (&transactions_table, "Transaction Details")
2293 | ];
2294 | 
2295 | let layout = ReportLayout {
2296 |     grid_columns: 2, // Arrange plots in 2 columns
2297 |     grid_gap: 30, // 30px gap between plots
2298 |     max_width: 1600, // Maximum width of 1600px
2299 |     plot_height: 450, // Each plot 450px high
2300 |     table_height: 500,  // Height for tables
2301 | };
2302 |     
2303 | let table_options = TableOptions {
2304 |     pagination: true,       // Enable pagination for tables
2305 |     page_size: 15,         // Show 15 rows per page
2306 |     enable_sorting: true,   // Allow column sorting
2307 |     enable_filtering: true, // Allow column filtering
2308 |     enable_column_menu: true, // Show column menu (sort/filter/hide options)
2309 |     theme: "ag-theme-alpine".to_string(), // Use Alpine theme for modern look
2310 | };
2311 | 
2312 | // Generate the enhanced interactive report with all plots and tables
2313 | CustomDataFrame::create_report(
2314 |     Some(&plots),  // plots (Optional)
2315 |     Some(&tables),   // tables (Optional)
2316 |     "Interactive Sales Analysis Dashboard",  // report_title
2317 |     "C:\\Borivoj\\RUST\\Elusion\\Plots\\interactive_aggrid_dashboard.html", // filename
2318 |     Some(layout),      // layout_config (Optional)
2319 |     Some(table_options)  // table_options (Optional)
2320 | ).await?;
2321 | ```
2322 | ### Dashboard Demo
2323 | ![Dash](./images/interactivedash3.gif)
2324 | ---
2325 | ### License
2326 | Elusion is distributed under the [MIT License](https://opensource.org/licenses/MIT). 
2327 | However, since it builds upon [DataFusion](https://datafusion.apache.org/), which is distributed under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), some parts of this project are subject to the terms of the Apache License 2.0.
2328 | For full details, see the [LICENSE.txt file](LICENSE.txt).
2329 | 
2330 | ### Acknowledgments
2331 | This library leverages the power of Rust's type system and libraries like [DataFusion](https://datafusion.apache.org/)
2332 | ,Appache Arrow, Tokio Cron Scheduler, Tokio... for efficient query processing. Special thanks to the open-source community for making this project possible.
2333 | 
2334 | ## Where you can find me:
2335 | borivoj.grujicic@gmail.com
2336 | 


--------------------------------------------------------------------------------
/benches/benchmark.rs:
--------------------------------------------------------------------------------
  1 | use criterion::{criterion_group, criterion_main, Criterion};
  2 | use elusion::prelude::*;
  3 | 
  4 | // Helper function to set up test DataFrames
  5 | async fn setup_test_dataframes() -> ElusionResult<(CustomDataFrame, CustomDataFrame, CustomDataFrame, CustomDataFrame)> {
  6 |     let sales_path = "C:\\Borivoj\\RUST\\Elusion\\SalesData2022.csv";
  7 |     let customer_path = "C:\\Borivoj\\RUST\\Elusion\\Customers.csv";
  8 |     let products_path = "C:\\Borivoj\\RUST\\Elusion\\Products.csv";
  9 |     let sales_order_path = "C:\\Borivoj\\RUST\\Elusion\\sales_order_report2.csv";
 10 |    
 11 |     let sales_df = CustomDataFrame::new(sales_path, "se").await?;
 12 |     let customers_df = CustomDataFrame::new(customer_path, "c").await?;
 13 |     let products_df = CustomDataFrame::new(products_path, "p").await?;
 14 |     let order_df = CustomDataFrame::new(sales_order_path, "o").await?;
 15 | 
 16 |     Ok((sales_df, customers_df, products_df, order_df))
 17 | }
 18 | 
 19 | fn benchmark_joins(c: &mut Criterion) {
 20 |     let rt = tokio::runtime::Runtime::new().unwrap();
 21 |     let (sales_df, customers_df, products_df, _) = rt.block_on(setup_test_dataframes()).unwrap();
 22 | 
 23 |     let mut group = c.benchmark_group("Joins");
 24 | 
 25 |     // Single Join Benchmark
 26 |     group.bench_function("single_join", |b| b.iter(|| {
 27 |         rt.block_on(async {
 28 |             sales_df.clone()
 29 |                 .join(
 30 |                     customers_df.clone(),
 31 |                     ["se.CustomerKey = c.CustomerKey"],
 32 |                     "INNER"
 33 |                 )
 34 |                 .select([
 35 |                     "se.OrderDate",
 36 |                     "c.FirstName",
 37 |                     "c.LastName",
 38 |                     "se.OrderQuantity"
 39 |                 ])
 40 |                 .elusion("bench_join")
 41 |                 .await
 42 |                 .unwrap()
 43 |         })
 44 |     }));
 45 | 
 46 |     // Multiple Joins Benchmark
 47 |     group.bench_function("multiple_joins", |b| b.iter(|| {
 48 |         rt.block_on(async {
 49 |             sales_df.clone()
 50 |                 .join_many([
 51 |                     (customers_df.clone(), ["se.CustomerKey = c.CustomerKey"], "INNER"),
 52 |                     (products_df.clone(), ["se.ProductKey = p.ProductKey"], "INNER"),
 53 |                 ])
 54 |                 .select([
 55 |                     "c.CustomerKey",
 56 |                     "c.FirstName",
 57 |                     "c.LastName",
 58 |                     "p.ProductName",
 59 |                 ])
 60 |                 .elusion("bench_many_joins")
 61 |                 .await
 62 |                 .unwrap()
 63 |         })
 64 |     }));
 65 | 
 66 |     group.finish();
 67 | }
 68 | 
 69 | fn benchmark_aggregations(c: &mut Criterion) {
 70 |     let rt = tokio::runtime::Runtime::new().unwrap();
 71 |     let (sales_df, customers_df, _, _) = rt.block_on(setup_test_dataframes()).unwrap();
 72 | 
 73 |     let mut group = c.benchmark_group("Aggregations");
 74 | 
 75 |     group.bench_function("simple_agg", |b| b.iter(|| {
 76 |         rt.block_on(async {
 77 |             sales_df.clone()
 78 |                 .agg([
 79 |                     "SUM(se.OrderQuantity) AS total_quantity",
 80 |                     "AVG(se.OrderQuantity) AS avg_quantity",
 81 |                 ])
 82 |                 .elusion("bench_agg")
 83 |                 .await
 84 |                 .unwrap();
 85 |         })
 86 |     }));
 87 | 
 88 |     group.bench_function("complex_agg_with_join", |b| b.iter(|| {
 89 |         rt.block_on(async {
 90 |             sales_df.clone()
 91 |                 .join(
 92 |                     customers_df.clone(), // Use the destructured customers_df
 93 |                     ["se.CustomerKey = c.CustomerKey"],
 94 |                     "INNER"
 95 |                 )
 96 |                 .select([
 97 |                     "c.FirstName",
 98 |                     "c.LastName"
 99 |                 ])
100 |                 .agg([
101 |                     "SUM(se.OrderQuantity) AS total_quantity",
102 |                     "AVG(se.OrderQuantity) AS avg_quantity"
103 |                 ])
104 |                 .group_by(["c.FirstName", "c.LastName"])
105 |                 .elusion("bench_complex_agg")
106 |                 .await
107 |                 .unwrap();
108 |         })
109 |     }));
110 | 
111 |     group.finish();
112 | }
113 | 
114 | fn benchmark_multiple_groupings(c: &mut Criterion) {
115 |     let rt = tokio::runtime::Runtime::new().unwrap();
116 |     let ( _, _, _,order_df) = rt.block_on(setup_test_dataframes()).unwrap();
117 | 
118 |     let mut group = c.benchmark_group("Multiple_Groupings");
119 | 
120 |     group.bench_function("agg_multiple_groupings", |b| b.iter(|| {
121 |         rt.block_on(async {
122 |             order_df.clone()
123 |                 .select(["customer_name"])
124 |                 .agg([
125 |                     "SUM(order_value) AS total_value"
126 |                 ])
127 |                 .group_by(["customer_name"])
128 |                 .elusion("agg_multiple_groupings")
129 |                 .await
130 |                 .unwrap();
131 |         })
132 |     }));
133 | 
134 |     group.finish();
135 | }
136 | 
137 | 
138 | 
139 | fn benchmark_window_functions(c: &mut Criterion) {
140 |     let rt = tokio::runtime::Runtime::new().unwrap();
141 |     let (sales_df, customers_df, _, _) = rt.block_on(setup_test_dataframes()).unwrap();
142 | 
143 |     let mut group = c.benchmark_group("Window_Functions");
144 | 
145 |     group.bench_function("basic_window_functions", |b| b.iter(|| {
146 |         rt.block_on(async {
147 |             sales_df.clone()
148 |                 .join(customers_df.clone(), ["se.CustomerKey = c.CustomerKey"], "INNER")
149 |                 .select([
150 |                     "se.OrderDate",
151 |                     "c.FirstName",
152 |                     "c.LastName",
153 |                     "se.OrderQuantity"
154 |                 ])
155 |                 // Aggregated window functions
156 |                 .window("SUM(se.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS running_total")
157 |                 .window("AVG(se.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS running_avg")
158 |                 // Ranking window functions
159 |                 .window("ROW_NUMBER() OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS row_num")
160 |                 .window("DENSE_RANK() OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS dense_rnk")
161 |                 .limit(10)
162 |                 .elusion("bench_window_functions")
163 |                 .await
164 |                 .unwrap();
165 |         })
166 |     }));
167 | 
168 |     group.bench_function("advanced_window_functions", |b| b.iter(|| {
169 |         rt.block_on(async {
170 |             sales_df.clone()
171 |                 .join(customers_df.clone(), ["se.CustomerKey = c.CustomerKey"], "INNER")
172 |                 .select([
173 |                     "se.OrderDate",
174 |                     "c.FirstName",
175 |                     "c.LastName",
176 |                     "se.OrderQuantity"
177 |                 ])
178 |                 // Analytical window functions
179 |                 .window("FIRST_VALUE(se.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS first_qty")
180 |                 .window("LAST_VALUE(se.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS last_qty")
181 |                 .window("LAG(se.OrderQuantity, 1, 0) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS prev_qty")
182 |                 .window("LEAD(se.OrderQuantity, 1, 0) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate) AS next_qty")
183 |                 .limit(10)
184 |                 .elusion("bench_advanced_window_functions")
185 |                 .await
186 |                 .unwrap();
187 |         })
188 |     }));
189 | 
190 |     group.finish();
191 | }
192 | 
193 | fn benchmark_window_functions_with_frames(c: &mut Criterion) {
194 |     let rt = tokio::runtime::Runtime::new().unwrap();
195 |     let (sales_df, customers_df, _, _) = rt.block_on(setup_test_dataframes()).unwrap();
196 | 
197 |     let mut group = c.benchmark_group("Window_Functions_With_Frames");
198 | 
199 |     group.bench_function("aggregated_rolling_windows", |b| b.iter(|| {
200 |         rt.block_on(async {
201 |             sales_df.clone()
202 |                 .join(customers_df.clone(), ["se.CustomerKey = c.CustomerKey"], "INNER")
203 |                 .select(["se.OrderDate", "c.FirstName", "c.LastName", "se.OrderQuantity"])
204 |                 // Aggregated rolling windows
205 |                 .window("SUM(se.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total")
206 |                 .window("AVG(se.OrderQuantity) OVER (PARTITION BY c.CustomerKey ORDER BY se.OrderDate ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS full_partition_avg")
207 |                 .limit(10)
208 |                 .elusion("bench_rolling_windows")
209 |                 .await
210 |                 .unwrap();
211 |         })
212 |     }));
213 | 
214 |     group.finish();
215 | }
216 | 
217 | fn benchmark_pivot(c: &mut Criterion) {
218 |     let rt = tokio::runtime::Runtime::new().unwrap();
219 |     let (sales_df, _, _, _) = rt.block_on(setup_test_dataframes()).unwrap();
220 | 
221 |     let mut group = c.benchmark_group("Pivot");
222 | 
223 |     group.bench_function("pivot_operation", |b| b.iter(|| {
224 |         rt.block_on(async {
225 |             sales_df.clone()
226 |                 .pivot(
227 |                     ["StockDate"],         // Row identifiers
228 |                     "TerritoryKey",        // Column to pivot
229 |                     "OrderQuantity",       // Value to aggregate
230 |                     "SUM"                   // Aggregation function
231 |                 )
232 |                 .await
233 |                 .unwrap()
234 |                 .elusion("bench_pivot")
235 |                 .await
236 |                 .unwrap();
237 |         })
238 |     }));
239 | 
240 |     group.finish();
241 | }
242 | 
243 | fn benchmark_unpivot(c: &mut Criterion) {
244 |     let rt = tokio::runtime::Runtime::new().unwrap();
245 |     let (pivoted_df, _) = rt.block_on(async {
246 |         let (sales_df, _, _, _) = setup_test_dataframes().await.unwrap();
247 |         let pivoted = sales_df.clone()
248 |             .pivot(
249 |                 ["StockDate"],
250 |                 "TerritoryKey",
251 |                 "OrderQuantity",
252 |                 "SUM"
253 |             )
254 |             .await
255 |             .unwrap()
256 |             .elusion("pivoted_df")
257 |             .await
258 |             .unwrap();
259 |         (pivoted, ())
260 |     });
261 | 
262 |     let mut group = c.benchmark_group("Unpivot");
263 | 
264 |     group.bench_function("unpivot_operation", |b| b.iter(|| {
265 |         rt.block_on(async {
266 |             pivoted_df.clone()
267 |                 .unpivot(
268 |                     ["StockDate"],                         // ID columns
269 |                     ["TerritoryKey_1", "TerritoryKey_2"],  // Value columns to unpivot
270 |                     "Territory",                           // New name column
271 |                     "Quantity"                             // New value column
272 |                 )
273 |                 .await
274 |                 .unwrap()
275 |                 .elusion("bench_unpivot")
276 |                 .await
277 |                 .unwrap();
278 |         })
279 |     }));
280 | 
281 |     group.finish();
282 | }
283 | 
284 | fn benchmark_string_functions(c: &mut Criterion) {
285 |     let rt = tokio::runtime::Runtime::new().unwrap();
286 |     let (sales_df, customers_df, products_df, _) = rt.block_on(setup_test_dataframes()).unwrap();
287 | 
288 |     let mut group = c.benchmark_group("String_Functions");
289 | 
290 |     group.bench_function("string_functions_query", |b| b.iter(|| {
291 |         rt.block_on(async {
292 |             sales_df.clone()
293 |                 .join_many([
294 |                     (customers_df.clone(), ["se.CustomerKey = c.CustomerKey"], "INNER"),
295 |                     (products_df.clone(), ["se.ProductKey = p.ProductKey"], "INNER"),
296 |                 ]) 
297 |                 .select([
298 |                     "c.CustomerKey",
299 |                     "c.FirstName",
300 |                     "c.LastName",
301 |                     "c.EmailAddress",
302 |                     "p.ProductName"
303 |                 ])
304 |                 .string_functions([
305 |                     "TRIM(c.EmailAddress) AS trimmed_email",
306 |                     "LTRIM(c.EmailAddress) AS left_trimmed_email",
307 |                     "RTRIM(c.EmailAddress) AS right_trimmed_email",
308 |                     "UPPER(c.FirstName) AS upper_first_name",
309 |                     "LOWER(c.LastName) AS lower_last_name",
310 |                     "LENGTH(c.EmailAddress) AS email_length",
311 |                     "LEFT(p.ProductName, 10) AS product_start",
312 |                     "RIGHT(p.ProductName, 10) AS product_end",
313 |                     "SUBSTRING(p.ProductName, 1, 5) AS product_substr",
314 |                     // Concatenation
315 |                     "CONCAT(c.FirstName, ' ', c.LastName) AS full_name",
316 |                     "CONCAT_WS(' ', c.FirstName, c.LastName, c.EmailAddress) AS all_info",
317 |                     // Position and Search
318 |                     "POSITION('@' IN c.EmailAddress) AS at_symbol_pos",
319 |                     "STRPOS(c.EmailAddress, '@') AS email_at_pos",
320 |                     // Replacement and Modification
321 |                     "REPLACE(c.EmailAddress, '@adventure-works.com', '@newdomain.com') AS new_email",
322 |                     "TRANSLATE(c.FirstName, 'AEIOU', '12345') AS vowels_replaced",
323 |                     "REPEAT('*', 5) AS stars",
324 |                     "REVERSE(c.FirstName) AS reversed_name",
325 |                     // Padding
326 |                     "LPAD(c.CustomerKey::TEXT, 10, '0') AS padded_customer_id",
327 |                     "RPAD(c.FirstName, 20, '.') AS padded_name",
328 |                     // Case Formatting
329 |                     "INITCAP(LOWER(c.FirstName)) AS proper_case_name",
330 |                     // String Extraction
331 |                     "SPLIT_PART(c.EmailAddress, '@', 1) AS email_username",
332 |                 ])
333 |                 .agg([
334 |                     "COUNT(*) AS total_records",
335 |                     "STRING_AGG(p.ProductName, ', ') AS all_products"
336 |                 ])
337 |                 .filter("c.emailaddress IS NOT NULL")
338 |                 .group_by_all()
339 |                 .having("COUNT(*) > 1")
340 |                 .order_by(["c.CustomerKey"], [true])
341 |                 .limit(10)
342 |                 .elusion("bench_string_functions")
343 |                 .await
344 |                 .unwrap();
345 |         })
346 |     }));
347 | 
348 |     group.finish();
349 | }
350 | 
351 | pub fn benchmark_appending(c: &mut Criterion) {
352 |     let rt = tokio::runtime::Runtime::new().unwrap();
353 |     let (sales_df, customers_df, _, _) = rt.block_on(setup_test_dataframes()).unwrap();
354 | 
355 |     let mut group = c.benchmark_group("Union_Intersect_Operations");
356 |     group.sample_size(10);
357 | 
358 |     // Benchmark Union with String Functions
359 |     group.bench_function("union_string_functions", |b| b.iter(|| {
360 |         rt.block_on(async {
361 |             // First DataFrame
362 |             let df1 = sales_df.clone()
363 |                 .join(
364 |                     customers_df.clone(),
365 |                     ["se.CustomerKey = c.CustomerKey"],
366 |                     "INNER"
367 |                 )
368 |                 .select(["c.FirstName", "c.LastName"])
369 |                 .string_functions([
370 |                     "TRIM(c.EmailAddress) AS trimmed_email",
371 |                     "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name",
372 |                 ]);
373 | 
374 |             // Second DataFrame
375 |             let df2 = sales_df.clone()
376 |                 .join(
377 |                     customers_df.clone(),
378 |                     ["se.CustomerKey = c.CustomerKey"],
379 |                     "INNER"
380 |                 )
381 |                 .select(["c.FirstName", "c.LastName"])
382 |                 .string_functions([
383 |                     "TRIM(c.EmailAddress) AS trimmed_email",
384 |                     "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name",
385 |                 ]);
386 | 
387 |             // Execute transformations and union
388 |             let result_df1 = df1.elusion("df1").await.unwrap();
389 |             let result_df2 = df2.elusion("df2").await.unwrap();
390 |             
391 |             let union_df = result_df1.union(result_df2).await.unwrap();
392 |             let _ = union_df.limit(100).elusion("union_re").await.unwrap();
393 |         })
394 |     }));
395 | 
396 |     // Benchmark Intersect with String Functions
397 |     group.bench_function("intersect_string_functions", |b| b.iter(|| {
398 |         rt.block_on(async {
399 |             // First DataFrame
400 |             let df1 = sales_df.clone()
401 |                 .join(
402 |                     customers_df.clone(),
403 |                     ["se.CustomerKey = c.CustomerKey"],
404 |                     "INNER"
405 |                 )
406 |                 .select(["c.FirstName", "c.LastName"])
407 |                 .string_functions([
408 |                     "TRIM(c.EmailAddress) AS trimmed_email",
409 |                     "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name",
410 |                 ]);
411 | 
412 |             // Second DataFrame - same structure for intersection
413 |             let df2 = sales_df.clone()
414 |                 .join(
415 |                     customers_df.clone(),
416 |                     ["se.CustomerKey = c.CustomerKey"],
417 |                     "INNER"
418 |                 )
419 |                 .select(["c.FirstName", "c.LastName"])
420 |                 .string_functions([
421 |                     "TRIM(c.EmailAddress) AS trimmed_email",
422 |                     "CONCAT(TRIM(c.FirstName), ' ', TRIM(c.LastName)) AS full_name",
423 |                 ]);
424 | 
425 |             // Execute transformations and intersect
426 |             let result_df1 = df1.elusion("df1_intersect").await.unwrap();
427 |             let result_df2 = df2.elusion("df2_intersect").await.unwrap();
428 |             
429 |             let intersect_df = result_df1.intersect(result_df2).await.unwrap();
430 |             let _ = intersect_df.limit(100).elusion("intersect_result").await.unwrap();
431 |         })
432 |     }));
433 | 
434 |     // Benchmark Union with Aggregations
435 |     group.bench_function("union_with_aggregations", |b| b.iter(|| {
436 |         rt.block_on(async {
437 |             // First DataFrame with aggregations
438 |             let df1 = sales_df.clone()
439 |                 .join(
440 |                     customers_df.clone(),
441 |                     ["se.CustomerKey = c.CustomerKey"],
442 |                     "INNER"
443 |                 )
444 |                 .select(["c.FirstName", "c.LastName"])
445 |                 .agg([
446 |                     "SUM(s.SalesAmount) as total_sales",
447 |                     "COUNT(*) as transaction_count"
448 |                 ])
449 |                 .group_by_all();
450 | 
451 |             // Second DataFrame with aggregations
452 |             let df2 = sales_df.clone()
453 |                 .join(
454 |                     customers_df.clone(),
455 |                     ["se.CustomerKey = c.CustomerKey"],
456 |                     "INNER"
457 |                 )
458 |                 .select(["c.FirstName", "c.LastName"])
459 |                 .agg([
460 |                     "SUM(s.SalesAmount) as total_sales",
461 |                     "COUNT(*) as transaction_count"
462 |                 ])
463 |                 .group_by_all();
464 | 
465 |             // Execute transformations and union
466 |             let result_df1 = df1.elusion("df1_agg").await.unwrap();
467 |             let result_df2 = df2.elusion("df2_agg").await.unwrap();
468 |             
469 |             let union_df = result_df1.union(result_df2).await.unwrap();
470 |             let _ = union_df.limit(100).elusion("union_agg_result").await.unwrap();
471 |         })
472 |     }));
473 | 
474 |     // Benchmark Intersect with Aggregations
475 |     group.bench_function("intersect_with_aggregations", |b| b.iter(|| {
476 |         rt.block_on(async {
477 |             // First DataFrame with aggregations
478 |             let df1 = sales_df.clone()
479 |                 .join(
480 |                     customers_df.clone(),
481 |                     ["se.CustomerKey = c.CustomerKey"],
482 |                     "INNER"
483 |                 )
484 |                 .select(["c.FirstName", "c.LastName"])
485 |                 .agg([
486 |                     "SUM(s.SalesAmount) as total_sales",
487 |                     "COUNT(*) as transaction_count"
488 |                 ])
489 |                 .group_by_all();
490 | 
491 |             // Second DataFrame with aggregations
492 |             let df2 = sales_df.clone()
493 |                 .join(
494 |                     customers_df.clone(),
495 |                     ["se.CustomerKey = c.CustomerKey"],
496 |                     "INNER"
497 |                 )
498 |                 .select(["c.FirstName", "c.LastName"])
499 |                 .agg([
500 |                     "SUM(s.SalesAmount) as total_sales",
501 |                     "COUNT(*) as transaction_count"
502 |                 ])
503 |                 .group_by_all();
504 | 
505 |             // Execute transformations and intersect
506 |             let result_df1 = df1.elusion("df1_agg_intersect").await.unwrap();
507 |             let result_df2 = df2.elusion("df2_agg_intersect").await.unwrap();
508 |             
509 |             let intersect_df = result_df1.intersect(result_df2).await.unwrap();
510 |             let _ = intersect_df.limit(100).elusion("intersect_agg_result").await.unwrap();
511 |         })
512 |     }));
513 | 
514 |     group.finish();
515 | }
516 | 
517 | fn benchmark_mysql_operations(c: &mut Criterion) {
518 |     let rt = tokio::runtime::Runtime::new().unwrap();
519 |     
520 |     let mut group = c.benchmark_group("MySQL_Operations");
521 |     group.sample_size(10); // Reduce sample size for database operations
522 |     
523 |     // Benchmark basic MySQL query
524 |     group.bench_function("basic_mysql_query", |b| b.iter(|| {
525 |         rt.block_on(async {
526 |             let mysql_config = MySqlConfig {
527 |                 host: "localhost".to_string(),
528 |                 port: 3306,
529 |                 user: "databora".to_string(),
530 |                 password: "!Djavolak1".to_string(),
531 |                 database: "brewery".to_string(),
532 |                 pool_size: Some(5),
533 |             };
534 |             
535 |             let conn = MySqlConnection::new(mysql_config).await.unwrap();
536 |             
537 |             // Simple query
538 |             let query = "SELECT * FROM brewery_data LIMIT 10";
539 |             let df = CustomDataFrame::from_mysql(&conn, query, "basic_mysql_data").await.unwrap();
540 |             let _ = df.limit(10).elusion("basic_result").await.unwrap();
541 | 
542 |         })
543 |     }));
544 |     
545 |     // Benchmark complex MySQL query with CTE, JOINS and window functions
546 |     group.bench_function("complex_mysql_query", |b| b.iter(|| {
547 |         rt.block_on(async {
548 |             let mysql_config = MySqlConfig {
549 |                 host: "localhost".to_string(),
550 |                 port: 3306,
551 |                 user: "databora".to_string(),
552 |                 password: "!Djavolak1".to_string(),
553 |                 database: "brewery".to_string(),
554 |                 pool_size: Some(5),
555 |             };
556 |             
557 |             let conn = MySqlConnection::new(mysql_config).await.unwrap();
558 |             
559 |             // Complex query with CTE, JOIN, and window functions
560 |             let mysql_query = "
561 |                 WITH ranked_sales AS (
562 |                     SELECT 
563 |                         c.color AS brew_color, 
564 |                         bd.beer_style, 
565 |                         bd.location, 
566 |                         SUM(bd.total_sales) AS total_sales
567 |                     FROM 
568 |                         brewery_data bd
569 |                     JOIN 
570 |                         colors c ON bd.Color = c.color_number
571 |                     WHERE 
572 |                         bd.brew_date >= '2020-01-01' AND bd.brew_date <= '2020-03-01'
573 |                     GROUP BY 
574 |                         c.color, bd.beer_style, bd.location
575 |                 )
576 |                 SELECT 
577 |                     brew_color, 
578 |                     beer_style, 
579 |                     location, 
580 |                     total_sales,
581 |                     ROW_NUMBER() OVER (PARTITION BY brew_color ORDER BY total_sales DESC) AS ranked
582 |                 FROM 
583 |                     ranked_sales
584 |                 ORDER BY 
585 |                 brew_color, total_sales DESC";
586 |                 
587 |             let df = CustomDataFrame::from_mysql(&conn, mysql_query, "mysql_data").await.unwrap();
588 |             let _ = df.limit(100).elusion("complex_result").await.unwrap();
589 |             
590 |         })
591 |     }));
592 |     
593 |     // Benchmark MySQL query with post-processing
594 |     group.bench_function("mysql_with_processing", |b| b.iter(|| {
595 |         rt.block_on(async {
596 |             let mysql_config = MySqlConfig {
597 |                 host: "localhost".to_string(),
598 |                 port: 3306,
599 |                 user: "databora".to_string(),
600 |                 password: "!Djavolak1".to_string(),
601 |                 database: "brewery".to_string(),
602 |                 pool_size: Some(5),
603 |             };
604 |             
605 |             let conn = MySqlConnection::new(mysql_config).await.unwrap();
606 | 
607 |             let query = "SELECT * FROM brewery_data";
608 |             let df = CustomDataFrame::from_mysql(&conn, query, "process_mysql_data").await.unwrap();
609 |             // Apply additional processing with Elusion
610 |             let _ = df
611 |                 .select([
612 |                     "brew_date", 
613 |                     "beer_style", 
614 |                     "location", 
615 |                     "total_sales"
616 |                 ])
617 |                 .filter("total_sales > 1000")
618 |                 .agg([
619 |                     "SUM(total_sales) AS total_revenue",
620 |                     "AVG(total_sales) AS avg_revenue",
621 |                     "COUNT(*) AS sale_count"
622 |                 ])
623 |                 .group_by([
624 |                     "beer_style", 
625 |                     "location"
626 |                 ])
627 |                 .order_by(["total_revenue"], [false])  // DESC order
628 |                 .limit(20)
629 |                 .elusion("processed_result")
630 |                 .await
631 |                 .unwrap();
632 |             
633 |         })
634 |     }));
635 |     
636 |     group.finish();
637 | }
638 | 
639 | criterion_group!(
640 |     benches,
641 |     benchmark_joins,
642 |     benchmark_multiple_groupings,
643 |     benchmark_aggregations,
644 |     benchmark_window_functions,
645 |     benchmark_window_functions_with_frames,
646 |     benchmark_pivot,
647 |     benchmark_unpivot,
648 |     benchmark_string_functions,
649 |     benchmark_appending,
650 |     benchmark_mysql_operations
651 | );
652 | criterion_main!(benches);
653 | 


--------------------------------------------------------------------------------
/images/bar.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataBora/elusion/eeebe26b0637e66835581d4b8b34d133954f61cd/images/bar.PNG


--------------------------------------------------------------------------------
/images/elusion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataBora/elusion/eeebe26b0637e66835581d4b8b34d133954f61cd/images/elusion.png


--------------------------------------------------------------------------------
/images/interactivedash3.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataBora/elusion/eeebe26b0637e66835581d4b8b34d133954f61cd/images/interactivedash3.gif


--------------------------------------------------------------------------------
/images/platformcom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataBora/elusion/eeebe26b0637e66835581d4b8b34d133954f61cd/images/platformcom.png


--------------------------------------------------------------------------------
/images/report.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataBora/elusion/eeebe26b0637e66835581d4b8b34d133954f61cd/images/report.PNG


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | pub mod prelude;
2 | pub mod error; 
3 | 
4 | pub use prelude::*;
5 | 
6 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
 1 | use elusion::prelude::*;
 2 | 
 3 | #[tokio::main]
 4 | async fn main() -> ElusionResult<()> {
 5 | 
 6 |     println!("Hello, Elusion!");
 7 | 
 8 |     Ok(())
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/src/prelude.rs:
--------------------------------------------------------------------------------
  1 | pub use crate::PipelineScheduler;
  2 | 
  3 | pub use crate::{CustomDataFrame, AliasedDataFrame, CsvWriteOptions};
  4 | pub use crate::{ElusionError, ElusionResult};
  5 | //====== postgres
  6 | pub use crate::{PostgresConfig, PostgresConnection};
  7 | //========= mysql
  8 | pub use crate::{MySqlConfig, MySqlConnection};
  9 | //====== dashboard
 10 | pub use crate::{ReportLayout, TableOptions};
 11 | 
 12 | pub use crate::DateFormat;
 13 | pub use crate::{extract_row_from_df, extract_value_from_df};
 14 | 
 15 | pub use regex::Regex;
 16 | pub use datafusion::prelude::*;
 17 | pub use datafusion::error::DataFusionError;
 18 | pub use futures::future::BoxFuture;
 19 | pub use datafusion::datasource::MemTable;
 20 | pub use std::sync::Arc;
 21 | pub use arrow::datatypes::{Field, DataType as ArrowDataType, Schema, SchemaRef};
 22 | pub use chrono::NaiveDate;
 23 | pub use arrow::array::{StringBuilder, ArrayRef,  ArrayBuilder, Float64Builder, Int64Builder, UInt64Builder};
 24 | 
 25 | pub use arrow::record_batch::RecordBatch;
 26 | pub use ArrowDataType::*;
 27 | pub use arrow::csv::writer::WriterBuilder;
 28 | 
 29 | // ========= CSV 
 30 | pub use std::fs::{self, File, OpenOptions};
 31 | pub use std::io::{self, Read, Write, BufWriter};
 32 | 
 33 | //============ WRITERS
 34 | pub use datafusion::prelude::SessionContext;
 35 | pub use datafusion::dataframe::{DataFrame,DataFrameWriteOptions};
 36 | 
 37 | // ========= JSON   
 38 | pub use serde_json::{json, Map, Value};
 39 | pub use serde::{Deserialize, Serialize};
 40 | pub use std::collections::{HashMap, HashSet};
 41 | pub use arrow::error::Result as ArrowResult;    
 42 | pub use datafusion::arrow::datatypes::TimeUnit;
 43 | //---json writer
 44 | pub use arrow::array::{ListArray,TimestampMicrosecondArray,TimestampMillisecondArray,TimestampSecondArray,LargeBinaryArray,BinaryArray,LargeStringArray,Float32Array,UInt64Array,UInt32Array,BooleanArray};
 45 | 
 46 | //delta
 47 | pub use std::result::Result;
 48 | pub use std::path::{Path as LocalPath, PathBuf};
 49 | pub use deltalake::operations::DeltaOps;
 50 | pub use deltalake::writer::{RecordBatchWriter, WriteMode, DeltaWriter};
 51 | pub use deltalake::{open_table, DeltaTableBuilder, DeltaTableError, ObjectStore, Path as DeltaPath};
 52 | pub use deltalake::protocol::SaveMode;
 53 | pub use deltalake::kernel::{DataType as DeltaType, Metadata, Protocol, StructType};
 54 | pub use deltalake::kernel::StructField;
 55 | pub use futures::StreamExt;
 56 | pub use deltalake::storage::object_store::local::LocalFileSystem;
 57 | // use object_store::path::Path as ObjectStorePath;
 58 | 
 59 | // =========== ERRROR
 60 | 
 61 | pub use std::fmt::{self, Debug};
 62 | pub use std::error::Error;
 63 | 
 64 | // PIVOT
 65 | pub use arrow::compute;
 66 | pub use arrow::array::StringArray;
 67 | 
 68 | //PLOTTING
 69 | #[cfg(feature = "dashboard")]
 70 | pub use plotly::{Plot, Scatter, Bar, Histogram, BoxPlot, Pie};
 71 | #[cfg(feature = "dashboard")]
 72 | pub use plotly::common::{Mode, Line, Marker, Orientation};
 73 | #[cfg(feature = "dashboard")]
 74 | pub use plotly::layout::{Axis, Layout};
 75 | #[cfg(feature = "dashboard")]
 76 | pub use plotly::color::Rgb;
 77 | #[cfg(feature = "dashboard")]
 78 | pub use plotly::layout::update_menu::{Button,UpdateMenu,UpdateMenuDirection};
 79 | #[cfg(feature = "dashboard")]
 80 | pub use plotly::layout::{DragMode, RangeSlider};
 81 | 
 82 | pub use arrow::array::{Array, Float64Array,Int64Array,Int32Array,TimestampNanosecondArray, Date64Array,Date32Array};
 83 | #[cfg(feature = "dashboard")]
 84 | pub use std::cmp::Ordering;
 85 | 
 86 | #[cfg(not(feature = "dashboard"))]
 87 | pub struct Plot;
 88 | #[cfg(not(feature = "dashboard"))]
 89 | pub struct Scatter;
 90 | #[cfg(not(feature = "dashboard"))]
 91 | pub struct Bar;
 92 | #[cfg(not(feature = "dashboard"))]
 93 | pub struct Histogram;
 94 | #[cfg(not(feature = "dashboard"))]
 95 | pub struct BoxPlot;
 96 | #[cfg(not(feature = "dashboard"))]
 97 | pub struct Pie;
 98 | #[cfg(not(feature = "dashboard"))]
 99 | pub struct Mode;
100 | #[cfg(not(feature = "dashboard"))]
101 | pub struct Line;
102 | #[cfg(not(feature = "dashboard"))]
103 | pub struct Marker;
104 | #[cfg(not(feature = "dashboard"))]
105 | pub struct Orientation;
106 | #[cfg(not(feature = "dashboard"))]
107 | pub struct Axis;
108 | #[cfg(not(feature = "dashboard"))]
109 | pub struct Layout;
110 | #[cfg(not(feature = "dashboard"))]
111 | pub struct Rgb;
112 | #[cfg(not(feature = "dashboard"))]
113 | pub struct Button;
114 | #[cfg(not(feature = "dashboard"))]
115 | pub struct UpdateMenu;
116 | #[cfg(not(feature = "dashboard"))]
117 | pub struct UpdateMenuDirection;
118 | #[cfg(not(feature = "dashboard"))]
119 | pub struct DragMode;
120 | #[cfg(not(feature = "dashboard"))]
121 | pub struct RangeSlider;
122 | 
123 | // STATISTICS
124 | pub use datafusion::common::ScalarValue;
125 | 
126 | // ========== AZURE
127 | #[cfg(feature = "azure")]
128 | pub use azure_storage_blobs::prelude::*;
129 | #[cfg(feature = "azure")]
130 | pub use azure_storage::StorageCredentials;
131 | #[cfg(feature = "azure")]
132 | pub use azure_storage::CloudLocation;
133 | pub use futures::stream;
134 | pub use std::io::BufReader;
135 | pub use futures::pin_mut;
136 | pub use csv::ReaderBuilder;
137 | pub use csv::Trim::All;
138 | pub use serde_json::Deserializer;
139 | // ==== pisanje
140 | #[cfg(feature = "azure")]
141 | pub use azure_storage_blobs::blob::{BlockList, BlobBlockType};
142 | pub use bytes::Bytes;
143 | pub use datafusion::parquet::basic::Compression;
144 | pub use datafusion::parquet::file::properties::{WriterProperties, WriterVersion};
145 | pub use datafusion::parquet::arrow::ArrowWriter;
146 | pub use base64::engine::general_purpose::STANDARD;
147 | pub use base64::Engine;
148 | pub use futures::TryStreamExt;
149 | pub use tempfile::Builder;
150 | 
151 | // ======== Scheduler
152 | pub use std::future::Future;
153 | pub use tokio_cron_scheduler::{JobScheduler, Job};
154 | 
155 | // ======== From API
156 | #[cfg(feature = "api")]
157 | pub use reqwest::Client;
158 | #[cfg(feature = "api")]
159 | pub use urlencoding::encode;
160 | 
161 | pub use crate::ElusionApi;
162 | 
163 | #[cfg(not(feature = "api"))]
164 | pub struct Client;
165 | 
166 | 
167 | // ========= VIEWS and CAche
168 | pub use std::hash::{Hash, Hasher};
169 | pub use std::collections::hash_map::DefaultHasher;
170 | pub use chrono::{DateTime, Utc};
171 | pub use std::sync::Mutex;
172 | pub use lazy_static::lazy_static;
173 | 
174 | // =========== DATE TABLE BUILDER
175 | pub use arrow::array::Int32Builder;
176 | pub use arrow::array::BooleanBuilder;
177 | pub use chrono::{Datelike, Weekday, Duration, NaiveDateTime, NaiveTime};
178 | 
179 | // =========EXCEL
180 | #[cfg(feature = "excel")]
181 | pub use rust_xlsxwriter::{Format, Workbook, ExcelDateTime};
182 | #[cfg(feature = "excel")]
183 | pub use arrow::array::{Int8Array, Int16Array,UInt8Array, UInt16Array};
184 | 
185 | pub use calamine::DataType as CalamineDataType;
186 | pub use calamine::{Reader, Xlsx, open_workbook};


--------------------------------------------------------------------------------