├── .cargo
    └── config.toml
├── .github
    └── workflows
    │   ├── release.yml
    │   └── rust.yml
├── .gitignore
├── CHANGELOG.md
├── CONTRIBUTING.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── bench_results
    ├── anomaly0
    ├── anomaly1
    ├── anomaly2
    ├── baseline0
    ├── baseline1
    ├── baseline10
    ├── baseline11
    ├── baseline12
    ├── baseline2
    ├── baseline3
    ├── baseline4
    ├── baseline5
    ├── baseline6
    ├── baseline7
    ├── baseline8
    ├── baseline9
    └── latest
├── benches
    └── basic.rs
├── export.py
├── float_values.bin
├── float_values.txt
├── git_hooks
    ├── README.md
    └── pre-push
├── locustdb-client
    ├── .appveyor.yml
    ├── .github
    │   └── dependabot.yml
    ├── .gitignore
    ├── .travis.yml
    ├── Cargo.toml
    ├── README.md
    ├── release.sh
    ├── src
    │   ├── client.rs
    │   └── lib.rs
    └── tests
    │   └── web.rs
├── locustdb-compression-utils
    ├── Cargo.lock
    ├── Cargo.toml
    ├── README.md
    ├── examples
    │   └── gorilla_time.rs
    └── src
    │   ├── lib.rs
    │   ├── test_data.rs
    │   └── xor_float
    │       ├── double.rs
    │       ├── mod.rs
    │       └── single.rs
├── locustdb-derive
    ├── Cargo.lock
    ├── Cargo.toml
    └── src
    │   ├── ast_builder.rs
    │   ├── enum_syntax.rs
    │   ├── lib.rs
    │   └── reify_types.rs
├── locustdb-serialization
    ├── Cargo.lock
    ├── Cargo.toml
    ├── README.md
    ├── schemas
    │   ├── api.capnp
    │   ├── dbmeta.capnp
    │   ├── partition_segment.capnp
    │   ├── rust.capnp
    │   └── wal_segment.capnp
    └── src
    │   ├── api.rs
    │   ├── api_capnp.rs
    │   ├── dbmeta_capnp.rs
    │   ├── event_buffer.rs
    │   ├── lib.rs
    │   ├── partition_segment_capnp.rs
    │   └── wal_segment_capnp.rs
├── memsize_results
    ├── baseline0
    ├── baseline1
    ├── baseline2
    └── baseline3
├── rust-toolchain
├── samples
    ├── example_row
    ├── example_row_sparse
    └── headers
├── src
    ├── bin
    │   ├── db_bench.rs
    │   ├── db_inspector.rs
    │   ├── load_generator.rs
    │   ├── log.rs
    │   ├── profile.rs
    │   ├── repl
    │   │   ├── fmt_table.rs
    │   │   ├── main.rs
    │   │   ├── print_results.rs
    │   │   └── unicode.rs
    │   └── show.rs
    ├── bitvec.rs
    ├── disk_store
    │   ├── azure_writer.rs
    │   ├── file_writer.rs
    │   ├── gcs_writer.rs
    │   ├── meta_store.rs
    │   ├── mod.rs
    │   ├── noop_storage.rs
    │   ├── partition_segment.rs
    │   ├── storage.rs
    │   └── wal_segment.rs
    ├── engine
    │   ├── data_types
    │   │   ├── byte_slices.rs
    │   │   ├── data.rs
    │   │   ├── mod.rs
    │   │   ├── nullable_vec_data.rs
    │   │   ├── scalar_data.rs
    │   │   ├── types.rs
    │   │   ├── val_rows.rs
    │   │   └── vec_data.rs
    │   ├── execution
    │   │   ├── batch_merging.rs
    │   │   ├── buffer.rs
    │   │   ├── executor.rs
    │   │   ├── mod.rs
    │   │   ├── query_task.rs
    │   │   └── scratchpad.rs
    │   ├── mod.rs
    │   ├── operators
    │   │   ├── aggregate.rs
    │   │   ├── aggregator.rs
    │   │   ├── assemble_nullable.rs
    │   │   ├── binary_operator.rs
    │   │   ├── bit_unpack.rs
    │   │   ├── bool_op.rs
    │   │   ├── buffer_stream.rs
    │   │   ├── collect.rs
    │   │   ├── column_ops.rs
    │   │   ├── combine_null_maps.rs
    │   │   ├── compact.rs
    │   │   ├── compact_nullable.rs
    │   │   ├── compact_nullable_nullable.rs
    │   │   ├── compact_with_nullable.rs
    │   │   ├── comparator.rs
    │   │   ├── comparison_operators.rs
    │   │   ├── constant.rs
    │   │   ├── constant_expand.rs
    │   │   ├── constant_vec.rs
    │   │   ├── delta_decode.rs
    │   │   ├── dict_lookup.rs
    │   │   ├── empty.rs
    │   │   ├── encode_const.rs
    │   │   ├── exists.rs
    │   │   ├── filter.rs
    │   │   ├── filter_nullable.rs
    │   │   ├── functions.rs
    │   │   ├── fuse_nulls.rs
    │   │   ├── get_null_map.rs
    │   │   ├── hashmap_grouping.rs
    │   │   ├── hashmap_grouping_byte_slices.rs
    │   │   ├── hashmap_grouping_val_rows.rs
    │   │   ├── identity.rs
    │   │   ├── indices.rs
    │   │   ├── is_null.rs
    │   │   ├── lz4_decode.rs
    │   │   ├── make_nullable.rs
    │   │   ├── map_operator.rs
    │   │   ├── merge.rs
    │   │   ├── merge_aggregate.rs
    │   │   ├── merge_deduplicate.rs
    │   │   ├── merge_deduplicate_partitioned.rs
    │   │   ├── merge_drop.rs
    │   │   ├── merge_keep.rs
    │   │   ├── merge_partitioned.rs
    │   │   ├── mod.rs
    │   │   ├── nonzero_compact.rs
    │   │   ├── nonzero_indices.rs
    │   │   ├── null_to_i64.rs
    │   │   ├── null_to_val.rs
    │   │   ├── null_to_vec.rs
    │   │   ├── null_vec.rs
    │   │   ├── null_vec_like.rs
    │   │   ├── numeric_operators.rs
    │   │   ├── parameterized_vec_vec_int_op.rs
    │   │   ├── partition.rs
    │   │   ├── pco_decode.rs
    │   │   ├── propagate_nullability.rs
    │   │   ├── scalar_f64.rs
    │   │   ├── scalar_i64.rs
    │   │   ├── scalar_i64_to_scalar_f64.rs
    │   │   ├── scalar_str.rs
    │   │   ├── select.rs
    │   │   ├── slice_pack.rs
    │   │   ├── slice_unpack.rs
    │   │   ├── sort_by.rs
    │   │   ├── sort_by_slices.rs
    │   │   ├── sort_by_val_rows.rs
    │   │   ├── stream_buffer.rs
    │   │   ├── subpartition.rs
    │   │   ├── to_val.rs
    │   │   ├── top_n.rs
    │   │   ├── type_conversion.rs
    │   │   ├── unhexpack_strings.rs
    │   │   ├── unpack_strings.rs
    │   │   ├── val_rows_pack.rs
    │   │   ├── val_rows_unpack.rs
    │   │   └── vector_operator.rs
    │   └── planning
    │   │   ├── filter.rs
    │   │   ├── mod.rs
    │   │   ├── planner.rs
    │   │   ├── query.rs
    │   │   └── query_plan.rs
    ├── errors.rs
    ├── ingest
    │   ├── alias_method_fork.rs
    │   ├── buffer.rs
    │   ├── colgen.rs
    │   ├── csv_loader.rs
    │   ├── extractor.rs
    │   ├── input_column.rs
    │   ├── mod.rs
    │   ├── nyc_taxi_data.rs
    │   ├── raw_val.rs
    │   └── schema.rs
    ├── lib.rs
    ├── locustdb.rs
    ├── logging_client
    │   └── mod.rs
    ├── mem_store
    │   ├── codec.rs
    │   ├── column.rs
    │   ├── column_buffer.rs
    │   ├── floats.rs
    │   ├── integers.rs
    │   ├── lru.rs
    │   ├── lz4.rs
    │   ├── mixed_column.rs
    │   ├── mod.rs
    │   ├── partition.rs
    │   ├── raw_col.rs
    │   ├── strings.rs
    │   ├── table.rs
    │   ├── tree.rs
    │   └── value.rs
    ├── observability
    │   ├── metrics.rs
    │   ├── mod.rs
    │   ├── perf_counter.rs
    │   └── simple_trace.rs
    ├── python.rs
    ├── scheduler
    │   ├── disk_read_scheduler.rs
    │   ├── inner_locustdb.rs
    │   ├── mod.rs
    │   ├── shared_sender.rs
    │   └── task.rs
    ├── server
    │   └── mod.rs
    ├── stringpack.rs
    ├── syntax
    │   ├── expression.rs
    │   ├── limit.rs
    │   ├── mod.rs
    │   └── parser.rs
    └── unit_fmt.rs
├── system_dependencies.sh
├── templates
    ├── index.html
    ├── plot.html
    └── table.html
├── test_data
    ├── edge_cases.csv
    ├── nyc-taxi.csv.gz
    ├── small.csv
    └── tiny.csv
├── tests
    ├── ingestion_test.rs
    └── query_tests.rs
└── wandb_data_import.py


/.cargo/config.toml:
--------------------------------------------------------------------------------
1 | [target.x86_64-apple-darwin]
2 | rustflags = [
3 |   "-C", "link-arg=-undefined",
4 |   "-C", "link-arg=dynamic_lookup",
5 | ]


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     name: Test
 9 |     runs-on: ${{ matrix.os }}
10 |     strategy:
11 |       matrix:
12 |         include:
13 |           - os: ubuntu-latest
14 |           - os: macos-latest
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v3
18 |     - uses: actions-rs/toolchain@v1
19 |       with:
20 |         components: rustfmt, clippy
21 | 
22 |     - name: Cache cargo registry
23 |       uses: actions/cache@v3
24 |       with:
25 |         path: ~/.cargo/registry
26 |         key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
27 |     - name: Cache cargo index
28 |       uses: actions/cache@v3
29 |       with:
30 |         path: ~/.cargo/git
31 |         key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }}
32 |     - name: Cache cargo build
33 |       uses: actions/cache@v3
34 |       with:
35 |         path: target
36 |         key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }}
37 | 
38 |     - name: Install Dependencies
39 |       run: ./system_dependencies.sh
40 | 
41 |     - name: Check
42 |       run: cargo check --all-features --all-targets
43 |     - name: Clippy
44 |       run: cargo clippy --all-features --all-targets -- --deny clippy::all
45 |     - name: Run tests
46 |       run: cargo test
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Generated by Cargo
 2 | # will have compiled files and executables
 3 | /target/
 4 | locustdb-derive/target/
 5 | locustdb-compression-utils/target/
 6 | locustdb-serialization/target/
 7 | /data/
 8 | **/node_modules/
 9 | **/.locustDB_history
10 | 
11 | # Large benchmarking datasets
12 | test_data/yellow_tripdata_2009-01.csv
13 | test_data/nyc-taxi-data
14 | 
15 | # IntelliJ
16 | .idea
17 | locustdb.iml
18 | 
19 | # readline
20 | .locustdb_history
21 | 
22 | # vim
23 | rusty-tags.vi
24 | 
25 | # default rocksdb data dir
26 | rocksdb
27 | 
28 | # OS X
29 | .DS_Store
30 | 
31 | # VSCode
32 | .vscode
33 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | All notable changes to this project will be documented in this file.
 3 | 
 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 6 | 
 7 | ## [Unreleased]
 8 | 
 9 | ## [0.2.1] - 2019-02-17
10 | ### Added
11 | - Unary `NOT`
12 | - `NOT LIKE`
13 | 
14 | ### Fixed
15 | - Fix LIKE operator not matching entire expression
16 | 
17 | ## [0.2.0] - 2018-12-31
18 | ### Added
19 | - Reliable parser and full set of basic SQL functionality except joins
20 | 
21 | [Unreleased]: https://github.com/cswinter/LocustDB/compare/v0.2.1..HEAD
22 | [0.2.1]: https://github.com/cswinter/LocustDB/compare/v0.2.0...v0.2.1
23 | [0.2.0]: https://github.com/cswinter/LocustDB/compare/v0.1.0-alpha...v0.2.0
24 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to LocustDB
 2 | 
 3 | Thank you for your interest in contributing to LocustDB! Good places to start are [this blog post][blogpost], the [README.md][readme] and the [issue tracker][issues]. I can also recommend [this blogpost][diving-into], which describes a general strategy for making changes to a codebase you've never worked with before.
 4 | 
 5 | If you have any question about LocustDB, feel free to ask on [gitter][gitter].
 6 | 
 7 | ## Working on issues
 8 | If you're looking for somewhere to start, check out the [good first issue tag][good-first-issue].
 9 | 
10 | Feel free to ask for guidelines on how to tackle a problem on [gitter][gitter] or open a new [new issue][new-issue].
11 | This is especially important if you want to add new features to LocustDB or make large changes to the already existing code base. LocustDB's core developers will do their best to provide help.
12 | 
13 | Various issues have a corresponding TODO(#ISSUE\_NUMBER) in the relevant section of the code.
14 | 
15 | If you start working on an already-filed issue, post a comment on this issue to let people know that somebody is working it. Feel free to ask for comments if you are unsure about the solution you would like to submit.
16 | 
17 | We use the "fork and pull" model [described here][development-models], where contributors push changes to their personal fork and create pull requests to bring those changes into the source repository.
18 | 
19 | Your basic steps to get going:
20 | 
21 | * Fork LocustDB and create a branch from master for the issue you are working on.
22 | * Please adhere to the code style that you see around the location you are working on.
23 | * [Commit as you go][githelp].
24 | * Include tests that cover all non-trivial code. Usually the easiest way of doing that is to add a new integration test to `tests/query_tests.rs`.
25 | * If you are adding a performance optimisation, make sure there is a benchmark case in `benches/basic.rs` that covers the optimisation.
26 | * Make sure `cargo test` passes and the bencharks can still be compiled (`cargo check --bench basic`). Running clippy and rustfmt is encouraged, but the existing code is not currently compliant.
27 | * Push your commits to GitHub and create a pull request against LocustDB's `master` branch.
28 | 
29 | [githelp]: https://dont-be-afraid-to-commit.readthedocs.io/en/latest/git/commandlinegit.html
30 | [development-models]: https://help.github.com/articles/about-collaborative-development-models/
31 | [gitter]: https://gitter.im/LocustDB/Lobby
32 | [issues]: https://github.com/cswinter/LocustDB/issues
33 | [new-issue]: https://github.com/cswinter/LocustDB/issues/new
34 | [good-first-issue]: https://github.com/cswinter/LocustDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22
35 | [blogpost]: https://clemenswinter.com/2018/07/09/how-to-analyze-billions-of-records-per-second-on-a-single-desktop-pc/
36 | [readme]: https://github.com/cswinter/LocustDB/blob/master/README.md
37 | [diving-into]: http://www.lihaoyi.com/post/DivingIntoOtherPeoplesCode.html
38 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | authors = ["Clemens Winter <clemenswinter1@gmail.com>"]
 3 | description = "Embeddable high-performance analytics database."
 4 | edition = "2021"
 5 | license-file = "LICENSE"
 6 | name = "locustdb"
 7 | version = "0.5.6"
 8 | repository = "https://github.com/cswinter/LocustDB"
 9 | readme = "README.md"
10 | 
11 | [lib]
12 | crate-type = ["cdylib", "rlib"]
13 | 
14 | [dependencies]
15 | actix-cors = "0.7"
16 | actix-web = "4"
17 | aliasmethod = "0.4"
18 | azure_core = { version = "0.19.0", features = ["enable_reqwest_rustls"], default-features = false }
19 | azure_identity = { version = "0.19.0", features = ["enable_reqwest_rustls"], default-features = false }
20 | azure_storage = { version = "0.19.0", features = ["enable_reqwest_rustls"], default-features = false }
21 | azure_storage_blobs = { version = "0.19.0", features = ["enable_reqwest_rustls"], default-features = false }
22 | bit-vec = "0.8"
23 | blake2 = "0.10"
24 | byteorder = "1.5"
25 | capnp = "0.21"
26 | chrono = "0.4"
27 | clap = "4.5"
28 | csv = "1"
29 | datasize = "0.2.15"
30 | env_logger = "0.11"
31 | flate2 = "1.1"
32 | fnv = "1.0"
33 | futures = "0.3"
34 | google-cloud-storage = { version = "0.24", features = ["rustls-tls", "auth"], default-features = false }
35 | hex = "0.4"
36 | itertools = "0.14"
37 | lazy_static = "1.5.0"
38 | locustdb-compression-utils = {path = "./locustdb-compression-utils", version = "0.2.0"}
39 | locustdb-derive = {path = "./locustdb-derive", version = "0.2.2"}
40 | locustdb-serialization = {path = "./locustdb-serialization", version = "0.2.2"}
41 | log = {features = ["max_level_trace", "release_max_level_debug"], version = "0.4"}
42 | lru = "0.14"
43 | lz4_flex = { version = "0.11" }
44 | num = "0.4"
45 | num_cpus = "1.16"
46 | ordered-float = { version = "5", features = ["serde"] }
47 | pco = "0.4.2"
48 | prometheus = "0.14.0"
49 | pyo3 = {features = ["extension-module"], version = "0.24.2", optional = true}
50 | rand = "0.9"
51 | rand_xorshift = "0.4.0"
52 | random_word = { version = "0.5", features = ["en", "fr", "de"] }
53 | regex = "1"
54 | reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
55 | rustyline = "15.0"
56 | scoped_threadpool = "0.1"
57 | seahash = "4.1"
58 | serde = { version = "1.0", features = ["derive"] }
59 | serde_json = "1.0"
60 | sha2 = "0.10"
61 | sqlparser = "0.56"
62 | std-semaphore = "0.1"
63 | structopt = "0.3"
64 | systemstat = "0.2.4"
65 | tempfile = "3"
66 | tera = "1"
67 | thiserror = "2.0.12"
68 | threadpool = "1.8.1"
69 | time = "0.3"
70 | tokio = { version = "1", features = ["full"] }
71 | tokio-util = "0.7"
72 | unicode-segmentation = "1"
73 | unicode-width = "0.2"
74 | walkdir = "2.5.0"
75 | 
76 | [dev-dependencies]
77 | pretty_assertions = "1"
78 | 
79 | [features]
80 | default = []
81 | python = ["pyo3"]
82 | 
83 | 
84 | [profile.release]
85 | codegen-units = 1
86 | debug = true
87 | lto = true
88 | opt-level = 3
89 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 Dropbox, Inc.
 2 | Copyright (c) 2018 Clemens Winter
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | 


--------------------------------------------------------------------------------
/bench_results/anomaly0:
--------------------------------------------------------------------------------
 1 | batchsize 2^16
 2 | 
 3 | running 8 tests
 4 | test count_by_passenger_count                     ... bench:  11,674,768 ns/iter (+/- 2,082,851)
 5 | test count_group_by_vendor_id_and_passenger_count ... bench: 878,825,549 ns/iter (+/- 11,190,933)
 6 | test q1_count_cab_type                            ... bench:  13,311,756 ns/iter (+/- 3,939,847)
 7 | test q2_avg_total_amount_by_passenger_count       ... bench: 247,310,599 ns/iter (+/- 10,812,396)
 8 | test q3_count_passenger_count_pickup_year         ... bench: 1,491,088,185 ns/iter (+/- 42,808,753)
 9 | test select_passenger_count_sparse_filter         ... bench: 1,258,719,035 ns/iter (+/- 11,086,208)
10 | test select_star_limit_10000                      ... bench: 374,562,778 ns/iter (+/- 25,321,861)
11 | test sum_total_amt_group_by_passenger_count       ... bench: 245,749,678 ns/iter (+/- 775,610)
12 | 


--------------------------------------------------------------------------------
/bench_results/anomaly1:
--------------------------------------------------------------------------------
 1 | batchsize 2^10
 2 | 
 3 | running 8 tests
 4 | test count_by_passenger_count               ... bench:  66,711,448 ns/iter (+/- 8,744,116)
 5 | test count_by_vendor_id_and_passenger_count ... bench: 118,967,104 ns/iter (+/- 6,259,125)
 6 | test q1_count_cab_type                      ... bench:  67,642,275 ns/iter (+/- 4,876,545)
 7 | test q2_avg_total_amount_by_passenger_count ... bench:  89,427,755 ns/iter (+/- 6,275,577)
 8 | test q3_count_by_passenger_count_pickup_year   ... bench: 6,359,979,005 ns/iter (+/- 310,230,539)
 9 | test select_passenger_count_sparse_filter   ... bench: 203,733,119 ns/iter (+/- 6,924,933)
10 | test select_star_limit_10000                ... bench: 889,329,736 ns/iter (+/- 16,377,040)
11 | test sum_total_amt_by_passenger_count       ... bench:  78,117,183 ns/iter (+/- 11,485,842)
12 | 


--------------------------------------------------------------------------------
/bench_results/anomaly2:
--------------------------------------------------------------------------------
 1 | batchsize 2^20
 2 | 
 3 | running 8 tests
 4 | test count_by_passenger_count                ... bench:  12,416,697 ns/iter (+/- 1,290,632)
 5 | test count_by_vendor_id_and_passenger_count  ... bench: 233,845,898 ns/iter (+/- 12,554,651)
 6 | test q1_count_cab_type                       ... bench:  13,473,580 ns/iter (+/- 909,745)
 7 | test q2_avg_total_amount_by_passenger_count  ... bench:  74,571,471 ns/iter (+/- 4,250,941)
 8 | test q3_count_by_passenger_count_pickup_year ... bench: 342,046,676 ns/iter (+/- 25,501,960)
 9 | test select_passenger_count_sparse_filter    ... bench: 242,676,708 ns/iter (+/- 11,081,106)
10 | test select_star_limit_10000                 ... bench: 1,595,645,667 ns/iter (+/- 114,784,124)
11 | test sum_total_amt_by_passenger_count        ... bench:  71,744,061 ns/iter (+/- 3,311,822)
12 | 


--------------------------------------------------------------------------------
/bench_results/baseline0:
--------------------------------------------------------------------------------
 1 | 
 2 | running 4 tests
 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored
 4 | test syntax::parser::tests::test_last_hour ... ignored
 5 | test syntax::parser::tests::test_select_star ... ignored
 6 | test syntax::parser::tests::test_to_year ... ignored
 7 | 
 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out
 9 | 
10 | 
11 | running 0 tests
12 | 
13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
14 | 
15 | 
16 | running 0 tests
17 | 
18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
19 | 
20 | 
21 | running 8 tests
22 | test count_by_passenger_count                ... bench:  26,938,870 ns/iter (+/- 2,675,577)
23 | test count_by_vendor_id_and_passenger_count  ... bench: 550,722,493 ns/iter (+/- 16,909,930)
24 | test q1_count_cab_type                       ... bench:  30,656,683 ns/iter (+/- 2,475,807)
25 | test q2_avg_total_amount_by_passenger_count  ... bench: 190,362,991 ns/iter (+/- 4,582,829)
26 | test q3_count_by_passenger_count_pickup_year ... bench: 828,067,692 ns/iter (+/- 29,823,670)
27 | test select_passenger_count_sparse_filter    ... bench: 555,553,199 ns/iter (+/- 59,422,977)
28 | test select_star_limit_10000                 ... bench: 972,255,085 ns/iter (+/- 25,239,661)
29 | test sum_total_amt_by_passenger_count        ... bench: 184,066,713 ns/iter (+/- 6,177,480)
30 | 
31 | test result: ok. 0 passed; 0 failed; 0 ignored; 8 measured; 0 filtered out
32 | 
33 | 


--------------------------------------------------------------------------------
/bench_results/baseline1:
--------------------------------------------------------------------------------
 1 | 
 2 | running 4 tests
 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored
 4 | test syntax::parser::tests::test_last_hour ... ignored
 5 | test syntax::parser::tests::test_select_star ... ignored
 6 | test syntax::parser::tests::test_to_year ... ignored
 7 | 
 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out
 9 | 
10 | 
11 | running 0 tests
12 | 
13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
14 | 
15 | 
16 | running 0 tests
17 | 
18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
19 | 
20 | 
21 | running 8 tests
22 | test count_by_passenger_count                ... bench:  26,070,934 ns/iter (+/- 1,594,412)
23 | test count_by_vendor_id_and_passenger_count  ... bench: 499,509,746 ns/iter (+/- 17,562,185)
24 | test q1_count_cab_type                       ... bench:  29,879,445 ns/iter (+/- 1,041,851)
25 | test q2_avg_total_amount_by_passenger_count  ... bench: 175,384,548 ns/iter (+/- 4,727,845)
26 | test q3_count_by_passenger_count_pickup_year ... bench: 783,307,490 ns/iter (+/- 24,737,787)
27 | test select_passenger_count_sparse_filter    ... bench: 538,106,785 ns/iter (+/- 10,699,233)
28 | test select_star_limit_10000                 ... bench: 927,526,976 ns/iter (+/- 20,292,403)
29 | test sum_total_amt_by_passenger_count        ... bench: 168,620,796 ns/iter (+/- 5,159,333)
30 | 
31 | test result: ok. 0 passed; 0 failed; 0 ignored; 8 measured; 0 filtered out
32 | 
33 | 


--------------------------------------------------------------------------------
/bench_results/baseline10:
--------------------------------------------------------------------------------
 1 | 
 2 | running 5 tests
 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored
 4 | test engine::vector_op::top_n::tests::test_heap_replace ... ignored
 5 | test syntax::parser::tests::test_last_hour ... ignored
 6 | test syntax::parser::tests::test_select_star ... ignored
 7 | test syntax::parser::tests::test_to_year ... ignored
 8 | 
 9 | test result: ok. 0 passed; 0 failed; 5 ignored; 0 measured; 0 filtered out
10 | 
11 | 
12 | running 0 tests
13 | 
14 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
15 | 
16 | 
17 | running 0 tests
18 | 
19 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
20 | 
21 | 
22 | running 13 tests
23 | test count_by_passenger_count                              ... bench:  15,627,532 ns/iter (+/- 655,985)
24 | test count_by_vendor_id_and_passenger_count                ... bench:  71,846,441 ns/iter (+/- 2,994,324)
25 | test q1_count_cab_type                                     ... bench:  18,137,258 ns/iter (+/- 682,612)
26 | test q2_avg_total_amount_by_passenger_count                ... bench:  44,917,954 ns/iter (+/- 3,062,919)
27 | test q3_count_by_passenger_count_pickup_year               ... bench:  60,057,811 ns/iter (+/- 1,042,005)
28 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 203,147,333 ns/iter (+/- 4,938,887)
29 | test q5_sparse_filter                                      ... bench:  89,310,448 ns/iter (+/- 3,107,625)
30 | test q6_top_n                                              ... bench:  28,955,535 ns/iter (+/- 2,397,557)
31 | test q7_hashmap_grouping                                   ... bench: 387,756,233 ns/iter (+/- 7,447,014)
32 | test q8_group_by_trip_id                                   ... ignored
33 | test select_passenger_count_sparse_filter                  ... bench: 247,671,658 ns/iter (+/- 3,402,559)
34 | test select_star_limit_10000                               ... ignored
35 | test sum_total_amt_by_passenger_count                      ... bench:  40,696,847 ns/iter (+/- 1,339,187)
36 | 
37 | test result: ok. 0 passed; 0 failed; 2 ignored; 11 measured; 0 filtered out
38 | 
39 | 


--------------------------------------------------------------------------------
/bench_results/baseline11:
--------------------------------------------------------------------------------
 1 | 
 2 | running 5 tests
 3 | test engine::vector_op::subpartition::tests::test_multipass_grouping ... ignored
 4 | test engine::vector_op::top_n::tests::test_heap_replace ... ignored
 5 | test syntax::parser::tests::test_last_hour ... ignored
 6 | test syntax::parser::tests::test_select_star ... ignored
 7 | test syntax::parser::tests::test_to_year ... ignored
 8 | 
 9 | test result: ok. 0 passed; 0 failed; 5 ignored; 0 measured; 0 filtered out
10 | 
11 | 
12 | running 0 tests
13 | 
14 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
15 | 
16 | 
17 | running 0 tests
18 | 
19 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
20 | 
21 | 
22 | running 0 tests
23 | 
24 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
25 | 
26 | 
27 | running 13 tests
28 | test count_by_passenger_count                              ... bench:  18,294,162 ns/iter (+/- 2,496,582)
29 | test count_by_vendor_id_and_passenger_count                ... bench:  76,762,635 ns/iter (+/- 3,339,295)
30 | test q1_count_cab_type                                     ... bench:  20,776,009 ns/iter (+/- 3,198,444)
31 | test q2_avg_total_amount_by_passenger_count                ... bench:  49,578,845 ns/iter (+/- 3,293,207)
32 | test q3_count_by_passenger_count_pickup_year               ... bench:  63,038,754 ns/iter (+/- 3,402,668)
33 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 208,468,489 ns/iter (+/- 3,078,098)
34 | test q5_sparse_filter                                      ... bench:  99,581,002 ns/iter (+/- 5,713,491)
35 | test q6_top_n                                              ... bench:  31,527,822 ns/iter (+/- 3,030,209)
36 | test q7_hashmap_grouping                                   ... bench: 311,869,086 ns/iter (+/- 6,209,690)
37 | test q8_group_by_trip_id                                   ... ignored
38 | test select_passenger_count_sparse_filter                  ... bench: 255,659,998 ns/iter (+/- 6,248,932)
39 | test select_star_limit_10000                               ... ignored
40 | test sum_total_amt_by_passenger_count                      ... bench:  44,787,624 ns/iter (+/- 3,711,254)
41 | 
42 | test result: ok. 0 passed; 0 failed; 2 ignored; 11 measured; 0 filtered out
43 | 
44 | 


--------------------------------------------------------------------------------
/bench_results/baseline12:
--------------------------------------------------------------------------------
 1 | 
 2 | running 5 tests
 3 | test engine::vector_op::subpartition::tests::test_multipass_grouping ... ignored
 4 | test engine::vector_op::top_n::tests::test_heap_replace ... ignored
 5 | test syntax::parser::tests::test_last_hour ... ignored
 6 | test syntax::parser::tests::test_select_star ... ignored
 7 | test syntax::parser::tests::test_to_year ... ignored
 8 | 
 9 | test result: ok. 0 passed; 0 failed; 5 ignored; 0 measured; 0 filtered out
10 | 
11 | 
12 | running 0 tests
13 | 
14 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
15 | 
16 | 
17 | running 0 tests
18 | 
19 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
20 | 
21 | 
22 | running 0 tests
23 | 
24 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
25 | 
26 | 
27 | running 13 tests
28 | test count_by_passenger_count                              ... bench:  16,129,780 ns/iter (+/- 739,944)
29 | test count_by_vendor_id_and_passenger_count                ... bench:  71,512,490 ns/iter (+/- 1,099,241)
30 | test q1_count_cab_type                                     ... bench:  18,421,078 ns/iter (+/- 609,591)
31 | test q2_avg_total_amount_by_passenger_count                ... bench:  45,844,627 ns/iter (+/- 792,661)
32 | test q3_count_by_passenger_count_pickup_year               ... bench:  60,440,313 ns/iter (+/- 1,171,763)
33 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 199,199,110 ns/iter (+/- 2,615,830)
34 | test q5_sparse_filter                                      ... bench:  81,682,932 ns/iter (+/- 1,575,449)
35 | test q6_top_n                                              ... bench:  22,215,360 ns/iter (+/- 984,098)
36 | test q7_hashmap_grouping                                   ... bench: 299,474,974 ns/iter (+/- 9,163,364)
37 | test q8_group_by_trip_id                                   ... ignored
38 | test select_passenger_count_sparse_filter                  ... bench: 229,560,195 ns/iter (+/- 7,530,226)
39 | test select_star_limit_10000                               ... ignored
40 | test sum_total_amt_by_passenger_count                      ... bench:  41,712,664 ns/iter (+/- 765,522)
41 | 
42 | test result: ok. 0 passed; 0 failed; 2 ignored; 11 measured; 0 filtered out
43 | 
44 | 


--------------------------------------------------------------------------------
/bench_results/baseline2:
--------------------------------------------------------------------------------
 1 | 
 2 | running 4 tests
 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored
 4 | test syntax::parser::tests::test_last_hour ... ignored
 5 | test syntax::parser::tests::test_select_star ... ignored
 6 | test syntax::parser::tests::test_to_year ... ignored
 7 | 
 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out
 9 | 
10 | 
11 | running 0 tests
12 | 
13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
14 | 
15 | 
16 | running 0 tests
17 | 
18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
19 | 
20 | 
21 | running 8 tests
22 | test count_by_passenger_count                ... bench:  27,406,000 ns/iter (+/- 6,282,199)
23 | test count_by_vendor_id_and_passenger_count  ... bench: 496,478,007 ns/iter (+/- 16,991,732)
24 | test q1_count_cab_type                       ... bench:  29,601,765 ns/iter (+/- 1,532,514)
25 | test q2_avg_total_amount_by_passenger_count  ... bench: 126,378,488 ns/iter (+/- 4,177,675)
26 | test q3_count_by_passenger_count_pickup_year ... bench: 782,609,520 ns/iter (+/- 30,886,586)
27 | test select_passenger_count_sparse_filter    ... bench: 535,309,369 ns/iter (+/- 11,938,554)
28 | test select_star_limit_10000                 ... bench: 918,701,430 ns/iter (+/- 20,417,984)
29 | test sum_total_amt_by_passenger_count        ... bench: 116,583,969 ns/iter (+/- 2,898,219)
30 | 
31 | test result: ok. 0 passed; 0 failed; 0 ignored; 8 measured; 0 filtered out
32 | 
33 | 


--------------------------------------------------------------------------------
/bench_results/baseline3:
--------------------------------------------------------------------------------
 1 | 
 2 | running 4 tests
 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored
 4 | test syntax::parser::tests::test_last_hour ... ignored
 5 | test syntax::parser::tests::test_select_star ... ignored
 6 | test syntax::parser::tests::test_to_year ... ignored
 7 | 
 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out
 9 | 
10 | 
11 | running 0 tests
12 | 
13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
14 | 
15 | 
16 | running 0 tests
17 | 
18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
19 | 
20 | 
21 | running 8 tests
22 | test count_by_passenger_count                ... bench:  25,243,860 ns/iter (+/- 1,027,409)
23 | test count_by_vendor_id_and_passenger_count  ... bench: 500,672,878 ns/iter (+/- 17,897,606)
24 | test q1_count_cab_type                       ... bench:  28,203,723 ns/iter (+/- 2,043,693)
25 | test q2_avg_total_amount_by_passenger_count  ... bench: 129,087,699 ns/iter (+/- 4,483,727)
26 | test q3_count_by_passenger_count_pickup_year ... bench: 786,015,826 ns/iter (+/- 33,239,775)
27 | test select_passenger_count_sparse_filter    ... bench: 529,497,008 ns/iter (+/- 11,370,622)
28 | test select_star_limit_10000                 ... bench: 920,571,002 ns/iter (+/- 25,253,936)
29 | test sum_total_amt_by_passenger_count        ... bench: 120,091,337 ns/iter (+/- 3,841,109)
30 | 
31 | test result: ok. 0 passed; 0 failed; 0 ignored; 8 measured; 0 filtered out
32 | 
33 | 


--------------------------------------------------------------------------------
/bench_results/baseline4:
--------------------------------------------------------------------------------
 1 | 
 2 | running 4 tests
 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored
 4 | test syntax::parser::tests::test_last_hour ... ignored
 5 | test syntax::parser::tests::test_select_star ... ignored
 6 | test syntax::parser::tests::test_to_year ... ignored
 7 | 
 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out
 9 | 
10 | 
11 | running 0 tests
12 | 
13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
14 | 
15 | 
16 | running 0 tests
17 | 
18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
19 | 
20 | 
21 | running 8 tests
22 | test count_by_passenger_count                ... bench:  27,511,249 ns/iter (+/- 2,073,721)
23 | test count_by_vendor_id_and_passenger_count  ... bench:  83,954,577 ns/iter (+/- 3,050,063)
24 | test q1_count_cab_type                       ... bench:  30,058,291 ns/iter (+/- 1,549,527)
25 | test q2_avg_total_amount_by_passenger_count  ... bench:  63,421,217 ns/iter (+/- 1,623,546)
26 | test q3_count_by_passenger_count_pickup_year ... bench: 305,009,611 ns/iter (+/- 9,044,703)
27 | test select_passenger_count_sparse_filter    ... bench: 375,427,738 ns/iter (+/- 9,443,649)
28 | test select_star_limit_10000                 ... bench: 961,663,022 ns/iter (+/- 24,624,939)
29 | test sum_total_amt_by_passenger_count        ... bench:  50,217,733 ns/iter (+/- 3,677,470)
30 | 
31 | test result: ok. 0 passed; 0 failed; 0 ignored; 8 measured; 0 filtered out
32 | 
33 | 


--------------------------------------------------------------------------------
/bench_results/baseline5:
--------------------------------------------------------------------------------
 1 | 
 2 | running 4 tests
 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored
 4 | test syntax::parser::tests::test_last_hour ... ignored
 5 | test syntax::parser::tests::test_select_star ... ignored
 6 | test syntax::parser::tests::test_to_year ... ignored
 7 | 
 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out
 9 | 
10 | 
11 | running 0 tests
12 | 
13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
14 | 
15 | 
16 | running 0 tests
17 | 
18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
19 | 
20 | 
21 | running 9 tests
22 | test count_by_passenger_count                              ... bench:  29,199,064 ns/iter (+/- 1,034,507)
23 | test count_by_vendor_id_and_passenger_count                ... bench:  89,481,240 ns/iter (+/- 3,335,758)
24 | test q1_count_cab_type                                     ... bench:  31,903,802 ns/iter (+/- 1,176,215)
25 | test q2_avg_total_amount_by_passenger_count                ... bench:  67,294,496 ns/iter (+/- 1,043,722)
26 | test q3_count_by_passenger_count_pickup_year               ... bench: 318,842,838 ns/iter (+/- 6,398,757)
27 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 760,724,705 ns/iter (+/- 34,517,497)
28 | test select_passenger_count_sparse_filter                  ... bench: 390,120,439 ns/iter (+/- 7,071,757)
29 | test select_star_limit_10000                               ... bench: 977,810,685 ns/iter (+/- 24,288,318)
30 | test sum_total_amt_by_passenger_count                      ... bench:  51,671,627 ns/iter (+/- 1,322,935)
31 | 
32 | test result: ok. 0 passed; 0 failed; 0 ignored; 9 measured; 0 filtered out
33 | 
34 | 


--------------------------------------------------------------------------------
/bench_results/baseline6:
--------------------------------------------------------------------------------
 1 | 
 2 | running 4 tests
 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored
 4 | test syntax::parser::tests::test_last_hour ... ignored
 5 | test syntax::parser::tests::test_select_star ... ignored
 6 | test syntax::parser::tests::test_to_year ... ignored
 7 | 
 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out
 9 | 
10 | 
11 | running 0 tests
12 | 
13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
14 | 
15 | 
16 | running 0 tests
17 | 
18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
19 | 
20 | 
21 | running 9 tests
22 | test count_by_passenger_count                              ... bench:  15,724,438 ns/iter (+/- 1,072,153)
23 | test count_by_vendor_id_and_passenger_count                ... bench:  72,069,173 ns/iter (+/- 2,089,003)
24 | test q1_count_cab_type                                     ... bench:  18,288,624 ns/iter (+/- 1,001,521)
25 | test q2_avg_total_amount_by_passenger_count                ... bench:  44,159,055 ns/iter (+/- 2,239,541)
26 | test q3_count_by_passenger_count_pickup_year               ... bench: 291,800,859 ns/iter (+/- 6,616,003)
27 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 717,384,932 ns/iter (+/- 17,664,140)
28 | test select_passenger_count_sparse_filter                  ... bench: 367,080,147 ns/iter (+/- 9,438,398)
29 | test select_star_limit_10000                               ... bench: 939,382,492 ns/iter (+/- 28,662,138)
30 | test sum_total_amt_by_passenger_count                      ... bench:  39,902,086 ns/iter (+/- 1,238,907)
31 | 
32 | test result: ok. 0 passed; 0 failed; 0 ignored; 9 measured; 0 filtered out
33 | 
34 | 


--------------------------------------------------------------------------------
/bench_results/baseline7:
--------------------------------------------------------------------------------
 1 | 
 2 | running 4 tests
 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored
 4 | test syntax::parser::tests::test_last_hour ... ignored
 5 | test syntax::parser::tests::test_select_star ... ignored
 6 | test syntax::parser::tests::test_to_year ... ignored
 7 | 
 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out
 9 | 
10 | 
11 | running 0 tests
12 | 
13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
14 | 
15 | 
16 | running 0 tests
17 | 
18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
19 | 
20 | 
21 | running 9 tests
22 | test count_by_passenger_count                              ... bench:  15,633,172 ns/iter (+/- 1,052,019)
23 | test count_by_vendor_id_and_passenger_count                ... bench:  71,932,129 ns/iter (+/- 1,938,891)
24 | test q1_count_cab_type                                     ... bench:  18,194,617 ns/iter (+/- 689,133)
25 | test q2_avg_total_amount_by_passenger_count                ... bench:  45,237,507 ns/iter (+/- 1,201,930)
26 | test q3_count_by_passenger_count_pickup_year               ... bench:  59,444,430 ns/iter (+/- 1,278,476)
27 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 202,641,239 ns/iter (+/- 5,968,827)
28 | test select_passenger_count_sparse_filter                  ... bench: 366,709,186 ns/iter (+/- 4,443,679)
29 | test select_star_limit_10000                               ... ignored
30 | test sum_total_amt_by_passenger_count                      ... bench:  41,048,125 ns/iter (+/- 2,398,789)
31 | 
32 | test result: ok. 0 passed; 0 failed; 1 ignored; 8 measured; 0 filtered out
33 | 
34 | 


--------------------------------------------------------------------------------
/bench_results/baseline8:
--------------------------------------------------------------------------------
 1 | 
 2 | running 4 tests
 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored
 4 | test syntax::parser::tests::test_last_hour ... ignored
 5 | test syntax::parser::tests::test_select_star ... ignored
 6 | test syntax::parser::tests::test_to_year ... ignored
 7 | 
 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out
 9 | 
10 | 
11 | running 0 tests
12 | 
13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
14 | 
15 | 
16 | running 0 tests
17 | 
18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
19 | 
20 | 
21 | running 12 tests
22 | test count_by_passenger_count                              ... bench:  15,705,040 ns/iter (+/- 784,971)
23 | test count_by_vendor_id_and_passenger_count                ... bench:  72,046,240 ns/iter (+/- 2,547,455)
24 | test q1_count_cab_type                                     ... bench:  18,197,313 ns/iter (+/- 927,398)
25 | test q2_avg_total_amount_by_passenger_count                ... bench:  46,106,829 ns/iter (+/- 4,138,488)
26 | test q3_count_by_passenger_count_pickup_year               ... bench:  60,239,773 ns/iter (+/- 1,136,396)
27 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 203,058,966 ns/iter (+/- 3,650,031)
28 | test q5_sparse_filter                                      ... bench: 405,593,372 ns/iter (+/- 5,204,887)
29 | test q6_top_n                                              ... ignored
30 | test q7_group_by_trip_id                                   ... ignored
31 | test select_passenger_count_sparse_filter                  ... bench: 367,930,764 ns/iter (+/- 9,318,326)
32 | test select_star_limit_10000                               ... ignored
33 | test sum_total_amt_by_passenger_count                      ... bench:  41,696,022 ns/iter (+/- 10,565,047)
34 | 
35 | test result: ok. 0 passed; 0 failed; 3 ignored; 9 measured; 0 filtered out
36 | 
37 | 


--------------------------------------------------------------------------------
/bench_results/baseline9:
--------------------------------------------------------------------------------
 1 | 
 2 | running 5 tests
 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored
 4 | test engine::vector_op::top_n::tests::test_heap_replace ... ignored
 5 | test syntax::parser::tests::test_last_hour ... ignored
 6 | test syntax::parser::tests::test_select_star ... ignored
 7 | test syntax::parser::tests::test_to_year ... ignored
 8 | 
 9 | test result: ok. 0 passed; 0 failed; 5 ignored; 0 measured; 0 filtered out
10 | 
11 | 
12 | running 0 tests
13 | 
14 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
15 | 
16 | 
17 | running 0 tests
18 | 
19 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
20 | 
21 | 
22 | running 12 tests
23 | test count_by_passenger_count                              ... bench:  16,737,115 ns/iter (+/- 978,516)
24 | test count_by_vendor_id_and_passenger_count                ... bench:  77,106,421 ns/iter (+/- 8,522,187)
25 | test q1_count_cab_type                                     ... bench:  19,302,662 ns/iter (+/- 1,208,816)
26 | test q2_avg_total_amount_by_passenger_count                ... bench:  47,660,235 ns/iter (+/- 1,403,601)
27 | test q3_count_by_passenger_count_pickup_year               ... bench:  53,953,279 ns/iter (+/- 2,222,464)
28 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 215,077,493 ns/iter (+/- 5,359,731)
29 | test q5_sparse_filter                                      ... bench:  95,569,850 ns/iter (+/- 3,434,476)
30 | test q6_top_n                                              ... bench:  30,493,132 ns/iter (+/- 2,212,544)
31 | test q7_group_by_trip_id                                   ... ignored
32 | test select_passenger_count_sparse_filter                  ... bench: 266,626,179 ns/iter (+/- 7,785,819)
33 | test select_star_limit_10000                               ... ignored
34 | test sum_total_amt_by_passenger_count                      ... bench:  43,133,618 ns/iter (+/- 2,111,243)
35 | 
36 | test result: ok. 0 passed; 0 failed; 2 ignored; 10 measured; 0 filtered out
37 | 
38 | 


--------------------------------------------------------------------------------
/bench_results/latest:
--------------------------------------------------------------------------------
 1 | 
 2 | running 7 tests
 3 | test engine::vector_op::subpartition::tests::test_multipass_grouping ... ignored
 4 | test engine::vector_op::top_n::tests::test_heap_replace ... ignored
 5 | test ingest::alias_method_fork::test_new_alias_table ... ignored
 6 | test mem_store::codec::tests::test_ensure_property ... ignored
 7 | test syntax::parser::tests::test_select_star ... ignored
 8 | test syntax::parser::tests::test_to_year ... ignored
 9 | test unit_fmt::tests::test_format ... ignored
10 | 
11 | test result: ok. 0 passed; 0 failed; 7 ignored; 0 measured; 0 filtered out
12 | 
13 | 
14 | running 0 tests
15 | 
16 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
17 | 
18 | 
19 | running 0 tests
20 | 
21 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
22 | 
23 | 
24 | running 0 tests
25 | 
26 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
27 | 
28 | 
29 | running 10 tests
30 | test avg_total_amount_by_passenger_count                ... bench:  53,240,800 ns/iter (+/- 1,049,080)
31 | test count_by_passenger_count                           ... bench:  16,645,900 ns/iter (+/- 768,500)
32 | test count_by_passenger_count_pickup_year_trip_distance ... bench:  22,535,800 ns/iter (+/- 626,580)
33 | test count_by_vendor_id_and_passenger_count             ... bench:  77,903,100 ns/iter (+/- 1,577,070)
34 | test count_cab_type                                     ... bench:  19,905,100 ns/iter (+/- 476,580)
35 | test group_by_trip_id                                   ... bench:  11,085,700 ns/iter (+/- 1,029,360)
36 | test hashmap_grouping                                   ... bench:  38,412,200 ns/iter (+/- 1,348,840)
37 | test sparse_filter                                      ... bench:  86,380,900 ns/iter (+/- 1,570,090)
38 | test sum_total_amt_by_passenger_count                   ... bench:  50,006,600 ns/iter (+/- 1,502,090)
39 | test top_n                                              ... bench:  17,454,800 ns/iter (+/- 622,920)
40 | 
41 | test result: ok. 0 passed; 0 failed; 0 ignored; 10 measured; 0 filtered out
42 | 
43 | 


--------------------------------------------------------------------------------
/export.py:
--------------------------------------------------------------------------------
 1 | from locustdb import Client
 2 | import numpy as np
 3 | import argparse
 4 | 
 5 | # parse argument (single positinal arge for column name)
 6 | parser = argparse.ArgumentParser(description='Query locustDB')
 7 | parser.add_argument('column_name', type=str, help='column name to query')
 8 | args = parser.parse_args()
 9 | colname = args.column_name
10 | 
11 | client = Client("http://localhost:8080")
12 | results = client.query(f'SELECT "{colname}" FROM "avid-wildflower-3446"')
13 | 
14 | # replace brakets with underscores
15 | scolname = colname.replace('[', '_').replace(']', '_')
16 | 
17 | with open(scolname + ".txt", "w") as f:
18 |     np.savetxt(f, [(r if r is not None else np.nan) for r in results[colname]])
19 | 


--------------------------------------------------------------------------------
/float_values.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cswinter/LocustDB/016efd84bbae9781c93ecffda63422e5fefb8e93/float_values.bin


--------------------------------------------------------------------------------
/git_hooks/README.md:
--------------------------------------------------------------------------------
 1 | Git hook for running tests and compiling benchmark before pushing.
 2 | 
 3 | Activate by linking to the `pre-push` script from `.git/hooks`:
 4 | 
 5 | ```bash
 6 | ln -s git_hooks/pre-push .git/hooks/pre-push
 7 | ```
 8 | 
 9 | If you are using git bash on Windows and can't use symbolic links, and you may want to just copy the file instead:
10 | 
11 | ```bash
12 | cp git_hooks/pre-push .git/hooks/pre-push
13 | ```
14 | 


--------------------------------------------------------------------------------
/git_hooks/pre-push:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -eu
 3 | 
 4 | check_char='\xE2\x9C\x93'
 5 | cross_char='\xE2\x9D\x8C'
 6 | green='\033[0;32m'
 7 | red='\033[0;31m'
 8 | nc='\033[0m'
 9 | check="$green$check_char$nc"
10 | cross="$red$cross_char$nc"
11 | errors=0
12 | 
13 | 
14 | ## Require_clean_work_tree ##
15 | # Update the index
16 | git update-index -q --ignore-submodules --refresh
17 | err=0
18 | 
19 | # Disallow unstaged changes in the working tree
20 | if ! git diff-files --quiet --ignore-submodules --
21 | then
22 |     echo -e "you have unstaged changes. $cross"
23 |     git diff-files --name-status -r --ignore-submodules -- >&2
24 |     err=1
25 | fi
26 | 
27 | # Disallow uncommitted changes in the index
28 | if ! git diff-index --cached --quiet HEAD --ignore-submodules --
29 | then
30 |     echo -e "your index contains uncommitted changes. $cross"
31 |     git diff-index --cached --name-status -r --ignore-submodules HEAD -- >&2
32 |     err=1
33 | fi
34 | 
35 | if [ $err = 1 ]
36 | then
37 |     echo -e "Please commit or stash them."
38 |     exit 1
39 | fi
40 | 
41 | : '
42 | echo -n "Checking formatting... "
43 | diff=$(cargo fmt -- --write-mode diff)
44 | stripped_diff=$(echo "$diff" | sed -e '/^Diff of/d' -e '/^$/d')
45 | 
46 | if [ -z "$stripped_diff" ]; then
47 | 	echo -e "$check"
48 | else
49 | 	echo -e "$cross"
50 | 	echo "$diff"
51 | 	errors=1
52 | fi
53 | '
54 | 
55 | echo -n "Running tests... "
56 | if result=$(cargo +nightly test --color always 2>&1); then
57 | 	echo -e "$check"
58 | else
59 | 	echo -e "$cross"
60 | 	echo "$result"
61 | 	errors=1
62 | fi
63 | 
64 | echo -n "Compiling benchmarks... "
65 | if result=$(cargo +nightly check --bench basic --color always 2>&1); then
66 | 	echo -e "$check"
67 | else
68 | 	echo -e "$cross"
69 | 	echo "$result"
70 | 	errors=1
71 | fi
72 | 
73 | 
74 | if [ "$errors" != 0 ]; then
75 | 	echo "Failed"
76 | 	exit 1
77 | else
78 | 	echo "OK"
79 | fi
80 | 


--------------------------------------------------------------------------------
/locustdb-client/.appveyor.yml:
--------------------------------------------------------------------------------
 1 | install:
 2 |   - appveyor-retry appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
 3 |   - if not defined RUSTFLAGS rustup-init.exe -y --default-host x86_64-pc-windows-msvc --default-toolchain nightly
 4 |   - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
 5 |   - rustc -V
 6 |   - cargo -V
 7 | 
 8 | build: false
 9 | 
10 | test_script:
11 |   - cargo test --locked
12 | 


--------------------------------------------------------------------------------
/locustdb-client/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: cargo
4 |   directory: "/"
5 |   schedule:
6 |     interval: daily
7 |     time: "08:00"
8 |   open-pull-requests-limit: 10
9 | 


--------------------------------------------------------------------------------
/locustdb-client/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | **/*.rs.bk
3 | Cargo.lock
4 | bin/
5 | pkg/
6 | wasm-pack.log
7 | 


--------------------------------------------------------------------------------
/locustdb-client/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: rust
 2 | sudo: false
 3 | 
 4 | cache: cargo
 5 | 
 6 | matrix:
 7 |   include:
 8 | 
 9 |   # Builds with wasm-pack.
10 |   - rust: beta
11 |     env: RUST_BACKTRACE=1
12 |     addons:
13 |       firefox: latest
14 |       chrome: stable
15 |     before_script:
16 |       - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update)
17 |       - (test -x $HOME/.cargo/bin/cargo-generate || cargo install --vers "^0.2" cargo-generate)
18 |       - cargo install-update -a
19 |       - curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh -s -- -f
20 |     script:
21 |       - cargo generate --git . --name testing
22 |       # Having a broken Cargo.toml (in that it has curlies in fields) anywhere
23 |       # in any of our parent dirs is problematic.
24 |       - mv Cargo.toml Cargo.toml.tmpl
25 |       - cd testing
26 |       - wasm-pack build
27 |       - wasm-pack test --chrome --firefox --headless
28 | 
29 |   # Builds on nightly.
30 |   - rust: nightly
31 |     env: RUST_BACKTRACE=1
32 |     before_script:
33 |       - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update)
34 |       - (test -x $HOME/.cargo/bin/cargo-generate || cargo install --vers "^0.2" cargo-generate)
35 |       - cargo install-update -a
36 |       - rustup target add wasm32-unknown-unknown
37 |     script:
38 |       - cargo generate --git . --name testing
39 |       - mv Cargo.toml Cargo.toml.tmpl
40 |       - cd testing
41 |       - cargo check
42 |       - cargo check --target wasm32-unknown-unknown
43 |       - cargo check                                 --no-default-features
44 |       - cargo check --target wasm32-unknown-unknown --no-default-features
45 |       - cargo check                                 --no-default-features --features console_error_panic_hook
46 |       - cargo check --target wasm32-unknown-unknown --no-default-features --features console_error_panic_hook
47 |       - cargo check                                 --no-default-features --features "console_error_panic_hook wee_alloc"
48 |       - cargo check --target wasm32-unknown-unknown --no-default-features --features "console_error_panic_hook wee_alloc"
49 | 
50 |   # Builds on beta.
51 |   - rust: beta
52 |     env: RUST_BACKTRACE=1
53 |     before_script:
54 |       - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update)
55 |       - (test -x $HOME/.cargo/bin/cargo-generate || cargo install --vers "^0.2" cargo-generate)
56 |       - cargo install-update -a
57 |       - rustup target add wasm32-unknown-unknown
58 |     script:
59 |       - cargo generate --git . --name testing
60 |       - mv Cargo.toml Cargo.toml.tmpl
61 |       - cd testing
62 |       - cargo check
63 |       - cargo check --target wasm32-unknown-unknown
64 |       - cargo check                                 --no-default-features
65 |       - cargo check --target wasm32-unknown-unknown --no-default-features
66 |       - cargo check                                 --no-default-features --features console_error_panic_hook
67 |       - cargo check --target wasm32-unknown-unknown --no-default-features --features console_error_panic_hook
68 |       # Note: no enabling the `wee_alloc` feature here because it requires
69 |       # nightly for now.
70 | 


--------------------------------------------------------------------------------
/locustdb-client/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "locustdb-client"
 3 | version = "0.5.5"
 4 | authors = ["Clemens Winter <clemenswinter1@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | [lib]
 8 | crate-type = ["cdylib", "rlib"]
 9 | 
10 | [features]
11 | default = ["console_error_panic_hook"]
12 | 
13 | [dependencies]
14 | reqwest = { version = "0.12", default_features = false, features = ["json", "rustls-tls"] }
15 | serde = { version = "1.0", features = ["derive"] }
16 | serde-wasm-bindgen = "0.6.5"
17 | serde_json = "1.0"
18 | wasm-bindgen = "0.2.84"
19 | wasm-bindgen-futures = "0.4.42"
20 | locustdb-compression-utils = {path = "../locustdb-compression-utils" }
21 | locustdb-serialization = {path = "../locustdb-serialization", version = "0.2.1" }
22 | wasm-logger = "0.2.0"
23 | log = "0.4"
24 | 
25 | # The `console_error_panic_hook` crate provides better debugging of panics by
26 | # logging them with `console.error`. This is great for development, but requires
27 | # all the `std::fmt` and `std::panicking` infrastructure, so isn't great for
28 | # code size when deploying.
29 | console_error_panic_hook = { version = "0.1.7", optional = true }
30 | web-sys = { version = "0.3.69", features = ["Performance", "Window"] }
31 | 
32 | [dev-dependencies]
33 | wasm-bindgen-test = "0.3.34"
34 | 
35 | [profile.release]
36 | # Tell `rustc` to optimize for small code size.
37 | opt-level = "s"
38 | debug = true
39 | 


--------------------------------------------------------------------------------
/locustdb-client/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 | 
 3 |   <h1><code>wasm-pack-template</code></h1>
 4 | 
 5 |   <strong>A template for kick starting a Rust and WebAssembly project using <a href="https://github.com/rustwasm/wasm-pack">wasm-pack</a>.</strong>
 6 | 
 7 |   <p>
 8 |     <a href="https://travis-ci.org/rustwasm/wasm-pack-template"><img src="https://img.shields.io/travis/rustwasm/wasm-pack-template.svg?style=flat-square" alt="Build Status" /></a>
 9 |   </p>
10 | 
11 |   <h3>
12 |     <a href="https://rustwasm.github.io/docs/wasm-pack/tutorials/npm-browser-packages/index.html">Tutorial</a>
13 |     <span> | </span>
14 |     <a href="https://discordapp.com/channels/442252698964721669/443151097398296587">Chat</a>
15 |   </h3>
16 | 
17 |   <sub>Built with 🦀🕸 by <a href="https://rustwasm.github.io/">The Rust and WebAssembly Working Group</a></sub>
18 | </div>
19 | 
20 | ## About
21 | 
22 | [**📚 Read this template tutorial! 📚**][template-docs]
23 | 
24 | This template is designed for compiling Rust libraries into WebAssembly and
25 | publishing the resulting package to NPM.
26 | 
27 | Be sure to check out [other `wasm-pack` tutorials online][tutorials] for other
28 | templates and usages of `wasm-pack`.
29 | 
30 | [tutorials]: https://rustwasm.github.io/docs/wasm-pack/tutorials/index.html
31 | [template-docs]: https://rustwasm.github.io/docs/wasm-pack/tutorials/npm-browser-packages/index.html
32 | 
33 | ## 🚴 Usage
34 | 
35 | ### 🐑 Use `cargo generate` to Clone this Template
36 | 
37 | [Learn more about `cargo generate` here.](https://github.com/ashleygwilliams/cargo-generate)
38 | 
39 | ```
40 | cargo generate --git https://github.com/rustwasm/wasm-pack-template.git --name my-project
41 | cd my-project
42 | ```
43 | 
44 | ### 🛠️ Build with `wasm-pack build`
45 | 
46 | ```
47 | wasm-pack build
48 | ```
49 | 
50 | ### 🔬 Test in Headless Browsers with `wasm-pack test`
51 | 
52 | ```
53 | wasm-pack test --headless --firefox
54 | ```
55 | 
56 | ### 🎁 Publish to NPM with `wasm-pack publish`
57 | 
58 | ```
59 | wasm-pack build --scope cswinter
60 | wasm-pack publish
61 | ```
62 | 
63 | ## 🔋 Batteries Included
64 | 
65 | * [`wasm-bindgen`](https://github.com/rustwasm/wasm-bindgen) for communicating
66 |   between WebAssembly and JavaScript.
67 | * [`console_error_panic_hook`](https://github.com/rustwasm/console_error_panic_hook)
68 |   for logging panic messages to the developer console.
69 | * `LICENSE-APACHE` and `LICENSE-MIT`: most Rust projects are licensed this way, so these are included for you
70 | 
71 | ## License
72 | 
73 | Licensed under either of
74 | 
75 | * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
76 | * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
77 | 
78 | at your option.
79 | 
80 | ### Contribution
81 | 
82 | Unless you explicitly state otherwise, any contribution intentionally
83 | submitted for inclusion in the work by you, as defined in the Apache-2.0
84 | license, shall be dual licensed as above, without any additional terms or
85 | conditions.
86 | 


--------------------------------------------------------------------------------
/locustdb-client/release.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | wasm-pack build --scope=cswinter
4 | cd pkg
5 | npm publish --access-public
6 | 


--------------------------------------------------------------------------------
/locustdb-client/src/lib.rs:
--------------------------------------------------------------------------------
 1 | use wasm_bindgen::prelude::*;
 2 | mod client;
 3 | 
 4 | #[wasm_bindgen]
 5 | extern "C" {
 6 |     fn alert(s: &str);
 7 | 
 8 |     #[wasm_bindgen(js_namespace = console)]
 9 |     fn log(s: &str);
10 | }
11 | 
12 | #[wasm_bindgen]
13 | pub fn greet() {
14 |     alert("Hello, locustdb-client!");
15 | }
16 | 
17 | pub use client::Client;


--------------------------------------------------------------------------------
/locustdb-client/tests/web.rs:
--------------------------------------------------------------------------------
 1 | //! Test suite for the Web and headless browsers.
 2 | 
 3 | #![cfg(target_arch = "wasm32")]
 4 | 
 5 | extern crate wasm_bindgen_test;
 6 | use wasm_bindgen_test::*;
 7 | 
 8 | wasm_bindgen_test_configure!(run_in_browser);
 9 | 
10 | #[wasm_bindgen_test]
11 | fn pass() {
12 |     assert_eq!(1 + 1, 2);
13 | }
14 | 


--------------------------------------------------------------------------------
/locustdb-compression-utils/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "locustdb-compression-utils"
 3 | version = "0.2.0"
 4 | edition = "2021"
 5 | description = "Utilities for compressing and decompressing sequences used in LocustDB."
 6 | license-file = "../LICENSE"
 7 | 
 8 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 9 | 
10 | [dependencies]
11 | serde = { version = "1.0", features = ["derive"] }
12 | serde_json = "1.0"
13 | log = "0.4"
14 | bitbuffer = "0.10"
15 | pco = "0.2.3"
16 | 
17 | [dev-dependencies]
18 | clap = { version = "4", features = ["derive"] }
19 | rand = { version = "0.8.5", features = ["small_rng"] }
20 | pretty_assertions = "1"
21 | 
22 | [profile.release]
23 | debug = true
24 | 
25 | [profile.dev]
26 | debug = true
27 | 


--------------------------------------------------------------------------------
/locustdb-compression-utils/README.md:
--------------------------------------------------------------------------------
 1 | # LocustDB Compression Utils
 2 | 
 3 | Collection of utils for compressing a series of values. 
 4 | 
 5 | ## XOR float compression
 6 | 
 7 | One of compression algorithms implemented is a variant of the XOR float compression algorithm described in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://www.vldb.org/pvldb/vol8/p1816-teller.pdf). The "gorilla_time" example program creates visualizations of the compression algorithm. You can run the visualization with:
 8 | 
 9 | ```bash
10 | cargo run --example gorilla_time -- --verbose
11 | ```
12 | 
13 | Run the following command to see more options:
14 | 
15 | ```bash
16 | cargo run --example gorilla_time -- --help
17 | ```


--------------------------------------------------------------------------------
/locustdb-compression-utils/src/lib.rs:
--------------------------------------------------------------------------------
1 | 
2 | pub mod xor_float;
3 | pub mod test_data;


--------------------------------------------------------------------------------
/locustdb-compression-utils/src/xor_float/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod double;
 2 | pub mod single;
 3 | 
 4 | #[derive(Debug, PartialEq)]
 5 | pub enum Error {
 6 |     Eof,
 7 | }
 8 | 
 9 | // Special NaN value that we use to represent NULLs in the data.
10 | // Can't use f64::from_bits because it is not a canonical NaN value.
11 | #[allow(clippy::transmute_int_to_float)]
12 | pub const NULL: f64 = unsafe { std::mem::transmute::<u64, f64>(0x7ffa_aaaa_aaaa_aaaau64) };


--------------------------------------------------------------------------------
/locustdb-derive/Cargo.lock:
--------------------------------------------------------------------------------
 1 | # This file is automatically @generated by Cargo.
 2 | # It is not intended for manual editing.
 3 | version = 4
 4 | 
 5 | [[package]]
 6 | name = "aho-corasick"
 7 | version = "0.7.18"
 8 | source = "registry+https://github.com/rust-lang/crates.io-index"
 9 | checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
10 | dependencies = [
11 |  "memchr",
12 | ]
13 | 
14 | [[package]]
15 | name = "lazy_static"
16 | version = "1.2.0"
17 | source = "registry+https://github.com/rust-lang/crates.io-index"
18 | checksum = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1"
19 | 
20 | [[package]]
21 | name = "locustdb-derive"
22 | version = "0.2.2"
23 | dependencies = [
24 |  "lazy_static",
25 |  "proc-macro2",
26 |  "quote",
27 |  "regex",
28 |  "syn",
29 | ]
30 | 
31 | [[package]]
32 | name = "memchr"
33 | version = "2.5.0"
34 | source = "registry+https://github.com/rust-lang/crates.io-index"
35 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
36 | 
37 | [[package]]
38 | name = "proc-macro2"
39 | version = "1.0.39"
40 | source = "registry+https://github.com/rust-lang/crates.io-index"
41 | checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f"
42 | dependencies = [
43 |  "unicode-ident",
44 | ]
45 | 
46 | [[package]]
47 | name = "quote"
48 | version = "1.0.18"
49 | source = "registry+https://github.com/rust-lang/crates.io-index"
50 | checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1"
51 | dependencies = [
52 |  "proc-macro2",
53 | ]
54 | 
55 | [[package]]
56 | name = "regex"
57 | version = "1.5.5"
58 | source = "registry+https://github.com/rust-lang/crates.io-index"
59 | checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
60 | dependencies = [
61 |  "aho-corasick",
62 |  "memchr",
63 |  "regex-syntax",
64 | ]
65 | 
66 | [[package]]
67 | name = "regex-syntax"
68 | version = "0.6.26"
69 | source = "registry+https://github.com/rust-lang/crates.io-index"
70 | checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64"
71 | 
72 | [[package]]
73 | name = "syn"
74 | version = "1.0.96"
75 | source = "registry+https://github.com/rust-lang/crates.io-index"
76 | checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf"
77 | dependencies = [
78 |  "proc-macro2",
79 |  "quote",
80 |  "unicode-ident",
81 | ]
82 | 
83 | [[package]]
84 | name = "unicode-ident"
85 | version = "1.0.0"
86 | source = "registry+https://github.com/rust-lang/crates.io-index"
87 | checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee"
88 | 


--------------------------------------------------------------------------------
/locustdb-derive/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | authors = ["Clemens Winter <clemenswinter1@gmail.com>"]
 3 | description = "Macros used internally by locustdb crate."
 4 | edition = "2018"
 5 | license = "Apache-2.0"
 6 | name = "locustdb-derive"
 7 | version = "0.2.2"
 8 | repository = "https://github.com/cswinter/locustdb"
 9 | 
10 | [dependencies]
11 | lazy_static = "1.2"
12 | proc-macro2 = {version = "1.0"}
13 | quote = "1.0"
14 | regex = "1.5"
15 | syn = {features = ["full", "fold", "extra-traits"], version = "1.0"}
16 | 
17 | [lib]
18 | proc-macro = true
19 | 


--------------------------------------------------------------------------------
/locustdb-derive/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![feature(proc_macro_hygiene, proc_macro_diagnostic)]
 2 | #![recursion_limit = "128"]
 3 | extern crate proc_macro;
 4 | #[macro_use]
 5 | extern crate quote;
 6 | extern crate syn;
 7 | #[macro_use]
 8 | extern crate lazy_static;
 9 | extern crate regex;
10 | 
11 | mod reify_types;
12 | mod enum_syntax;
13 | mod ast_builder;
14 | 
15 | use self::proc_macro::TokenStream;
16 | 
17 | #[proc_macro]
18 | pub fn reify_types(input: TokenStream) -> TokenStream {
19 |     reify_types::reify_types(input)
20 | }
21 | 
22 | #[proc_macro_derive(EnumSyntax)]
23 | pub fn enum_syntax(input: TokenStream) -> TokenStream {
24 |     enum_syntax::enum_syntax(input)
25 | }
26 | 
27 | #[proc_macro_derive(ASTBuilder, attributes(newstyle, input, internal, output, nohash))]
28 | pub fn ast_builder(input: TokenStream) -> TokenStream { ast_builder::ast_builder(input) }
29 | 


--------------------------------------------------------------------------------
/locustdb-serialization/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 4
  4 | 
  5 | [[package]]
  6 | name = "capnp"
  7 | version = "0.21.0"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "b1d1b4a00e80b7c4b1a49e845365f25c9d8fd0a19c9cd8d66f68afea47b1f020"
 10 | dependencies = [
 11 |  "embedded-io",
 12 | ]
 13 | 
 14 | [[package]]
 15 | name = "diff"
 16 | version = "0.1.13"
 17 | source = "registry+https://github.com/rust-lang/crates.io-index"
 18 | checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
 19 | 
 20 | [[package]]
 21 | name = "embedded-io"
 22 | version = "0.6.1"
 23 | source = "registry+https://github.com/rust-lang/crates.io-index"
 24 | checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
 25 | 
 26 | [[package]]
 27 | name = "locustdb-serialization"
 28 | version = "0.2.2"
 29 | dependencies = [
 30 |  "capnp",
 31 |  "pretty_assertions",
 32 |  "serde",
 33 | ]
 34 | 
 35 | [[package]]
 36 | name = "pretty_assertions"
 37 | version = "1.4.0"
 38 | source = "registry+https://github.com/rust-lang/crates.io-index"
 39 | checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66"
 40 | dependencies = [
 41 |  "diff",
 42 |  "yansi",
 43 | ]
 44 | 
 45 | [[package]]
 46 | name = "proc-macro2"
 47 | version = "1.0.81"
 48 | source = "registry+https://github.com/rust-lang/crates.io-index"
 49 | checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba"
 50 | dependencies = [
 51 |  "unicode-ident",
 52 | ]
 53 | 
 54 | [[package]]
 55 | name = "quote"
 56 | version = "1.0.36"
 57 | source = "registry+https://github.com/rust-lang/crates.io-index"
 58 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
 59 | dependencies = [
 60 |  "proc-macro2",
 61 | ]
 62 | 
 63 | [[package]]
 64 | name = "serde"
 65 | version = "1.0.198"
 66 | source = "registry+https://github.com/rust-lang/crates.io-index"
 67 | checksum = "9846a40c979031340571da2545a4e5b7c4163bdae79b301d5f86d03979451fcc"
 68 | dependencies = [
 69 |  "serde_derive",
 70 | ]
 71 | 
 72 | [[package]]
 73 | name = "serde_derive"
 74 | version = "1.0.198"
 75 | source = "registry+https://github.com/rust-lang/crates.io-index"
 76 | checksum = "e88edab869b01783ba905e7d0153f9fc1a6505a96e4ad3018011eedb838566d9"
 77 | dependencies = [
 78 |  "proc-macro2",
 79 |  "quote",
 80 |  "syn",
 81 | ]
 82 | 
 83 | [[package]]
 84 | name = "syn"
 85 | version = "2.0.60"
 86 | source = "registry+https://github.com/rust-lang/crates.io-index"
 87 | checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3"
 88 | dependencies = [
 89 |  "proc-macro2",
 90 |  "quote",
 91 |  "unicode-ident",
 92 | ]
 93 | 
 94 | [[package]]
 95 | name = "unicode-ident"
 96 | version = "1.0.12"
 97 | source = "registry+https://github.com/rust-lang/crates.io-index"
 98 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
 99 | 
100 | [[package]]
101 | name = "yansi"
102 | version = "0.5.1"
103 | source = "registry+https://github.com/rust-lang/crates.io-index"
104 | checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
105 | 


--------------------------------------------------------------------------------
/locustdb-serialization/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "locustdb-serialization"
 3 | version = "0.2.2"
 4 | edition = "2021"
 5 | description = "Serialization formats used by LocustDB for peristent storage and client/server communication."
 6 | license-file = "../LICENSE"
 7 | repository = "https://github.com/cswinter/locustdb"
 8 | 
 9 | [dependencies]
10 | capnp = "0.21"
11 | serde = { version = "1.0", features = ["derive"] }
12 | 
13 | [dev-dependencies]
14 | pretty_assertions = "1"
15 | 


--------------------------------------------------------------------------------
/locustdb-serialization/README.md:
--------------------------------------------------------------------------------
 1 | # LocustDB Serialization
 2 | 
 3 | Util crate that defines Cap'n Proto schema and serialization/deserialization logic for data structures used for persistent storage and client-server communication in LocustDB.
 4 | 
 5 | To regenerate the Cap'n Proto definitions, follow this process:
 6 | 
 7 | 1. [Install the Cap'n Proto CLI tool][install-capnproto]
 8 | 2. `cargo install capnpc`
 9 | 3. `capnp compile -orust:src --src-prefix=schemas schemas/{dbmeta,partition_segment,wal_segment,api}.capnp`
10 | 


--------------------------------------------------------------------------------
/locustdb-serialization/schemas/api.capnp:
--------------------------------------------------------------------------------
 1 | @0x88a093a148e409e4;
 2 | 
 3 | 
 4 | struct QueryResponse {
 5 |     columns @0 :List(Column);
 6 | }
 7 | 
 8 | struct MultiQueryResponse {
 9 |     responses @0 :List(QueryResponse);
10 | }
11 | 
12 | struct Column {
13 |     name @0 :Text;
14 | 
15 |     data :union {
16 |         f64 @1 :List(Float64);
17 |         i64 @2 :List(Int64);
18 |         string @3 :List(Text);
19 |         mixed @4 :List(AnyVal);
20 |         null @5 :UInt64;
21 |         xorF64 @6 :Data;
22 |         deltaEncodedI8 :group {
23 |             first @7 :Int64;
24 |             data @8 :List(Int8);
25 |         }
26 |         deltaEncodedI16 :group {
27 |             first @9 :Int64;
28 |             data @10 :List(Int16);
29 |         }
30 |         deltaEncodedI32 :group {
31 |             first @11 :Int64;
32 |             data @12 :List(Int32);
33 |         }
34 |         doubleDeltaEncodedI8 :group {
35 |             first @13 :Int64;
36 |             second @14 :Int64;
37 |             data @15 :List(Int8);
38 |         }
39 |         doubleDeltaEncodedI16 :group {
40 |             first @16 :Int64;
41 |             second @17 :Int64;
42 |             data @18 :List(Int16);
43 |         }
44 |         doubleDeltaEncodedI32 :group {
45 |             first @19 :Int64;
46 |             second @20 :Int64;
47 |             data @21 :List(Int32);
48 |         }
49 |         range :group {
50 |             start @22 :Int64;
51 |             len @23 :UInt64;
52 |             step @24 :Int64;
53 |         }
54 |     }
55 | }
56 | 
57 | struct AnyVal {
58 |     union {
59 |         f64 @0 :Float64;
60 |         i64 @1 :Int64;
61 |         string @2 :Text;
62 |         null @3 :Void;
63 |     }
64 | }


--------------------------------------------------------------------------------
/locustdb-serialization/schemas/dbmeta.capnp:
--------------------------------------------------------------------------------
 1 | @0xafa9b81d5e8e2ef5;
 2 | struct DBMeta {
 3 |     nextWalId @0 :UInt64;
 4 |     partitions @1 :List(PartitionMetadata);
 5 | 
 6 |     compressedStrings @3 :Data; # [v2], deprecated in v3 in favor of column range
 7 |     lengthsCompressedStrings @4 :List(UInt16); # [v2], deprecated in v3 in favor of column range
 8 |     strings @2 :List(Text);  # [v1] unused in legacy format and deprecated in new format
 9 | }
10 | 
11 | struct PartitionMetadata {
12 |     id @0 :UInt64;
13 |     tablename @1 :Text;
14 |     offset @2 :UInt64;
15 |     len @3 :UInt64;
16 |     subpartitions @4 :List(SubpartitionMetadata);
17 | }
18 | 
19 | struct SubpartitionMetadata {
20 |     sizeBytes @0 :UInt64;
21 |     subpartitionKey @1 :Text;
22 |     # Name of the largest column in the subpartition
23 |     lastColumn @5 :Text; # [v3]
24 | 
25 |     columns @2 :List(Text);  # [v0] deprecated in favor of internedColumns
26 |     internedColumns @3 :List(UInt64); # [v1] deprecated in favor of compressedInternedColumns
27 |     compressedInternedColumns @4 :Data; # [v2..] deprecated in favor of lastColumn
28 | }


--------------------------------------------------------------------------------
/locustdb-serialization/schemas/partition_segment.capnp:
--------------------------------------------------------------------------------
 1 | @0xc2e3685626e6e832;
 2 | 
 3 | struct PartitionSegment {
 4 |     columns @0 :List(Column);
 5 | }
 6 | 
 7 | struct Column {
 8 |     name @0 :Text;
 9 |     len @1 :UInt64;
10 |     range: union {
11 |         range @2 :Range;
12 |         empty @3 :Void;
13 |     }
14 |     codec @4 :List(CodecOp);
15 |     data @5 :List(DataSection);
16 | }
17 | 
18 | struct Range {
19 |     start @0 :Int64;
20 |     end @1 :Int64;
21 | }
22 | 
23 | struct CodecOp {
24 |     union {
25 |         add @0 :Add;
26 |         delta @1 :EncodingType;
27 |         toI64 @2 :EncodingType;
28 |         pushDataSection @3 :UInt64;
29 |         dictLookup @4 :EncodingType;
30 |         lz4 @5 :LZ4;
31 |         unpackStrings @6 :Void;
32 |         unhexpackStrings @7 :UnhexpackStrings;
33 |         nullable  @8 :Void;
34 |         pco @9 :Pco;
35 |     }
36 | }
37 | 
38 | struct DataSection {
39 |     union {
40 |         u8 @0 :List(UInt8);
41 |         u16 @1 :List(UInt16);
42 |         u32 @2 :List(UInt32);
43 |         u64 @3 :List(UInt64);
44 |         i64 @4 :List(Int64);
45 |         null @5 :UInt64;
46 |         f64 @6 :List(Float64);
47 |         bitvec @7 :List(UInt8);
48 |         lz4 :group {
49 |             decodedBytes @8 :UInt64;
50 |             bytesPerElement @9 :UInt64;
51 |             data @10 :List(UInt8);
52 |         }
53 |         pco :group {
54 |             decodedBytes @11 :UInt64;
55 |             bytesPerElement @12 :UInt64;
56 |             data @13 :List(UInt8);
57 |             isFp32 @14 :Bool;
58 |         }
59 |     }
60 | }
61 | 
62 | struct Add {
63 |     type @0 :EncodingType;
64 |     amount @1 :Int64;
65 | }
66 | 
67 | struct LZ4 {
68 |     type @0 :EncodingType;
69 |     lenDecoded @1 :UInt64;
70 | }
71 | 
72 | struct Pco {
73 |     type @0 :EncodingType;
74 |     lenDecoded @1 :UInt64;
75 |     isFp32 @2 :Bool;
76 | }
77 | 
78 | struct UnhexpackStrings {
79 |     uppercase @0 :Bool;
80 |     totalBytes @1 :UInt64;
81 | }
82 | 
83 | enum EncodingType {
84 |     u8 @0;
85 |     u16 @1;
86 |     u32 @2;
87 |     u64 @3;
88 |     i64 @4;
89 |     null @5;
90 |     f64 @6;
91 |     bitvec @7;
92 | }


--------------------------------------------------------------------------------
/locustdb-serialization/schemas/rust.capnp:
--------------------------------------------------------------------------------
 1 | # This file contains annotations that are recognized by the capnpc-rust code generator.
 2 | 
 3 | @0x83b3c14c3c8dd083;
 4 | 
 5 | annotation name @0xc2fe4c6d100166d0 (field, struct, enum, enumerant, union, group) :Text;
 6 | # Rename something in the generated code. The value that you specify in this
 7 | # annotation should follow capnp capitalization conventions. So, for example,
 8 | # a struct should use CamelCase capitalization like `StructFoo`, even though
 9 | # that will get translated to a `struct_foo` module in the generated Rust code.
10 | #
11 | # TODO: support annotating more kinds of things with this.
12 | 
13 | annotation parentModule @0xabee386cd1450364 (file) :Text;
14 | # A Rust module path indicating where the generated code will be included.
15 | # For example, if this is set to "foo::bar" and the schema file is named
16 | # "baz.capnp", then you could include the generated code like this:
17 | #
18 | #  pub mod foo {
19 | #    pub mod bar {
20 | #      pub mod baz_capnp {
21 | #        include!(concat!(env!("OUT_DIR"), "/baz_capnp.rs"));
22 | #      }
23 | #    }
24 | #  }


--------------------------------------------------------------------------------
/locustdb-serialization/schemas/wal_segment.capnp:
--------------------------------------------------------------------------------
 1 | @0xdb2bd6b471f245ca;
 2 | 
 3 | struct WalSegment {
 4 |     id @0 :UInt64;
 5 |     data @1 :TableSegmentList;
 6 | }
 7 | 
 8 | struct TableSegmentList {
 9 |     data @0 :List(TableSegment);
10 | }
11 | 
12 | struct TableSegment {
13 |     name @0 :Text;
14 |     len @1 :UInt64;
15 |     columns @2 :List(Column);
16 | }
17 | 
18 | struct Column {
19 |     name @0 :Text;
20 | 
21 |     data :union {
22 |         f64 @1 :List(Float64);
23 |         sparseF64 :group {
24 |             indices @2 :List(UInt64);
25 |             values @3 :List(Float64);
26 |         }
27 |         i64 @4 :List(Int64);
28 |         string @5 :List(Text);
29 |         empty @6 :Void;
30 |         sparseI64 :group {
31 |             indices @7 :List(UInt64);
32 |             values @8 :List(Int64);
33 |         }
34 |         mixed @9 :List(AnyVal);
35 |     }
36 | }
37 | 
38 | struct AnyVal {
39 |     value :union {
40 |         f64 @0 :Float64;
41 |         i64 @1 :Int64;
42 |         string @2 :Text;
43 |         null @3: Void;
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/locustdb-serialization/src/lib.rs:
--------------------------------------------------------------------------------
 1 | pub mod dbmeta_capnp;
 2 | pub mod partition_segment_capnp;
 3 | pub mod wal_segment_capnp;
 4 | pub mod api_capnp;
 5 | pub mod api;
 6 | pub mod event_buffer;
 7 | 
 8 | 
 9 | pub fn default_reader_options() -> capnp::message::ReaderOptions {
10 |     let mut options = capnp::message::ReaderOptions::new();
11 |     // Allow messages up to 8 GiB
12 |     options.traversal_limit_in_words(Some(1024 * 1024 * 1024));
13 |     options
14 | }


--------------------------------------------------------------------------------
/memsize_results/baseline0:
--------------------------------------------------------------------------------
 1 | # Table `test` (20000000 rows, 3951.96 MiB) #
 2 | rate_code_id: 19.10MiB
 3 | pickup_ntaname: 40.27MiB
 4 | dropoff_boroct2010: 76.32MiB
 5 | dropoff_latitude: 38.17MiB
 6 | store_and_fwd_flag: 38.19MiB
 7 | pickup_boroct2010: 76.32MiB
 8 | vendor_id: 19.10MiB
 9 | pickup_ntacode: 39.56MiB
10 | total_amount: 55.42MiB
11 | pickup_boroname: 38.24MiB
12 | payment_type: 19.10MiB
13 | average_wind_speed: 38.18MiB
14 | dropoff_boroname: 38.24MiB
15 | precipitation: 34.92MiB
16 | tolls_amount: 39.80MiB
17 | dropoff_ct2010: 76.32MiB
18 | dropoff_ntacode: 39.70MiB
19 | pickup_ct2010: 76.32MiB
20 | fare_amount: 52.42MiB
21 | snowfall: 25.99MiB
22 | dropoff_borocode: 19.10MiB
23 | tip_amount: 39.17MiB
24 | pickup_latitude: 38.30MiB
25 | trip_type: 18.22MiB
26 | extra: 25.54MiB
27 | trip_id: 76.32MiB
28 | pickup_datetime: 76.32MiB
29 | dropoff_cdeligibil: 38.20MiB
30 | passenger_count: 19.10MiB
31 | trip_distance: 60.55MiB
32 | cab_type: 38.19MiB
33 | ehail_fee: 19.10MiB
34 | pickup_longitude: 76.32MiB
35 | dropoff_ntaname: 40.51MiB
36 | pickup_cdeligibil: 38.20MiB
37 | dropoff_nyct2010_gid: 38.18MiB
38 | dropoff_datetime: 76.32MiB
39 | pickup: 971.11MiB
40 | pickup_ctlabel: 38.17MiB
41 | dropoff_puma: 38.17MiB
42 | dropoff: 971.14MiB
43 | mta_tax: 19.35MiB
44 | dropoff_ctlabel: 38.17MiB
45 | snow_depth: 27.11MiB
46 | pickup_puma: 38.17MiB
47 | max_temperature: 19.10MiB
48 | pickup_nyct2010_gid: 38.18MiB
49 | pickup_borocode: 19.10MiB
50 | improvement_surcharge: 19.10MiB
51 | dropoff_longitude: 76.32MiB
52 | min_temperature: 19.10MiB
53 | 


--------------------------------------------------------------------------------
/memsize_results/baseline1:
--------------------------------------------------------------------------------
 1 | # Table `test` (20000000 rows, 2009.72 MiB) #
 2 | tip_amount: 39.17MiB
 3 | dropoff_nyct2010_gid: 38.18MiB
 4 | dropoff_datetime: 76.32MiB
 5 | dropoff_borocode: 19.10MiB
 6 | pickup_ntacode: 39.56MiB
 7 | pickup_borocode: 19.10MiB
 8 | dropoff_ntacode: 39.70MiB
 9 | snowfall: 25.99MiB
10 | trip_id: 76.32MiB
11 | pickup_latitude: 38.30MiB
12 | extra: 25.54MiB
13 | dropoff_boroct2010: 76.32MiB
14 | store_and_fwd_flag: 38.19MiB
15 | dropoff_longitude: 76.32MiB
16 | dropoff_ntaname: 40.51MiB
17 | min_temperature: 19.10MiB
18 | trip_distance: 60.55MiB
19 | pickup_boroname: 38.24MiB
20 | dropoff_ct2010: 76.32MiB
21 | pickup_nyct2010_gid: 38.18MiB
22 | pickup_ntaname: 40.27MiB
23 | max_temperature: 19.10MiB
24 | dropoff_latitude: 38.17MiB
25 | vendor_id: 19.10MiB
26 | total_amount: 55.42MiB
27 | pickup_puma: 38.17MiB
28 | pickup_boroct2010: 76.32MiB
29 | precipitation: 34.92MiB
30 | payment_type: 19.10MiB
31 | pickup_datetime: 76.32MiB
32 | average_wind_speed: 38.18MiB
33 | pickup_longitude: 76.32MiB
34 | fare_amount: 52.42MiB
35 | improvement_surcharge: 19.10MiB
36 | snow_depth: 27.11MiB
37 | pickup_ctlabel: 38.17MiB
38 | dropoff_puma: 38.17MiB
39 | rate_code_id: 19.10MiB
40 | passenger_count: 19.10MiB
41 | pickup_cdeligibil: 38.20MiB
42 | dropoff_cdeligibil: 38.20MiB
43 | mta_tax: 19.35MiB
44 | dropoff_ctlabel: 38.17MiB
45 | trip_type: 18.22MiB
46 | pickup_ct2010: 76.32MiB
47 | tolls_amount: 39.80MiB
48 | ehail_fee: 19.10MiB
49 | dropoff_boroname: 38.24MiB
50 | cab_type: 38.19MiB
51 | 


--------------------------------------------------------------------------------
/memsize_results/baseline2:
--------------------------------------------------------------------------------
 1 | # Table `test` (100000000 rows, 9336.90 MiB) #
 2 | average_wind_speed: 190.74MiB
 3 | cab_type: 190.75MiB
 4 | dropoff_borocode: 78.30MiB
 5 | dropoff_boroct2010: 313.18MiB
 6 | dropoff_boroname: 156.62MiB
 7 | dropoff_cdeligibil: 156.60MiB
 8 | dropoff_ct2010: 313.18MiB
 9 | dropoff_ctlabel: 156.60MiB
10 | dropoff_datetime: 381.48MiB
11 | dropoff_latitude: 199.67MiB
12 | dropoff_longitude: 330.26MiB
13 | dropoff_ntacode: 157.02MiB
14 | dropoff_ntaname: 157.25MiB
15 | dropoff_nyct2010_gid: 156.60MiB
16 | dropoff_puma: 156.60MiB
17 | ehail_fee: 95.38MiB
18 | extra: 150.52MiB
19 | fare_amount: 315.04MiB
20 | improvement_surcharge: 95.38MiB
21 | max_temperature: 95.38MiB
22 | min_temperature: 95.38MiB
23 | mta_tax: 110.38MiB
24 | passenger_count: 95.38MiB
25 | payment_type: 129.53MiB
26 | pickup_borocode: 78.30MiB
27 | pickup_boroct2010: 313.18MiB
28 | pickup_boroname: 156.61MiB
29 | pickup_cdeligibil: 156.60MiB
30 | pickup_ct2010: 313.18MiB
31 | pickup_ctlabel: 156.60MiB
32 | pickup_datetime: 381.48MiB
33 | pickup_latitude: 209.67MiB
34 | pickup_longitude: 330.26MiB
35 | pickup_ntacode: 157.01MiB
36 | pickup_ntaname: 157.23MiB
37 | pickup_nyct2010_gid: 156.60MiB
38 | pickup_puma: 156.60MiB
39 | precipitation: 160.45MiB
40 | rate_code_id: 62.23MiB
41 | snow_depth: 151.60MiB
42 | snowfall: 149.60MiB
43 | store_and_fwd_flag: 157.60MiB
44 | tip_amount: 216.74MiB
45 | tolls_amount: 234.74MiB
46 | total_amount: 315.04MiB
47 | trip_distance: 315.04MiB
48 | trip_id: 381.48MiB
49 | trip_type: 62.23MiB
50 | vendor_id: 129.53MiB


--------------------------------------------------------------------------------
/memsize_results/baseline3:
--------------------------------------------------------------------------------
 1 | # Table `test` (100000000 rows, 8484.65 MiB) #
 2 | average_wind_speed: 190.74MiB
 3 | cab_type: 95.38MiB
 4 | dropoff_borocode: 78.30MiB
 5 | dropoff_boroct2010: 313.18MiB
 6 | dropoff_boroname: 78.32MiB
 7 | dropoff_cdeligibil: 78.31MiB
 8 | dropoff_ct2010: 313.18MiB
 9 | dropoff_ctlabel: 156.60MiB
10 | dropoff_datetime: 381.48MiB
11 | dropoff_latitude: 199.67MiB
12 | dropoff_longitude: 330.26MiB
13 | dropoff_ntacode: 78.73MiB
14 | dropoff_ntaname: 78.96MiB
15 | dropoff_nyct2010_gid: 156.60MiB
16 | dropoff_puma: 156.60MiB
17 | ehail_fee: 95.38MiB
18 | extra: 150.52MiB
19 | fare_amount: 315.04MiB
20 | improvement_surcharge: 95.38MiB
21 | max_temperature: 95.38MiB
22 | min_temperature: 95.38MiB
23 | mta_tax: 110.37MiB
24 | passenger_count: 95.38MiB
25 | payment_type: 95.38MiB
26 | pickup_borocode: 78.30MiB
27 | pickup_boroct2010: 313.18MiB
28 | pickup_boroname: 78.32MiB
29 | pickup_cdeligibil: 78.31MiB
30 | pickup_ct2010: 313.18MiB
31 | pickup_ctlabel: 156.60MiB
32 | pickup_datetime: 381.48MiB
33 | pickup_latitude: 209.67MiB
34 | pickup_longitude: 330.26MiB
35 | pickup_ntacode: 78.72MiB
36 | pickup_ntaname: 78.94MiB
37 | pickup_nyct2010_gid: 156.60MiB
38 | pickup_puma: 156.60MiB
39 | precipitation: 160.45MiB
40 | rate_code_id: 62.23MiB
41 | snow_depth: 151.60MiB
42 | snowfall: 149.60MiB
43 | store_and_fwd_flag: 95.38MiB
44 | tip_amount: 216.74MiB
45 | tolls_amount: 234.74MiB
46 | total_amount: 315.04MiB
47 | trip_distance: 315.04MiB
48 | trip_id: 381.48MiB
49 | trip_type: 62.23MiB
50 | vendor_id: 95.38MiB


--------------------------------------------------------------------------------
/rust-toolchain:
--------------------------------------------------------------------------------
1 | nightly-2025-03-28
2 | 


--------------------------------------------------------------------------------
/samples/example_row:
--------------------------------------------------------------------------------
 1 | trip_id                39999994
 2 | vendor_id              2
 3 | pickup_datetime        2016-03-13 14:58:31
 4 | dropoff_datetime       2016-03-13 15:04:05
 5 | store_and_fwd_flag     N
 6 | rate_code_id           1
 7 | pickup_longitude       -73.924003601074219
 8 | pickup_latitude        40.743988037109375
 9 | dropoff_longitude      -73.93267822265625
10 | dropoff_latitude       40.752838134765625
11 | passenger_count        1
12 | trip_distance          0.93
13 | fare_amount            6
14 | extra                  0
15 | mta_tax                0.5
16 | tip_amount             0.5
17 | tolls_amount           0
18 | ehail_fee              
19 | improvement_surcharge  0.3
20 | total_amount           7.3
21 | payment_type           1
22 | trip_type              1
23 | pickup                 0101000020E6100000000000E0227B52C0000000003B5F4440
24 | dropoff                0101000020E610000000000000B17B52C0000000005D604440
25 | cab_type               green
26 | precipitation          0.00
27 | snow_depth             0
28 | snowfall               0.0
29 | max_temperature        62
30 | min_temperature        50
31 | average_wind_speed     3.58
32 | pickup_nyct2010_gid    2064
33 | pickup_ctlabel         181.01
34 | pickup_borocode        4
35 | pickup_boroname        Queens
36 | pickup_ct2010          018101
37 | pickup_boroct2010      4018101
38 | pickup_cdeligibil      E
39 | pickup_ntacode         QN31
40 | pickup_ntaname         Hunters Point-Sunnyside-West Maspeth
41 | pickup_puma            4109
42 | dropoff_nyct2010_gid   542
43 | dropoff_ctlabel        31
44 | dropoff_borocode       4
45 | dropoff_boroname       Queens
46 | dropoff_ct2010         003100
47 | dropoff_boroct2010     4003100
48 | dropoff_cdeligibil     E
49 | dropoff_ntacode        QN68
50 | dropoff_ntaname        Queensbridge-Ravenswood-Long Island City
51 | dropoff_puma           4101
52 | 


--------------------------------------------------------------------------------
/samples/example_row_sparse:
--------------------------------------------------------------------------------
 1 | trip_id                1
 2 | vendor_id              2
 3 | pickup_datetime        2013-08-01 08:14:37
 4 | dropoff_datetime       2013-08-01 09:09:06
 5 | store_and_fwd_flag     N
 6 | rate_code_id           1
 7 | pickup_longitude       
 8 | pickup_latitude        
 9 | dropoff_longitude      
10 | dropoff_latitude       
11 | passenger_count        1
12 | trip_distance          0.00
13 | fare_amount            21.25
14 | extra                  0
15 | mta_tax                0
16 | tip_amount             0
17 | tolls_amount           0
18 | ehail_fee              
19 | improvement_surcharge  
20 | total_amount           21.25
21 | payment_type           2
22 | trip_type              
23 | pickup                 
24 | dropoff                
25 | cab_type               green
26 | precipitation          0.65
27 | snow_depth             0
28 | snowfall               0.0
29 | max_temperature        76
30 | min_temperature        66
31 | average_wind_speed     2.91
32 | pickup_nyct2010_gid    
33 | pickup_ctlabel         
34 | pickup_borocode        
35 | pickup_boroname        
36 | pickup_ct2010          
37 | pickup_boroct2010      
38 | pickup_cdeligibil      
39 | pickup_ntacode         
40 | pickup_ntaname         
41 | pickup_puma            
42 | dropoff_nyct2010_gid   
43 | dropoff_ctlabel        
44 | dropoff_borocode       
45 | dropoff_boroname       
46 | dropoff_ct2010         
47 | dropoff_boroct2010     
48 | dropoff_cdeligibil     
49 | dropoff_ntacode        
50 | dropoff_ntaname        
51 | dropoff_puma           
52 | 


--------------------------------------------------------------------------------
/samples/headers:
--------------------------------------------------------------------------------
 1 | trip_id
 2 | vendor_id
 3 | pickup_datetime
 4 | dropoff_datetime
 5 | store_and_fwd_flag
 6 | rate_code_id
 7 | pickup_longitude
 8 | pickup_latitude
 9 | dropoff_longitude
10 | dropoff_latitude
11 | passenger_count
12 | trip_distance
13 | fare_amount
14 | extra
15 | mta_tax
16 | tip_amount
17 | tolls_amount
18 | ehail_fee
19 | improvement_surcharge
20 | total_amount
21 | payment_type
22 | trip_type
23 | pickup
24 | dropoff
25 | cab_type
26 | precipitation
27 | snow_depth
28 | snowfall
29 | max_temperature
30 | min_temperature
31 | average_wind_speed
32 | pickup_nyct2010_gid
33 | pickup_ctlabel
34 | pickup_borocode
35 | pickup_boroname
36 | pickup_ct2010
37 | pickup_boroct2010
38 | pickup_cdeligibil
39 | pickup_ntacode
40 | pickup_ntaname
41 | pickup_puma
42 | dropoff_nyct2010_gid
43 | dropoff_ctlabel
44 | dropoff_borocode
45 | dropoff_boroname
46 | dropoff_ct2010
47 | dropoff_boroct2010
48 | dropoff_cdeligibil
49 | dropoff_ntacode
50 | dropoff_ntaname
51 | dropoff_puma
52 | 


--------------------------------------------------------------------------------
/src/bin/load_generator.rs:
--------------------------------------------------------------------------------
 1 | use std::time::Duration;
 2 | 
 3 | use locustdb::logging_client::BufferFullPolicy;
 4 | use locustdb_serialization::api::any_val_syntax::vf64;
 5 | use structopt::StructOpt;
 6 | use tokio::time;
 7 | 
 8 | #[derive(StructOpt, Debug)]
 9 | #[structopt(
10 |     name = "LocustDB Logger Test",
11 |     about = "Log basic system stats to LocustDB.",
12 |     author = "Clemens Winter <clemenswinter1@gmail.com>"
13 | )]
14 | struct Opt {
15 |     /// Address of LocustDB server
16 |     #[structopt(long, name = "ADDR", default_value = "http://localhost:8080")]
17 |     addr: String,
18 | 
19 |     /// Logging interval in milliseconds
20 |     #[structopt(long, name = "INTERVAL", default_value = "100")]
21 |     interval: u64,
22 | 
23 |     /// Number of active tables
24 |     #[structopt(long, name = "TABLES", default_value = "10")]
25 |     tables: u64,
26 | 
27 |     /// Number of rows logged per table per interval
28 |     #[structopt(long, name = "ROWCOUNT")]
29 |     rowcount: Option<Vec<u64>>,
30 | 
31 |     /// Number of columns logged per row
32 |     #[structopt(long, name = "COLUMNS", default_value = "20")]
33 |     columns: u64,
34 | 
35 |     /// Prefix for table names
36 |     #[structopt(long, name = "PREFIX", default_value = "")]
37 |     table_prefix: String,
38 | }
39 | 
40 | #[tokio::main]
41 | async fn main() {
42 |     env_logger::init();
43 |     let Opt {
44 |         addr,
45 |         interval,
46 |         tables: n_tables,
47 |         rowcount,
48 |         columns,
49 |         table_prefix,
50 |     } = Opt::from_args();
51 |     let rowcount = rowcount.unwrap_or_else(Vec::new);
52 |     let tables: Vec<_> = (0..n_tables)
53 |         .map(|i| format!("{table_prefix}{}_{i}", random_word::get(random_word::Lang::En),))
54 |         .collect();
55 |     let mut log = locustdb::logging_client::LoggingClient::new(
56 |         Duration::from_secs(1),
57 |         &addr,
58 |         1 << 28,
59 |         BufferFullPolicy::Block,
60 |         None,
61 |     );
62 |     let mut interval = time::interval(Duration::from_millis(interval));
63 | 
64 |     loop {
65 |         interval.tick().await;
66 |         for (i, table) in tables.iter().enumerate() {
67 |             for _ in 0..(rowcount.get(i).cloned().unwrap_or(1)) {
68 |                 log.log(
69 |                     table,
70 |                     (0..columns).map(|c| (format!("col_{c}"), vf64(rand::random::<f64>()))),
71 |                 );
72 |             }
73 |         }
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/src/bin/log.rs:
--------------------------------------------------------------------------------
 1 | use locustdb_serialization::api::any_val_syntax::vf64;
 2 | use locustdb_serialization::api::AnyVal;
 3 | use rand::Rng;
 4 | use std::mem;
 5 | use std::time::Duration;
 6 | 
 7 | use locustdb::logging_client::BufferFullPolicy;
 8 | use structopt::StructOpt;
 9 | use systemstat::{Platform, System};
10 | use tokio::time;
11 | 
12 | #[derive(StructOpt, Debug)]
13 | #[structopt(
14 |     name = "LocustDB Logger Test",
15 |     about = "Log basic system stats to LocustDB.",
16 |     author = "Clemens Winter <clemenswinter1@gmail.com>"
17 | )]
18 | struct Opt {
19 |     /// Address of LocustDB server
20 |     #[structopt(long, name = "ADDR", default_value = "http://localhost:8080")]
21 |     addr: String,
22 | 
23 |     /// Logging interval in milliseconds
24 |     #[structopt(long, name = "INTERVAL", default_value = "100")]
25 |     interval: u64,
26 | 
27 |     /// Prefix for table names
28 |     #[structopt(long, name = "PREFIX", default_value = "")]
29 |     table_prefix: String,
30 | 
31 |     /// Interval multiplier for step
32 |     #[structopt(long, name = "STEP_MULTIPLIER", default_value = "1")]
33 |     step_interval: i64,
34 | 
35 |     /// Additional noise added to each value
36 |     #[structopt(long, name = "NOISE", default_value = "0.0")]
37 |     noise: f64,
38 | }
39 | 
40 | struct RandomWalk {
41 |     name: String,
42 |     curr_value: f64,
43 |     interval: u64,
44 | }
45 | 
46 | #[tokio::main]
47 | async fn main() {
48 |     env_logger::init();
49 |     let Opt { addr, interval, table_prefix, step_interval, noise } = Opt::from_args();
50 |     let mut log = locustdb::logging_client::LoggingClient::new(
51 |         Duration::from_secs(1),
52 |         &addr,
53 |         1 << 50,
54 |         BufferFullPolicy::Block,
55 |         None,
56 |     );
57 |     let mut rng = rand::rng();
58 |     let mut random_walks = (0..5)
59 |         .map(|i| RandomWalk {
60 |             name: format!("{table_prefix}random_walk_{}", i),
61 |             curr_value: 0.0,
62 |             interval: rng.random_range(1..10),
63 |         })
64 |         .collect::<Vec<_>>();
65 |     let mut interval = time::interval(Duration::from_millis(interval));
66 |     let sys = System::new();
67 |     let mut cpu_watcher = sys.cpu_load_aggregate().unwrap();
68 |     for i in 0..u64::MAX {
69 |         interval.tick().await;
70 |         let cpu = mem::replace(&mut cpu_watcher, sys.cpu_load_aggregate().unwrap())
71 |             .done()
72 |             .unwrap();
73 |         log.log(
74 |             "system_stats",
75 |             [("cpu".to_string(), vf64(cpu.user))].iter().cloned(),
76 |         );
77 |         for walk in random_walks.iter_mut() {
78 |             if i % walk.interval == 0 {
79 |                 walk.curr_value += rng.random_range(-1.0..1.0);
80 |                 log.log(
81 |                     &walk.name,
82 |                     [
83 |                         ("value".to_string(), vf64(walk.curr_value + rng.random_range(-noise..noise))),
84 |                         ("step".to_string(), AnyVal::Int((i / walk.interval) as i64 * step_interval)),
85 |                     ]
86 |                     .iter()
87 |                     .cloned(),
88 |                 );
89 |             }
90 |         }
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/src/bin/profile.rs:
--------------------------------------------------------------------------------
 1 | use locustdb::LocustDB;
 2 | use futures::executor::block_on;
 3 | 
 4 | fn main() {
 5 |     let locustdb = LocustDB::memory_only();
 6 |     let mut loads = Vec::new();
 7 |     for x in &["aa", "ab", "ac", "ad", "ae"] {
 8 |         let path = format!("test_data/nyc-taxi-data/trips_x{}.csv.gz", x);
 9 |         loads.push(locustdb.load_csv(
10 |             locustdb::nyc_taxi_data::ingest_reduced_file(&path, "test")
11 |                 .with_partition_size(1 << 20)));
12 |     }
13 |     for l in loads {
14 |         let _ = block_on(l);
15 |     }
16 |     println!("Load completed");
17 |     loop {
18 |         let _ = block_on(locustdb.run_query("select passenger_count, to_year(pickup_datetime), trip_distance / 1000, count(0) from test;", false, false, vec![]));
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/bin/repl/fmt_table.rs:
--------------------------------------------------------------------------------
 1 | pub fn fmt_table(headings: &[&str], rows: &[Vec<&str>]) -> String {
 2 |     let ncols = headings.len();
 3 |     let mut col_width = Vec::<usize>::with_capacity(ncols);
 4 |     for heading in headings {
 5 |         col_width.push(super::unicode::display_width(heading) + 1);
 6 |     }
 7 |     for row in rows {
 8 |         assert_eq!(ncols, row.len());
 9 |         for (i, entry) in row.iter().enumerate() {
10 |             let width = super::unicode::display_width(entry) + 1;
11 |             if col_width[i] < width {
12 |                 col_width[i] = width;
13 |             }
14 |         }
15 |     }
16 | 
17 |     let mut result = String::new();
18 |     append_row(&mut result, headings, &col_width);
19 | 
20 |     result.push('\n');
21 |     for (i, width) in col_width.iter().enumerate() {
22 |         result.push_str(&String::from_utf8(vec![b'-'; *width]).unwrap());
23 |         if i < ncols - 1 {
24 |             result.push_str("+-");
25 |         }
26 |     }
27 | 
28 |     for row in rows {
29 |         result.push('\n');
30 |         append_row(&mut result, row, &col_width);
31 |     }
32 | 
33 |     result
34 | }
35 | 
36 | fn append_row(string: &mut String, row: &[&str], col_width: &[usize]) {
37 |     let imax = col_width.len() - 1;
38 |     for (i, entry) in row.iter().enumerate() {
39 |         string.push_str(&format!("{:1$}", entry, col_width[i]));
40 |         if i < imax {
41 |             string.push_str("| ");
42 |         }
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/bin/repl/print_results.rs:
--------------------------------------------------------------------------------
 1 | use crate::fmt_table::fmt_table;
 2 | use locustdb::*;
 3 | use locustdb::unit_fmt::*;
 4 | 
 5 | pub fn print_query_result(results: &QueryOutput) {
 6 |     let rt = if results.stats.runtime_ns == 0 { 1 } else { results.stats.runtime_ns };
 7 | 
 8 |     println!();
 9 |     for (query_plan, count) in &results.query_plans {
10 |         println!("Query plan in {} batches{}", count, query_plan)
11 |     }
12 |     println!("Scanned {} rows in {} ({:.2} rows/s)!",
13 |              short_scale(results.stats.rows_scanned as f64),
14 |              ns(rt as usize),
15 |              billion(results.stats.rows_scanned as f64 / rt as f64));
16 |     println!("\n{}", format_results(&results.colnames, results.rows.as_ref().unwrap()));
17 |     println!();
18 | }
19 | 
20 | fn format_results(colnames: &[String], rows: &[Vec<Value>]) -> String {
21 |     let strcolnames: Vec<&str> = colnames.iter().map(|s| s as &str).collect();
22 |     let formattedrows: Vec<Vec<String>> = rows.iter()
23 |         .map(|row| {
24 |             row.iter()
25 |                 .map(|val| format!("{}", val))
26 |                 .collect()
27 |         })
28 |         .collect();
29 |     let strrows =
30 |         formattedrows.iter().map(|row| row.iter().map(|val| val as &str).collect()).collect::<Vec<_>>();
31 | 
32 |     fmt_table(&strcolnames, &strrows)
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/src/bin/repl/unicode.rs:
--------------------------------------------------------------------------------
 1 | // original source: https://github.com/lintje/lintje/blob/501aab06e19008e787237438a69ac961f38bb4b7
 2 | // https://tomdebruijn.com/posts/rust-string-length-width-calculations/
 3 | use unicode_segmentation::UnicodeSegmentation;
 4 | use unicode_width::UnicodeWidthStr;
 5 | use lazy_static::lazy_static;
 6 | 
 7 | const ZERO_WIDTH_JOINER: &str = "\u{200d}";
 8 | const VARIATION_SELECTOR_16: &str = "\u{fe0f}";
 9 | const SKIN_TONES: [&str; 5] = [
10 |     "\u{1f3fb}", // Light Skin Tone
11 |     "\u{1f3fc}", // Medium-Light Skin Tone
12 |     "\u{1f3fd}", // Medium Skin Tone
13 |     "\u{1f3fe}", // Medium-Dark Skin Tone
14 |     "\u{1f3ff}", // Dark Skin Tone
15 | ];
16 | 
17 | lazy_static! {
18 |     static ref OTHER_PUNCTUATION: Vec<char> = vec!['…', '⋯',];
19 | }
20 | 
21 | // Return String display width as rendered in a monospace font according to the Unicode
22 | // specification.
23 | //
24 | // This may return some odd results at times where some symbols are counted as more character width
25 | // than they actually are.
26 | //
27 | // This function has exceptions for skin tones and other emoji modifiers to determine a more
28 | // accurate display with.
29 | pub fn display_width(string: &str) -> usize {
30 |     // String expressed as a vec of Unicode characters. Characters with accents and emoji may
31 |     // be multiple characters combined.
32 |     let unicode_chars = string.graphemes(true);
33 |     let mut width = 0;
34 |     for c in unicode_chars.into_iter() {
35 |         width += display_width_char(c);
36 |     }
37 |     width
38 | }
39 | 
40 | /// Calculate the render width of a single Unicode character. Unicode characters may consist of
41 | /// multiple String characters, which is why the function argument takes a string.
42 | fn display_width_char(string: &str) -> usize {
43 |     // Characters that are used as modifiers on emoji. By themselves they have no width.
44 |     if string == ZERO_WIDTH_JOINER || string == VARIATION_SELECTOR_16 {
45 |         return 0;
46 |     }
47 |     // Emoji that are representations of combined emoji. They are normally calculated as the
48 |     // combined width of the emoji, rather than the actual display width. This check fixes that and
49 |     // returns a width of 2 instead.
50 |     if string.contains(ZERO_WIDTH_JOINER) {
51 |         return 2;
52 |     }
53 |     // Any character with a skin tone is most likely an emoji.
54 |     // Normally it would be counted as as four or more characters, but these emoji should be
55 |     // rendered as having a width of two.
56 |     for skin_tone in SKIN_TONES {
57 |         if string.contains(skin_tone) {
58 |             return 2;
59 |         }
60 |     }
61 | 
62 |     match string {
63 |         "\t" => {
64 |             // unicode-width returns 0 for tab width, which is not how it's rendered.
65 |             // I choose 4 columns as that's what most applications render a tab as.
66 |             4
67 |         }
68 |         _ => UnicodeWidthStr::width(string),
69 |     }
70 | }


--------------------------------------------------------------------------------
/src/bin/show.rs:
--------------------------------------------------------------------------------
 1 | use futures::executor::block_on;
 2 | 
 3 | use locustdb::nyc_taxi_data;
 4 | use locustdb::LocustDB;
 5 | 
 6 | fn main() {
 7 |     let locustdb = LocustDB::memory_only();
 8 |     let load = block_on(
 9 |         locustdb.load_csv(
10 |             nyc_taxi_data::ingest_reduced_file("test_data/nyc-taxi.csv.gz", "default")
11 |                 .with_partition_size(2500),
12 |         ),
13 |     );
14 |     load.unwrap();
15 |     let query = "select pickup_ntaname, to_year(pickup_datetime), trip_distance / 1000, count(0), sum(total_amount) from default where cab_type = \"CMS\";";
16 |     // let query = "select payment_method, count(0), sum(total_amount) from default;";
17 |     block_on(locustdb.run_query(query, false, true, vec![0])).unwrap();
18 | }
19 | 


--------------------------------------------------------------------------------
/src/bitvec.rs:
--------------------------------------------------------------------------------
 1 | pub trait BitVecMut {
 2 |     fn set(&mut self, index: usize);
 3 |     fn unset(&mut self, index: usize);
 4 | }
 5 | 
 6 | pub trait BitVec {
 7 |     fn is_set(&self, index: usize) -> bool;
 8 | }
 9 | 
10 | impl BitVecMut for Vec<u8> {
11 |     fn set(&mut self, index: usize) {
12 |         let slot = index >> 3;
13 |         while slot >= self.len() {
14 |             self.push(0);
15 |         }
16 |         self[slot] |= 1 << (index as u8 & 7)
17 |     }
18 | 
19 |     fn unset(&mut self, index: usize) {
20 |         let slot = index >> 3;
21 |         if slot < self.len() {
22 |             self[slot] &= 0xff ^ (1 << (index as u8 & 7));
23 |         }
24 |     }
25 | }
26 | 
27 | impl BitVec for Vec<u8> {
28 |     fn is_set(&self, index: usize) -> bool {
29 |         let slot = index >> 3;
30 |         slot < self.len() && self[slot] & (1 << (index as u8 & 7)) > 0
31 |     }
32 | }
33 | 
34 | impl BitVec for [u8] {
35 |     fn is_set(&self, index: usize) -> bool {
36 |         let slot = index >> 3;
37 |         slot < self.len() && self[slot] & (1 << (index as u8 & 7)) > 0
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/disk_store/mod.rs:
--------------------------------------------------------------------------------
 1 | mod azure_writer;
 2 | mod file_writer;
 3 | mod gcs_writer;
 4 | pub mod meta_store;
 5 | pub mod noop_storage;
 6 | mod partition_segment;
 7 | pub mod storage;
 8 | pub mod wal_segment;
 9 | 
10 | lazy_static! {
11 |     static ref RT: tokio::runtime::Runtime = tokio::runtime::Runtime::new().unwrap();
12 | }
13 | 
14 | use crate::mem_store::column::Column;
15 | use crate::observability::QueryPerfCounter;
16 | use crate::scheduler::inner_locustdb::InnerLocustDB;
17 | 
18 | pub trait ColumnLoader: Sync + Send + 'static {
19 |     fn load_column(
20 |         &self,
21 |         table_name: &str,
22 |         partition: PartitionID,
23 |         column_name: &str,
24 |         perf_counter: &QueryPerfCounter,
25 |     ) -> Option<Vec<Column>>;
26 |     fn load_column_range(
27 |         &self,
28 |         start: PartitionID,
29 |         end: PartitionID,
30 |         column_name: &str,
31 |         ldb: &InnerLocustDB,
32 |     );
33 |     fn partition_has_been_loaded(&self, table: &str, partition: PartitionID, column: &str) -> bool;
34 |     fn mark_subpartition_as_loaded(&self, table: &str, partition: PartitionID, column: &str);
35 | }
36 | 
37 | pub type PartitionID = u64;
38 | 


--------------------------------------------------------------------------------
/src/disk_store/noop_storage.rs:
--------------------------------------------------------------------------------
 1 | use crate::disk_store::*;
 2 | 
 3 | pub struct NoopStorage;
 4 | 
 5 | impl ColumnLoader for NoopStorage {
 6 |     fn load_column(
 7 |         &self,
 8 |         _: &str,
 9 |         _: PartitionID,
10 |         _: &str,
11 |         _: &QueryPerfCounter,
12 |     ) -> Option<Vec<Column>> {
13 |         None
14 |     }
15 |     fn load_column_range(&self, _: PartitionID, _: PartitionID, _: &str, _: &InnerLocustDB) {}
16 |     fn partition_has_been_loaded(&self, _: &str, _: PartitionID, _: &str) -> bool {
17 |         true
18 |     }
19 |     fn mark_subpartition_as_loaded(&self, _: &str, _: PartitionID, _: &str) {}
20 | }
21 | 


--------------------------------------------------------------------------------
/src/disk_store/wal_segment.rs:
--------------------------------------------------------------------------------
 1 | use std::borrow::Cow;
 2 | use capnp::serialize_packed;
 3 | use locustdb_serialization::event_buffer::EventBuffer;
 4 | use locustdb_serialization::{default_reader_options, wal_segment_capnp};
 5 | 
 6 | #[derive(Debug)]
 7 | pub struct WalSegment<'a> {
 8 |     pub id: u64,
 9 |     pub data: Cow<'a, EventBuffer>,
10 | }
11 | 
12 | impl<'a> WalSegment<'a> {
13 |     pub fn serialize(&self) -> Vec<u8> {
14 |         let mut builder = capnp::message::Builder::new_default();
15 |         let mut wal_segment = builder.init_root::<wal_segment_capnp::wal_segment::Builder>();
16 |         wal_segment.set_id(self.id);
17 |         let mut table_segment_list = wal_segment.get_data().unwrap();
18 |         self.data.serialize_builder(&mut table_segment_list);
19 |         let mut buf = Vec::new();
20 |         serialize_packed::write_message(&mut buf, &builder).unwrap();
21 |         buf
22 |     }
23 | 
24 |     pub fn deserialize(data: &[u8]) -> capnp::Result<WalSegment<'static>> {
25 |         let message_reader =
26 |             serialize_packed::read_message(data, default_reader_options()).unwrap();
27 |         let wal_segment = message_reader.get_root::<wal_segment_capnp::wal_segment::Reader>()?;
28 |         let id = wal_segment.get_id();
29 |         let data = EventBuffer::deserialize_reader(wal_segment.get_data()?)?;
30 |         Ok(WalSegment {
31 |             id,
32 |             data: Cow::Owned(data),
33 |         })
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/engine/data_types/mod.rs:
--------------------------------------------------------------------------------
 1 | mod byte_slices;
 2 | mod data;
 3 | mod nullable_vec_data;
 4 | mod scalar_data;
 5 | mod types;
 6 | mod val_rows;
 7 | mod vec_data;
 8 | 
 9 | use ordered_float::OrderedFloat;
10 | 
11 | pub use self::byte_slices::*;
12 | pub use self::data::*;
13 | pub use self::nullable_vec_data::*;
14 | pub use self::scalar_data::*;
15 | pub use self::types::*;
16 | pub use self::val_rows::*;
17 | pub use self::vec_data::*;
18 | 
19 | #[allow(non_camel_case_types)]
20 | pub type of64 = OrderedFloat<f64>;
21 | 
22 | pub fn vec_f64_to_vec_of64(vec: Vec<f64>) -> Vec<of64> {
23 |     unsafe { std::mem::transmute::<Vec<f64>, Vec<of64>>(vec) }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/engine/execution/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod query_task;
 2 | mod buffer;
 3 | mod executor;
 4 | mod batch_merging;
 5 | mod scratchpad;
 6 | 
 7 | pub use self::buffer::*;
 8 | pub use self::scratchpad::*;
 9 | pub use self::executor::*;
10 | pub use self::batch_merging::{BatchResult, combine};


--------------------------------------------------------------------------------
/src/engine/mod.rs:
--------------------------------------------------------------------------------
 1 | use crate::errors::QueryError;
 2 | 
 3 | pub use self::data_types::*;
 4 | pub use self::execution::*;
 5 | pub use self::operators::*;
 6 | pub use self::planning::*;
 7 | 
 8 | pub mod data_types;
 9 | pub mod execution;
10 | pub mod operators;
11 | pub mod planning;
12 | 


--------------------------------------------------------------------------------
/src/engine/operators/aggregator.rs:
--------------------------------------------------------------------------------
 1 | // TODO: would probably be better to have two types here, an UntypedAggregator emitted by parser which is then converted into the right TypedAggregator by query planner
 2 | #[derive(Debug, Clone, Copy, PartialEq)]
 3 | pub enum Aggregator {
 4 |     SumI64 = 0,
 5 |     SumF64 = 1,
 6 |     Count = 2,
 7 |     MaxI64 = 3,
 8 |     MaxF64 = 4,
 9 |     MinI64 = 5,
10 |     MinF64 = 6,
11 | }


--------------------------------------------------------------------------------
/src/engine/operators/assemble_nullable.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct AssembleNullable<T> {
 5 |     pub data: BufferRef<T>,
 6 |     pub present: BufferRef<u8>,
 7 |     pub nullable_data: BufferRef<Nullable<T>>,
 8 | }
 9 | 
10 | impl<'a, T: VecData<T>> VecOperator<'a> for AssembleNullable<T> {
11 |     fn execute(&mut self, _streaming: bool, _scratchpad: &mut Scratchpad<'a>)
12 |         -> Result<(), QueryError> {
13 |         Ok(())
14 |     }
15 | 
16 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
17 |         // This works even when streaming since it just creates an nullable_data->data alias and sets the null map of nullable_data to present.
18 |         // It would incorrect to perform this operation in the `execute` function since otherwise it would results in incorrect ordering with potential `PropagateNullability` operations.
19 |         scratchpad.assemble_nullable(self.data, self.present, self.nullable_data);
20 |     }
21 | 
22 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.data.any(), self.present.any()] }
23 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i, &mut self.present.i] }
24 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.nullable_data.any()] }
25 |     fn can_stream_input(&self, _: usize) -> bool { true }
26 |     fn can_stream_output(&self, _: usize) -> bool { true }
27 |     fn allocates(&self) -> bool { false }
28 |     fn display_op(&self, _: bool) -> String { format!("nullable({}, {})", self.data, self.present) }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/engine/operators/bit_unpack.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct BitUnpackOperator {
 6 |     pub input: BufferRef<i64>,
 7 |     pub output: BufferRef<i64>,
 8 |     pub shift: u8,
 9 |     pub width: u8,
10 | }
11 | 
12 | impl<'a> VecOperator<'a> for BitUnpackOperator {
13 |     fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{
14 |         let data = scratchpad.get(self.input);
15 |         let mut unpacked = scratchpad.get_mut(self.output);
16 |         if stream { unpacked.clear(); }
17 |         let mask = (1 << self.width) - 1;
18 |         for d in data.iter() {
19 |             unpacked.push((d >> self.shift) & mask);
20 |         }
21 |         Ok(())
22 |     }
23 | 
24 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
25 |         scratchpad.set(self.output, Vec::<i64>::with_capacity(batch_size));
26 |     }
27 | 
28 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input.any()] }
29 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
30 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
31 |     fn can_stream_input(&self, _: usize) -> bool { true }
32 |     fn can_stream_output(&self, _: usize) -> bool { true }
33 |     fn can_block_output(&self) -> bool { true }
34 |     fn allocates(&self) -> bool { true }
35 | 
36 |     fn display_op(&self, alternate: bool) -> String {
37 |         if alternate {
38 |             let mask = (1 << self.width) - 1;
39 |             format!("({} >> {}) & {:x}", self.input, self.shift, mask)
40 |         } else {
41 |             format!("({} >> $shift) & $mask", self.input)
42 |         }
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/engine/operators/collect.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | pub struct Collect {
 4 |     pub input: BufferRef<Any>,
 5 |     pub output: BufferRef<Any>,
 6 |     pub name: String,
 7 | }
 8 | 
 9 | 
10 | impl<'a> VecOperator<'a> for Collect {
11 |     fn execute(&mut self, _: bool, _scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) }
12 | 
13 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
14 |         scratchpad.alias(self.input, self.output);
15 |     }
16 | 
17 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input] }
18 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
19 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output] }
20 |     fn can_stream_input(&self, _: usize) -> bool { false }
21 |     fn can_stream_output(&self, _: usize) -> bool { false }
22 |     fn allocates(&self) -> bool { false }
23 |     fn display_op(&self, _: bool) -> String { format!("collect(\"{}\", {})", self.name, self.input) }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/engine/operators/column_ops.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct ReadColumnData {
 5 |     pub colname: String,
 6 |     pub section_index: usize,
 7 |     pub output: BufferRef<Any>,
 8 |     pub tag: EncodingType,
 9 | }
10 | 
11 | impl<'a> VecOperator<'a> for ReadColumnData {
12 |     fn execute(
13 |         &mut self,
14 |         _: bool,
15 |         scratchpad: &mut Scratchpad<'a>,
16 |     ) -> Result<(), QueryError> {
17 |         let data_section = scratchpad.get_column_data(&self.colname, self.section_index);
18 |         let result = data_section.slice_box(0, data_section.len());
19 |         scratchpad.set_any(self.output, result);
20 |         Ok(())
21 |     }
22 | 
23 |     fn init(&mut self, _: usize, _: usize, _: &mut Scratchpad<'a>) {}
24 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![] }
25 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] }
26 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output] }
27 |     fn can_stream_input(&self, _: usize) -> bool { false }
28 |     fn can_stream_output(&self, _: usize) -> bool { false }
29 |     fn allocates(&self) -> bool { false }
30 |     fn display_op(&self, _: bool) -> String { format!("{:?}.{}: {:?}", self.colname, self.section_index, self.tag) }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/engine/operators/combine_null_maps.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct CombineNullMaps {
 5 |     pub lhs: BufferRef<Nullable<Any>>,
 6 |     pub rhs: BufferRef<Nullable<Any>>,
 7 |     pub output: BufferRef<u8>,
 8 | }
 9 | 
10 | impl<'a> VecOperator<'a> for CombineNullMaps {
11 |     fn execute(&mut self, _streaming: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{
12 |         let lhs = scratchpad.get_null_map(self.lhs);
13 |         let rhs = scratchpad.get_null_map(self.rhs);
14 |         let mut output = scratchpad.get_mut(self.output);
15 |         for (out, (l, r)) in output.iter_mut().zip(lhs.iter().zip(rhs.iter())) {
16 |             *out = l & r;
17 |         }
18 |         Ok(())
19 |     }
20 | 
21 |     fn init(&mut self, total_count: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
22 |         let output = vec![0u8; batch_size.min(total_count).div_ceil(8)];
23 |         scratchpad.set(self.output, output);
24 |     }
25 | 
26 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.lhs.any(), self.rhs.any()] }
27 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.lhs.i, &mut self.rhs.i] }
28 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
29 |     fn can_stream_input(&self, _: usize) -> bool { true }
30 |     fn can_stream_output(&self, _: usize) -> bool { true }
31 |     fn allocates(&self) -> bool { true }
32 |     fn display_op(&self, _: bool) -> String { format!("combine_null_maps({}, {})", self.lhs, self.rhs) }
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/src/engine/operators/compact.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct Compact<T, U> {
 5 |     pub data: BufferRef<T>,
 6 |     pub select: BufferRef<U>,
 7 |     pub compacted: BufferRef<T>,
 8 | }
 9 | 
10 | impl<'a, T: VecData<T> + 'a, U: GenericIntVec<U>> VecOperator<'a> for Compact<T, U> {
11 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
12 |         let mut data = scratchpad.get_mut(self.data);
13 |         let select = scratchpad.get(self.select);
14 |         // Remove all unmodified entries
15 |         let mut j = 0;
16 |         for (i, &s) in select.iter().take(data.len()).enumerate() {
17 |             if s > U::zero() {
18 |                 data[j] = data[i];
19 |                 j += 1;
20 |             }
21 |         }
22 |         data.truncate(j);
23 |         Ok(())
24 |     }
25 | 
26 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
27 |         scratchpad.alias(self.data, self.compacted);
28 |     }
29 | 
30 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.data.any(), self.select.any()] }
31 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i, &mut self.select.i] }
32 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.compacted.any()] }
33 |     fn can_stream_input(&self, _: usize) -> bool { false }
34 |     fn can_stream_output(&self, _: usize) -> bool { false }
35 |     fn mutates(&self, i: usize) -> bool { i == self.data.i }
36 |     fn allocates(&self) -> bool { false }
37 | 
38 |     fn display_op(&self, _: bool) -> String {
39 |         format!("{}[{} > 0]", self.data, self.select)
40 |     }
41 | }
42 | 
43 | 


--------------------------------------------------------------------------------
/src/engine/operators/compact_nullable.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use crate::bitvec::*;
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct CompactNullable<T, U> {
 6 |     pub data: BufferRef<Nullable<T>>,
 7 |     pub select: BufferRef<U>,
 8 |     pub compacted: BufferRef<Nullable<T>>,
 9 | }
10 | 
11 | impl<'a, T: VecData<T> + 'a, U: GenericIntVec<U>> VecOperator<'a> for CompactNullable<T, U> {
12 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
13 |         let (mut data, mut present) = scratchpad.get_mut_nullable(self.data);
14 |         let select = scratchpad.get(self.select);
15 |         // Remove all unmodified entries
16 |         let mut j = 0;
17 |         for (i, &s) in select.iter().take(data.len()).enumerate() {
18 |             if s > U::zero() {
19 |                 data[j] = data[i];
20 |                 if present.is_set(i) {
21 |                     present.set(j);
22 |                 } else {
23 |                     present.unset(j);
24 |                 }
25 |                 j += 1;
26 |             }
27 |         }
28 |         data.truncate(j);
29 |         present.truncate(j.div_ceil(8));
30 |         Ok(())
31 |     }
32 | 
33 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
34 |         scratchpad.alias(self.data, self.compacted);
35 |     }
36 | 
37 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.data.any(), self.select.any()] }
38 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i, &mut self.select.i] }
39 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.compacted.any()] }
40 |     fn can_stream_input(&self, _: usize) -> bool { false }
41 |     fn can_stream_output(&self, _: usize) -> bool { false }
42 |     fn mutates(&self, i: usize) -> bool { i == self.data.i }
43 |     fn allocates(&self) -> bool { false }
44 | 
45 |     fn display_op(&self, _: bool) -> String {
46 |         format!("{}[{} > 0]", self.data, self.select)
47 |     }
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/src/engine/operators/compact_nullable_nullable.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use crate::bitvec::*;
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct CompactNullableNullable<T, U> {
 6 |     pub data: BufferRef<Nullable<T>>,
 7 |     pub select: BufferRef<Nullable<U>>,
 8 |     pub compacted: BufferRef<Nullable<T>>,
 9 | }
10 | 
11 | impl<'a, T: VecData<T> + 'a, U: GenericIntVec<U>> VecOperator<'a> for CompactNullableNullable<T, U> {
12 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
13 |         let (mut data, mut present) = scratchpad.get_mut_nullable(self.data);
14 |         let (select, select_present) = scratchpad.get_nullable(self.select);
15 |         // Remove all unmodified entries
16 |         let mut j = 0;
17 |         for (i, &s) in select.iter().take(data.len()).enumerate() {
18 |             if s > U::zero() && (*select_present).is_set(i) {
19 |                 data[j] = data[i];
20 |                 if present.is_set(i) {
21 |                     present.set(j);
22 |                 } else {
23 |                     present.unset(j);
24 |                 }
25 |                 j += 1;
26 |             }
27 |         }
28 |         data.truncate(j);
29 |         present.truncate(j.div_ceil(8));
30 |         Ok(())
31 |     }
32 | 
33 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
34 |         scratchpad.alias(self.data, self.compacted);
35 |     }
36 | 
37 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.data.any(), self.select.any()] }
38 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i, &mut self.select.i] }
39 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.compacted.any()] }
40 |     fn can_stream_input(&self, _: usize) -> bool { false }
41 |     fn can_stream_output(&self, _: usize) -> bool { false }
42 |     fn mutates(&self, i: usize) -> bool { i == self.data.i }
43 |     fn allocates(&self) -> bool { false }
44 | 
45 |     fn display_op(&self, _: bool) -> String {
46 |         format!("{}[{} > 0]", self.data, self.select)
47 |     }
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/src/engine/operators/compact_with_nullable.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use crate::bitvec::*;
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct CompactWithNullable<T, U> {
 6 |     pub data: BufferRef<T>,
 7 |     pub select: BufferRef<Nullable<U>>,
 8 |     pub compacted: BufferRef<T>,
 9 | }
10 | 
11 | impl<'a, T: VecData<T> + 'a, U: GenericIntVec<U>> VecOperator<'a> for CompactWithNullable<T, U> {
12 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
13 |         let mut data = scratchpad.get_mut(self.data);
14 |         let (select, select_present) = scratchpad.get_nullable(self.select);
15 |         // Remove all unmodified entries
16 |         let mut j = 0;
17 |         for (i, &s) in select.iter().take(data.len()).enumerate() {
18 |             if s > U::zero() && (*select_present).is_set(i) {
19 |                 data[j] = data[i];
20 |                 j += 1;
21 |             }
22 |         }
23 |         data.truncate(j);
24 |         Ok(())
25 |     }
26 | 
27 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
28 |         scratchpad.alias(self.data, self.compacted);
29 |     }
30 | 
31 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.data.any(), self.select.any()] }
32 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i, &mut self.select.i] }
33 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.compacted.any()] }
34 |     fn can_stream_input(&self, _: usize) -> bool { false }
35 |     fn can_stream_output(&self, _: usize) -> bool { false }
36 |     fn mutates(&self, i: usize) -> bool { i == self.data.i }
37 |     fn allocates(&self) -> bool { false }
38 | 
39 |     fn display_op(&self, _: bool) -> String {
40 |         format!("{}[{} > 0]", self.data, self.select)
41 |     }
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/src/engine/operators/constant.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use crate::ingest::raw_val::RawVal;
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct Constant {
 6 |     pub val: RawVal,
 7 |     pub hide_value: bool,
 8 |     pub output: BufferRef<RawVal>,
 9 | }
10 | 
11 | impl<'a> VecOperator<'a> for Constant {
12 |     fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> {
13 |         Ok(())
14 |     }
15 | 
16 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
17 |         let result = constant_data(self.val.clone());
18 |         scratchpad.set_any(self.output.any(), result);
19 |     }
20 | 
21 |     fn inputs(&self) -> Vec<BufferRef<Any>> {
22 |         vec![]
23 |     }
24 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] }
25 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
26 |         vec![self.output.any()]
27 |     }
28 |     fn can_stream_input(&self, _: usize) -> bool {
29 |         false
30 |     }
31 |     fn can_stream_output(&self, _: usize) -> bool {
32 |         true
33 |     }
34 |     fn allocates(&self) -> bool {
35 |         false
36 |     }
37 | 
38 |     fn display_op(&self, alternate: bool) -> String {
39 |         if self.hide_value && !alternate {
40 |             format!("Constant<{:?}>", self.val.get_type())
41 |         } else {
42 |             format!("{}", &self.val)
43 |         }
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/engine/operators/constant_expand.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct ConstantExpand<T> {
 5 |     pub val: T,
 6 |     pub output: BufferRef<T>,
 7 | 
 8 |     pub current_index: usize,
 9 |     pub len: usize,
10 |     pub batch_size: usize,
11 | }
12 | 
13 | impl<'a, T: GenericIntVec<T>> VecOperator<'a> for ConstantExpand<T> {
14 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
15 |         if self.current_index + self.batch_size > self.len {
16 |             let mut output = scratchpad.get_mut(self.output);
17 |             output.truncate(self.len - self.current_index);
18 |         }
19 |         self.current_index += self.batch_size;
20 |         Ok(())
21 |     }
22 | 
23 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
24 |         self.batch_size = batch_size;
25 |         scratchpad.set(self.output, vec![self.val; batch_size]);
26 |     }
27 | 
28 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![] }
29 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] }
30 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
31 |     fn can_stream_input(&self, _: usize) -> bool { false }
32 |     fn can_stream_output(&self, _: usize) -> bool { true }
33 |     fn allocates(&self) -> bool { true }
34 |     fn is_streaming_producer(&self) -> bool { true }
35 |     fn has_more(&self) -> bool { self.current_index < self.len }
36 | 
37 |     fn display_op(&self, _: bool) -> String {
38 |         "ConstantExpand".to_string()
39 |     }
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/src/engine/operators/constant_vec.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use std::fmt;
 3 | use std::mem;
 4 | 
 5 | pub struct ConstantVec<'a> {
 6 |     pub val: BoxedData<'a>,
 7 |     pub output: BufferRef<Any>,
 8 | }
 9 | 
10 | impl<'a> VecOperator<'a> for ConstantVec<'a> {
11 |     fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> {
12 |         Ok(())
13 |     }
14 | 
15 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
16 |         let owned = mem::replace(&mut self.val, empty_data(0));
17 |         scratchpad.set_any(self.output, owned);
18 |     }
19 | 
20 |     fn inputs(&self) -> Vec<BufferRef<Any>> {
21 |         vec![]
22 |     }
23 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] }
24 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
25 |         vec![self.output]
26 |     }
27 |     fn can_stream_input(&self, _: usize) -> bool {
28 |         false
29 |     }
30 |     fn can_stream_output(&self, _: usize) -> bool {
31 |         false
32 |     }
33 |     fn allocates(&self) -> bool {
34 |         false
35 |     }
36 | 
37 |     fn display_op(&self, _: bool) -> String {
38 |         "ConstantVec".to_string()
39 |     }
40 | }
41 | 
42 | impl<'a> fmt::Debug for ConstantVec<'a> {
43 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
44 |         write!(f, "{}", self.display_op(false))
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/engine/operators/delta_decode.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct DeltaDecode<T> {
 5 |     pub encoded: BufferRef<T>,
 6 |     pub decoded: BufferRef<i64>,
 7 |     pub previous: i64,
 8 | }
 9 | 
10 | impl<'a, T: GenericIntVec<T>> VecOperator<'a> for DeltaDecode<T> {
11 |     fn execute(&mut self, streaming: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
12 |         let encoded = scratchpad.get(self.encoded);
13 |         let mut decoded = scratchpad.get_mut(self.decoded);
14 |         if streaming { decoded.clear(); }
15 |         let mut previous = self.previous;
16 |         for e in encoded.iter() {
17 |             let current = e.to_i64().unwrap() + previous;
18 |             decoded.push(current);
19 |             previous = current;
20 |         }
21 |         self.previous = previous;
22 |         Ok(())
23 |     }
24 | 
25 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
26 |         scratchpad.set(self.decoded, Vec::with_capacity(batch_size));
27 |     }
28 | 
29 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.encoded.any()] }
30 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.encoded.i] }
31 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.decoded.any()] }
32 |     fn can_stream_input(&self, _: usize) -> bool { true }
33 |     fn can_stream_output(&self, _: usize) -> bool { true }
34 |     fn can_block_output(&self) -> bool { true }
35 |     fn allocates(&self) -> bool { true }
36 | 
37 |     fn display_op(&self, _: bool) -> String {
38 |         format!("delta_decode({})", self.encoded)
39 |     }
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/src/engine/operators/empty.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use std::fmt;
 3 | 
 4 | pub struct Empty<T> {
 5 |     pub output: BufferRef<T>,
 6 | }
 7 | 
 8 | impl<'a, T> VecOperator<'a> for Empty<T> where T: VecData<T> + 'a {
 9 |     fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> {
10 |         Ok(())
11 |     }
12 | 
13 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
14 |         scratchpad.set(self.output, vec![]);
15 |     }
16 | 
17 |     fn inputs(&self) -> Vec<BufferRef<Any>> {
18 |         vec![]
19 |     }
20 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] }
21 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
22 |         vec![self.output.any()]
23 |     }
24 |     fn can_stream_input(&self, _: usize) -> bool {
25 |         false
26 |     }
27 |     fn can_stream_output(&self, _: usize) -> bool {
28 |         false
29 |     }
30 |     fn allocates(&self) -> bool {
31 |         false
32 |     }
33 | 
34 |     fn display_op(&self, _: bool) -> String {
35 |         "Empty".to_string()
36 |     }
37 | }
38 | 
39 | impl<T> fmt::Debug for Empty<T> where T: VecData<T> {
40 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
41 |         write!(f, "{}", self.display_op(false))
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/engine/operators/encode_const.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use crate::mem_store::*;
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct EncodeIntConstant {
 6 |     pub constant: BufferRef<Scalar<i64>>,
 7 |     pub output: BufferRef<Scalar<i64>>,
 8 |     pub codec: Codec,
 9 | }
10 | 
11 | impl<'a> VecOperator<'a> for EncodeIntConstant {
12 |     fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> {
13 |         Ok(())
14 |     }
15 | 
16 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
17 |         let constant = scratchpad.get_scalar(&self.constant);
18 |         let result = self.codec.encode_int(constant);
19 |         scratchpad.set_any(self.output.any(), scalar_i64_data(result));
20 |     }
21 | 
22 |     fn inputs(&self) -> Vec<BufferRef<Any>> {
23 |         vec![self.constant.any()]
24 |     }
25 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.constant.i] }
26 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
27 |         vec![self.output.any()]
28 |     }
29 |     fn can_stream_input(&self, _: usize) -> bool {
30 |         true
31 |     }
32 |     fn can_stream_output(&self, _: usize) -> bool {
33 |         true
34 |     }
35 |     fn allocates(&self) -> bool {
36 |         false
37 |     }
38 | 
39 |     fn display_op(&self, _: bool) -> String {
40 |         format!("encode({}; {:?})", self.constant, self.codec)
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/engine/operators/exists.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct Exists<T> {
 6 |     pub input: BufferRef<T>,
 7 |     pub max_index: BufferRef<Scalar<i64>>,
 8 |     pub output: BufferRef<u8>,
 9 | }
10 | 
11 | impl<'a, T: GenericIntVec<T> + CastUsize> VecOperator<'a> for Exists<T> {
12 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{
13 |         let data = scratchpad.get(self.input);
14 |         let mut exists = scratchpad.get_mut(self.output);
15 | 
16 |         let len = scratchpad.get_scalar(&self.max_index) as usize + 1;
17 |         if len > exists.len() {
18 |             exists.resize(len, 0);
19 |         }
20 | 
21 |         for &i in data.iter() {
22 |             let index = i.cast_usize();
23 |             exists[index] = 1;
24 |         }
25 |         Ok(())
26 |     }
27 | 
28 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
29 |         scratchpad.set(self.output, Vec::with_capacity(0));
30 |     }
31 | 
32 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input.any(), self.max_index.any()] }
33 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
34 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
35 |     fn can_stream_input(&self, _: usize) -> bool { true }
36 |     fn can_stream_output(&self, _: usize) -> bool { false }
37 |     fn allocates(&self) -> bool { true }
38 | 
39 |     fn display_output(&self) -> bool { false }
40 |     fn display_op(&self, _: bool) -> String {
41 |         format!("{}[{}] = 1 {}", self.output, self.input, self.max_index)
42 |     }
43 | }
44 | 
45 | 


--------------------------------------------------------------------------------
/src/engine/operators/functions.rs:
--------------------------------------------------------------------------------
 1 | use chrono::{DateTime, Datelike};
 2 | 
 3 | use crate::engine::of64;
 4 | 
 5 | use super::map_operator::MapOp;
 6 | 
 7 | 
 8 | pub struct ToYear;
 9 | 
10 | impl MapOp<i64, i64> for ToYear {
11 |     fn apply(&self, unix_ts: i64) -> i64 { i64::from(DateTime::from_timestamp(unix_ts, 0).unwrap().year()) }
12 |     fn name() -> &'static str { "to_year" }
13 | }
14 | 
15 | pub struct Floor;
16 | 
17 | impl MapOp<of64, i64> for Floor {
18 |     fn apply(&self, f: of64) -> i64 { f.floor() as i64 }
19 |     fn name() -> &'static str { "floor" }
20 | }
21 | 
22 | pub struct BooleanNot;
23 | 
24 | impl MapOp<u8, u8> for BooleanNot {
25 |     fn apply(&self, boolean: u8) -> u8 { boolean ^ true as u8 }
26 |     fn name() -> &'static str { "not" }
27 | }
28 | 
29 | 
30 | pub struct RegexMatch {
31 |     pub r: regex::Regex
32 | }
33 | 
34 | impl<'a> MapOp<&'a str, u8> for RegexMatch {
35 |     fn apply(&self, s: &'a str) -> u8 {
36 |         match self.r.find(s) {
37 |             Some(_) => 1,
38 |             None => 0,
39 |         }
40 |     }
41 |     fn name() -> &'static str { "not" }
42 | }
43 | 
44 | 
45 | pub struct Length;
46 | 
47 | impl<'a> MapOp<&'a str, i64> for Length {
48 |     fn apply(&self, s: &'a str) -> i64 { s.len() as i64 }
49 |     fn name() -> &'static str { "length" }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/engine/operators/get_null_map.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct GetNullMap {
 5 |     pub from: BufferRef<Nullable<Any>>,
 6 |     pub present: BufferRef<u8>,
 7 | }
 8 | 
 9 | impl<'a> VecOperator<'a> for GetNullMap {
10 |     fn execute(&mut self, _streaming: bool, _scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) }
11 | 
12 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
13 |         scratchpad.alias_null_map(self.from, self.present);
14 |     }
15 | 
16 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.from.any()] }
17 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.from.i] }
18 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.present.any()] }
19 |     fn can_stream_input(&self, _: usize) -> bool { true }
20 |     fn can_stream_output(&self, _: usize) -> bool { true }
21 |     fn allocates(&self) -> bool { true }
22 |     fn display_op(&self, _: bool) -> String { format!("null_map({})", self.from) }
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/src/engine/operators/hashmap_grouping.rs:
--------------------------------------------------------------------------------
 1 | use fnv::FnvHashMap;
 2 | 
 3 | use crate::engine::*;
 4 | use crate::ingest::raw_val::RawVal;
 5 | use std::hash::Hash;
 6 | 
 7 | #[derive(Debug)]
 8 | pub struct HashMapGrouping<T: VecData<T> + Hash + Ord> {
 9 |     input: BufferRef<T>,
10 |     unique_out: BufferRef<T>,
11 |     grouping_key_out: BufferRef<u32>,
12 |     cardinality_out: BufferRef<Scalar<i64>>,
13 |     map: FnvHashMap<T, u32>,
14 | }
15 | 
16 | impl<'a, T: VecData<T> + Hash + Ord + 'a> HashMapGrouping<T> {
17 |     pub fn boxed(
18 |         input: BufferRef<T>,
19 |         unique_out: BufferRef<T>,
20 |         grouping_key_out: BufferRef<u32>,
21 |         cardinality_out: BufferRef<Scalar<i64>>,
22 |         _max_index: usize,
23 |     ) -> BoxedOperator<'a> {
24 |         Box::new(HashMapGrouping::<T> {
25 |             input,
26 |             unique_out,
27 |             grouping_key_out,
28 |             cardinality_out,
29 |             map: FnvHashMap::default(),
30 |         })
31 |     }
32 | }
33 | 
34 | impl<'a, T: VecData<T> + Hash + Ord + 'a> VecOperator<'a> for HashMapGrouping<T> {
35 |     fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
36 |         let count = {
37 |             let raw_grouping_key = scratchpad.get(self.input);
38 |             let mut grouping = scratchpad.get_mut(self.grouping_key_out);
39 |             let mut unique = scratchpad.get_mut(self.unique_out);
40 |             if stream {
41 |                 grouping.clear()
42 |             }
43 |             for i in raw_grouping_key.iter() {
44 |                 grouping.push(*self.map.entry(*i).or_insert_with(|| {
45 |                     unique.push(*i);
46 |                     unique.len() as u32 - 1
47 |                 }));
48 |             }
49 |             RawVal::Int(unique.len() as i64)
50 |         };
51 |         scratchpad.set_any(self.cardinality_out.any(), constant_data(count));
52 |         Ok(())
53 |     }
54 | 
55 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
56 |         scratchpad.set(self.unique_out, Vec::new());
57 |         scratchpad.set(self.grouping_key_out, Vec::with_capacity(batch_size));
58 |     }
59 | 
60 |     fn inputs(&self) -> Vec<BufferRef<Any>> {
61 |         vec![self.input.any()]
62 |     }
63 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
64 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
65 |         vec![
66 |             self.unique_out.any(),
67 |             self.grouping_key_out.any(),
68 |             self.cardinality_out.any(),
69 |         ]
70 |     }
71 |     fn can_stream_input(&self, _: usize) -> bool {
72 |         true
73 |     }
74 |     fn can_stream_output(&self, output: usize) -> bool {
75 |         output != self.unique_out.i
76 |     }
77 |     fn can_block_output(&self) -> bool { true }
78 |     fn allocates(&self) -> bool {
79 |         true
80 |     }
81 | 
82 |     fn display_op(&self, _: bool) -> String {
83 |         format!("hashmap_grouping({})", self.input)
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/src/engine/operators/identity.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | pub struct Identity {
 4 |     pub input: BufferRef<Any>,
 5 |     pub output: BufferRef<Any>,
 6 | }
 7 | 
 8 | 
 9 | impl<'a> VecOperator<'a> for Identity {
10 |     fn execute(&mut self, _: bool, _scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) }
11 | 
12 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
13 |         scratchpad.alias(self.input, self.output);
14 |     }
15 | 
16 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input.any()] }
17 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
18 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
19 |     fn can_stream_input(&self, _: usize) -> bool { true }
20 |     fn can_stream_output(&self, _: usize) -> bool { true }
21 |     fn allocates(&self) -> bool { false }
22 |     fn display_op(&self, _: bool) -> String { format!("{}", self.input) }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/engine/operators/indices.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct Indices {
 5 |     pub input: BufferRef<Any>,
 6 |     pub indices_out: BufferRef<usize>,
 7 | }
 8 | 
 9 | impl<'a> VecOperator<'a> for Indices {
10 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
11 |         let len = scratchpad.get_any(self.input).len();
12 |         let indices = (0..len).collect::<Vec<usize>>();
13 |         scratchpad.set(self.indices_out, indices);
14 |         Ok(())
15 |     }
16 | 
17 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input.any()] }
18 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
19 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.indices_out.any()] }
20 |     // TODO: could make streaming? (need to set streaming_producer)
21 |     fn can_stream_input(&self, _: usize) -> bool { false }
22 |     fn can_stream_output(&self, _: usize) -> bool { false }
23 |     fn allocates(&self) -> bool { true }
24 | 
25 |     fn display_op(&self, _: bool) -> String {
26 |         format!("indices({})", self.input)
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/engine/operators/is_null.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use crate::bitvec::*;
 3 | 
 4 | 
 5 | pub struct IsNull {
 6 |     pub input: BufferRef<Nullable<Any>>,
 7 |     pub is_null: BufferRef<u8>,
 8 | }
 9 | 
10 | impl<'a> VecOperator<'a> for IsNull {
11 |     fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{
12 |         let len = scratchpad.get_any(self.input.any()).len();
13 |         let present = scratchpad.get_null_map(self.input);
14 |         let mut is_null = scratchpad.get_mut(self.is_null);
15 |         if stream { is_null.clear(); }
16 |         for i in 0..len {
17 |             if (*present).is_set(i) {
18 |                 is_null.push(false as u8);
19 |             } else {
20 |                 is_null.push(true as u8);
21 |             }
22 |         }
23 |         Ok(())
24 |     }
25 | 
26 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
27 |         scratchpad.set(self.is_null, Vec::with_capacity(batch_size));
28 |     }
29 | 
30 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input.any()] }
31 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
32 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.is_null.any()] }
33 |     fn can_stream_input(&self, _: usize) -> bool { true }
34 |     fn can_stream_output(&self, _: usize) -> bool { true }
35 |     fn can_block_output(&self) -> bool { true }
36 |     fn allocates(&self) -> bool { true }
37 | 
38 |     fn display_op(&self, _: bool) -> String {
39 |         format!("IsNull({})", self.input)
40 |     }
41 | }
42 | 
43 | pub struct IsNotNull {
44 |     pub input: BufferRef<Nullable<Any>>,
45 |     pub is_not_null: BufferRef<u8>,
46 | }
47 | 
48 | impl<'a> VecOperator<'a> for IsNotNull {
49 |     fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{
50 |         let len = scratchpad.get_any(self.input.any()).len();
51 |         let present = scratchpad.get_null_map(self.input);
52 |         let mut is_not_null = scratchpad.get_mut(self.is_not_null);
53 |         if stream { is_not_null.clear(); }
54 |         for i in 0..len {
55 |             if (*present).is_set(i) {
56 |                 is_not_null.push(true as u8);
57 |             } else {
58 |                 is_not_null.push(false as u8);
59 |             }
60 |         }
61 |         Ok(())
62 |     }
63 | 
64 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
65 |         scratchpad.set(self.is_not_null, Vec::with_capacity(batch_size));
66 |     }
67 | 
68 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input.any()] }
69 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
70 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.is_not_null.any()] }
71 |     fn can_stream_input(&self, _: usize) -> bool { true }
72 |     fn can_stream_output(&self, _: usize) -> bool { true }
73 |     fn can_block_output(&self) -> bool { true }
74 |     fn allocates(&self) -> bool { true }
75 | 
76 |     fn display_op(&self, _: bool) -> String {
77 |         format!("IsNotNull({})", self.input)
78 |     }
79 | }
80 | 
81 | 


--------------------------------------------------------------------------------
/src/engine/operators/lz4_decode.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use crate::mem_store::lz4;
 3 | use std::fmt;
 4 | use std::io::Read;
 5 | 
 6 | pub struct LZ4Decode<'a, T> {
 7 |     pub encoded: BufferRef<u8>,
 8 |     pub decoded: BufferRef<T>,
 9 |     pub decoded_len: usize,
10 |     pub reader: Box<dyn Read + 'a>,
11 |     pub has_more: bool,
12 | }
13 | 
14 | impl<'a, T: VecData<T> + Default + 'static> VecOperator<'a> for LZ4Decode<'a, T> {
15 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
16 |         let mut decoded = scratchpad.get_mut(self.decoded);
17 |         let len = lz4::decode(&mut self.reader, &mut decoded);
18 |         if len < decoded.len() {
19 |             decoded.truncate(len);
20 |             self.has_more = false;
21 |         }
22 |         Ok(())
23 |     }
24 | 
25 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
26 |         scratchpad.set(self.decoded, vec![T::default(); batch_size]);
27 |         let encoded = scratchpad.get_pinned(self.encoded);
28 |         self.reader = Box::new(lz4::decoder(encoded));
29 |     }
30 | 
31 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.encoded.any()] }
32 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.encoded.i] }
33 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.decoded.any()] }
34 |     fn can_stream_input(&self, _: usize) -> bool { false }
35 |     fn can_stream_output(&self, _: usize) -> bool { true }
36 |     fn allocates(&self) -> bool { true }
37 |     fn is_streaming_producer(&self) -> bool { true }
38 |     fn has_more(&self) -> bool { self.has_more }
39 |     fn custom_output_len(&self) -> Option<usize> { Some(self.decoded_len) }
40 | 
41 |     fn display_op(&self, _: bool) -> String {
42 |         format!("lz4_decode({})", self.encoded)
43 |     }
44 | }
45 | 
46 | impl<'a, T> fmt::Debug for LZ4Decode<'a, T> {
47 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
48 |         write!(f, "LZ4Decode {{ encoded: {}, decoded: {} }}", self.encoded, self.decoded)
49 |     }
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/src/engine/operators/make_nullable.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct MakeNullable<T> {
 5 |     pub data: BufferRef<T>,
 6 |     pub present: BufferRef<u8>,
 7 |     pub nullable_data: BufferRef<Nullable<T>>,
 8 | }
 9 | 
10 | impl<'a, T: VecData<T> + 'a> VecOperator<'a> for MakeNullable<T> {
11 |     fn execute(&mut self, _streaming: bool, _scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) }
12 | 
13 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
14 |         let present = vec![255u8; batch_size / 8 + 1];
15 |         scratchpad.set(self.present, present);
16 |         scratchpad.assemble_nullable(self.data, self.present, self.nullable_data);
17 |     }
18 | 
19 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.data.any()] }
20 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i] }
21 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.nullable_data.any(), self.present.any()] }
22 |     fn can_stream_input(&self, _: usize) -> bool { true }
23 |     fn can_stream_output(&self, _: usize) -> bool { true }
24 |     fn allocates(&self) -> bool { true }
25 |     fn display_op(&self, _: bool) -> String { format!("nullable({}, {})", self.data, self.present) }
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/src/engine/operators/map_operator.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct MapOperator<Input, Output, Map> {
 6 |     pub input: BufferRef<Input>,
 7 |     pub output: BufferRef<Output>,
 8 |     pub map: Map,
 9 | }
10 | 
11 | impl<'a, Input, Output, Map> VecOperator<'a> for MapOperator<Input, Output, Map>
12 |     where Input: VecData<Input> + 'a,
13 |           Output: VecData<Output> + 'a,
14 |           Map: MapOp<Input, Output> {
15 |     fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{
16 |         let input = scratchpad.get(self.input);
17 |         let mut output = scratchpad.get_mut(self.output);
18 |         if stream { output.clear() }
19 |         for i in input.iter() {
20 |             output.push(self.map.apply(*i));
21 |         }
22 |         Ok(())
23 |     }
24 | 
25 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
26 |         scratchpad.set(self.output, Vec::with_capacity(batch_size));
27 |     }
28 | 
29 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input.any()] }
30 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
31 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
32 |     fn can_stream_input(&self, _: usize) -> bool { true }
33 |     fn can_stream_output(&self, _: usize) -> bool { true }
34 |     fn can_block_output(&self) -> bool { true }
35 |     fn allocates(&self) -> bool { true }
36 | 
37 |     fn display_op(&self, _: bool) -> String {
38 |         format!("{}({})", Map::name(), self.input)
39 |     }
40 | }
41 | 
42 | pub trait MapOp<Input, Output> {
43 |     fn apply(&self, input: Input) -> Output;
44 |     fn name() -> &'static str;
45 | }


--------------------------------------------------------------------------------
/src/engine/operators/merge_deduplicate.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use std::cmp::{max, min};
 3 | use std::marker::PhantomData;
 4 | 
 5 | #[derive(Debug)]
 6 | pub struct MergeDeduplicate<T, C> {
 7 |     pub left: BufferRef<T>,
 8 |     pub right: BufferRef<T>,
 9 |     pub deduplicated: BufferRef<T>,
10 |     pub merge_ops: BufferRef<MergeOp>,
11 |     pub comparator: PhantomData<C>,
12 | }
13 | 
14 | impl<'a, T: VecData<T> + 'a, C: Comparator<T>> VecOperator<'a> for MergeDeduplicate<T, C> {
15 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
16 |         let (deduplicated, merge_ops) = {
17 |             let left = scratchpad.get(self.left);
18 |             let right = scratchpad.get(self.right);
19 |             merge_deduplicate::<_, C>(&left, &right)
20 |         };
21 |         scratchpad.set(self.deduplicated, deduplicated);
22 |         scratchpad.set(self.merge_ops, merge_ops);
23 |         Ok(())
24 |     }
25 | 
26 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.left.any(), self.right.any()] }
27 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.left.i, &mut self.right.i] }
28 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.deduplicated.any(), self.merge_ops.any()] }
29 |     fn can_stream_input(&self, _: usize) -> bool { false }
30 |     fn can_stream_output(&self, _: usize) -> bool { false }
31 |     fn allocates(&self) -> bool { true }
32 | 
33 |     fn display_op(&self, _: bool) -> String {
34 |         format!("merge_deduplicate({}, {})", self.left, self.right)
35 |     }
36 | }
37 | 
38 | fn merge_deduplicate<'a, T: VecData<T> + 'a, C: Comparator<T>>(left: &[T], right: &[T]) -> (Vec<T>, Vec<MergeOp>) {
39 |     // Could figure out maths for more precise estimate + variance derived from how much grouping reduced cardinality
40 |     let output_len_estimate = max(left.len(), right.len()) + min(left.len(), right.len()) / 2;
41 |     let mut result = Vec::with_capacity(output_len_estimate);
42 |     let mut ops = Vec::<MergeOp>::with_capacity(output_len_estimate);
43 | 
44 |     let mut i = 0;
45 |     let mut j = 0;
46 |     while i < left.len() && j < right.len() {
47 |         if result.last() == Some(&right[j]) {
48 |             ops.push(MergeOp::MergeRight);
49 |             j += 1;
50 |         } else if C::cmp_eq(left[i], right[j]) {
51 |             result.push(left[i]);
52 |             ops.push(MergeOp::TakeLeft);
53 |             i += 1;
54 |         } else {
55 |             result.push(right[j]);
56 |             ops.push(MergeOp::TakeRight);
57 |             j += 1;
58 |         }
59 |     }
60 | 
61 |     for x in left[i..].iter() {
62 |         result.push(*x);
63 |         ops.push(MergeOp::TakeLeft);
64 |     }
65 |     if j < right.len() && result.last() == Some(&right[j]) {
66 |         ops.push(MergeOp::MergeRight);
67 |         j += 1;
68 |     }
69 |     for x in right[j..].iter() {
70 |         result.push(*x);
71 |         ops.push(MergeOp::TakeRight);
72 |     }
73 | 
74 |     (result, ops)
75 | }
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/src/engine/operators/merge_drop.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct MergeDrop<T> {
 5 |     pub merge_ops: BufferRef<MergeOp>,
 6 |     pub left: BufferRef<T>,
 7 |     pub right: BufferRef<T>,
 8 |     pub deduplicated: BufferRef<T>,
 9 | }
10 | 
11 | impl<'a, T: VecData<T> + 'a> VecOperator<'a> for MergeDrop<T> {
12 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
13 |         let deduplicated = {
14 |             let ops = scratchpad.get(self.merge_ops);
15 |             let left = scratchpad.get(self.left);
16 |             let right = scratchpad.get(self.right);
17 |             merge_drop(&ops, &left, &right)
18 |         };
19 |         scratchpad.set(self.deduplicated, deduplicated);
20 |         Ok(())
21 |     }
22 | 
23 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.merge_ops.any(), self.left.any(), self.right.any()] }
24 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.merge_ops.i, &mut self.left.i, &mut self.right.i] }
25 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.deduplicated.any()] }
26 |     fn can_stream_input(&self, _: usize) -> bool { false }
27 |     fn can_stream_output(&self, _: usize) -> bool { false }
28 |     fn allocates(&self) -> bool { true }
29 | 
30 |     fn display_op(&self, _: bool) -> String {
31 |         format!("merge_drop({}, {}, {})", self.merge_ops, self.left, self.right)
32 |     }
33 | }
34 | 
35 | fn merge_drop<'a, T: VecData<T> + 'a>(ops: &[MergeOp], left: &[T], right: &[T]) -> Vec<T> {
36 |     // This is an overestimate
37 |     let mut result = Vec::with_capacity(ops.len());
38 |     let mut i = 0;
39 |     let mut j = 0;
40 |     for op in ops {
41 |         match *op {
42 |             MergeOp::TakeLeft => {
43 |                 result.push(left[i]);
44 |                 i += 1;
45 |             }
46 |             MergeOp::TakeRight => {
47 |                 result.push(right[j]);
48 |                 j += 1;
49 |             }
50 |             MergeOp::MergeRight => {
51 |                 j += 1;
52 |             }
53 |         }
54 |     }
55 |     result
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/src/engine/operators/merge_partitioned.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use std::cmp;
 3 | use std::fmt::Debug;
 4 | use std::marker::PhantomData;
 5 | 
 6 | #[derive(Debug)]
 7 | pub struct MergePartitioned<T, C> {
 8 |     pub partitioning: BufferRef<Premerge>,
 9 |     pub left: BufferRef<T>,
10 |     pub right: BufferRef<T>,
11 |     pub merged: BufferRef<T>,
12 |     pub take_left: BufferRef<u8>,
13 |     pub limit: usize,
14 |     pub c: PhantomData<C>,
15 | }
16 | 
17 | impl<'a, T: VecData<T> + 'a + Debug, C: Comparator<T>> VecOperator<'a> for MergePartitioned<T, C> {
18 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
19 |         let (merged, merge_ops) = {
20 |             let partitioning = scratchpad.get(self.partitioning);
21 |             let left = scratchpad.get(self.left);
22 |             let right = scratchpad.get(self.right);
23 |             merge_partitioned::<_, C>(&partitioning, &left, &right, self.limit)
24 |         };
25 |         scratchpad.set(self.merged, merged);
26 |         scratchpad.set(self.take_left, merge_ops);
27 |         Ok(())
28 |     }
29 | 
30 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.partitioning.any(), self.left.any(), self.right.any()] }
31 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.partitioning.i, &mut self.left.i, &mut self.right.i] }
32 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.merged.any(), self.take_left.any()] }
33 |     fn can_stream_input(&self, _: usize) -> bool { false }
34 |     fn can_stream_output(&self, _: usize) -> bool { false }
35 |     fn allocates(&self) -> bool { true }
36 | 
37 |     fn display_op(&self, _: bool) -> String {
38 |         format!("merge_partitioned({}, {}, {})", self.partitioning, self.left, self.right)
39 |     }
40 | }
41 | 
42 | pub fn merge_partitioned<'a, T, C>(partitioning: &[Premerge], left: &[T], right: &[T], limit: usize)
43 |                                    -> (Vec<T>, Vec<u8>) where T: Debug + Copy + 'a, C: Comparator<T> {
44 |     let len = cmp::min(left.len() + right.len(), limit);
45 |     let mut result = Vec::with_capacity(len);
46 |     let mut take_left = Vec::<u8>::with_capacity(len);
47 | 
48 |     let mut i = 0;
49 |     let mut j = 0;
50 |     'outer: for group in partitioning {
51 |         let i_max = i + group.left as usize;
52 |         let j_max = j + group.right as usize;
53 |         for _ in 0..(group.left + group.right) {
54 |             if j == j_max || (i < i_max && C::cmp_eq(left[i], right[j])) {
55 |                 take_left.push(1);
56 |                 result.push(left[i]);
57 |                 i += 1;
58 |             } else {
59 |                 take_left.push(0);
60 |                 result.push(right[j]);
61 |                 j += 1;
62 |             }
63 |             if i + j == limit {
64 |                 break 'outer;
65 |             }
66 |         }
67 |     }
68 |     (result, take_left)
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/src/engine/operators/mod.rs:
--------------------------------------------------------------------------------
 1 | pub use self::aggregator::*;
 2 | pub use self::comparator::*;
 3 | pub use self::vector_operator::*;
 4 | 
 5 | pub mod vector_operator;
 6 | pub mod comparator;
 7 | 
 8 | mod aggregate;
 9 | mod assemble_nullable;
10 | mod binary_operator;
11 | mod bit_unpack;
12 | mod bool_op;
13 | mod buffer_stream;
14 | mod collect;
15 | mod column_ops;
16 | mod combine_null_maps;
17 | mod compact_nullable_nullable;
18 | mod compact_nullable;
19 | mod compact_with_nullable;
20 | mod compact;
21 | mod comparison_operators;
22 | mod constant;
23 | mod constant_expand;
24 | mod constant_vec;
25 | mod delta_decode;
26 | mod dict_lookup;
27 | mod empty;
28 | mod encode_const;
29 | mod exists;
30 | mod filter;
31 | mod filter_nullable;
32 | mod functions;
33 | mod fuse_nulls;
34 | mod get_null_map;
35 | mod hashmap_grouping;
36 | mod hashmap_grouping_byte_slices;
37 | mod hashmap_grouping_val_rows;
38 | mod identity;
39 | mod indices;
40 | mod is_null;
41 | mod lz4_decode;
42 | mod make_nullable;
43 | mod map_operator;
44 | mod merge;
45 | mod merge_aggregate;
46 | mod merge_deduplicate;
47 | mod merge_deduplicate_partitioned;
48 | mod merge_drop;
49 | mod merge_keep;
50 | mod merge_partitioned;
51 | mod nonzero_compact;
52 | mod nonzero_indices;
53 | mod null_to_i64;
54 | mod null_to_val;
55 | mod null_to_vec;
56 | mod null_vec;
57 | mod null_vec_like;
58 | mod numeric_operators;
59 | mod parameterized_vec_vec_int_op;
60 | mod partition;
61 | mod pco_decode;
62 | mod propagate_nullability;
63 | mod scalar_f64;
64 | mod scalar_i64;
65 | mod scalar_i64_to_scalar_f64;
66 | mod scalar_str;
67 | mod select;
68 | mod slice_pack;
69 | mod slice_unpack;
70 | mod sort_by;
71 | mod sort_by_slices;
72 | mod sort_by_val_rows;
73 | mod stream_buffer;
74 | mod subpartition;
75 | mod to_val;
76 | mod top_n;
77 | mod type_conversion;
78 | mod unhexpack_strings;
79 | mod unpack_strings;
80 | mod val_rows_pack;
81 | mod val_rows_unpack;
82 | 
83 | mod aggregator;
84 | 
85 | pub use null_vec_like::LengthSource;


--------------------------------------------------------------------------------
/src/engine/operators/nonzero_compact.rs:
--------------------------------------------------------------------------------
 1 | use crate::bitvec::BitVec;
 2 | use crate::engine::*;
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct NonzeroCompact<T> {
 6 |     pub data: BufferRef<T>,
 7 |     pub compacted: BufferRef<T>,
 8 | }
 9 | 
10 | impl<'a, T: GenericIntVec<T>> VecOperator<'a> for NonzeroCompact<T> {
11 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
12 |         let mut data = scratchpad.get_mut(self.data);
13 |         // Remove all unmodified entries
14 |         let mut j = 0;
15 |         for i in 0..data.len() {
16 |             if data[i] > T::zero() {
17 |                 data[j] = data[i];
18 |                 j += 1;
19 |             }
20 |         }
21 |         data.truncate(j);
22 |         Ok(())
23 |     }
24 | 
25 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
26 |         scratchpad.alias(self.data, self.compacted);
27 |     }
28 | 
29 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.data.any()] }
30 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i] }
31 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.compacted.any()] }
32 |     fn can_stream_input(&self, _: usize) -> bool { false }
33 |     fn can_stream_output(&self, _: usize) -> bool { false }
34 |     fn mutates(&self, i: usize) -> bool { i == self.data.i }
35 |     fn allocates(&self) -> bool { false }
36 | 
37 |     fn display_op(&self, _: bool) -> String {
38 |         format!("{}[{} > 0]", self.data, self.data)
39 |     }
40 | }
41 | 
42 | 
43 | #[derive(Debug)]
44 | pub struct NonzeroCompactNullable<T> {
45 |     pub data: BufferRef<Nullable<T>>,
46 |     pub compacted: BufferRef<T>,
47 | }
48 | 
49 | impl<'a, T: GenericIntVec<T>> VecOperator<'a> for NonzeroCompactNullable<T> {
50 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
51 |         let (mut data, data_present) = scratchpad.get_mut_nullable(self.data);
52 |         // Remove all unmodified entries
53 |         let mut j = 0;
54 |         for i in 0..data.len() {
55 |             if (*data_present).is_set(i) && data[i] > T::zero() {
56 |                 data[j] = data[i];
57 |                 j += 1;
58 |             }
59 |         }
60 |         data.truncate(j);
61 |         Ok(())
62 |     }
63 | 
64 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
65 |         scratchpad.alias_data(self.data, self.compacted);
66 |     }
67 | 
68 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.data.any()] }
69 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i] }
70 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.compacted.any()] }
71 |     fn can_stream_input(&self, _: usize) -> bool { false }
72 |     fn can_stream_output(&self, _: usize) -> bool { false }
73 |     fn mutates(&self, i: usize) -> bool { i == self.data.i }
74 |     fn allocates(&self) -> bool { false }
75 | 
76 |     fn display_op(&self, _: bool) -> String {
77 |         format!("{}[{} > 0]", self.data, self.data)
78 |     }
79 | }
80 | 
81 | 


--------------------------------------------------------------------------------
/src/engine/operators/null_to_i64.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | // Take a null count and expands it into a nullable vec of the same length with arbitrary type and all values set to null
 4 | #[derive(Debug)]
 5 | pub struct NullToI64 {
 6 |     pub input: BufferRef<Any>,
 7 |     pub output: BufferRef<i64>,
 8 | 
 9 |     pub batch_size: usize,
10 | }
11 | 
12 | impl<'a> VecOperator<'a> for NullToI64 {
13 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
14 |         let len = scratchpad.get_any(self.input).len();
15 |         if self.batch_size > len {
16 |             let mut output = scratchpad.get_mut(self.output);
17 |             output.truncate(len);
18 |         }
19 |         Ok(())
20 |     }
21 | 
22 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
23 |         self.batch_size = batch_size;
24 |         scratchpad.set(self.output, vec![I64_NULL; batch_size]);
25 |     }
26 | 
27 |     fn inputs(&self) -> Vec<BufferRef<Any>> {
28 |         vec![self.input.any()]
29 |     }
30 |     fn inputs_mut(&mut self) -> Vec<&mut usize> {
31 |         vec![&mut self.input.i]
32 |     }
33 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
34 |         vec![self.output.any()]
35 |     }
36 |     fn can_stream_input(&self, _: usize) -> bool {
37 |         true
38 |     }
39 |     fn can_stream_output(&self, _: usize) -> bool {
40 |         true
41 |     }
42 |     fn allocates(&self) -> bool {
43 |         true
44 |     }
45 | 
46 |     fn display_op(&self, _: bool) -> String {
47 |         format!("{} expand as <i64>", self.input)
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/engine/operators/null_to_val.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use crate::mem_store::Val;
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct NullToVal {
 6 |     pub input: BufferRef<Any>,
 7 |     pub output: BufferRef<Val<'static>>,
 8 | 
 9 |     pub batch_size: usize,
10 | }
11 | 
12 | impl<'a> VecOperator<'a> for NullToVal {
13 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
14 |         let len = scratchpad.get_any(self.input).len();
15 |         if self.batch_size > len {
16 |             let mut output = scratchpad.get_mut(self.output);
17 |             output.truncate(len);
18 |         }
19 |         Ok(())
20 |     }
21 | 
22 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
23 |         self.batch_size = batch_size;
24 |         scratchpad.set(self.output, vec![Val::Null; batch_size]);
25 |     }
26 | 
27 |     fn inputs(&self) -> Vec<BufferRef<Any>> {
28 |         vec![self.input.any()]
29 |     }
30 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
31 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
32 |         vec![self.output.any()]
33 |     }
34 |     fn can_stream_input(&self, _: usize) -> bool {
35 |         true
36 |     }
37 |     fn can_stream_output(&self, _: usize) -> bool {
38 |         true
39 |     }
40 |     fn allocates(&self) -> bool {
41 |         true
42 |     }
43 | 
44 |     fn display_op(&self, _: bool) -> String {
45 |         format!("{} expand as Val", self.input)
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/engine/operators/null_to_vec.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | // Take a null count and expands it into a nullable vec of the same length with arbitrary type and all values set to null
 4 | #[derive(Debug)]
 5 | pub struct NullToVec<T> {
 6 |     pub input: BufferRef<Any>,
 7 |     pub output: BufferRef<Nullable<T>>,
 8 | 
 9 |     pub batch_size: usize,
10 | }
11 | 
12 | impl<'a, T: 'a> VecOperator<'a> for NullToVec<T>
13 | where
14 |     T: VecData<T> + Copy + Default,
15 | {
16 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
17 |         let len = scratchpad.get_any(self.input).len();
18 |         if self.batch_size > len {
19 |             let (mut output, mut present) = scratchpad.get_mut_nullable(self.output);
20 |             output.truncate(len);
21 |             present.truncate(len.div_ceil(8));
22 |         }
23 |         Ok(())
24 |     }
25 | 
26 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
27 |         self.batch_size = batch_size;
28 |         scratchpad.set_nullable(
29 |             self.output,
30 |             vec![T::default(); batch_size],
31 |             vec![0u8; batch_size.div_ceil(8)],
32 |         );
33 |     }
34 | 
35 |     fn inputs(&self) -> Vec<BufferRef<Any>> {
36 |         vec![self.input.any()]
37 |     }
38 |     fn inputs_mut(&mut self) -> Vec<&mut usize> {
39 |         vec![&mut self.input.i]
40 |     }
41 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
42 |         vec![self.output.any()]
43 |     }
44 |     fn can_stream_input(&self, _: usize) -> bool {
45 |         true
46 |     }
47 |     fn can_stream_output(&self, _: usize) -> bool {
48 |         true
49 |     }
50 |     fn allocates(&self) -> bool {
51 |         true
52 |     }
53 | 
54 |     fn display_op(&self, _: bool) -> String {
55 |         format!("{} expand as Nullable<{:?}>", self.input, T::t())
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/engine/operators/null_vec.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct NullVec {
 5 |     pub len: usize,
 6 |     pub output: BufferRef<Any>,
 7 | }
 8 | 
 9 | impl<'a> VecOperator<'a> for NullVec {
10 |     fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> {
11 |         Ok(())
12 |     }
13 | 
14 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
15 |         scratchpad.set_any(self.output, empty_data(self.len));
16 |     }
17 | 
18 |     fn inputs(&self) -> Vec<BufferRef<Any>> {
19 |         vec![]
20 |     }
21 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] }
22 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
23 |         vec![self.output.any()]
24 |     }
25 |     fn can_stream_input(&self, _: usize) -> bool {
26 |         true
27 |     }
28 |     fn can_stream_output(&self, _: usize) -> bool {
29 |         true
30 |     }
31 |     fn allocates(&self) -> bool {
32 |         false
33 |     }
34 |     fn display_op(&self, _: bool) -> String {
35 |         "NullVec".to_string()
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/engine/operators/null_vec_like.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use crate::bitvec::BitVec;
 3 | 
 4 | #[derive(Debug)]
 5 | pub enum LengthSource {
 6 |     InputLength,
 7 |     NonZeroU8ElementCount,
 8 |     NonNullElementCount,
 9 | }
10 | 
11 | #[derive(Debug)]
12 | pub struct NullVecLike {
13 |     pub input: BufferRef<Any>,
14 |     pub output: BufferRef<Any>,
15 |     pub source_type: LengthSource,
16 |     pub count: usize,
17 | }
18 | 
19 | impl<'a> VecOperator<'a> for NullVecLike {
20 |     fn execute(&mut self, streaming: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
21 |         if streaming { self.count = 0 };
22 |         self.count += match self.source_type {
23 |             LengthSource::InputLength => scratchpad.get_any(self.input).len(),
24 |             LengthSource::NonZeroU8ElementCount => scratchpad.get(self.input.u8()).iter().filter(|&&x| x != 0).count(),
25 |             LengthSource::NonNullElementCount => {
26 |                 let mut count = 0;
27 |                 let (data, present) = scratchpad.get_nullable(self.input.nullable_u8());
28 |                 for (i, d) in data.iter().enumerate() {
29 |                     if *d != 0 && BitVec::is_set(&*present, i) {
30 |                         count += 1;
31 |                     }
32 |                 }
33 |                 count
34 |             },
35 |         };
36 |         let mut output = scratchpad.get_any_mut(self.output);
37 |         *output.cast_ref_mut_null() = self.count;
38 |         Ok(())
39 |     }
40 | 
41 |     fn init(&mut self, _: usize, _: usize, _: &mut Scratchpad<'a>) { }
42 | 
43 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input] }
44 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
45 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
46 |         vec![self.output.any()]
47 |     }
48 |     fn can_stream_input(&self, _: usize) -> bool { true }
49 |     fn can_stream_output(&self, _: usize) -> bool { true }
50 |     fn can_block_output(&self) -> bool { true }
51 |     fn allocates(&self) -> bool { false }
52 |     fn display_op(&self, _: bool) -> String {
53 |         format!("NullVecLike({})", self.input)
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/engine/operators/parameterized_vec_vec_int_op.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt;
 2 | use std::marker::PhantomData;
 3 | 
 4 | use crate::engine::*;
 5 | 
 6 | 
 7 | #[derive(Debug)]
 8 | pub struct ParameterizedVecVecIntegerOperator<Op> {
 9 |     pub lhs: BufferRef<i64>,
10 |     pub rhs: BufferRef<i64>,
11 |     pub output: BufferRef<i64>,
12 |     pub parameter: i64,
13 |     pub op: PhantomData<Op>,
14 | }
15 | 
16 | impl<'a, Op: ParameterizedIntegerOperation + fmt::Debug> VecOperator<'a> for ParameterizedVecVecIntegerOperator<Op> {
17 |     fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{
18 |         let mut output = scratchpad.get_mut(self.output);
19 |         let lhs = scratchpad.get(self.lhs);
20 |         let rhs = scratchpad.get(self.rhs);
21 |         if stream { output.clear(); }
22 |         for (l, r) in lhs.iter().zip(rhs.iter()) {
23 |             output.push(Op::perform(*l, *r, self.parameter));
24 |         }
25 |         Ok(())
26 |     }
27 | 
28 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
29 |         scratchpad.set(self.output, Vec::with_capacity(batch_size));
30 |     }
31 | 
32 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.lhs.any(), self.rhs.any()] }
33 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.lhs.i, &mut self.rhs.i] }
34 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
35 |     fn can_stream_input(&self, _: usize) -> bool { true }
36 |     fn can_stream_output(&self, _: usize) -> bool { true }
37 |     fn can_block_output(&self) -> bool { true }
38 |     fn allocates(&self) -> bool { true }
39 | 
40 |     fn display_op(&self, alternate: bool) -> String {
41 |         Op::display(self.lhs, self.rhs, self.parameter, alternate)
42 |     }
43 | }
44 | 
45 | 
46 | pub trait ParameterizedIntegerOperation {
47 |     fn perform(lhs: i64, rhs: i64, param: i64) -> i64;
48 |     fn display(lhs: BufferRef<i64>, rhs: BufferRef<i64>, param: i64, alternate: bool) -> String;
49 | }
50 | 
51 | #[derive(Debug)]
52 | pub struct BitShiftLeftAdd;
53 | 
54 | impl ParameterizedIntegerOperation for BitShiftLeftAdd {
55 |     fn perform(lhs: i64, rhs: i64, param: i64) -> i64 { lhs + (rhs << param) }
56 |     fn display(lhs: BufferRef<i64>, rhs: BufferRef<i64>, param: i64, alternate: bool) -> String {
57 |         if alternate {
58 |             format!("{} + ({} << {})", lhs, rhs, param)
59 |         } else {
60 |             format!("{} + ({} << $shift)", lhs, rhs)
61 |         }
62 |     }
63 | }
64 | 
65 | 


--------------------------------------------------------------------------------
/src/engine/operators/propagate_nullability.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | /// Applies the null map of a nullable buffer to another (non-nullable) buffer.
 4 | #[derive(Debug)]
 5 | pub struct PropagateNullability<T> {
 6 |     pub from: BufferRef<Nullable<Any>>,
 7 |     pub to: BufferRef<T>,
 8 |     pub output: BufferRef<Nullable<T>>,
 9 | }
10 | 
11 | impl<'a, T: VecData<T>> VecOperator<'a> for PropagateNullability<T> {
12 |     fn execute(&mut self, _streaming: bool, _scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) }
13 | 
14 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
15 |         // This works even when streaming since it just creates an output->to alias and sets the null map of output to the null map of from.
16 |         // It would incorrect to perform this operation in the `execute` function since otherwise it would results in incorrect ordering with potential `AssembleNullable` operations.
17 |         scratchpad.reassemble_nullable(self.from, self.to, self.output);
18 |     }
19 | 
20 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.from.any(), self.to.any()] }
21 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.from.i, &mut self.to.i] }
22 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
23 |     fn can_stream_input(&self, _: usize) -> bool { true }
24 |     fn can_stream_output(&self, _: usize) -> bool { true }
25 |     fn allocates(&self) -> bool { false }
26 |     fn display_op(&self, _: bool) -> String { format!("reassemble_nullable({}, {})", self.from, self.to) }
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/src/engine/operators/scalar_f64.rs:
--------------------------------------------------------------------------------
 1 | use ordered_float::OrderedFloat;
 2 | 
 3 | use crate::engine::*;
 4 | 
 5 | #[derive(Debug)]
 6 | pub struct ScalarF64 {
 7 |     pub val: OrderedFloat<f64>,
 8 |     pub hide_value: bool,
 9 |     pub output: BufferRef<Scalar<OrderedFloat<f64>>>,
10 | }
11 | 
12 | impl<'a> VecOperator<'a> for ScalarF64 {
13 |     fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) }
14 | 
15 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
16 |         scratchpad.set_const(self.output, self.val);
17 |     }
18 | 
19 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![] }
20 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] }
21 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
22 |     fn can_stream_input(&self, _: usize) -> bool { false }
23 |     fn can_stream_output(&self, _: usize) -> bool { true }
24 |     fn allocates(&self) -> bool { false }
25 | 
26 |     fn display_op(&self, alternate: bool) -> String {
27 |         if self.hide_value && !alternate {
28 |             "ScalarF64".to_string()
29 |         } else {
30 |             format!("{}", &self.val)
31 |         }
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/engine/operators/scalar_i64.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct ScalarI64 {
 5 |     pub val: i64,
 6 |     pub hide_value: bool,
 7 |     pub output: BufferRef<Scalar<i64>>,
 8 | }
 9 | 
10 | impl<'a> VecOperator<'a> for ScalarI64 {
11 |     fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) }
12 | 
13 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
14 |         scratchpad.set_const(self.output, self.val);
15 |     }
16 | 
17 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![] }
18 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] }
19 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
20 |     fn can_stream_input(&self, _: usize) -> bool { false }
21 |     fn can_stream_output(&self, _: usize) -> bool { true }
22 |     fn allocates(&self) -> bool { false }
23 | 
24 |     fn display_op(&self, alternate: bool) -> String {
25 |         if self.hide_value && !alternate {
26 |             "ScalarI64".to_string()
27 |         } else {
28 |             format!("{}", &self.val)
29 |         }
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/engine/operators/scalar_i64_to_scalar_f64.rs:
--------------------------------------------------------------------------------
 1 | use ordered_float::OrderedFloat;
 2 | 
 3 | use crate::engine::*;
 4 | 
 5 | 
 6 | #[derive(Debug)]
 7 | pub struct ScalarI64ToScalarF64 {
 8 |     pub input: BufferRef<Scalar<i64>>,
 9 |     pub output: BufferRef<Scalar<of64>>,
10 | }
11 | 
12 | impl<'a> VecOperator<'a> for ScalarI64ToScalarF64 {
13 |     fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError>{ Ok(()) }
14 | 
15 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
16 |         let input = scratchpad.get_scalar(&self.input);
17 |         scratchpad.set_const(self.output, OrderedFloat(input as f64));
18 |     }
19 | 
20 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input.any()] }
21 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
22 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
23 |     fn can_stream_input(&self, _: usize) -> bool { true }
24 |     fn can_stream_output(&self, _: usize) -> bool { true }
25 |     fn can_block_output(&self) -> bool { true }
26 |     fn allocates(&self) -> bool { false }
27 | 
28 |     fn display_op(&self, _: bool) -> String {
29 |         format!("{} as f64", self.input)
30 |     }
31 | }


--------------------------------------------------------------------------------
/src/engine/operators/scalar_str.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct ScalarStr<'a> {
 5 |     pub val: String,
 6 |     pub pinned: BufferRef<Scalar<String>>,
 7 |     pub output: BufferRef<Scalar<&'a str>>,
 8 | }
 9 | 
10 | impl<'a> VecOperator<'a> for ScalarStr<'a> {
11 |     fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> {
12 |         Ok(())
13 |     }
14 | 
15 |     fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) {
16 |         scratchpad.set_const(self.pinned, self.val.clone());
17 |         let output = scratchpad.get_scalar_string_pinned(&self.pinned);
18 |         scratchpad.set_const(self.output, output);
19 |     }
20 | 
21 |     fn inputs(&self) -> Vec<BufferRef<Any>> {
22 |         vec![]
23 |     }
24 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] }
25 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
26 |         vec![self.output.any()]
27 |     }
28 |     fn can_stream_input(&self, _: usize) -> bool {
29 |         false
30 |     }
31 |     fn can_stream_output(&self, _: usize) -> bool {
32 |         true
33 |     }
34 |     fn allocates(&self) -> bool {
35 |         true
36 |     }
37 |     fn display_op(&self, _: bool) -> String {
38 |         format!("\"{}\"", &self.val)
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/engine/operators/select.rs:
--------------------------------------------------------------------------------
 1 | use crate::bitvec::*;
 2 | use crate::engine::*;
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct Select<T> {
 6 |     pub input: BufferRef<T>,
 7 |     pub indices: BufferRef<usize>,
 8 |     pub output: BufferRef<T>,
 9 | }
10 | 
11 | impl<'a, T: 'a> VecOperator<'a> for Select<T> where T: VecData<T> {
12 |     fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
13 |         let data = scratchpad.get(self.input);
14 |         let indices = scratchpad.get(self.indices);
15 |         let mut output = scratchpad.get_mut(self.output);
16 |         if stream { output.clear(); }
17 |         for i in indices.iter() {
18 |             output.push(data[*i]);
19 |         }
20 |         Ok(())
21 |     }
22 | 
23 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
24 |         scratchpad.set(self.output, Vec::with_capacity(batch_size));
25 |     }
26 | 
27 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input.any(), self.indices.any()] }
28 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i, &mut self.indices.i] }
29 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
30 |     fn can_stream_input(&self, i: usize) -> bool { i == self.indices.i }
31 |     fn can_stream_output(&self, _: usize) -> bool { true }
32 |     fn can_block_output(&self) -> bool { true }
33 |     fn allocates(&self) -> bool { true }
34 | 
35 |     fn display_op(&self, _: bool) -> String {
36 |         format!("{}[{}]", self.input, self.indices)
37 |     }
38 | }
39 | 
40 | 
41 | #[derive(Debug)]
42 | pub struct SelectNullable<T> {
43 |     pub input: BufferRef<Nullable<T>>,
44 |     pub indices: BufferRef<usize>,
45 |     pub output: BufferRef<Nullable<T>>,
46 | }
47 | 
48 | impl<'a, T: 'a> VecOperator<'a> for SelectNullable<T> where T: VecData<T> {
49 |     fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
50 |         let (data, present) = scratchpad.get_nullable(self.input);
51 |         let indices = scratchpad.get(self.indices);
52 |         let (mut data_out, mut present_out) = scratchpad.get_mut_nullable(self.output);
53 |         if stream {
54 |             data_out.clear();
55 |             present_out.clear();
56 |         }
57 |         for (i, &index) in indices.iter().enumerate() {
58 |             data_out.push(data[index]);
59 |             if (*present).is_set(index) { present_out.set(i) }
60 |         }
61 |         Ok(())
62 |     }
63 | 
64 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
65 |         scratchpad.set_nullable(self.output, Vec::with_capacity(batch_size), Vec::with_capacity(batch_size / 8));
66 |     }
67 | 
68 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input.any(), self.indices.any()] }
69 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i, &mut self.indices.i] }
70 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
71 |     fn can_stream_input(&self, i: usize) -> bool { i == self.indices.i }
72 |     fn can_stream_output(&self, _: usize) -> bool { true }
73 |     fn can_block_output(&self) -> bool { true }
74 |     fn allocates(&self) -> bool { true }
75 | 
76 |     fn display_op(&self, _: bool) -> String {
77 |         format!("{}[{}]", self.input, self.indices)
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/src/engine/operators/slice_unpack.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use std::str;
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct SliceUnpackInt<T> {
 6 |     pub input: BufferRef<Any>,
 7 |     pub output: BufferRef<T>,
 8 |     pub stride: usize,
 9 |     pub offset: usize,
10 | }
11 | 
12 | impl<'a, T: GenericIntVec<T>> VecOperator<'a> for SliceUnpackInt<T> {
13 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
14 |         let packed_any = scratchpad.get_any(self.input);
15 |         let packed = packed_any.cast_ref_byte_slices();
16 |         let mut unpacked = scratchpad.get_mut(self.output);
17 |         for datum in packed.data.iter().skip(self.offset).step_by(self.stride) {
18 |             unpacked.push(T::from_bytes(datum));
19 |         }
20 |         Ok(())
21 |     }
22 | 
23 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
24 |         scratchpad.set(self.output, Vec::with_capacity(batch_size));
25 |     }
26 | 
27 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input] }
28 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
29 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
30 |     fn can_stream_input(&self, _: usize) -> bool { true }
31 |     fn can_stream_output(&self, _: usize) -> bool { true }
32 |     fn allocates(&self) -> bool { true }
33 | 
34 |     fn display_op(&self, _: bool) -> String {
35 |         format!("{}[{}, {}, ...] = {}", self.output, self.offset, self.offset + self.stride, self.input)
36 |     }
37 |     fn display_output(&self) -> bool { false }
38 | }
39 | 
40 | #[derive(Debug)]
41 | pub struct SliceUnpackString<'a> {
42 |     pub input: BufferRef<Any>,
43 |     pub output: BufferRef<&'a str>,
44 |     pub stride: usize,
45 |     pub offset: usize,
46 | }
47 | 
48 | impl<'a> VecOperator<'a> for SliceUnpackString<'a> {
49 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
50 |         let packed_any = scratchpad.get_any(self.input);
51 |         let packed = packed_any.cast_ref_byte_slices();
52 |         let mut unpacked = scratchpad.get_mut(self.output);
53 |         for datum in packed.data.iter().skip(self.offset).step_by(self.stride) {
54 |             unpacked.push(unsafe { str::from_utf8_unchecked(datum) });
55 |         }
56 |         Ok(())
57 |     }
58 | 
59 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
60 |         scratchpad.set(self.output, Vec::with_capacity(batch_size));
61 |     }
62 | 
63 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input] }
64 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
65 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
66 |     fn can_stream_input(&self, _: usize) -> bool { true }
67 |     fn can_stream_output(&self, _: usize) -> bool { true }
68 |     fn allocates(&self) -> bool { true }
69 | 
70 |     fn display_op(&self, _: bool) -> String {
71 |         format!("{}[{}, {}, ...] = {}", self.output, self.offset, self.offset + self.stride, self.input)
72 |     }
73 |     fn display_output(&self) -> bool { false }
74 | }
75 | 


--------------------------------------------------------------------------------
/src/engine/operators/sort_by_slices.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | #[derive(Debug)]
 4 | pub struct SortBySlices {
 5 |     pub ranking: BufferRef<Any>,
 6 |     pub indices: BufferRef<usize>,
 7 |     pub output: BufferRef<usize>,
 8 |     pub descending: bool,
 9 |     pub stable: bool,
10 | }
11 | 
12 | impl<'a> VecOperator<'a> for SortBySlices {
13 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
14 |         scratchpad.alias(self.indices, self.output);
15 |         let ranking_any = scratchpad.get_any(self.ranking);
16 |         let ranking = ranking_any.cast_ref_byte_slices();
17 |         let mut result = scratchpad.get_mut(self.indices);
18 |         if self.descending {
19 |             if self.stable {
20 |                 result.sort_by(|i, j| ranking.row(*i).cmp(ranking.row(*j)).reverse());
21 |             } else {
22 |                 result.sort_unstable_by(|i, j| ranking.row(*i).cmp(ranking.row(*j)).reverse());
23 |             }
24 |         } else if self.stable {
25 |             result.sort_by_key(|i| ranking.row(*i));
26 |         } else {
27 |             result.sort_unstable_by_key(|i| ranking.row(*i));
28 |         }
29 |         Ok(())
30 |     }
31 | 
32 |     fn inputs(&self) -> Vec<BufferRef<Any>> {
33 |         vec![self.ranking.any(), self.indices.any()]
34 |     }
35 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.ranking.i, &mut self.indices.i] }
36 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
37 |         vec![self.output.any()]
38 |     }
39 |     fn can_stream_input(&self, _: usize) -> bool {
40 |         false
41 |     }
42 |     fn can_stream_output(&self, _: usize) -> bool {
43 |         false
44 |     }
45 |     fn allocates(&self) -> bool {
46 |         true
47 |     }
48 | 
49 |     fn display_op(&self, _: bool) -> String {
50 |         format!(
51 |             "sort_by({}, {}, desc={}, stable={})",
52 |             self.ranking, self.indices, self.descending, self.stable
53 |         )
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/engine/operators/sort_by_val_rows.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | 
 3 | pub struct SortByValRows<'a> {
 4 |     pub ranking: BufferRef<ValRows<'a>>,
 5 |     pub indices: BufferRef<usize>,
 6 |     pub output: BufferRef<usize>,
 7 |     pub descending: bool,
 8 |     pub stable: bool,
 9 | }
10 | 
11 | impl<'a> VecOperator<'a> for SortByValRows<'a> {
12 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
13 |         scratchpad.alias(self.indices, self.output);
14 |         let ranking = scratchpad.get_mut_val_rows(self.ranking);
15 |         let mut result = scratchpad.get_mut(self.indices);
16 |         if self.descending {
17 |             if self.stable {
18 |                 result.sort_by(|i, j| ranking.row(*i).cmp(ranking.row(*j)).reverse());
19 |             } else {
20 |                 result.sort_unstable_by(|i, j| ranking.row(*i).cmp(ranking.row(*j)).reverse());
21 |             }
22 |         } else if self.stable {
23 |             result.sort_by_key(|i| ranking.row(*i));
24 |         } else {
25 |             result.sort_unstable_by_key(|i| ranking.row(*i));
26 |         }
27 |         Ok(())
28 |     }
29 | 
30 |     fn inputs(&self) -> Vec<BufferRef<Any>> {
31 |         vec![self.ranking.any(), self.indices.any()]
32 |     }
33 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.indices.i] }
34 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
35 |         vec![self.output.any()]
36 |     }
37 |     fn can_stream_input(&self, _: usize) -> bool {
38 |         false
39 |     }
40 |     fn can_stream_output(&self, _: usize) -> bool {
41 |         false
42 |     }
43 |     fn allocates(&self) -> bool {
44 |         true
45 |     }
46 | 
47 |     fn display_op(&self, _: bool) -> String {
48 |         format!(
49 |             "sort_by({}, {}, desc={}, stable={})",
50 |             self.ranking, self.indices, self.descending, self.stable
51 |         )
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/engine/operators/unpack_strings.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use std::fmt;
 3 | use crate::stringpack::StringPackerIterator;
 4 | 
 5 | pub struct UnpackStrings<'a> {
 6 |     pub packed: BufferRef<u8>,
 7 |     pub unpacked: BufferRef<&'a str>,
 8 |     pub iterator: Option<StringPackerIterator<'a>>,
 9 |     pub has_more: bool,
10 | }
11 | 
12 | impl<'a> VecOperator<'a> for UnpackStrings<'a> {
13 |     fn execute(&mut self, streaming: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
14 |         let mut decoded = scratchpad.get_mut(self.unpacked);
15 |         if streaming {
16 |             decoded.clear();
17 |         }
18 |         for elem in self.iterator.as_mut().unwrap() {
19 |             decoded.push(elem);
20 |             if decoded.capacity() == decoded.len() { return Ok(()); }
21 |         }
22 |         self.has_more = false;
23 |         Ok(())
24 |     }
25 | 
26 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
27 |         scratchpad.set(self.unpacked, Vec::with_capacity(batch_size));
28 |         let encoded = scratchpad.get_pinned(self.packed);
29 |         self.iterator = Some(unsafe { StringPackerIterator::from_slice(encoded) });
30 |     }
31 | 
32 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.packed.any()] }
33 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.packed.i] }
34 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.unpacked.any()] }
35 |     fn can_stream_input(&self, _: usize) -> bool { false }
36 |     fn can_stream_output(&self, _: usize) -> bool { true }
37 |     fn allocates(&self) -> bool { true }
38 |     fn is_streaming_producer(&self) -> bool { true }
39 |     fn has_more(&self) -> bool { self.has_more }
40 | 
41 |     fn display_op(&self, _: bool) -> String {
42 |         format!("unpack_strings({})", self.packed)
43 |     }
44 | }
45 | 
46 | impl<'a> fmt::Debug for UnpackStrings<'a> {
47 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
48 |         write!(f, "UnpackStrings {{ packed: {}, unpacked: {} }}", self.packed, self.unpacked)
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/engine/operators/val_rows_pack.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use crate::mem_store::value::Val;
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct ValRowsPack<'a> {
 6 |     pub input: BufferRef<Val<'a>>,
 7 |     pub output: BufferRef<ValRows<'a>>,
 8 |     pub stride: usize,
 9 |     pub offset: usize,
10 | }
11 | 
12 | impl<'a> VecOperator<'a> for ValRowsPack<'a> {
13 |     fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
14 |         let data = scratchpad.get(self.input);
15 |         let mut val_rows = scratchpad.get_mut_val_rows(self.output);
16 |         for (i, datum) in data.iter().enumerate() {
17 |             val_rows.data[i * self.stride + self.offset] = *datum;
18 |         }
19 |         Ok(())
20 |     }
21 | 
22 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
23 |         if scratchpad.get_any(self.output.any()).len() == 0 {
24 |             scratchpad.set_any(
25 |                 self.output.any(),
26 |                 Box::new(ValRows {
27 |                     row_len: self.stride,
28 |                     data: vec![Val::Null; batch_size * self.stride],
29 |                 }),
30 |             );
31 |         }
32 |     }
33 | 
34 |     fn inputs(&self) -> Vec<BufferRef<Any>> {
35 |         vec![self.input.any()]
36 |     }
37 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
38 |     fn outputs(&self) -> Vec<BufferRef<Any>> {
39 |         vec![self.output.any()]
40 |     }
41 |     fn can_stream_input(&self, _: usize) -> bool {
42 |         false
43 |     }
44 |     fn can_stream_output(&self, _: usize) -> bool {
45 |         false
46 |     }
47 |     fn allocates(&self) -> bool {
48 |         true
49 |     }
50 | 
51 |     fn display_op(&self, _: bool) -> String {
52 |         format!(
53 |             "{}[{}, {}, ...] = {}",
54 |             self.output,
55 |             self.offset,
56 |             self.offset + self.stride,
57 |             self.input
58 |         )
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/engine/operators/val_rows_unpack.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::*;
 2 | use crate::mem_store::Val;
 3 | 
 4 | #[derive(Debug)]
 5 | pub struct ValRowsUnpack<'a> {
 6 |     pub input: BufferRef<ValRows<'a>>,
 7 |     pub output: BufferRef<Val<'a>>,
 8 |     pub stride: usize,
 9 |     pub offset: usize,
10 | 
11 |     pub batch_size: usize,
12 |     pub curr_index: usize,
13 |     pub has_more: bool,
14 | }
15 | 
16 | impl<'a> VecOperator<'a> for ValRowsUnpack<'a> {
17 |     fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> {
18 |         let packed = scratchpad.get_mut_val_rows(self.input);
19 |         let mut unpacked = scratchpad.get_mut(self.output);
20 |         if stream {
21 |             self.curr_index += unpacked.len();
22 |             unpacked.clear();
23 |         }
24 |         for &datum in packed.data.iter().skip(self.offset).step_by(self.stride).skip(self.curr_index).take(self.batch_size) {
25 |             unpacked.push(datum);
26 |         }
27 |         self.has_more = (packed.data.len() + self.stride - self.offset - 1) / self.stride > self.curr_index;
28 |         Ok(())
29 |     }
30 | 
31 |     fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) {
32 |         self.batch_size = batch_size;
33 |         scratchpad.set(self.output, Vec::with_capacity(batch_size));
34 |     }
35 | 
36 |     fn inputs(&self) -> Vec<BufferRef<Any>> { vec![self.input.any()] }
37 |     fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] }
38 |     fn outputs(&self) -> Vec<BufferRef<Any>> { vec![self.output.any()] }
39 |     // TODO: make sliced/streamable version of val rows? but have to make ValRowsPack streaming first
40 |     fn can_stream_input(&self, _: usize) -> bool { false }
41 |     fn can_stream_output(&self, _: usize) -> bool { true }
42 |     fn allocates(&self) -> bool { true }
43 |     fn has_more(&self) -> bool { self.has_more }
44 | 
45 |     fn display_op(&self, _: bool) -> String {
46 |         format!("{}[{}, {}, ...] = {}", self.output, self.offset, self.offset + self.stride, self.input)
47 |     }
48 |     fn display_output(&self) -> bool { false }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/engine/planning/filter.rs:
--------------------------------------------------------------------------------
 1 | use crate::engine::{BufferRef, Nullable, QueryPlanner, TypedBufferRef};
 2 | 
 3 | #[derive(Clone, Copy, Default)]
 4 | pub enum Filter {
 5 |     #[default]
 6 |     None,
 7 |     Null,
 8 |     U8(BufferRef<u8>),
 9 |     NullableU8(BufferRef<Nullable<u8>>),
10 |     Indices(BufferRef<usize>),
11 | }
12 | 
13 | 
14 | impl Filter {
15 |     pub fn apply_filter(self, planner: &mut QueryPlanner, plan: TypedBufferRef) -> TypedBufferRef {
16 |         match self {
17 |             Filter::U8(filter) => planner.filter(plan, filter),
18 |             Filter::NullableU8(filter) => planner.nullable_filter(plan, filter),
19 |             Filter::Indices(indices) => planner.select(plan, indices),
20 |             Filter::Null => planner.empty(plan.tag),
21 |             Filter::None => plan,
22 |         }
23 |     }
24 | }


--------------------------------------------------------------------------------
/src/engine/planning/mod.rs:
--------------------------------------------------------------------------------
 1 | mod filter;
 2 | pub mod planner;
 3 | mod query;
 4 | pub mod query_plan;
 5 | 
 6 | pub use self::filter::Filter;
 7 | pub use self::planner::QueryPlanner;
 8 | pub use self::query::ColumnInfo;
 9 | pub use self::query::NormalFormQuery;
10 | pub use self::query::Query;
11 | pub use self::query::ResultColumn;
12 | pub use self::query_plan::QueryPlan;
13 | 


--------------------------------------------------------------------------------
/src/errors.rs:
--------------------------------------------------------------------------------
 1 | use std::backtrace::Backtrace;
 2 | use futures::channel::oneshot;
 3 | use thiserror::Error;
 4 | 
 5 | #[derive(Error, Debug)]
 6 | pub enum QueryError {
 7 |     #[error("Failed to parse query. Chars remaining: {}", _0)]
 8 |     SytaxErrorCharsRemaining(String),
 9 |     #[error("Failed to parse query. Bytes remaining: {:?}", _0)]
10 |     SyntaxErrorBytesRemaining(Vec<u8>),
11 |     #[error("Failed to parse query: {}", _0)]
12 |     ParseError(String),
13 |     #[error("Some assumption was violated. This is a bug: {}", _0)]
14 |     FatalError(String, Backtrace),
15 |     #[error("Not implemented: {}", _0)]
16 |     NotImplemented(String),
17 |     #[error("Type error: {}", _0)]
18 |     TypeError(String),
19 |     #[error("Overflow or division by zero")]
20 |     Overflow,
21 |     #[error("Query execution was canceled")]
22 |     Canceled {
23 |         #[from]
24 |         source: oneshot::Canceled,
25 |     },
26 | }
27 | 
28 | #[macro_export]
29 | macro_rules! fatal {
30 |     ($e:expr) => {
31 |         QueryError::FatalError($e.to_owned(), std::backtrace::Backtrace::capture())
32 |     };
33 |     ($fmt:expr, $($arg:tt)+) => {
34 |         QueryError::FatalError(format!($fmt, $($arg)+).to_string(), std::backtrace::Backtrace::capture())
35 |     };
36 | }
37 | 
38 | #[macro_export]
39 | macro_rules! bail {
40 |     ($kind:expr, $e:expr) => {
41 |         return Err($kind($e.to_owned()))
42 |     };
43 |     ($kind:expr, $fmt:expr, $($arg:tt)+) => {
44 |         return Err($kind(format!($fmt, $($arg)+).to_owned()))
45 |     };
46 | }
47 | 
48 | #[macro_export]
49 | macro_rules! ensure {
50 |     ($cond:expr, $e:expr) => {
51 |         if !($cond) {
52 |             return Err(QueryError::FatalError($e.to_string(), std::backtrace::Backtrace::capture()))
53 |         }
54 |     };
55 |     ($cond:expr, $fmt:expr, $($arg:tt)+) => {
56 |         if !($cond) {
57 |             return Err(QueryError::FatalError(format!($fmt, $($arg)+).to_string(), std::backtrace::Backtrace::capture()))
58 |         }
59 |     };
60 | }
61 | 


--------------------------------------------------------------------------------
/src/ingest/extractor.rs:
--------------------------------------------------------------------------------
 1 | use chrono::prelude::*;
 2 | 
 3 | pub type Extractor = fn(&str) -> i64;
 4 | 
 5 | pub fn multiply_by_100(field: &str) -> i64 {
 6 |     if let Ok(int) = field.parse::<i64>() {
 7 |         int * 100
 8 |     } else if let Ok(float) = field.parse::<f64>() {
 9 |         (float * 100.0) as i64
10 |     } else if field.is_empty() {
11 |         0
12 |     } else {
13 |         panic!("invalid field {}", &field)
14 |     }
15 | }
16 | 
17 | pub fn multiply_by_1000(field: &str) -> i64 {
18 |     if let Ok(int) = field.parse::<i64>() {
19 |         int * 1000
20 |     } else if let Ok(float) = field.parse::<f64>() {
21 |         (float * 1000.0) as i64
22 |     } else if field.is_empty() {
23 |         0
24 |     } else {
25 |         panic!("invalid field {}", &field)
26 |     }
27 | }
28 | 
29 | pub fn int(field: &str) -> i64 {
30 |     if let Ok(int) = field.parse::<i64>() {
31 |         int
32 |     } else if field.is_empty() {
33 |         0
34 |     } else {
35 |         panic!("can't parse {} as integer", &field)
36 |     }
37 | }
38 | 
39 | pub fn date_time(field: &str) -> i64 {
40 |     NaiveDateTime::parse_from_str(field, "%Y-%m-%d %H:%M:%S")
41 |         .unwrap_or_else(|_| panic!("Failed to parse {} as date time", &field))
42 |         .and_utc()
43 |         .timestamp()
44 | }
45 | 


--------------------------------------------------------------------------------
/src/ingest/input_column.rs:
--------------------------------------------------------------------------------
 1 | use locustdb_serialization::event_buffer::ColumnData;
 2 | 
 3 | use crate::Value;
 4 | 
 5 | pub enum InputColumn {
 6 |     Int(Vec<i64>),
 7 |     Float(Vec<f64>),
 8 |     // (Length, [(Index, Value)])
 9 |     NullableFloat(u64, Vec<(u64, f64)>),
10 |     NullableInt(u64, Vec<(u64, i64)>),
11 |     Str(Vec<String>),
12 |     Null(usize),
13 |     Mixed(Vec<Value>),
14 | }
15 | 
16 | impl InputColumn {
17 |     pub fn from_column_data(column_data: ColumnData, rows: u64) -> Self {
18 |         match column_data {
19 |             ColumnData::Dense(data) => {
20 |                 if (data.len() as u64) < rows {
21 |                     InputColumn::NullableFloat(
22 |                         rows,
23 |                         data.into_iter()
24 |                             .enumerate()
25 |                             .map(|(i, v)| (i as u64, v))
26 |                             .collect(),
27 |                     )
28 |                 } else {
29 |                     InputColumn::Float(data)
30 |                 }
31 |             }
32 |             ColumnData::Sparse(data) => InputColumn::NullableFloat(rows, data),
33 |             ColumnData::I64(data) => {
34 |                 if (data.len() as u64) < rows {
35 |                     InputColumn::NullableInt(
36 |                         rows,
37 |                         data.into_iter()
38 |                             .enumerate()
39 |                             .map(|(i, v)| (i as u64, v))
40 |                             .collect(),
41 |                     )
42 |                 } else {
43 |                     InputColumn::Int(data)
44 |                 }
45 |             }
46 |             ColumnData::String(data) => {
47 |                 assert!(
48 |                     (data.len() as u64) == rows,
49 |                     "rows: {}, data.len(): {}",
50 |                     rows,
51 |                     data.len()
52 |                 );
53 |                 InputColumn::Str(data)
54 |             }
55 |             ColumnData::Empty => InputColumn::Null(rows as usize),
56 |             ColumnData::SparseI64(data) => InputColumn::NullableInt(rows, data),
57 |             ColumnData::Mixed(data) => {
58 |                 InputColumn::Mixed(data.into_iter().map(|v| v.into()).collect())
59 |             }
60 |         }
61 |     }
62 | 
63 |     pub fn len(&self) -> usize {
64 |         match self {
65 |             InputColumn::Int(data) => data.len(),
66 |             InputColumn::Float(data) => data.len(),
67 |             InputColumn::Str(data) => data.len(),
68 |             InputColumn::NullableFloat(rows, _) => *rows as usize,
69 |             InputColumn::NullableInt(rows, _) => *rows as usize,
70 |             InputColumn::Mixed(data) => data.len(),
71 |             InputColumn::Null(rows) => *rows,
72 |         }
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/src/ingest/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod csv_loader;
2 | pub mod raw_val;
3 | pub mod input_column;
4 | pub mod buffer;
5 | pub mod extractor;
6 | pub mod nyc_taxi_data;
7 | pub mod colgen;
8 | pub mod schema;
9 | mod alias_method_fork;


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // TODO: migrate off incomplete/unsound specialization feature
 2 | // TODO: migrate off core_intrinsics?
 3 | #![allow(incomplete_features)]
 4 | #![allow(internal_features)]
 5 | #![feature(
 6 |     fn_traits,
 7 |     specialization,
 8 |     trait_alias,
 9 |     core_intrinsics,
10 |     box_patterns,
11 |     proc_macro_hygiene,
12 |     let_chains,
13 |     duration_constructors,
14 |     btree_cursors,
15 |     error_generic_member_access,
16 | )]
17 | #[macro_use]
18 | extern crate lazy_static;
19 | #[macro_use]
20 | extern crate log;
21 | pub use crate::disk_store::noop_storage::NoopStorage;
22 | 
23 | pub use crate::engine::query_task::{BasicTypeColumn, QueryOutput};
24 | pub use crate::errors::QueryError;
25 | pub use crate::ingest::colgen;
26 | pub use crate::ingest::csv_loader::Options as LoadOptions;
27 | pub use crate::ingest::extractor;
28 | pub use crate::ingest::nyc_taxi_data;
29 | pub use crate::ingest::raw_val::syntax as value_syntax;
30 | pub use crate::ingest::raw_val::RawVal as Value;
31 | pub use crate::locustdb::LocustDB;
32 | pub use crate::locustdb::Options;
33 | pub use crate::mem_store::table::TableStats;
34 | 
35 | #[macro_use]
36 | mod errors;
37 | mod bitvec;
38 | pub mod disk_store;
39 | mod engine;
40 | mod ingest;
41 | mod locustdb;
42 | pub mod logging_client;
43 | mod mem_store;
44 | pub mod observability;
45 | mod scheduler;
46 | pub mod server;
47 | mod stringpack;
48 | mod syntax;
49 | pub mod unit_fmt;
50 | 
51 | #[cfg(feature = "python")]
52 | pub mod python;
53 | 
54 | pub type QueryResult = Result<QueryOutput, QueryError>;
55 | 


--------------------------------------------------------------------------------
/src/mem_store/floats.rs:
--------------------------------------------------------------------------------
 1 | use ordered_float::OrderedFloat;
 2 | 
 3 | use crate::mem_store::*;
 4 | use std::sync::Arc;
 5 | use crate::bitvec::BitVec;
 6 | 
 7 | pub struct FloatColumn;
 8 | 
 9 | impl FloatColumn {
10 |     pub fn new_boxed(name: &str, mut values: Vec<OrderedFloat<f64>>, null: Option<Vec<u8>>) -> Arc<Column> {
11 |         let null = null.map(|mut n| {
12 |             n.shrink_to_fit();
13 |             n
14 |         });
15 |         values.shrink_to_fit();
16 |         let mut column = match null {
17 |             Some(present) => {
18 |                 // Values for null entries are arbitrary, replace them with values that give high compression
19 |                 let mut last_value = OrderedFloat(0.0);
20 |                 for (i, value) in values.iter_mut().enumerate() {
21 |                     if !present.is_set(i) {
22 |                         *value = last_value;
23 |                     } else {
24 |                         last_value = *value;
25 |                     }
26 |                 }
27 |                 Column::new(
28 |                     name,
29 |                     values.len(),
30 |                     None,
31 |                     vec![CodecOp::PushDataSection(1), CodecOp::Nullable],
32 |                     vec![values.into(), DataSection::Bitvec(present)],
33 |                 )
34 |             },
35 |             None => Column::new(
36 |                 name,
37 |                 values.len(),
38 |                 None,
39 |                 vec![],
40 |                 vec![DataSection::F64(values)],
41 |             ),
42 |         };
43 |         column.lz4_or_pco_encode();
44 |         Arc::new(column)
45 |     }
46 | }


--------------------------------------------------------------------------------
/src/mem_store/lru.rs:
--------------------------------------------------------------------------------
 1 | use crate::mem_store::partition::ColumnLocator;
 2 | use lru::LruCache;
 3 | use std::sync::{Arc, Mutex};
 4 | 
 5 | #[derive(Clone, Debug)]
 6 | pub struct Lru {
 7 |     cache: Arc<Mutex<LruCache<ColumnLocator, ()>>>,
 8 | }
 9 | 
10 | impl Lru {
11 |     pub fn touch(&self, column: &ColumnLocator) {
12 |         let mut cache = self.cache.lock().unwrap();
13 |         cache.get(column);
14 |     }
15 | 
16 |     pub fn put(&self, column: ColumnLocator) {
17 |         let mut cache = self.cache.lock().unwrap();
18 |         cache.put(column, ());
19 |     }
20 | 
21 |     pub fn remove(&self, column: &ColumnLocator) {
22 |         let mut cache = self.cache.lock().unwrap();
23 |         cache.pop(column);
24 |     }
25 | 
26 |     pub fn evict(&self) -> Option<ColumnLocator> {
27 |         let mut cache = self.cache.lock().unwrap();
28 |         cache.pop_lru().map(|x| x.0)
29 |     }
30 | }
31 | 
32 | impl Default for Lru {
33 |     fn default() -> Lru {
34 |         Lru {
35 |             cache: Arc::new(Mutex::new(LruCache::unbounded())),
36 |         }
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/mem_store/lz4.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{Read, Write};
 2 | use std::mem;
 3 | use std::slice::{from_raw_parts, from_raw_parts_mut};
 4 | use std::fmt::Debug;
 5 | 
 6 | 
 7 | pub fn decoder(data: &[u8]) -> lz4_flex::frame::FrameDecoder<&[u8]> {
 8 |     lz4_flex::frame::FrameDecoder::new(data)
 9 | }
10 | 
11 | pub fn encode<T: Debug>(data: &[T]) -> Vec<u8> {
12 |     let ptr_t = data.as_ptr();
13 |     // Endianness? Never heard of it...
14 |     let data_u8: &[u8] = unsafe {
15 |         let ptr_u8 = ptr_t as *const u8;
16 |         from_raw_parts(ptr_u8, std::mem::size_of_val(data))
17 |     };
18 | 
19 |     let mut result = Vec::new();
20 |     {
21 |         let mut encoder = lz4_flex::frame::FrameEncoder::new(&mut result);
22 |         encoder.write_all(data_u8).unwrap();
23 |         encoder.finish().unwrap();
24 |     }
25 |     result
26 | }
27 | 
28 | // TODO: unsafe
29 | #[allow(clippy::needless_pass_by_ref_mut)]
30 | pub fn decode<T>(src: &mut dyn Read, dst: &mut [T]) -> usize {
31 |     let ptr_t = dst.as_ptr();
32 |     let dst_u8: &mut [u8] = unsafe {
33 |         let ptr_u8 = ptr_t as *mut u8;
34 |         from_raw_parts_mut(ptr_u8, std::mem::size_of_val(dst))
35 |     };
36 | 
37 |     let mut read = 0;
38 |     // LZ4 decodes in blocks of at most 65536 elements, so might have to call multiple times to fill buffer
39 |     while read < dst_u8.len() && 0 != {
40 |         let len = src.read(&mut dst_u8[read..]).unwrap();
41 |         read += len;
42 |         len
43 |     } {}
44 |     if read % mem::size_of::<T>() != 0 {
45 |         println!("{} {} {} {}", dst.len(), dst_u8.len(), read, mem::size_of::<T>());
46 |     }
47 |     assert_eq!(read % mem::size_of::<T>(), 0);
48 |     read / mem::size_of::<T>()
49 | }
50 | 
51 | #[cfg(test)]
52 | mod tests {
53 |     use super::*;
54 | 
55 |     #[test]
56 |     fn test_encode_decode() {
57 |         let data = vec![10i64, 12095, -51235, 3, 0, 0, 12353, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10];
58 |         let encoded = encode(&data);
59 |         let mut decoded = vec![0i64; data.len()];
60 |         let count = decode(&mut decoder(&encoded), &mut decoded);
61 |         assert_eq!(count, data.len());
62 |         assert_eq!(decoded, data);
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/src/mem_store/mixed_column.rs:
--------------------------------------------------------------------------------
 1 | use crate::ingest::raw_val::RawVal;
 2 | use crate::mem_store::value::Val;
 3 | 
 4 | impl RawVal {
 5 |     pub fn to_val(&self) -> Val {
 6 |         match *self {
 7 |             RawVal::Null => Val::Null,
 8 |             RawVal::Int(i) => Val::Integer(i),
 9 |             RawVal::Str(ref string) => Val::Str(string),
10 |             RawVal::Float(f) => Val::Float(f),
11 |         }
12 |     }
13 | 
14 |     pub fn to_static_val(&self) -> Val<'static> {
15 |         match *self {
16 |             RawVal::Null => Val::Null,
17 |             RawVal::Int(i) => Val::Integer(i),
18 |             RawVal::Float(f) => Val::Float(f),
19 |             RawVal::Str(_) => panic!("Can't convert RawVal::Str to Val::Str + 'static"),
20 |         }
21 |     }
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/src/mem_store/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod codec;
 2 | pub mod column;
 3 | pub mod column_buffer;
 4 | pub mod floats;
 5 | pub mod integers;
 6 | pub(crate) mod lru;
 7 | pub mod lz4;
 8 | mod mixed_column;
 9 | pub mod partition;
10 | pub mod strings;
11 | pub mod table;
12 | pub mod tree;
13 | pub mod value;
14 | 
15 | pub use self::codec::{Codec, CodecOp};
16 | pub use self::column::{Column, DataSection, DataSource};
17 | pub use self::lru::Lru;
18 | pub use self::table::TableStats;
19 | pub use self::tree::*;
20 | pub use self::value::Val;
21 | 


--------------------------------------------------------------------------------
/src/mem_store/value.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt;
 2 | use ordered_float::OrderedFloat;
 3 | 
 4 | use crate::ingest::raw_val::RawVal;
 5 | 
 6 | #[derive(Debug, PartialEq, Eq, Ord, PartialOrd, Clone, Copy, Hash)]
 7 | pub enum Val<'a> {
 8 |     Null,
 9 |     Bool(bool),
10 |     Integer(i64),
11 |     Str(&'a str),
12 |     Float(OrderedFloat<f64>),
13 | }
14 | 
15 | 
16 | impl<'a> fmt::Display for Val<'a> {
17 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
18 |         match *self {
19 |             Val::Null => write!(f, "null"),
20 |             Val::Bool(b) => write!(f, "{}", b),
21 |             Val::Integer(i) => write!(f, "{}", i),
22 |             Val::Str(s) => write!(f, "\"{}\"", s),
23 |             Val::Float(x) => write!(f, "\"{}\"", x),
24 |         }
25 |     }
26 | }
27 | 
28 | impl<'a> From<()> for Val<'a> {
29 |     fn from(_: ()) -> Val<'a> {
30 |         Val::Null
31 |     }
32 | }
33 | 
34 | impl<'a> From<bool> for Val<'a> {
35 |     fn from(b: bool) -> Val<'a> {
36 |         Val::Bool(b)
37 |     }
38 | }
39 | 
40 | impl<'a> From<i64> for Val<'a> {
41 |     fn from(t: i64) -> Val<'a> {
42 |         Val::Integer(t)
43 |     }
44 | }
45 | 
46 | impl<'a> From<&'a str> for Val<'a> {
47 |     fn from(s: &'a str) -> Val<'a> {
48 |         Val::Str(s)
49 |     }
50 | }
51 | 
52 | impl<'a> From<f64> for Val<'a> {
53 |     fn from(f: f64) -> Val<'a> {
54 |         Val::Float(OrderedFloat(f))
55 |     }
56 | }
57 | 
58 | impl<'a, T> From<Option<T>> for Val<'a>
59 |     where Val<'a>: From<T>
60 | {
61 |     fn from(o: Option<T>) -> Val<'a> {
62 |         match o {
63 |             None => Val::Null,
64 |             Some(v) => Val::from(v),
65 |         }
66 |     }
67 | }
68 | 
69 | impl<'a, 'b> From<&'a Val<'b>> for RawVal {
70 |     fn from(val: &Val) -> RawVal {
71 |         match *val {
72 |             Val::Integer(b) => RawVal::Int(b),
73 |             Val::Str(s) => RawVal::Str(s.to_string()),
74 |             Val::Null | Val::Bool(_) => RawVal::Null,
75 |             Val::Float(f) => RawVal::Float(f),
76 |         }
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/src/observability/metrics.rs:
--------------------------------------------------------------------------------
 1 | use prometheus::{register_counter, register_gauge};
 2 | use prometheus::{Counter, Gauge};
 3 | 
 4 | lazy_static! {
 5 |     pub static ref QUERY_COUNT: Counter =
 6 |         register_counter!("query_count", "Number of queries executed").unwrap();
 7 |     pub static ref QUERY_OK_COUNT: Counter =
 8 |         register_counter!("query_ok_count", "Number of queries executed successfully").unwrap();
 9 |     pub static ref QUERY_ERROR_COUNT: Counter =
10 |         register_counter!("query_error_count", "Number of queries executed with errors").unwrap();
11 |     pub static ref INGESTION_EVENT_COUNT: Counter =
12 |         register_counter!("ingestion_event_count", "Number of ingestion events").unwrap();
13 |     pub static ref WAL_SIZE_BYTES: Gauge =
14 |         register_gauge!("wal_size_bytes", "Size of the WAL").unwrap();
15 |     pub static ref WAL_UTILIZATION: Gauge =
16 |         register_gauge!("wal_utilization", "Utilization of the WAL").unwrap();
17 |     pub static ref COLUMN_CACHE_BYTES: Gauge =
18 |         register_gauge!("column_cache_bytes", "In-memory size of columns loaded in-memory").unwrap();
19 |     pub static ref UNFLUSHED_BUFFER_CACHE_BYTES: Gauge =
20 |         register_gauge!("unflushed_buffer_cache_bytes", "In-memory size of open table buffers").unwrap();
21 |     pub static ref COLUMN_CACHE_UTILIZATION: Gauge =
22 |         register_gauge!("column_cache_utilization", "Utilization of the column cache").unwrap();
23 |     pub static ref META_STORE_BYTES: Gauge =
24 |         register_gauge!("meta_store_bytes", "In-memory size of the meta store").unwrap();
25 |     pub static ref TABLE_COUNT: Gauge =
26 |         register_gauge!("table_count", "Number of tables").unwrap();
27 |     pub static ref ROW_COUNT: Gauge =
28 |         register_gauge!("row_count", "Number of rows in the database").unwrap();
29 |     pub static ref PARTITION_COUNT: Gauge =
30 |         register_gauge!("partition_count", "Number of partitions in the database").unwrap();
31 |     pub static ref PARTITION_COLUMN_COUNT: Gauge =
32 |         register_gauge!("partition_column_count", "Sum of columns over all partitions").unwrap();
33 |     pub static ref PARTITION_VALUES: Gauge =
34 |         register_gauge!("value_count", "Number of values in the partitions in the database").unwrap();
35 |     pub static ref DATABASE_SIZE_BYTES: Gauge =
36 |         register_gauge!("database_size_bytes", "Size of the database").unwrap();
37 | }
38 | 


--------------------------------------------------------------------------------
/src/observability/mod.rs:
--------------------------------------------------------------------------------
1 | mod simple_trace;
2 | mod perf_counter;
3 | pub(crate) mod metrics;
4 | 
5 | pub(crate) use simple_trace::SimpleTracer;
6 | pub use perf_counter::{PerfCounter, QueryPerfCounter};


--------------------------------------------------------------------------------
/src/scheduler/mod.rs:
--------------------------------------------------------------------------------
1 | mod shared_sender;
2 | mod task;
3 | pub(crate) mod disk_read_scheduler;
4 | pub(crate) mod inner_locustdb;
5 | 
6 | pub use self::inner_locustdb::InnerLocustDB;
7 | pub use self::task::Task;
8 | pub use self::shared_sender::SharedSender;


--------------------------------------------------------------------------------
/src/scheduler/shared_sender.rs:
--------------------------------------------------------------------------------
 1 | use futures::channel::oneshot::Sender;
 2 | use std::sync::Mutex;
 3 | use std::mem;
 4 | 
 5 | pub struct SharedSender<T> {
 6 |     inner: Mutex<Option<Sender<T>>>,
 7 | }
 8 | 
 9 | impl<T> SharedSender<T> {
10 |     pub fn new(sender: Sender<T>) -> SharedSender<T> {
11 |         SharedSender {
12 |             inner: Mutex::new(Some(sender))
13 |         }
14 |     }
15 | 
16 |     pub fn send(&self, value: T) {
17 |         let mut sender_opt = self.inner.lock().unwrap();
18 |         let mut owned = None;
19 |         mem::swap(&mut *sender_opt, &mut owned);
20 |         if let Some(sender) = owned {
21 |             let _ = sender.send(value);
22 |         }
23 |     }
24 | }


--------------------------------------------------------------------------------
/src/scheduler/task.rs:
--------------------------------------------------------------------------------
 1 | use super::SharedSender;
 2 | use futures::channel::oneshot;
 3 | 
 4 | pub trait Task: Sync + Send {
 5 |     fn execute(&self);
 6 |     fn completed(&self) -> bool;
 7 |     fn max_parallelism(&self) -> usize;
 8 |     fn multithreaded(&self) -> bool {
 9 |         self.max_parallelism() > 1
10 |     }
11 | }
12 | 
13 | impl Task for dyn Fn() + Send + Sync + 'static {
14 |     fn execute(&self) {
15 |         self.call(());
16 |     }
17 | 
18 |     fn completed(&self) -> bool {
19 |         false
20 |     }
21 |     fn max_parallelism(&self) -> usize {
22 |         1
23 |     }
24 | }
25 | 
26 | struct FnTask<F, T>
27 | where
28 |     F: Fn() -> T + Sync + Send + 'static,
29 |     T: Send,
30 | {
31 |     fun: F,
32 |     sender: SharedSender<T>,
33 | }
34 | 
35 | impl<F, T> Task for FnTask<F, T>
36 | where
37 |     F: Fn() -> T + Sync + Send + 'static,
38 |     T: Send,
39 | {
40 |     fn execute(&self) {
41 |         let result = self.fun.call(());
42 |         self.sender.send(result);
43 |     }
44 | 
45 |     fn completed(&self) -> bool {
46 |         false
47 |     }
48 |     fn max_parallelism(&self) -> usize {
49 |         1
50 |     }
51 | }
52 | 
53 | impl dyn Task {
54 |     pub fn from_fn<F, T>(fun: F) -> (impl Task, oneshot::Receiver<T>)
55 |     where
56 |         F: Fn() -> T + Sync + Send + 'static,
57 |         T: Send,
58 |     {
59 |         let (sender, receiver) = oneshot::channel();
60 |         (
61 |             FnTask {
62 |                 fun,
63 |                 sender: SharedSender::new(sender),
64 |             },
65 |             receiver,
66 |         )
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/src/syntax/expression.rs:
--------------------------------------------------------------------------------
 1 | use self::Expr::*;
 2 | use crate::engine::*;
 3 | use crate::ingest::raw_val::RawVal;
 4 | use std::collections::HashSet;
 5 | 
 6 | #[derive(Debug, Clone)]
 7 | pub enum Expr {
 8 |     ColName(String),
 9 |     Const(RawVal),
10 |     Func1(Func1Type, Box<Expr>),
11 |     Func2(Func2Type, Box<Expr>, Box<Expr>),
12 |     Aggregate(Aggregator, Box<Expr>),
13 | }
14 | 
15 | #[allow(clippy::upper_case_acronyms)]
16 | #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
17 | pub enum Func2Type {
18 |     Equals,
19 |     NotEquals,
20 |     LT,
21 |     LTE,
22 |     GT,
23 |     GTE,
24 |     And,
25 |     Or,
26 |     Add,
27 |     Subtract,
28 |     Multiply,
29 |     Divide,
30 |     Modulo,
31 |     RegexMatch,
32 |     Like,
33 |     NotLike,
34 | }
35 | 
36 | #[derive(Debug, Copy, Clone)]
37 | pub enum Func1Type {
38 |     Negate,
39 |     ToYear,
40 |     Not,
41 |     IsNull,
42 |     IsNotNull,
43 |     Length,
44 |     Floor,
45 | }
46 | 
47 | impl Expr {
48 |     pub fn add_colnames(&self, result: &mut HashSet<String>) {
49 |         match *self {
50 |             ColName(ref name) => {
51 |                 result.insert(name.to_string());
52 |             }
53 |             Func2(_, ref expr1, ref expr2) => {
54 |                 expr1.add_colnames(result);
55 |                 expr2.add_colnames(result);
56 |             }
57 |             Func1(_, ref expr) => expr.add_colnames(result),
58 |             Aggregate(_, ref expr) => expr.add_colnames(result),
59 |             Const(_) => {}
60 |         }
61 |     }
62 | 
63 |     pub fn func(ftype: Func2Type, expr1: Expr, expr2: Expr) -> Expr {
64 |         Func2(ftype, Box::new(expr1), Box::new(expr2))
65 |     }
66 | 
67 |     pub fn func1(ftype: Func1Type, expr: Expr) -> Expr {
68 |         Func1(ftype, Box::new(expr))
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/syntax/limit.rs:
--------------------------------------------------------------------------------
1 | #[derive(Clone, Debug, Hash, PartialEq)]
2 | pub struct LimitClause {
3 |     pub limit: u64,
4 |     pub offset: u64,
5 | }
6 | 


--------------------------------------------------------------------------------
/src/syntax/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod expression;
2 | pub mod limit;
3 | pub mod parser;


--------------------------------------------------------------------------------
/system_dependencies.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -euxo pipefail
 4 | 
 5 | 
 6 | if [ "$(uname)" == "Darwin" ]; then
 7 | 	ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" < /dev/null 2> /dev/null
 8 | 	brew install lz4
 9 | 	brew install capnp
10 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then
11 | 	sudo apt-get install -y g++
12 | 	sudo apt-get install -y liblz4-dev
13 | 	sudo apt-get install -y dpkg
14 | 	#sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu bionic universe" -y
15 | 	#sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu bionic main" -y
16 | 	#sudo apt-get update -q
17 | 	sudo apt-get install -y capnproto
18 | else
19 | 	echo ERROR: Platform not supported
20 | 	exit 1
21 | fi
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/templates/table.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | 
 3 | <html lang="en">
 4 | 
 5 | <head>
 6 |     <meta charset="utf-8">
 7 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 8 | 
 9 |     <title>A Basic HTML5 Template</title>
10 |     <meta name="description" content="A simple HTML5 Template for new projects.">
11 |     <meta name="author" content="SitePoint">
12 | 
13 |     <meta property="og:title" content="A Basic HTML5 Template">
14 |     <meta property="og:type" content="website">
15 |     <meta property="og:url" content="https://www.sitepoint.com/a-basic-html5-template/">
16 |     <meta property="og:description" content="A simple HTML5 Template for new projects.">
17 |     <meta property="og:image" content="image.png">
18 | 
19 |     <link rel="icon" href="/favicon.ico">
20 |     <link rel="icon" href="/favicon.svg" type="image/svg+xml">
21 |     <link rel="apple-touch-icon" href="/apple-touch-icon.png">
22 | 
23 |     <link rel="stylesheet" href="css/styles.css?v=1.0">
24 | 
25 | </head>
26 | 
27 | <body>
28 |     <h1>{{ table }}</h1>
29 | 
30 |     <h2>Columns</h2>
31 |     {{ columns }}
32 | 
33 |     <script>
34 |     </script>
35 | </body>
36 | 
37 | </html>


--------------------------------------------------------------------------------
/test_data/edge_cases.csv:
--------------------------------------------------------------------------------
 1 | u8_offset_encoded,non_dense_ints,enum,string_packed,constant0,constant0_2,negative,id,nullable_int,nullable_int2,country,largenum,float,nullable_float,float01,mixed_float_int_null
 2 | 256,0,aa,xyz,0,0,-199,0,-1,,Germany,-9223372036854775808,0.123412,,0.3,1
 3 | 258,2,aa,abc,0,0,39,1,-40,-40,USA,9223372036854775806,3e-4,,-0.4,10
 4 | 259,3,aa,axz,0,0,-100,2,,,France,9223372036854775806,-124.0,0.4,0.421231,3
 5 | 257,1,bb,AXY,0,0,34,3,,0,,9223372036854775806,3.15159,,0.9482,0.21
 6 | 275,4,bb,azy,0,0,4031,4,10,9,France,-9223372036854775808,0.1234e30,,0.1,0.12
 7 | 500,0,aa,$sss,0,0,32,5,,6,,9223372036854775806,1e-6,,0.2,
 8 | 343,2,cc,asd,0,0,-130,6,,,Turkey,-9223372036854775808,0.0,1e-32,0.5,
 9 | 432,1,aa,_f,0,0,-120,7,20,,,9223372036854775806,0.000001,,0.23,0.1
10 | 511,2,cc,t,0,0,4010,8,,1,,-9223372036854775808,-1.0,,0.742,0.1
11 | 500,3,bb,😈,0,0,-40,9,13,14,Germany,9223372036854775806,1234124.51325,1.123124e30,-0.2,0.5


--------------------------------------------------------------------------------
/test_data/nyc-taxi.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cswinter/LocustDB/016efd84bbae9781c93ecffda63422e5fefb8e93/test_data/nyc-taxi.csv.gz


--------------------------------------------------------------------------------
/wandb_data_import.py:
--------------------------------------------------------------------------------
 1 | import locustdb
 2 | import time
 3 | import wandb
 4 | import requests
 5 | 
 6 | entity = "entity-neural-network"
 7 | project = "enn-ppo"
 8 | run_id = "220511-055353-xor-num_envs=256-ent_coef=0.003-dmodel=64-anneal_ent=false-bs=1024-lr=0.0003"
 9 | 
10 | api = wandb.Api(timeout=300)
11 | runs = api.runs(f"{entity}/{project}", {
12 |     'config.name': {"$regex": '220511-055353-.*'},
13 |     # {
14 |     #     "$text": '220511-055353',
15 |     # }
16 | })
17 | 
18 | print("Starting...")
19 | i = 0
20 | logger = locustdb.Client(url="http://localhost:8080")
21 | while True:
22 |     try:
23 |         run = next(runs)
24 |     except requests.exceptions.HTTPError as e:
25 |         print(e)
26 |         continue
27 |     except requests.exceptions.ReadTimeout as e:
28 |         print(e)
29 |         continue
30 |     except StopIteration:
31 |         break
32 |     print(i, run.name)
33 |     rows = 0
34 |     for row in run.history(pandas=False):
35 |         clean_row = {k: v or 0.0 for k, v in row.items() if not isinstance(v, dict) and not isinstance(v, str)}
36 |         # print(clean_row)
37 |         logger.log(table="gb_9a43be3e-"+run.name, metrics=clean_row)
38 |         rows += 1
39 |     print(f"Logged {rows} rows")
40 |     i += 1
41 | 
42 | print("done")
43 | 
44 | # run = api.run(f"{entity}/{project}/{run_id}")
45 | 
46 | # # random walk
47 | # print("starting logging...")
48 | # value = 0
49 | # for i in range(10000):
50 | #     value += np.random.normal()
51 | #     locustdb.log(table="test_metrics", metrics={"step": i, "cpu": value})
52 | 
53 | time.sleep(2)
54 | print("done!")
55 | 


--------------------------------------------------------------------------------