├── .cargo └── config.toml ├── .github └── workflows │ ├── release.yml │ └── rust.yml ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── bench_results ├── anomaly0 ├── anomaly1 ├── anomaly2 ├── baseline0 ├── baseline1 ├── baseline10 ├── baseline11 ├── baseline12 ├── baseline2 ├── baseline3 ├── baseline4 ├── baseline5 ├── baseline6 ├── baseline7 ├── baseline8 ├── baseline9 └── latest ├── benches └── basic.rs ├── export.py ├── float_values.bin ├── float_values.txt ├── git_hooks ├── README.md └── pre-push ├── locustdb-client ├── .appveyor.yml ├── .github │ └── dependabot.yml ├── .gitignore ├── .travis.yml ├── Cargo.toml ├── README.md ├── release.sh ├── src │ ├── client.rs │ └── lib.rs └── tests │ └── web.rs ├── locustdb-compression-utils ├── Cargo.lock ├── Cargo.toml ├── README.md ├── examples │ └── gorilla_time.rs └── src │ ├── lib.rs │ ├── test_data.rs │ └── xor_float │ ├── double.rs │ ├── mod.rs │ └── single.rs ├── locustdb-derive ├── Cargo.lock ├── Cargo.toml └── src │ ├── ast_builder.rs │ ├── enum_syntax.rs │ ├── lib.rs │ └── reify_types.rs ├── locustdb-serialization ├── Cargo.lock ├── Cargo.toml ├── README.md ├── schemas │ ├── api.capnp │ ├── dbmeta.capnp │ ├── partition_segment.capnp │ ├── rust.capnp │ └── wal_segment.capnp └── src │ ├── api.rs │ ├── api_capnp.rs │ ├── dbmeta_capnp.rs │ ├── event_buffer.rs │ ├── lib.rs │ ├── partition_segment_capnp.rs │ └── wal_segment_capnp.rs ├── memsize_results ├── baseline0 ├── baseline1 ├── baseline2 └── baseline3 ├── rust-toolchain ├── samples ├── example_row ├── example_row_sparse └── headers ├── src ├── bin │ ├── db_bench.rs │ ├── db_inspector.rs │ ├── load_generator.rs │ ├── log.rs │ ├── profile.rs │ ├── repl │ │ ├── fmt_table.rs │ │ ├── main.rs │ │ ├── print_results.rs │ │ └── unicode.rs │ └── show.rs ├── bitvec.rs ├── disk_store │ ├── azure_writer.rs │ ├── file_writer.rs │ ├── gcs_writer.rs │ ├── meta_store.rs │ ├── mod.rs │ ├── noop_storage.rs │ ├── partition_segment.rs │ ├── storage.rs │ └── wal_segment.rs ├── engine │ ├── data_types │ │ ├── byte_slices.rs │ │ ├── data.rs │ │ ├── mod.rs │ │ ├── nullable_vec_data.rs │ │ ├── scalar_data.rs │ │ ├── types.rs │ │ ├── val_rows.rs │ │ └── vec_data.rs │ ├── execution │ │ ├── batch_merging.rs │ │ ├── buffer.rs │ │ ├── executor.rs │ │ ├── mod.rs │ │ ├── query_task.rs │ │ └── scratchpad.rs │ ├── mod.rs │ ├── operators │ │ ├── aggregate.rs │ │ ├── aggregator.rs │ │ ├── assemble_nullable.rs │ │ ├── binary_operator.rs │ │ ├── bit_unpack.rs │ │ ├── bool_op.rs │ │ ├── buffer_stream.rs │ │ ├── collect.rs │ │ ├── column_ops.rs │ │ ├── combine_null_maps.rs │ │ ├── compact.rs │ │ ├── compact_nullable.rs │ │ ├── compact_nullable_nullable.rs │ │ ├── compact_with_nullable.rs │ │ ├── comparator.rs │ │ ├── comparison_operators.rs │ │ ├── constant.rs │ │ ├── constant_expand.rs │ │ ├── constant_vec.rs │ │ ├── delta_decode.rs │ │ ├── dict_lookup.rs │ │ ├── empty.rs │ │ ├── encode_const.rs │ │ ├── exists.rs │ │ ├── filter.rs │ │ ├── filter_nullable.rs │ │ ├── functions.rs │ │ ├── fuse_nulls.rs │ │ ├── get_null_map.rs │ │ ├── hashmap_grouping.rs │ │ ├── hashmap_grouping_byte_slices.rs │ │ ├── hashmap_grouping_val_rows.rs │ │ ├── identity.rs │ │ ├── indices.rs │ │ ├── is_null.rs │ │ ├── lz4_decode.rs │ │ ├── make_nullable.rs │ │ ├── map_operator.rs │ │ ├── merge.rs │ │ ├── merge_aggregate.rs │ │ ├── merge_deduplicate.rs │ │ ├── merge_deduplicate_partitioned.rs │ │ ├── merge_drop.rs │ │ ├── merge_keep.rs │ │ ├── merge_partitioned.rs │ │ ├── mod.rs │ │ ├── nonzero_compact.rs │ │ ├── nonzero_indices.rs │ │ ├── null_to_i64.rs │ │ ├── null_to_val.rs │ │ ├── null_to_vec.rs │ │ ├── null_vec.rs │ │ ├── null_vec_like.rs │ │ ├── numeric_operators.rs │ │ ├── parameterized_vec_vec_int_op.rs │ │ ├── partition.rs │ │ ├── pco_decode.rs │ │ ├── propagate_nullability.rs │ │ ├── scalar_f64.rs │ │ ├── scalar_i64.rs │ │ ├── scalar_i64_to_scalar_f64.rs │ │ ├── scalar_str.rs │ │ ├── select.rs │ │ ├── slice_pack.rs │ │ ├── slice_unpack.rs │ │ ├── sort_by.rs │ │ ├── sort_by_slices.rs │ │ ├── sort_by_val_rows.rs │ │ ├── stream_buffer.rs │ │ ├── subpartition.rs │ │ ├── to_val.rs │ │ ├── top_n.rs │ │ ├── type_conversion.rs │ │ ├── unhexpack_strings.rs │ │ ├── unpack_strings.rs │ │ ├── val_rows_pack.rs │ │ ├── val_rows_unpack.rs │ │ └── vector_operator.rs │ └── planning │ │ ├── filter.rs │ │ ├── mod.rs │ │ ├── planner.rs │ │ ├── query.rs │ │ └── query_plan.rs ├── errors.rs ├── ingest │ ├── alias_method_fork.rs │ ├── buffer.rs │ ├── colgen.rs │ ├── csv_loader.rs │ ├── extractor.rs │ ├── input_column.rs │ ├── mod.rs │ ├── nyc_taxi_data.rs │ ├── raw_val.rs │ └── schema.rs ├── lib.rs ├── locustdb.rs ├── logging_client │ └── mod.rs ├── mem_store │ ├── codec.rs │ ├── column.rs │ ├── column_buffer.rs │ ├── floats.rs │ ├── integers.rs │ ├── lru.rs │ ├── lz4.rs │ ├── mixed_column.rs │ ├── mod.rs │ ├── partition.rs │ ├── raw_col.rs │ ├── strings.rs │ ├── table.rs │ ├── tree.rs │ └── value.rs ├── observability │ ├── metrics.rs │ ├── mod.rs │ ├── perf_counter.rs │ └── simple_trace.rs ├── python.rs ├── scheduler │ ├── disk_read_scheduler.rs │ ├── inner_locustdb.rs │ ├── mod.rs │ ├── shared_sender.rs │ └── task.rs ├── server │ └── mod.rs ├── stringpack.rs ├── syntax │ ├── expression.rs │ ├── limit.rs │ ├── mod.rs │ └── parser.rs └── unit_fmt.rs ├── system_dependencies.sh ├── templates ├── index.html ├── plot.html └── table.html ├── test_data ├── edge_cases.csv ├── nyc-taxi.csv.gz ├── small.csv └── tiny.csv ├── tests ├── ingestion_test.rs └── query_tests.rs └── wandb_data_import.py /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.x86_64-apple-darwin] 2 | rustflags = [ 3 | "-C", "link-arg=-undefined", 4 | "-C", "link-arg=dynamic_lookup", 5 | ] -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | 8 | name: Test 9 | runs-on: ${{ matrix.os }} 10 | strategy: 11 | matrix: 12 | include: 13 | - os: ubuntu-latest 14 | - os: macos-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | - uses: actions-rs/toolchain@v1 19 | with: 20 | components: rustfmt, clippy 21 | 22 | - name: Cache cargo registry 23 | uses: actions/cache@v3 24 | with: 25 | path: ~/.cargo/registry 26 | key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} 27 | - name: Cache cargo index 28 | uses: actions/cache@v3 29 | with: 30 | path: ~/.cargo/git 31 | key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }} 32 | - name: Cache cargo build 33 | uses: actions/cache@v3 34 | with: 35 | path: target 36 | key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }} 37 | 38 | - name: Install Dependencies 39 | run: ./system_dependencies.sh 40 | 41 | - name: Check 42 | run: cargo check --all-features --all-targets 43 | - name: Clippy 44 | run: cargo clippy --all-features --all-targets -- --deny clippy::all 45 | - name: Run tests 46 | run: cargo test 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | locustdb-derive/target/ 5 | locustdb-compression-utils/target/ 6 | locustdb-serialization/target/ 7 | /data/ 8 | **/node_modules/ 9 | **/.locustDB_history 10 | 11 | # Large benchmarking datasets 12 | test_data/yellow_tripdata_2009-01.csv 13 | test_data/nyc-taxi-data 14 | 15 | # IntelliJ 16 | .idea 17 | locustdb.iml 18 | 19 | # readline 20 | .locustdb_history 21 | 22 | # vim 23 | rusty-tags.vi 24 | 25 | # default rocksdb data dir 26 | rocksdb 27 | 28 | # OS X 29 | .DS_Store 30 | 31 | # VSCode 32 | .vscode 33 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 6 | 7 | ## [Unreleased] 8 | 9 | ## [0.2.1] - 2019-02-17 10 | ### Added 11 | - Unary `NOT` 12 | - `NOT LIKE` 13 | 14 | ### Fixed 15 | - Fix LIKE operator not matching entire expression 16 | 17 | ## [0.2.0] - 2018-12-31 18 | ### Added 19 | - Reliable parser and full set of basic SQL functionality except joins 20 | 21 | [Unreleased]: https://github.com/cswinter/LocustDB/compare/v0.2.1..HEAD 22 | [0.2.1]: https://github.com/cswinter/LocustDB/compare/v0.2.0...v0.2.1 23 | [0.2.0]: https://github.com/cswinter/LocustDB/compare/v0.1.0-alpha...v0.2.0 24 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to LocustDB 2 | 3 | Thank you for your interest in contributing to LocustDB! Good places to start are [this blog post][blogpost], the [README.md][readme] and the [issue tracker][issues]. I can also recommend [this blogpost][diving-into], which describes a general strategy for making changes to a codebase you've never worked with before. 4 | 5 | If you have any question about LocustDB, feel free to ask on [gitter][gitter]. 6 | 7 | ## Working on issues 8 | If you're looking for somewhere to start, check out the [good first issue tag][good-first-issue]. 9 | 10 | Feel free to ask for guidelines on how to tackle a problem on [gitter][gitter] or open a new [new issue][new-issue]. 11 | This is especially important if you want to add new features to LocustDB or make large changes to the already existing code base. LocustDB's core developers will do their best to provide help. 12 | 13 | Various issues have a corresponding TODO(#ISSUE\_NUMBER) in the relevant section of the code. 14 | 15 | If you start working on an already-filed issue, post a comment on this issue to let people know that somebody is working it. Feel free to ask for comments if you are unsure about the solution you would like to submit. 16 | 17 | We use the "fork and pull" model [described here][development-models], where contributors push changes to their personal fork and create pull requests to bring those changes into the source repository. 18 | 19 | Your basic steps to get going: 20 | 21 | * Fork LocustDB and create a branch from master for the issue you are working on. 22 | * Please adhere to the code style that you see around the location you are working on. 23 | * [Commit as you go][githelp]. 24 | * Include tests that cover all non-trivial code. Usually the easiest way of doing that is to add a new integration test to `tests/query_tests.rs`. 25 | * If you are adding a performance optimisation, make sure there is a benchmark case in `benches/basic.rs` that covers the optimisation. 26 | * Make sure `cargo test` passes and the bencharks can still be compiled (`cargo check --bench basic`). Running clippy and rustfmt is encouraged, but the existing code is not currently compliant. 27 | * Push your commits to GitHub and create a pull request against LocustDB's `master` branch. 28 | 29 | [githelp]: https://dont-be-afraid-to-commit.readthedocs.io/en/latest/git/commandlinegit.html 30 | [development-models]: https://help.github.com/articles/about-collaborative-development-models/ 31 | [gitter]: https://gitter.im/LocustDB/Lobby 32 | [issues]: https://github.com/cswinter/LocustDB/issues 33 | [new-issue]: https://github.com/cswinter/LocustDB/issues/new 34 | [good-first-issue]: https://github.com/cswinter/LocustDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22 35 | [blogpost]: https://clemenswinter.com/2018/07/09/how-to-analyze-billions-of-records-per-second-on-a-single-desktop-pc/ 36 | [readme]: https://github.com/cswinter/LocustDB/blob/master/README.md 37 | [diving-into]: http://www.lihaoyi.com/post/DivingIntoOtherPeoplesCode.html 38 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Clemens Winter "] 3 | description = "Embeddable high-performance analytics database." 4 | edition = "2021" 5 | license-file = "LICENSE" 6 | name = "locustdb" 7 | version = "0.5.6" 8 | repository = "https://github.com/cswinter/LocustDB" 9 | readme = "README.md" 10 | 11 | [lib] 12 | crate-type = ["cdylib", "rlib"] 13 | 14 | [dependencies] 15 | actix-cors = "0.7" 16 | actix-web = "4" 17 | aliasmethod = "0.4" 18 | azure_core = { version = "0.19.0", features = ["enable_reqwest_rustls"], default-features = false } 19 | azure_identity = { version = "0.19.0", features = ["enable_reqwest_rustls"], default-features = false } 20 | azure_storage = { version = "0.19.0", features = ["enable_reqwest_rustls"], default-features = false } 21 | azure_storage_blobs = { version = "0.19.0", features = ["enable_reqwest_rustls"], default-features = false } 22 | bit-vec = "0.8" 23 | blake2 = "0.10" 24 | byteorder = "1.5" 25 | capnp = "0.21" 26 | chrono = "0.4" 27 | clap = "4.5" 28 | csv = "1" 29 | datasize = "0.2.15" 30 | env_logger = "0.11" 31 | flate2 = "1.1" 32 | fnv = "1.0" 33 | futures = "0.3" 34 | google-cloud-storage = { version = "0.24", features = ["rustls-tls", "auth"], default-features = false } 35 | hex = "0.4" 36 | itertools = "0.14" 37 | lazy_static = "1.5.0" 38 | locustdb-compression-utils = {path = "./locustdb-compression-utils", version = "0.2.0"} 39 | locustdb-derive = {path = "./locustdb-derive", version = "0.2.2"} 40 | locustdb-serialization = {path = "./locustdb-serialization", version = "0.2.2"} 41 | log = {features = ["max_level_trace", "release_max_level_debug"], version = "0.4"} 42 | lru = "0.14" 43 | lz4_flex = { version = "0.11" } 44 | num = "0.4" 45 | num_cpus = "1.16" 46 | ordered-float = { version = "5", features = ["serde"] } 47 | pco = "0.4.2" 48 | prometheus = "0.14.0" 49 | pyo3 = {features = ["extension-module"], version = "0.24.2", optional = true} 50 | rand = "0.9" 51 | rand_xorshift = "0.4.0" 52 | random_word = { version = "0.5", features = ["en", "fr", "de"] } 53 | regex = "1" 54 | reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } 55 | rustyline = "15.0" 56 | scoped_threadpool = "0.1" 57 | seahash = "4.1" 58 | serde = { version = "1.0", features = ["derive"] } 59 | serde_json = "1.0" 60 | sha2 = "0.10" 61 | sqlparser = "0.56" 62 | std-semaphore = "0.1" 63 | structopt = "0.3" 64 | systemstat = "0.2.4" 65 | tempfile = "3" 66 | tera = "1" 67 | thiserror = "2.0.12" 68 | threadpool = "1.8.1" 69 | time = "0.3" 70 | tokio = { version = "1", features = ["full"] } 71 | tokio-util = "0.7" 72 | unicode-segmentation = "1" 73 | unicode-width = "0.2" 74 | walkdir = "2.5.0" 75 | 76 | [dev-dependencies] 77 | pretty_assertions = "1" 78 | 79 | [features] 80 | default = [] 81 | python = ["pyo3"] 82 | 83 | 84 | [profile.release] 85 | codegen-units = 1 86 | debug = true 87 | lto = true 88 | opt-level = 3 89 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Dropbox, Inc. 2 | Copyright (c) 2018 Clemens Winter 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | -------------------------------------------------------------------------------- /bench_results/anomaly0: -------------------------------------------------------------------------------- 1 | batchsize 2^16 2 | 3 | running 8 tests 4 | test count_by_passenger_count ... bench: 11,674,768 ns/iter (+/- 2,082,851) 5 | test count_group_by_vendor_id_and_passenger_count ... bench: 878,825,549 ns/iter (+/- 11,190,933) 6 | test q1_count_cab_type ... bench: 13,311,756 ns/iter (+/- 3,939,847) 7 | test q2_avg_total_amount_by_passenger_count ... bench: 247,310,599 ns/iter (+/- 10,812,396) 8 | test q3_count_passenger_count_pickup_year ... bench: 1,491,088,185 ns/iter (+/- 42,808,753) 9 | test select_passenger_count_sparse_filter ... bench: 1,258,719,035 ns/iter (+/- 11,086,208) 10 | test select_star_limit_10000 ... bench: 374,562,778 ns/iter (+/- 25,321,861) 11 | test sum_total_amt_group_by_passenger_count ... bench: 245,749,678 ns/iter (+/- 775,610) 12 | -------------------------------------------------------------------------------- /bench_results/anomaly1: -------------------------------------------------------------------------------- 1 | batchsize 2^10 2 | 3 | running 8 tests 4 | test count_by_passenger_count ... bench: 66,711,448 ns/iter (+/- 8,744,116) 5 | test count_by_vendor_id_and_passenger_count ... bench: 118,967,104 ns/iter (+/- 6,259,125) 6 | test q1_count_cab_type ... bench: 67,642,275 ns/iter (+/- 4,876,545) 7 | test q2_avg_total_amount_by_passenger_count ... bench: 89,427,755 ns/iter (+/- 6,275,577) 8 | test q3_count_by_passenger_count_pickup_year ... bench: 6,359,979,005 ns/iter (+/- 310,230,539) 9 | test select_passenger_count_sparse_filter ... bench: 203,733,119 ns/iter (+/- 6,924,933) 10 | test select_star_limit_10000 ... bench: 889,329,736 ns/iter (+/- 16,377,040) 11 | test sum_total_amt_by_passenger_count ... bench: 78,117,183 ns/iter (+/- 11,485,842) 12 | -------------------------------------------------------------------------------- /bench_results/anomaly2: -------------------------------------------------------------------------------- 1 | batchsize 2^20 2 | 3 | running 8 tests 4 | test count_by_passenger_count ... bench: 12,416,697 ns/iter (+/- 1,290,632) 5 | test count_by_vendor_id_and_passenger_count ... bench: 233,845,898 ns/iter (+/- 12,554,651) 6 | test q1_count_cab_type ... bench: 13,473,580 ns/iter (+/- 909,745) 7 | test q2_avg_total_amount_by_passenger_count ... bench: 74,571,471 ns/iter (+/- 4,250,941) 8 | test q3_count_by_passenger_count_pickup_year ... bench: 342,046,676 ns/iter (+/- 25,501,960) 9 | test select_passenger_count_sparse_filter ... bench: 242,676,708 ns/iter (+/- 11,081,106) 10 | test select_star_limit_10000 ... bench: 1,595,645,667 ns/iter (+/- 114,784,124) 11 | test sum_total_amt_by_passenger_count ... bench: 71,744,061 ns/iter (+/- 3,311,822) 12 | -------------------------------------------------------------------------------- /bench_results/baseline0: -------------------------------------------------------------------------------- 1 | 2 | running 4 tests 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored 4 | test syntax::parser::tests::test_last_hour ... ignored 5 | test syntax::parser::tests::test_select_star ... ignored 6 | test syntax::parser::tests::test_to_year ... ignored 7 | 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out 9 | 10 | 11 | running 0 tests 12 | 13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 14 | 15 | 16 | running 0 tests 17 | 18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 19 | 20 | 21 | running 8 tests 22 | test count_by_passenger_count ... bench: 26,938,870 ns/iter (+/- 2,675,577) 23 | test count_by_vendor_id_and_passenger_count ... bench: 550,722,493 ns/iter (+/- 16,909,930) 24 | test q1_count_cab_type ... bench: 30,656,683 ns/iter (+/- 2,475,807) 25 | test q2_avg_total_amount_by_passenger_count ... bench: 190,362,991 ns/iter (+/- 4,582,829) 26 | test q3_count_by_passenger_count_pickup_year ... bench: 828,067,692 ns/iter (+/- 29,823,670) 27 | test select_passenger_count_sparse_filter ... bench: 555,553,199 ns/iter (+/- 59,422,977) 28 | test select_star_limit_10000 ... bench: 972,255,085 ns/iter (+/- 25,239,661) 29 | test sum_total_amt_by_passenger_count ... bench: 184,066,713 ns/iter (+/- 6,177,480) 30 | 31 | test result: ok. 0 passed; 0 failed; 0 ignored; 8 measured; 0 filtered out 32 | 33 | -------------------------------------------------------------------------------- /bench_results/baseline1: -------------------------------------------------------------------------------- 1 | 2 | running 4 tests 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored 4 | test syntax::parser::tests::test_last_hour ... ignored 5 | test syntax::parser::tests::test_select_star ... ignored 6 | test syntax::parser::tests::test_to_year ... ignored 7 | 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out 9 | 10 | 11 | running 0 tests 12 | 13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 14 | 15 | 16 | running 0 tests 17 | 18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 19 | 20 | 21 | running 8 tests 22 | test count_by_passenger_count ... bench: 26,070,934 ns/iter (+/- 1,594,412) 23 | test count_by_vendor_id_and_passenger_count ... bench: 499,509,746 ns/iter (+/- 17,562,185) 24 | test q1_count_cab_type ... bench: 29,879,445 ns/iter (+/- 1,041,851) 25 | test q2_avg_total_amount_by_passenger_count ... bench: 175,384,548 ns/iter (+/- 4,727,845) 26 | test q3_count_by_passenger_count_pickup_year ... bench: 783,307,490 ns/iter (+/- 24,737,787) 27 | test select_passenger_count_sparse_filter ... bench: 538,106,785 ns/iter (+/- 10,699,233) 28 | test select_star_limit_10000 ... bench: 927,526,976 ns/iter (+/- 20,292,403) 29 | test sum_total_amt_by_passenger_count ... bench: 168,620,796 ns/iter (+/- 5,159,333) 30 | 31 | test result: ok. 0 passed; 0 failed; 0 ignored; 8 measured; 0 filtered out 32 | 33 | -------------------------------------------------------------------------------- /bench_results/baseline10: -------------------------------------------------------------------------------- 1 | 2 | running 5 tests 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored 4 | test engine::vector_op::top_n::tests::test_heap_replace ... ignored 5 | test syntax::parser::tests::test_last_hour ... ignored 6 | test syntax::parser::tests::test_select_star ... ignored 7 | test syntax::parser::tests::test_to_year ... ignored 8 | 9 | test result: ok. 0 passed; 0 failed; 5 ignored; 0 measured; 0 filtered out 10 | 11 | 12 | running 0 tests 13 | 14 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 15 | 16 | 17 | running 0 tests 18 | 19 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 20 | 21 | 22 | running 13 tests 23 | test count_by_passenger_count ... bench: 15,627,532 ns/iter (+/- 655,985) 24 | test count_by_vendor_id_and_passenger_count ... bench: 71,846,441 ns/iter (+/- 2,994,324) 25 | test q1_count_cab_type ... bench: 18,137,258 ns/iter (+/- 682,612) 26 | test q2_avg_total_amount_by_passenger_count ... bench: 44,917,954 ns/iter (+/- 3,062,919) 27 | test q3_count_by_passenger_count_pickup_year ... bench: 60,057,811 ns/iter (+/- 1,042,005) 28 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 203,147,333 ns/iter (+/- 4,938,887) 29 | test q5_sparse_filter ... bench: 89,310,448 ns/iter (+/- 3,107,625) 30 | test q6_top_n ... bench: 28,955,535 ns/iter (+/- 2,397,557) 31 | test q7_hashmap_grouping ... bench: 387,756,233 ns/iter (+/- 7,447,014) 32 | test q8_group_by_trip_id ... ignored 33 | test select_passenger_count_sparse_filter ... bench: 247,671,658 ns/iter (+/- 3,402,559) 34 | test select_star_limit_10000 ... ignored 35 | test sum_total_amt_by_passenger_count ... bench: 40,696,847 ns/iter (+/- 1,339,187) 36 | 37 | test result: ok. 0 passed; 0 failed; 2 ignored; 11 measured; 0 filtered out 38 | 39 | -------------------------------------------------------------------------------- /bench_results/baseline11: -------------------------------------------------------------------------------- 1 | 2 | running 5 tests 3 | test engine::vector_op::subpartition::tests::test_multipass_grouping ... ignored 4 | test engine::vector_op::top_n::tests::test_heap_replace ... ignored 5 | test syntax::parser::tests::test_last_hour ... ignored 6 | test syntax::parser::tests::test_select_star ... ignored 7 | test syntax::parser::tests::test_to_year ... ignored 8 | 9 | test result: ok. 0 passed; 0 failed; 5 ignored; 0 measured; 0 filtered out 10 | 11 | 12 | running 0 tests 13 | 14 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 15 | 16 | 17 | running 0 tests 18 | 19 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 20 | 21 | 22 | running 0 tests 23 | 24 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 25 | 26 | 27 | running 13 tests 28 | test count_by_passenger_count ... bench: 18,294,162 ns/iter (+/- 2,496,582) 29 | test count_by_vendor_id_and_passenger_count ... bench: 76,762,635 ns/iter (+/- 3,339,295) 30 | test q1_count_cab_type ... bench: 20,776,009 ns/iter (+/- 3,198,444) 31 | test q2_avg_total_amount_by_passenger_count ... bench: 49,578,845 ns/iter (+/- 3,293,207) 32 | test q3_count_by_passenger_count_pickup_year ... bench: 63,038,754 ns/iter (+/- 3,402,668) 33 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 208,468,489 ns/iter (+/- 3,078,098) 34 | test q5_sparse_filter ... bench: 99,581,002 ns/iter (+/- 5,713,491) 35 | test q6_top_n ... bench: 31,527,822 ns/iter (+/- 3,030,209) 36 | test q7_hashmap_grouping ... bench: 311,869,086 ns/iter (+/- 6,209,690) 37 | test q8_group_by_trip_id ... ignored 38 | test select_passenger_count_sparse_filter ... bench: 255,659,998 ns/iter (+/- 6,248,932) 39 | test select_star_limit_10000 ... ignored 40 | test sum_total_amt_by_passenger_count ... bench: 44,787,624 ns/iter (+/- 3,711,254) 41 | 42 | test result: ok. 0 passed; 0 failed; 2 ignored; 11 measured; 0 filtered out 43 | 44 | -------------------------------------------------------------------------------- /bench_results/baseline12: -------------------------------------------------------------------------------- 1 | 2 | running 5 tests 3 | test engine::vector_op::subpartition::tests::test_multipass_grouping ... ignored 4 | test engine::vector_op::top_n::tests::test_heap_replace ... ignored 5 | test syntax::parser::tests::test_last_hour ... ignored 6 | test syntax::parser::tests::test_select_star ... ignored 7 | test syntax::parser::tests::test_to_year ... ignored 8 | 9 | test result: ok. 0 passed; 0 failed; 5 ignored; 0 measured; 0 filtered out 10 | 11 | 12 | running 0 tests 13 | 14 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 15 | 16 | 17 | running 0 tests 18 | 19 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 20 | 21 | 22 | running 0 tests 23 | 24 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 25 | 26 | 27 | running 13 tests 28 | test count_by_passenger_count ... bench: 16,129,780 ns/iter (+/- 739,944) 29 | test count_by_vendor_id_and_passenger_count ... bench: 71,512,490 ns/iter (+/- 1,099,241) 30 | test q1_count_cab_type ... bench: 18,421,078 ns/iter (+/- 609,591) 31 | test q2_avg_total_amount_by_passenger_count ... bench: 45,844,627 ns/iter (+/- 792,661) 32 | test q3_count_by_passenger_count_pickup_year ... bench: 60,440,313 ns/iter (+/- 1,171,763) 33 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 199,199,110 ns/iter (+/- 2,615,830) 34 | test q5_sparse_filter ... bench: 81,682,932 ns/iter (+/- 1,575,449) 35 | test q6_top_n ... bench: 22,215,360 ns/iter (+/- 984,098) 36 | test q7_hashmap_grouping ... bench: 299,474,974 ns/iter (+/- 9,163,364) 37 | test q8_group_by_trip_id ... ignored 38 | test select_passenger_count_sparse_filter ... bench: 229,560,195 ns/iter (+/- 7,530,226) 39 | test select_star_limit_10000 ... ignored 40 | test sum_total_amt_by_passenger_count ... bench: 41,712,664 ns/iter (+/- 765,522) 41 | 42 | test result: ok. 0 passed; 0 failed; 2 ignored; 11 measured; 0 filtered out 43 | 44 | -------------------------------------------------------------------------------- /bench_results/baseline2: -------------------------------------------------------------------------------- 1 | 2 | running 4 tests 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored 4 | test syntax::parser::tests::test_last_hour ... ignored 5 | test syntax::parser::tests::test_select_star ... ignored 6 | test syntax::parser::tests::test_to_year ... ignored 7 | 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out 9 | 10 | 11 | running 0 tests 12 | 13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 14 | 15 | 16 | running 0 tests 17 | 18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 19 | 20 | 21 | running 8 tests 22 | test count_by_passenger_count ... bench: 27,406,000 ns/iter (+/- 6,282,199) 23 | test count_by_vendor_id_and_passenger_count ... bench: 496,478,007 ns/iter (+/- 16,991,732) 24 | test q1_count_cab_type ... bench: 29,601,765 ns/iter (+/- 1,532,514) 25 | test q2_avg_total_amount_by_passenger_count ... bench: 126,378,488 ns/iter (+/- 4,177,675) 26 | test q3_count_by_passenger_count_pickup_year ... bench: 782,609,520 ns/iter (+/- 30,886,586) 27 | test select_passenger_count_sparse_filter ... bench: 535,309,369 ns/iter (+/- 11,938,554) 28 | test select_star_limit_10000 ... bench: 918,701,430 ns/iter (+/- 20,417,984) 29 | test sum_total_amt_by_passenger_count ... bench: 116,583,969 ns/iter (+/- 2,898,219) 30 | 31 | test result: ok. 0 passed; 0 failed; 0 ignored; 8 measured; 0 filtered out 32 | 33 | -------------------------------------------------------------------------------- /bench_results/baseline3: -------------------------------------------------------------------------------- 1 | 2 | running 4 tests 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored 4 | test syntax::parser::tests::test_last_hour ... ignored 5 | test syntax::parser::tests::test_select_star ... ignored 6 | test syntax::parser::tests::test_to_year ... ignored 7 | 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out 9 | 10 | 11 | running 0 tests 12 | 13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 14 | 15 | 16 | running 0 tests 17 | 18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 19 | 20 | 21 | running 8 tests 22 | test count_by_passenger_count ... bench: 25,243,860 ns/iter (+/- 1,027,409) 23 | test count_by_vendor_id_and_passenger_count ... bench: 500,672,878 ns/iter (+/- 17,897,606) 24 | test q1_count_cab_type ... bench: 28,203,723 ns/iter (+/- 2,043,693) 25 | test q2_avg_total_amount_by_passenger_count ... bench: 129,087,699 ns/iter (+/- 4,483,727) 26 | test q3_count_by_passenger_count_pickup_year ... bench: 786,015,826 ns/iter (+/- 33,239,775) 27 | test select_passenger_count_sparse_filter ... bench: 529,497,008 ns/iter (+/- 11,370,622) 28 | test select_star_limit_10000 ... bench: 920,571,002 ns/iter (+/- 25,253,936) 29 | test sum_total_amt_by_passenger_count ... bench: 120,091,337 ns/iter (+/- 3,841,109) 30 | 31 | test result: ok. 0 passed; 0 failed; 0 ignored; 8 measured; 0 filtered out 32 | 33 | -------------------------------------------------------------------------------- /bench_results/baseline4: -------------------------------------------------------------------------------- 1 | 2 | running 4 tests 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored 4 | test syntax::parser::tests::test_last_hour ... ignored 5 | test syntax::parser::tests::test_select_star ... ignored 6 | test syntax::parser::tests::test_to_year ... ignored 7 | 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out 9 | 10 | 11 | running 0 tests 12 | 13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 14 | 15 | 16 | running 0 tests 17 | 18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 19 | 20 | 21 | running 8 tests 22 | test count_by_passenger_count ... bench: 27,511,249 ns/iter (+/- 2,073,721) 23 | test count_by_vendor_id_and_passenger_count ... bench: 83,954,577 ns/iter (+/- 3,050,063) 24 | test q1_count_cab_type ... bench: 30,058,291 ns/iter (+/- 1,549,527) 25 | test q2_avg_total_amount_by_passenger_count ... bench: 63,421,217 ns/iter (+/- 1,623,546) 26 | test q3_count_by_passenger_count_pickup_year ... bench: 305,009,611 ns/iter (+/- 9,044,703) 27 | test select_passenger_count_sparse_filter ... bench: 375,427,738 ns/iter (+/- 9,443,649) 28 | test select_star_limit_10000 ... bench: 961,663,022 ns/iter (+/- 24,624,939) 29 | test sum_total_amt_by_passenger_count ... bench: 50,217,733 ns/iter (+/- 3,677,470) 30 | 31 | test result: ok. 0 passed; 0 failed; 0 ignored; 8 measured; 0 filtered out 32 | 33 | -------------------------------------------------------------------------------- /bench_results/baseline5: -------------------------------------------------------------------------------- 1 | 2 | running 4 tests 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored 4 | test syntax::parser::tests::test_last_hour ... ignored 5 | test syntax::parser::tests::test_select_star ... ignored 6 | test syntax::parser::tests::test_to_year ... ignored 7 | 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out 9 | 10 | 11 | running 0 tests 12 | 13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 14 | 15 | 16 | running 0 tests 17 | 18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 19 | 20 | 21 | running 9 tests 22 | test count_by_passenger_count ... bench: 29,199,064 ns/iter (+/- 1,034,507) 23 | test count_by_vendor_id_and_passenger_count ... bench: 89,481,240 ns/iter (+/- 3,335,758) 24 | test q1_count_cab_type ... bench: 31,903,802 ns/iter (+/- 1,176,215) 25 | test q2_avg_total_amount_by_passenger_count ... bench: 67,294,496 ns/iter (+/- 1,043,722) 26 | test q3_count_by_passenger_count_pickup_year ... bench: 318,842,838 ns/iter (+/- 6,398,757) 27 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 760,724,705 ns/iter (+/- 34,517,497) 28 | test select_passenger_count_sparse_filter ... bench: 390,120,439 ns/iter (+/- 7,071,757) 29 | test select_star_limit_10000 ... bench: 977,810,685 ns/iter (+/- 24,288,318) 30 | test sum_total_amt_by_passenger_count ... bench: 51,671,627 ns/iter (+/- 1,322,935) 31 | 32 | test result: ok. 0 passed; 0 failed; 0 ignored; 9 measured; 0 filtered out 33 | 34 | -------------------------------------------------------------------------------- /bench_results/baseline6: -------------------------------------------------------------------------------- 1 | 2 | running 4 tests 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored 4 | test syntax::parser::tests::test_last_hour ... ignored 5 | test syntax::parser::tests::test_select_star ... ignored 6 | test syntax::parser::tests::test_to_year ... ignored 7 | 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out 9 | 10 | 11 | running 0 tests 12 | 13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 14 | 15 | 16 | running 0 tests 17 | 18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 19 | 20 | 21 | running 9 tests 22 | test count_by_passenger_count ... bench: 15,724,438 ns/iter (+/- 1,072,153) 23 | test count_by_vendor_id_and_passenger_count ... bench: 72,069,173 ns/iter (+/- 2,089,003) 24 | test q1_count_cab_type ... bench: 18,288,624 ns/iter (+/- 1,001,521) 25 | test q2_avg_total_amount_by_passenger_count ... bench: 44,159,055 ns/iter (+/- 2,239,541) 26 | test q3_count_by_passenger_count_pickup_year ... bench: 291,800,859 ns/iter (+/- 6,616,003) 27 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 717,384,932 ns/iter (+/- 17,664,140) 28 | test select_passenger_count_sparse_filter ... bench: 367,080,147 ns/iter (+/- 9,438,398) 29 | test select_star_limit_10000 ... bench: 939,382,492 ns/iter (+/- 28,662,138) 30 | test sum_total_amt_by_passenger_count ... bench: 39,902,086 ns/iter (+/- 1,238,907) 31 | 32 | test result: ok. 0 passed; 0 failed; 0 ignored; 9 measured; 0 filtered out 33 | 34 | -------------------------------------------------------------------------------- /bench_results/baseline7: -------------------------------------------------------------------------------- 1 | 2 | running 4 tests 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored 4 | test syntax::parser::tests::test_last_hour ... ignored 5 | test syntax::parser::tests::test_select_star ... ignored 6 | test syntax::parser::tests::test_to_year ... ignored 7 | 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out 9 | 10 | 11 | running 0 tests 12 | 13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 14 | 15 | 16 | running 0 tests 17 | 18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 19 | 20 | 21 | running 9 tests 22 | test count_by_passenger_count ... bench: 15,633,172 ns/iter (+/- 1,052,019) 23 | test count_by_vendor_id_and_passenger_count ... bench: 71,932,129 ns/iter (+/- 1,938,891) 24 | test q1_count_cab_type ... bench: 18,194,617 ns/iter (+/- 689,133) 25 | test q2_avg_total_amount_by_passenger_count ... bench: 45,237,507 ns/iter (+/- 1,201,930) 26 | test q3_count_by_passenger_count_pickup_year ... bench: 59,444,430 ns/iter (+/- 1,278,476) 27 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 202,641,239 ns/iter (+/- 5,968,827) 28 | test select_passenger_count_sparse_filter ... bench: 366,709,186 ns/iter (+/- 4,443,679) 29 | test select_star_limit_10000 ... ignored 30 | test sum_total_amt_by_passenger_count ... bench: 41,048,125 ns/iter (+/- 2,398,789) 31 | 32 | test result: ok. 0 passed; 0 failed; 1 ignored; 8 measured; 0 filtered out 33 | 34 | -------------------------------------------------------------------------------- /bench_results/baseline8: -------------------------------------------------------------------------------- 1 | 2 | running 4 tests 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored 4 | test syntax::parser::tests::test_last_hour ... ignored 5 | test syntax::parser::tests::test_select_star ... ignored 6 | test syntax::parser::tests::test_to_year ... ignored 7 | 8 | test result: ok. 0 passed; 0 failed; 4 ignored; 0 measured; 0 filtered out 9 | 10 | 11 | running 0 tests 12 | 13 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 14 | 15 | 16 | running 0 tests 17 | 18 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 19 | 20 | 21 | running 12 tests 22 | test count_by_passenger_count ... bench: 15,705,040 ns/iter (+/- 784,971) 23 | test count_by_vendor_id_and_passenger_count ... bench: 72,046,240 ns/iter (+/- 2,547,455) 24 | test q1_count_cab_type ... bench: 18,197,313 ns/iter (+/- 927,398) 25 | test q2_avg_total_amount_by_passenger_count ... bench: 46,106,829 ns/iter (+/- 4,138,488) 26 | test q3_count_by_passenger_count_pickup_year ... bench: 60,239,773 ns/iter (+/- 1,136,396) 27 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 203,058,966 ns/iter (+/- 3,650,031) 28 | test q5_sparse_filter ... bench: 405,593,372 ns/iter (+/- 5,204,887) 29 | test q6_top_n ... ignored 30 | test q7_group_by_trip_id ... ignored 31 | test select_passenger_count_sparse_filter ... bench: 367,930,764 ns/iter (+/- 9,318,326) 32 | test select_star_limit_10000 ... ignored 33 | test sum_total_amt_by_passenger_count ... bench: 41,696,022 ns/iter (+/- 10,565,047) 34 | 35 | test result: ok. 0 passed; 0 failed; 3 ignored; 9 measured; 0 filtered out 36 | 37 | -------------------------------------------------------------------------------- /bench_results/baseline9: -------------------------------------------------------------------------------- 1 | 2 | running 5 tests 3 | test engine::batch_merging::tests::test_multipass_grouping ... ignored 4 | test engine::vector_op::top_n::tests::test_heap_replace ... ignored 5 | test syntax::parser::tests::test_last_hour ... ignored 6 | test syntax::parser::tests::test_select_star ... ignored 7 | test syntax::parser::tests::test_to_year ... ignored 8 | 9 | test result: ok. 0 passed; 0 failed; 5 ignored; 0 measured; 0 filtered out 10 | 11 | 12 | running 0 tests 13 | 14 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 15 | 16 | 17 | running 0 tests 18 | 19 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 20 | 21 | 22 | running 12 tests 23 | test count_by_passenger_count ... bench: 16,737,115 ns/iter (+/- 978,516) 24 | test count_by_vendor_id_and_passenger_count ... bench: 77,106,421 ns/iter (+/- 8,522,187) 25 | test q1_count_cab_type ... bench: 19,302,662 ns/iter (+/- 1,208,816) 26 | test q2_avg_total_amount_by_passenger_count ... bench: 47,660,235 ns/iter (+/- 1,403,601) 27 | test q3_count_by_passenger_count_pickup_year ... bench: 53,953,279 ns/iter (+/- 2,222,464) 28 | test q4_count_by_passenger_count_pickup_year_trip_distance ... bench: 215,077,493 ns/iter (+/- 5,359,731) 29 | test q5_sparse_filter ... bench: 95,569,850 ns/iter (+/- 3,434,476) 30 | test q6_top_n ... bench: 30,493,132 ns/iter (+/- 2,212,544) 31 | test q7_group_by_trip_id ... ignored 32 | test select_passenger_count_sparse_filter ... bench: 266,626,179 ns/iter (+/- 7,785,819) 33 | test select_star_limit_10000 ... ignored 34 | test sum_total_amt_by_passenger_count ... bench: 43,133,618 ns/iter (+/- 2,111,243) 35 | 36 | test result: ok. 0 passed; 0 failed; 2 ignored; 10 measured; 0 filtered out 37 | 38 | -------------------------------------------------------------------------------- /bench_results/latest: -------------------------------------------------------------------------------- 1 | 2 | running 7 tests 3 | test engine::vector_op::subpartition::tests::test_multipass_grouping ... ignored 4 | test engine::vector_op::top_n::tests::test_heap_replace ... ignored 5 | test ingest::alias_method_fork::test_new_alias_table ... ignored 6 | test mem_store::codec::tests::test_ensure_property ... ignored 7 | test syntax::parser::tests::test_select_star ... ignored 8 | test syntax::parser::tests::test_to_year ... ignored 9 | test unit_fmt::tests::test_format ... ignored 10 | 11 | test result: ok. 0 passed; 0 failed; 7 ignored; 0 measured; 0 filtered out 12 | 13 | 14 | running 0 tests 15 | 16 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 17 | 18 | 19 | running 0 tests 20 | 21 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 22 | 23 | 24 | running 0 tests 25 | 26 | test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out 27 | 28 | 29 | running 10 tests 30 | test avg_total_amount_by_passenger_count ... bench: 53,240,800 ns/iter (+/- 1,049,080) 31 | test count_by_passenger_count ... bench: 16,645,900 ns/iter (+/- 768,500) 32 | test count_by_passenger_count_pickup_year_trip_distance ... bench: 22,535,800 ns/iter (+/- 626,580) 33 | test count_by_vendor_id_and_passenger_count ... bench: 77,903,100 ns/iter (+/- 1,577,070) 34 | test count_cab_type ... bench: 19,905,100 ns/iter (+/- 476,580) 35 | test group_by_trip_id ... bench: 11,085,700 ns/iter (+/- 1,029,360) 36 | test hashmap_grouping ... bench: 38,412,200 ns/iter (+/- 1,348,840) 37 | test sparse_filter ... bench: 86,380,900 ns/iter (+/- 1,570,090) 38 | test sum_total_amt_by_passenger_count ... bench: 50,006,600 ns/iter (+/- 1,502,090) 39 | test top_n ... bench: 17,454,800 ns/iter (+/- 622,920) 40 | 41 | test result: ok. 0 passed; 0 failed; 0 ignored; 10 measured; 0 filtered out 42 | 43 | -------------------------------------------------------------------------------- /export.py: -------------------------------------------------------------------------------- 1 | from locustdb import Client 2 | import numpy as np 3 | import argparse 4 | 5 | # parse argument (single positinal arge for column name) 6 | parser = argparse.ArgumentParser(description='Query locustDB') 7 | parser.add_argument('column_name', type=str, help='column name to query') 8 | args = parser.parse_args() 9 | colname = args.column_name 10 | 11 | client = Client("http://localhost:8080") 12 | results = client.query(f'SELECT "{colname}" FROM "avid-wildflower-3446"') 13 | 14 | # replace brakets with underscores 15 | scolname = colname.replace('[', '_').replace(']', '_') 16 | 17 | with open(scolname + ".txt", "w") as f: 18 | np.savetxt(f, [(r if r is not None else np.nan) for r in results[colname]]) 19 | -------------------------------------------------------------------------------- /float_values.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cswinter/LocustDB/016efd84bbae9781c93ecffda63422e5fefb8e93/float_values.bin -------------------------------------------------------------------------------- /git_hooks/README.md: -------------------------------------------------------------------------------- 1 | Git hook for running tests and compiling benchmark before pushing. 2 | 3 | Activate by linking to the `pre-push` script from `.git/hooks`: 4 | 5 | ```bash 6 | ln -s git_hooks/pre-push .git/hooks/pre-push 7 | ``` 8 | 9 | If you are using git bash on Windows and can't use symbolic links, and you may want to just copy the file instead: 10 | 11 | ```bash 12 | cp git_hooks/pre-push .git/hooks/pre-push 13 | ``` 14 | -------------------------------------------------------------------------------- /git_hooks/pre-push: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu 3 | 4 | check_char='\xE2\x9C\x93' 5 | cross_char='\xE2\x9D\x8C' 6 | green='\033[0;32m' 7 | red='\033[0;31m' 8 | nc='\033[0m' 9 | check="$green$check_char$nc" 10 | cross="$red$cross_char$nc" 11 | errors=0 12 | 13 | 14 | ## Require_clean_work_tree ## 15 | # Update the index 16 | git update-index -q --ignore-submodules --refresh 17 | err=0 18 | 19 | # Disallow unstaged changes in the working tree 20 | if ! git diff-files --quiet --ignore-submodules -- 21 | then 22 | echo -e "you have unstaged changes. $cross" 23 | git diff-files --name-status -r --ignore-submodules -- >&2 24 | err=1 25 | fi 26 | 27 | # Disallow uncommitted changes in the index 28 | if ! git diff-index --cached --quiet HEAD --ignore-submodules -- 29 | then 30 | echo -e "your index contains uncommitted changes. $cross" 31 | git diff-index --cached --name-status -r --ignore-submodules HEAD -- >&2 32 | err=1 33 | fi 34 | 35 | if [ $err = 1 ] 36 | then 37 | echo -e "Please commit or stash them." 38 | exit 1 39 | fi 40 | 41 | : ' 42 | echo -n "Checking formatting... " 43 | diff=$(cargo fmt -- --write-mode diff) 44 | stripped_diff=$(echo "$diff" | sed -e '/^Diff of/d' -e '/^$/d') 45 | 46 | if [ -z "$stripped_diff" ]; then 47 | echo -e "$check" 48 | else 49 | echo -e "$cross" 50 | echo "$diff" 51 | errors=1 52 | fi 53 | ' 54 | 55 | echo -n "Running tests... " 56 | if result=$(cargo +nightly test --color always 2>&1); then 57 | echo -e "$check" 58 | else 59 | echo -e "$cross" 60 | echo "$result" 61 | errors=1 62 | fi 63 | 64 | echo -n "Compiling benchmarks... " 65 | if result=$(cargo +nightly check --bench basic --color always 2>&1); then 66 | echo -e "$check" 67 | else 68 | echo -e "$cross" 69 | echo "$result" 70 | errors=1 71 | fi 72 | 73 | 74 | if [ "$errors" != 0 ]; then 75 | echo "Failed" 76 | exit 1 77 | else 78 | echo "OK" 79 | fi 80 | -------------------------------------------------------------------------------- /locustdb-client/.appveyor.yml: -------------------------------------------------------------------------------- 1 | install: 2 | - appveyor-retry appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe 3 | - if not defined RUSTFLAGS rustup-init.exe -y --default-host x86_64-pc-windows-msvc --default-toolchain nightly 4 | - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin 5 | - rustc -V 6 | - cargo -V 7 | 8 | build: false 9 | 10 | test_script: 11 | - cargo test --locked 12 | -------------------------------------------------------------------------------- /locustdb-client/.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "08:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /locustdb-client/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | bin/ 5 | pkg/ 6 | wasm-pack.log 7 | -------------------------------------------------------------------------------- /locustdb-client/.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | sudo: false 3 | 4 | cache: cargo 5 | 6 | matrix: 7 | include: 8 | 9 | # Builds with wasm-pack. 10 | - rust: beta 11 | env: RUST_BACKTRACE=1 12 | addons: 13 | firefox: latest 14 | chrome: stable 15 | before_script: 16 | - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update) 17 | - (test -x $HOME/.cargo/bin/cargo-generate || cargo install --vers "^0.2" cargo-generate) 18 | - cargo install-update -a 19 | - curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh -s -- -f 20 | script: 21 | - cargo generate --git . --name testing 22 | # Having a broken Cargo.toml (in that it has curlies in fields) anywhere 23 | # in any of our parent dirs is problematic. 24 | - mv Cargo.toml Cargo.toml.tmpl 25 | - cd testing 26 | - wasm-pack build 27 | - wasm-pack test --chrome --firefox --headless 28 | 29 | # Builds on nightly. 30 | - rust: nightly 31 | env: RUST_BACKTRACE=1 32 | before_script: 33 | - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update) 34 | - (test -x $HOME/.cargo/bin/cargo-generate || cargo install --vers "^0.2" cargo-generate) 35 | - cargo install-update -a 36 | - rustup target add wasm32-unknown-unknown 37 | script: 38 | - cargo generate --git . --name testing 39 | - mv Cargo.toml Cargo.toml.tmpl 40 | - cd testing 41 | - cargo check 42 | - cargo check --target wasm32-unknown-unknown 43 | - cargo check --no-default-features 44 | - cargo check --target wasm32-unknown-unknown --no-default-features 45 | - cargo check --no-default-features --features console_error_panic_hook 46 | - cargo check --target wasm32-unknown-unknown --no-default-features --features console_error_panic_hook 47 | - cargo check --no-default-features --features "console_error_panic_hook wee_alloc" 48 | - cargo check --target wasm32-unknown-unknown --no-default-features --features "console_error_panic_hook wee_alloc" 49 | 50 | # Builds on beta. 51 | - rust: beta 52 | env: RUST_BACKTRACE=1 53 | before_script: 54 | - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update) 55 | - (test -x $HOME/.cargo/bin/cargo-generate || cargo install --vers "^0.2" cargo-generate) 56 | - cargo install-update -a 57 | - rustup target add wasm32-unknown-unknown 58 | script: 59 | - cargo generate --git . --name testing 60 | - mv Cargo.toml Cargo.toml.tmpl 61 | - cd testing 62 | - cargo check 63 | - cargo check --target wasm32-unknown-unknown 64 | - cargo check --no-default-features 65 | - cargo check --target wasm32-unknown-unknown --no-default-features 66 | - cargo check --no-default-features --features console_error_panic_hook 67 | - cargo check --target wasm32-unknown-unknown --no-default-features --features console_error_panic_hook 68 | # Note: no enabling the `wee_alloc` feature here because it requires 69 | # nightly for now. 70 | -------------------------------------------------------------------------------- /locustdb-client/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "locustdb-client" 3 | version = "0.5.5" 4 | authors = ["Clemens Winter "] 5 | edition = "2018" 6 | 7 | [lib] 8 | crate-type = ["cdylib", "rlib"] 9 | 10 | [features] 11 | default = ["console_error_panic_hook"] 12 | 13 | [dependencies] 14 | reqwest = { version = "0.12", default_features = false, features = ["json", "rustls-tls"] } 15 | serde = { version = "1.0", features = ["derive"] } 16 | serde-wasm-bindgen = "0.6.5" 17 | serde_json = "1.0" 18 | wasm-bindgen = "0.2.84" 19 | wasm-bindgen-futures = "0.4.42" 20 | locustdb-compression-utils = {path = "../locustdb-compression-utils" } 21 | locustdb-serialization = {path = "../locustdb-serialization", version = "0.2.1" } 22 | wasm-logger = "0.2.0" 23 | log = "0.4" 24 | 25 | # The `console_error_panic_hook` crate provides better debugging of panics by 26 | # logging them with `console.error`. This is great for development, but requires 27 | # all the `std::fmt` and `std::panicking` infrastructure, so isn't great for 28 | # code size when deploying. 29 | console_error_panic_hook = { version = "0.1.7", optional = true } 30 | web-sys = { version = "0.3.69", features = ["Performance", "Window"] } 31 | 32 | [dev-dependencies] 33 | wasm-bindgen-test = "0.3.34" 34 | 35 | [profile.release] 36 | # Tell `rustc` to optimize for small code size. 37 | opt-level = "s" 38 | debug = true 39 | -------------------------------------------------------------------------------- /locustdb-client/README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |

wasm-pack-template

4 | 5 | A template for kick starting a Rust and WebAssembly project using wasm-pack. 6 | 7 |

8 | Build Status 9 |

10 | 11 |

12 | Tutorial 13 | | 14 | Chat 15 |

16 | 17 | Built with 🦀🕸 by The Rust and WebAssembly Working Group 18 |
19 | 20 | ## About 21 | 22 | [**📚 Read this template tutorial! 📚**][template-docs] 23 | 24 | This template is designed for compiling Rust libraries into WebAssembly and 25 | publishing the resulting package to NPM. 26 | 27 | Be sure to check out [other `wasm-pack` tutorials online][tutorials] for other 28 | templates and usages of `wasm-pack`. 29 | 30 | [tutorials]: https://rustwasm.github.io/docs/wasm-pack/tutorials/index.html 31 | [template-docs]: https://rustwasm.github.io/docs/wasm-pack/tutorials/npm-browser-packages/index.html 32 | 33 | ## 🚴 Usage 34 | 35 | ### 🐑 Use `cargo generate` to Clone this Template 36 | 37 | [Learn more about `cargo generate` here.](https://github.com/ashleygwilliams/cargo-generate) 38 | 39 | ``` 40 | cargo generate --git https://github.com/rustwasm/wasm-pack-template.git --name my-project 41 | cd my-project 42 | ``` 43 | 44 | ### 🛠️ Build with `wasm-pack build` 45 | 46 | ``` 47 | wasm-pack build 48 | ``` 49 | 50 | ### 🔬 Test in Headless Browsers with `wasm-pack test` 51 | 52 | ``` 53 | wasm-pack test --headless --firefox 54 | ``` 55 | 56 | ### 🎁 Publish to NPM with `wasm-pack publish` 57 | 58 | ``` 59 | wasm-pack build --scope cswinter 60 | wasm-pack publish 61 | ``` 62 | 63 | ## 🔋 Batteries Included 64 | 65 | * [`wasm-bindgen`](https://github.com/rustwasm/wasm-bindgen) for communicating 66 | between WebAssembly and JavaScript. 67 | * [`console_error_panic_hook`](https://github.com/rustwasm/console_error_panic_hook) 68 | for logging panic messages to the developer console. 69 | * `LICENSE-APACHE` and `LICENSE-MIT`: most Rust projects are licensed this way, so these are included for you 70 | 71 | ## License 72 | 73 | Licensed under either of 74 | 75 | * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 76 | * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 77 | 78 | at your option. 79 | 80 | ### Contribution 81 | 82 | Unless you explicitly state otherwise, any contribution intentionally 83 | submitted for inclusion in the work by you, as defined in the Apache-2.0 84 | license, shall be dual licensed as above, without any additional terms or 85 | conditions. 86 | -------------------------------------------------------------------------------- /locustdb-client/release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | wasm-pack build --scope=cswinter 4 | cd pkg 5 | npm publish --access-public 6 | -------------------------------------------------------------------------------- /locustdb-client/src/lib.rs: -------------------------------------------------------------------------------- 1 | use wasm_bindgen::prelude::*; 2 | mod client; 3 | 4 | #[wasm_bindgen] 5 | extern "C" { 6 | fn alert(s: &str); 7 | 8 | #[wasm_bindgen(js_namespace = console)] 9 | fn log(s: &str); 10 | } 11 | 12 | #[wasm_bindgen] 13 | pub fn greet() { 14 | alert("Hello, locustdb-client!"); 15 | } 16 | 17 | pub use client::Client; -------------------------------------------------------------------------------- /locustdb-client/tests/web.rs: -------------------------------------------------------------------------------- 1 | //! Test suite for the Web and headless browsers. 2 | 3 | #![cfg(target_arch = "wasm32")] 4 | 5 | extern crate wasm_bindgen_test; 6 | use wasm_bindgen_test::*; 7 | 8 | wasm_bindgen_test_configure!(run_in_browser); 9 | 10 | #[wasm_bindgen_test] 11 | fn pass() { 12 | assert_eq!(1 + 1, 2); 13 | } 14 | -------------------------------------------------------------------------------- /locustdb-compression-utils/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "locustdb-compression-utils" 3 | version = "0.2.0" 4 | edition = "2021" 5 | description = "Utilities for compressing and decompressing sequences used in LocustDB." 6 | license-file = "../LICENSE" 7 | 8 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 9 | 10 | [dependencies] 11 | serde = { version = "1.0", features = ["derive"] } 12 | serde_json = "1.0" 13 | log = "0.4" 14 | bitbuffer = "0.10" 15 | pco = "0.2.3" 16 | 17 | [dev-dependencies] 18 | clap = { version = "4", features = ["derive"] } 19 | rand = { version = "0.8.5", features = ["small_rng"] } 20 | pretty_assertions = "1" 21 | 22 | [profile.release] 23 | debug = true 24 | 25 | [profile.dev] 26 | debug = true 27 | -------------------------------------------------------------------------------- /locustdb-compression-utils/README.md: -------------------------------------------------------------------------------- 1 | # LocustDB Compression Utils 2 | 3 | Collection of utils for compressing a series of values. 4 | 5 | ## XOR float compression 6 | 7 | One of compression algorithms implemented is a variant of the XOR float compression algorithm described in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://www.vldb.org/pvldb/vol8/p1816-teller.pdf). The "gorilla_time" example program creates visualizations of the compression algorithm. You can run the visualization with: 8 | 9 | ```bash 10 | cargo run --example gorilla_time -- --verbose 11 | ``` 12 | 13 | Run the following command to see more options: 14 | 15 | ```bash 16 | cargo run --example gorilla_time -- --help 17 | ``` -------------------------------------------------------------------------------- /locustdb-compression-utils/src/lib.rs: -------------------------------------------------------------------------------- 1 | 2 | pub mod xor_float; 3 | pub mod test_data; -------------------------------------------------------------------------------- /locustdb-compression-utils/src/xor_float/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod double; 2 | pub mod single; 3 | 4 | #[derive(Debug, PartialEq)] 5 | pub enum Error { 6 | Eof, 7 | } 8 | 9 | // Special NaN value that we use to represent NULLs in the data. 10 | // Can't use f64::from_bits because it is not a canonical NaN value. 11 | #[allow(clippy::transmute_int_to_float)] 12 | pub const NULL: f64 = unsafe { std::mem::transmute::(0x7ffa_aaaa_aaaa_aaaau64) }; -------------------------------------------------------------------------------- /locustdb-derive/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "0.7.18" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "lazy_static" 16 | version = "1.2.0" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" 19 | 20 | [[package]] 21 | name = "locustdb-derive" 22 | version = "0.2.2" 23 | dependencies = [ 24 | "lazy_static", 25 | "proc-macro2", 26 | "quote", 27 | "regex", 28 | "syn", 29 | ] 30 | 31 | [[package]] 32 | name = "memchr" 33 | version = "2.5.0" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 36 | 37 | [[package]] 38 | name = "proc-macro2" 39 | version = "1.0.39" 40 | source = "registry+https://github.com/rust-lang/crates.io-index" 41 | checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" 42 | dependencies = [ 43 | "unicode-ident", 44 | ] 45 | 46 | [[package]] 47 | name = "quote" 48 | version = "1.0.18" 49 | source = "registry+https://github.com/rust-lang/crates.io-index" 50 | checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" 51 | dependencies = [ 52 | "proc-macro2", 53 | ] 54 | 55 | [[package]] 56 | name = "regex" 57 | version = "1.5.5" 58 | source = "registry+https://github.com/rust-lang/crates.io-index" 59 | checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" 60 | dependencies = [ 61 | "aho-corasick", 62 | "memchr", 63 | "regex-syntax", 64 | ] 65 | 66 | [[package]] 67 | name = "regex-syntax" 68 | version = "0.6.26" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" 71 | 72 | [[package]] 73 | name = "syn" 74 | version = "1.0.96" 75 | source = "registry+https://github.com/rust-lang/crates.io-index" 76 | checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf" 77 | dependencies = [ 78 | "proc-macro2", 79 | "quote", 80 | "unicode-ident", 81 | ] 82 | 83 | [[package]] 84 | name = "unicode-ident" 85 | version = "1.0.0" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" 88 | -------------------------------------------------------------------------------- /locustdb-derive/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Clemens Winter "] 3 | description = "Macros used internally by locustdb crate." 4 | edition = "2018" 5 | license = "Apache-2.0" 6 | name = "locustdb-derive" 7 | version = "0.2.2" 8 | repository = "https://github.com/cswinter/locustdb" 9 | 10 | [dependencies] 11 | lazy_static = "1.2" 12 | proc-macro2 = {version = "1.0"} 13 | quote = "1.0" 14 | regex = "1.5" 15 | syn = {features = ["full", "fold", "extra-traits"], version = "1.0"} 16 | 17 | [lib] 18 | proc-macro = true 19 | -------------------------------------------------------------------------------- /locustdb-derive/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(proc_macro_hygiene, proc_macro_diagnostic)] 2 | #![recursion_limit = "128"] 3 | extern crate proc_macro; 4 | #[macro_use] 5 | extern crate quote; 6 | extern crate syn; 7 | #[macro_use] 8 | extern crate lazy_static; 9 | extern crate regex; 10 | 11 | mod reify_types; 12 | mod enum_syntax; 13 | mod ast_builder; 14 | 15 | use self::proc_macro::TokenStream; 16 | 17 | #[proc_macro] 18 | pub fn reify_types(input: TokenStream) -> TokenStream { 19 | reify_types::reify_types(input) 20 | } 21 | 22 | #[proc_macro_derive(EnumSyntax)] 23 | pub fn enum_syntax(input: TokenStream) -> TokenStream { 24 | enum_syntax::enum_syntax(input) 25 | } 26 | 27 | #[proc_macro_derive(ASTBuilder, attributes(newstyle, input, internal, output, nohash))] 28 | pub fn ast_builder(input: TokenStream) -> TokenStream { ast_builder::ast_builder(input) } 29 | -------------------------------------------------------------------------------- /locustdb-serialization/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "capnp" 7 | version = "0.21.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "b1d1b4a00e80b7c4b1a49e845365f25c9d8fd0a19c9cd8d66f68afea47b1f020" 10 | dependencies = [ 11 | "embedded-io", 12 | ] 13 | 14 | [[package]] 15 | name = "diff" 16 | version = "0.1.13" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" 19 | 20 | [[package]] 21 | name = "embedded-io" 22 | version = "0.6.1" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" 25 | 26 | [[package]] 27 | name = "locustdb-serialization" 28 | version = "0.2.2" 29 | dependencies = [ 30 | "capnp", 31 | "pretty_assertions", 32 | "serde", 33 | ] 34 | 35 | [[package]] 36 | name = "pretty_assertions" 37 | version = "1.4.0" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" 40 | dependencies = [ 41 | "diff", 42 | "yansi", 43 | ] 44 | 45 | [[package]] 46 | name = "proc-macro2" 47 | version = "1.0.81" 48 | source = "registry+https://github.com/rust-lang/crates.io-index" 49 | checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" 50 | dependencies = [ 51 | "unicode-ident", 52 | ] 53 | 54 | [[package]] 55 | name = "quote" 56 | version = "1.0.36" 57 | source = "registry+https://github.com/rust-lang/crates.io-index" 58 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" 59 | dependencies = [ 60 | "proc-macro2", 61 | ] 62 | 63 | [[package]] 64 | name = "serde" 65 | version = "1.0.198" 66 | source = "registry+https://github.com/rust-lang/crates.io-index" 67 | checksum = "9846a40c979031340571da2545a4e5b7c4163bdae79b301d5f86d03979451fcc" 68 | dependencies = [ 69 | "serde_derive", 70 | ] 71 | 72 | [[package]] 73 | name = "serde_derive" 74 | version = "1.0.198" 75 | source = "registry+https://github.com/rust-lang/crates.io-index" 76 | checksum = "e88edab869b01783ba905e7d0153f9fc1a6505a96e4ad3018011eedb838566d9" 77 | dependencies = [ 78 | "proc-macro2", 79 | "quote", 80 | "syn", 81 | ] 82 | 83 | [[package]] 84 | name = "syn" 85 | version = "2.0.60" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" 88 | dependencies = [ 89 | "proc-macro2", 90 | "quote", 91 | "unicode-ident", 92 | ] 93 | 94 | [[package]] 95 | name = "unicode-ident" 96 | version = "1.0.12" 97 | source = "registry+https://github.com/rust-lang/crates.io-index" 98 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 99 | 100 | [[package]] 101 | name = "yansi" 102 | version = "0.5.1" 103 | source = "registry+https://github.com/rust-lang/crates.io-index" 104 | checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" 105 | -------------------------------------------------------------------------------- /locustdb-serialization/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "locustdb-serialization" 3 | version = "0.2.2" 4 | edition = "2021" 5 | description = "Serialization formats used by LocustDB for peristent storage and client/server communication." 6 | license-file = "../LICENSE" 7 | repository = "https://github.com/cswinter/locustdb" 8 | 9 | [dependencies] 10 | capnp = "0.21" 11 | serde = { version = "1.0", features = ["derive"] } 12 | 13 | [dev-dependencies] 14 | pretty_assertions = "1" 15 | -------------------------------------------------------------------------------- /locustdb-serialization/README.md: -------------------------------------------------------------------------------- 1 | # LocustDB Serialization 2 | 3 | Util crate that defines Cap'n Proto schema and serialization/deserialization logic for data structures used for persistent storage and client-server communication in LocustDB. 4 | 5 | To regenerate the Cap'n Proto definitions, follow this process: 6 | 7 | 1. [Install the Cap'n Proto CLI tool][install-capnproto] 8 | 2. `cargo install capnpc` 9 | 3. `capnp compile -orust:src --src-prefix=schemas schemas/{dbmeta,partition_segment,wal_segment,api}.capnp` 10 | -------------------------------------------------------------------------------- /locustdb-serialization/schemas/api.capnp: -------------------------------------------------------------------------------- 1 | @0x88a093a148e409e4; 2 | 3 | 4 | struct QueryResponse { 5 | columns @0 :List(Column); 6 | } 7 | 8 | struct MultiQueryResponse { 9 | responses @0 :List(QueryResponse); 10 | } 11 | 12 | struct Column { 13 | name @0 :Text; 14 | 15 | data :union { 16 | f64 @1 :List(Float64); 17 | i64 @2 :List(Int64); 18 | string @3 :List(Text); 19 | mixed @4 :List(AnyVal); 20 | null @5 :UInt64; 21 | xorF64 @6 :Data; 22 | deltaEncodedI8 :group { 23 | first @7 :Int64; 24 | data @8 :List(Int8); 25 | } 26 | deltaEncodedI16 :group { 27 | first @9 :Int64; 28 | data @10 :List(Int16); 29 | } 30 | deltaEncodedI32 :group { 31 | first @11 :Int64; 32 | data @12 :List(Int32); 33 | } 34 | doubleDeltaEncodedI8 :group { 35 | first @13 :Int64; 36 | second @14 :Int64; 37 | data @15 :List(Int8); 38 | } 39 | doubleDeltaEncodedI16 :group { 40 | first @16 :Int64; 41 | second @17 :Int64; 42 | data @18 :List(Int16); 43 | } 44 | doubleDeltaEncodedI32 :group { 45 | first @19 :Int64; 46 | second @20 :Int64; 47 | data @21 :List(Int32); 48 | } 49 | range :group { 50 | start @22 :Int64; 51 | len @23 :UInt64; 52 | step @24 :Int64; 53 | } 54 | } 55 | } 56 | 57 | struct AnyVal { 58 | union { 59 | f64 @0 :Float64; 60 | i64 @1 :Int64; 61 | string @2 :Text; 62 | null @3 :Void; 63 | } 64 | } -------------------------------------------------------------------------------- /locustdb-serialization/schemas/dbmeta.capnp: -------------------------------------------------------------------------------- 1 | @0xafa9b81d5e8e2ef5; 2 | struct DBMeta { 3 | nextWalId @0 :UInt64; 4 | partitions @1 :List(PartitionMetadata); 5 | 6 | compressedStrings @3 :Data; # [v2], deprecated in v3 in favor of column range 7 | lengthsCompressedStrings @4 :List(UInt16); # [v2], deprecated in v3 in favor of column range 8 | strings @2 :List(Text); # [v1] unused in legacy format and deprecated in new format 9 | } 10 | 11 | struct PartitionMetadata { 12 | id @0 :UInt64; 13 | tablename @1 :Text; 14 | offset @2 :UInt64; 15 | len @3 :UInt64; 16 | subpartitions @4 :List(SubpartitionMetadata); 17 | } 18 | 19 | struct SubpartitionMetadata { 20 | sizeBytes @0 :UInt64; 21 | subpartitionKey @1 :Text; 22 | # Name of the largest column in the subpartition 23 | lastColumn @5 :Text; # [v3] 24 | 25 | columns @2 :List(Text); # [v0] deprecated in favor of internedColumns 26 | internedColumns @3 :List(UInt64); # [v1] deprecated in favor of compressedInternedColumns 27 | compressedInternedColumns @4 :Data; # [v2..] deprecated in favor of lastColumn 28 | } -------------------------------------------------------------------------------- /locustdb-serialization/schemas/partition_segment.capnp: -------------------------------------------------------------------------------- 1 | @0xc2e3685626e6e832; 2 | 3 | struct PartitionSegment { 4 | columns @0 :List(Column); 5 | } 6 | 7 | struct Column { 8 | name @0 :Text; 9 | len @1 :UInt64; 10 | range: union { 11 | range @2 :Range; 12 | empty @3 :Void; 13 | } 14 | codec @4 :List(CodecOp); 15 | data @5 :List(DataSection); 16 | } 17 | 18 | struct Range { 19 | start @0 :Int64; 20 | end @1 :Int64; 21 | } 22 | 23 | struct CodecOp { 24 | union { 25 | add @0 :Add; 26 | delta @1 :EncodingType; 27 | toI64 @2 :EncodingType; 28 | pushDataSection @3 :UInt64; 29 | dictLookup @4 :EncodingType; 30 | lz4 @5 :LZ4; 31 | unpackStrings @6 :Void; 32 | unhexpackStrings @7 :UnhexpackStrings; 33 | nullable @8 :Void; 34 | pco @9 :Pco; 35 | } 36 | } 37 | 38 | struct DataSection { 39 | union { 40 | u8 @0 :List(UInt8); 41 | u16 @1 :List(UInt16); 42 | u32 @2 :List(UInt32); 43 | u64 @3 :List(UInt64); 44 | i64 @4 :List(Int64); 45 | null @5 :UInt64; 46 | f64 @6 :List(Float64); 47 | bitvec @7 :List(UInt8); 48 | lz4 :group { 49 | decodedBytes @8 :UInt64; 50 | bytesPerElement @9 :UInt64; 51 | data @10 :List(UInt8); 52 | } 53 | pco :group { 54 | decodedBytes @11 :UInt64; 55 | bytesPerElement @12 :UInt64; 56 | data @13 :List(UInt8); 57 | isFp32 @14 :Bool; 58 | } 59 | } 60 | } 61 | 62 | struct Add { 63 | type @0 :EncodingType; 64 | amount @1 :Int64; 65 | } 66 | 67 | struct LZ4 { 68 | type @0 :EncodingType; 69 | lenDecoded @1 :UInt64; 70 | } 71 | 72 | struct Pco { 73 | type @0 :EncodingType; 74 | lenDecoded @1 :UInt64; 75 | isFp32 @2 :Bool; 76 | } 77 | 78 | struct UnhexpackStrings { 79 | uppercase @0 :Bool; 80 | totalBytes @1 :UInt64; 81 | } 82 | 83 | enum EncodingType { 84 | u8 @0; 85 | u16 @1; 86 | u32 @2; 87 | u64 @3; 88 | i64 @4; 89 | null @5; 90 | f64 @6; 91 | bitvec @7; 92 | } -------------------------------------------------------------------------------- /locustdb-serialization/schemas/rust.capnp: -------------------------------------------------------------------------------- 1 | # This file contains annotations that are recognized by the capnpc-rust code generator. 2 | 3 | @0x83b3c14c3c8dd083; 4 | 5 | annotation name @0xc2fe4c6d100166d0 (field, struct, enum, enumerant, union, group) :Text; 6 | # Rename something in the generated code. The value that you specify in this 7 | # annotation should follow capnp capitalization conventions. So, for example, 8 | # a struct should use CamelCase capitalization like `StructFoo`, even though 9 | # that will get translated to a `struct_foo` module in the generated Rust code. 10 | # 11 | # TODO: support annotating more kinds of things with this. 12 | 13 | annotation parentModule @0xabee386cd1450364 (file) :Text; 14 | # A Rust module path indicating where the generated code will be included. 15 | # For example, if this is set to "foo::bar" and the schema file is named 16 | # "baz.capnp", then you could include the generated code like this: 17 | # 18 | # pub mod foo { 19 | # pub mod bar { 20 | # pub mod baz_capnp { 21 | # include!(concat!(env!("OUT_DIR"), "/baz_capnp.rs")); 22 | # } 23 | # } 24 | # } -------------------------------------------------------------------------------- /locustdb-serialization/schemas/wal_segment.capnp: -------------------------------------------------------------------------------- 1 | @0xdb2bd6b471f245ca; 2 | 3 | struct WalSegment { 4 | id @0 :UInt64; 5 | data @1 :TableSegmentList; 6 | } 7 | 8 | struct TableSegmentList { 9 | data @0 :List(TableSegment); 10 | } 11 | 12 | struct TableSegment { 13 | name @0 :Text; 14 | len @1 :UInt64; 15 | columns @2 :List(Column); 16 | } 17 | 18 | struct Column { 19 | name @0 :Text; 20 | 21 | data :union { 22 | f64 @1 :List(Float64); 23 | sparseF64 :group { 24 | indices @2 :List(UInt64); 25 | values @3 :List(Float64); 26 | } 27 | i64 @4 :List(Int64); 28 | string @5 :List(Text); 29 | empty @6 :Void; 30 | sparseI64 :group { 31 | indices @7 :List(UInt64); 32 | values @8 :List(Int64); 33 | } 34 | mixed @9 :List(AnyVal); 35 | } 36 | } 37 | 38 | struct AnyVal { 39 | value :union { 40 | f64 @0 :Float64; 41 | i64 @1 :Int64; 42 | string @2 :Text; 43 | null @3: Void; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /locustdb-serialization/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod dbmeta_capnp; 2 | pub mod partition_segment_capnp; 3 | pub mod wal_segment_capnp; 4 | pub mod api_capnp; 5 | pub mod api; 6 | pub mod event_buffer; 7 | 8 | 9 | pub fn default_reader_options() -> capnp::message::ReaderOptions { 10 | let mut options = capnp::message::ReaderOptions::new(); 11 | // Allow messages up to 8 GiB 12 | options.traversal_limit_in_words(Some(1024 * 1024 * 1024)); 13 | options 14 | } -------------------------------------------------------------------------------- /memsize_results/baseline0: -------------------------------------------------------------------------------- 1 | # Table `test` (20000000 rows, 3951.96 MiB) # 2 | rate_code_id: 19.10MiB 3 | pickup_ntaname: 40.27MiB 4 | dropoff_boroct2010: 76.32MiB 5 | dropoff_latitude: 38.17MiB 6 | store_and_fwd_flag: 38.19MiB 7 | pickup_boroct2010: 76.32MiB 8 | vendor_id: 19.10MiB 9 | pickup_ntacode: 39.56MiB 10 | total_amount: 55.42MiB 11 | pickup_boroname: 38.24MiB 12 | payment_type: 19.10MiB 13 | average_wind_speed: 38.18MiB 14 | dropoff_boroname: 38.24MiB 15 | precipitation: 34.92MiB 16 | tolls_amount: 39.80MiB 17 | dropoff_ct2010: 76.32MiB 18 | dropoff_ntacode: 39.70MiB 19 | pickup_ct2010: 76.32MiB 20 | fare_amount: 52.42MiB 21 | snowfall: 25.99MiB 22 | dropoff_borocode: 19.10MiB 23 | tip_amount: 39.17MiB 24 | pickup_latitude: 38.30MiB 25 | trip_type: 18.22MiB 26 | extra: 25.54MiB 27 | trip_id: 76.32MiB 28 | pickup_datetime: 76.32MiB 29 | dropoff_cdeligibil: 38.20MiB 30 | passenger_count: 19.10MiB 31 | trip_distance: 60.55MiB 32 | cab_type: 38.19MiB 33 | ehail_fee: 19.10MiB 34 | pickup_longitude: 76.32MiB 35 | dropoff_ntaname: 40.51MiB 36 | pickup_cdeligibil: 38.20MiB 37 | dropoff_nyct2010_gid: 38.18MiB 38 | dropoff_datetime: 76.32MiB 39 | pickup: 971.11MiB 40 | pickup_ctlabel: 38.17MiB 41 | dropoff_puma: 38.17MiB 42 | dropoff: 971.14MiB 43 | mta_tax: 19.35MiB 44 | dropoff_ctlabel: 38.17MiB 45 | snow_depth: 27.11MiB 46 | pickup_puma: 38.17MiB 47 | max_temperature: 19.10MiB 48 | pickup_nyct2010_gid: 38.18MiB 49 | pickup_borocode: 19.10MiB 50 | improvement_surcharge: 19.10MiB 51 | dropoff_longitude: 76.32MiB 52 | min_temperature: 19.10MiB 53 | -------------------------------------------------------------------------------- /memsize_results/baseline1: -------------------------------------------------------------------------------- 1 | # Table `test` (20000000 rows, 2009.72 MiB) # 2 | tip_amount: 39.17MiB 3 | dropoff_nyct2010_gid: 38.18MiB 4 | dropoff_datetime: 76.32MiB 5 | dropoff_borocode: 19.10MiB 6 | pickup_ntacode: 39.56MiB 7 | pickup_borocode: 19.10MiB 8 | dropoff_ntacode: 39.70MiB 9 | snowfall: 25.99MiB 10 | trip_id: 76.32MiB 11 | pickup_latitude: 38.30MiB 12 | extra: 25.54MiB 13 | dropoff_boroct2010: 76.32MiB 14 | store_and_fwd_flag: 38.19MiB 15 | dropoff_longitude: 76.32MiB 16 | dropoff_ntaname: 40.51MiB 17 | min_temperature: 19.10MiB 18 | trip_distance: 60.55MiB 19 | pickup_boroname: 38.24MiB 20 | dropoff_ct2010: 76.32MiB 21 | pickup_nyct2010_gid: 38.18MiB 22 | pickup_ntaname: 40.27MiB 23 | max_temperature: 19.10MiB 24 | dropoff_latitude: 38.17MiB 25 | vendor_id: 19.10MiB 26 | total_amount: 55.42MiB 27 | pickup_puma: 38.17MiB 28 | pickup_boroct2010: 76.32MiB 29 | precipitation: 34.92MiB 30 | payment_type: 19.10MiB 31 | pickup_datetime: 76.32MiB 32 | average_wind_speed: 38.18MiB 33 | pickup_longitude: 76.32MiB 34 | fare_amount: 52.42MiB 35 | improvement_surcharge: 19.10MiB 36 | snow_depth: 27.11MiB 37 | pickup_ctlabel: 38.17MiB 38 | dropoff_puma: 38.17MiB 39 | rate_code_id: 19.10MiB 40 | passenger_count: 19.10MiB 41 | pickup_cdeligibil: 38.20MiB 42 | dropoff_cdeligibil: 38.20MiB 43 | mta_tax: 19.35MiB 44 | dropoff_ctlabel: 38.17MiB 45 | trip_type: 18.22MiB 46 | pickup_ct2010: 76.32MiB 47 | tolls_amount: 39.80MiB 48 | ehail_fee: 19.10MiB 49 | dropoff_boroname: 38.24MiB 50 | cab_type: 38.19MiB 51 | -------------------------------------------------------------------------------- /memsize_results/baseline2: -------------------------------------------------------------------------------- 1 | # Table `test` (100000000 rows, 9336.90 MiB) # 2 | average_wind_speed: 190.74MiB 3 | cab_type: 190.75MiB 4 | dropoff_borocode: 78.30MiB 5 | dropoff_boroct2010: 313.18MiB 6 | dropoff_boroname: 156.62MiB 7 | dropoff_cdeligibil: 156.60MiB 8 | dropoff_ct2010: 313.18MiB 9 | dropoff_ctlabel: 156.60MiB 10 | dropoff_datetime: 381.48MiB 11 | dropoff_latitude: 199.67MiB 12 | dropoff_longitude: 330.26MiB 13 | dropoff_ntacode: 157.02MiB 14 | dropoff_ntaname: 157.25MiB 15 | dropoff_nyct2010_gid: 156.60MiB 16 | dropoff_puma: 156.60MiB 17 | ehail_fee: 95.38MiB 18 | extra: 150.52MiB 19 | fare_amount: 315.04MiB 20 | improvement_surcharge: 95.38MiB 21 | max_temperature: 95.38MiB 22 | min_temperature: 95.38MiB 23 | mta_tax: 110.38MiB 24 | passenger_count: 95.38MiB 25 | payment_type: 129.53MiB 26 | pickup_borocode: 78.30MiB 27 | pickup_boroct2010: 313.18MiB 28 | pickup_boroname: 156.61MiB 29 | pickup_cdeligibil: 156.60MiB 30 | pickup_ct2010: 313.18MiB 31 | pickup_ctlabel: 156.60MiB 32 | pickup_datetime: 381.48MiB 33 | pickup_latitude: 209.67MiB 34 | pickup_longitude: 330.26MiB 35 | pickup_ntacode: 157.01MiB 36 | pickup_ntaname: 157.23MiB 37 | pickup_nyct2010_gid: 156.60MiB 38 | pickup_puma: 156.60MiB 39 | precipitation: 160.45MiB 40 | rate_code_id: 62.23MiB 41 | snow_depth: 151.60MiB 42 | snowfall: 149.60MiB 43 | store_and_fwd_flag: 157.60MiB 44 | tip_amount: 216.74MiB 45 | tolls_amount: 234.74MiB 46 | total_amount: 315.04MiB 47 | trip_distance: 315.04MiB 48 | trip_id: 381.48MiB 49 | trip_type: 62.23MiB 50 | vendor_id: 129.53MiB -------------------------------------------------------------------------------- /memsize_results/baseline3: -------------------------------------------------------------------------------- 1 | # Table `test` (100000000 rows, 8484.65 MiB) # 2 | average_wind_speed: 190.74MiB 3 | cab_type: 95.38MiB 4 | dropoff_borocode: 78.30MiB 5 | dropoff_boroct2010: 313.18MiB 6 | dropoff_boroname: 78.32MiB 7 | dropoff_cdeligibil: 78.31MiB 8 | dropoff_ct2010: 313.18MiB 9 | dropoff_ctlabel: 156.60MiB 10 | dropoff_datetime: 381.48MiB 11 | dropoff_latitude: 199.67MiB 12 | dropoff_longitude: 330.26MiB 13 | dropoff_ntacode: 78.73MiB 14 | dropoff_ntaname: 78.96MiB 15 | dropoff_nyct2010_gid: 156.60MiB 16 | dropoff_puma: 156.60MiB 17 | ehail_fee: 95.38MiB 18 | extra: 150.52MiB 19 | fare_amount: 315.04MiB 20 | improvement_surcharge: 95.38MiB 21 | max_temperature: 95.38MiB 22 | min_temperature: 95.38MiB 23 | mta_tax: 110.37MiB 24 | passenger_count: 95.38MiB 25 | payment_type: 95.38MiB 26 | pickup_borocode: 78.30MiB 27 | pickup_boroct2010: 313.18MiB 28 | pickup_boroname: 78.32MiB 29 | pickup_cdeligibil: 78.31MiB 30 | pickup_ct2010: 313.18MiB 31 | pickup_ctlabel: 156.60MiB 32 | pickup_datetime: 381.48MiB 33 | pickup_latitude: 209.67MiB 34 | pickup_longitude: 330.26MiB 35 | pickup_ntacode: 78.72MiB 36 | pickup_ntaname: 78.94MiB 37 | pickup_nyct2010_gid: 156.60MiB 38 | pickup_puma: 156.60MiB 39 | precipitation: 160.45MiB 40 | rate_code_id: 62.23MiB 41 | snow_depth: 151.60MiB 42 | snowfall: 149.60MiB 43 | store_and_fwd_flag: 95.38MiB 44 | tip_amount: 216.74MiB 45 | tolls_amount: 234.74MiB 46 | total_amount: 315.04MiB 47 | trip_distance: 315.04MiB 48 | trip_id: 381.48MiB 49 | trip_type: 62.23MiB 50 | vendor_id: 95.38MiB -------------------------------------------------------------------------------- /rust-toolchain: -------------------------------------------------------------------------------- 1 | nightly-2025-03-28 2 | -------------------------------------------------------------------------------- /samples/example_row: -------------------------------------------------------------------------------- 1 | trip_id 39999994 2 | vendor_id 2 3 | pickup_datetime 2016-03-13 14:58:31 4 | dropoff_datetime 2016-03-13 15:04:05 5 | store_and_fwd_flag N 6 | rate_code_id 1 7 | pickup_longitude -73.924003601074219 8 | pickup_latitude 40.743988037109375 9 | dropoff_longitude -73.93267822265625 10 | dropoff_latitude 40.752838134765625 11 | passenger_count 1 12 | trip_distance 0.93 13 | fare_amount 6 14 | extra 0 15 | mta_tax 0.5 16 | tip_amount 0.5 17 | tolls_amount 0 18 | ehail_fee 19 | improvement_surcharge 0.3 20 | total_amount 7.3 21 | payment_type 1 22 | trip_type 1 23 | pickup 0101000020E6100000000000E0227B52C0000000003B5F4440 24 | dropoff 0101000020E610000000000000B17B52C0000000005D604440 25 | cab_type green 26 | precipitation 0.00 27 | snow_depth 0 28 | snowfall 0.0 29 | max_temperature 62 30 | min_temperature 50 31 | average_wind_speed 3.58 32 | pickup_nyct2010_gid 2064 33 | pickup_ctlabel 181.01 34 | pickup_borocode 4 35 | pickup_boroname Queens 36 | pickup_ct2010 018101 37 | pickup_boroct2010 4018101 38 | pickup_cdeligibil E 39 | pickup_ntacode QN31 40 | pickup_ntaname Hunters Point-Sunnyside-West Maspeth 41 | pickup_puma 4109 42 | dropoff_nyct2010_gid 542 43 | dropoff_ctlabel 31 44 | dropoff_borocode 4 45 | dropoff_boroname Queens 46 | dropoff_ct2010 003100 47 | dropoff_boroct2010 4003100 48 | dropoff_cdeligibil E 49 | dropoff_ntacode QN68 50 | dropoff_ntaname Queensbridge-Ravenswood-Long Island City 51 | dropoff_puma 4101 52 | -------------------------------------------------------------------------------- /samples/example_row_sparse: -------------------------------------------------------------------------------- 1 | trip_id 1 2 | vendor_id 2 3 | pickup_datetime 2013-08-01 08:14:37 4 | dropoff_datetime 2013-08-01 09:09:06 5 | store_and_fwd_flag N 6 | rate_code_id 1 7 | pickup_longitude 8 | pickup_latitude 9 | dropoff_longitude 10 | dropoff_latitude 11 | passenger_count 1 12 | trip_distance 0.00 13 | fare_amount 21.25 14 | extra 0 15 | mta_tax 0 16 | tip_amount 0 17 | tolls_amount 0 18 | ehail_fee 19 | improvement_surcharge 20 | total_amount 21.25 21 | payment_type 2 22 | trip_type 23 | pickup 24 | dropoff 25 | cab_type green 26 | precipitation 0.65 27 | snow_depth 0 28 | snowfall 0.0 29 | max_temperature 76 30 | min_temperature 66 31 | average_wind_speed 2.91 32 | pickup_nyct2010_gid 33 | pickup_ctlabel 34 | pickup_borocode 35 | pickup_boroname 36 | pickup_ct2010 37 | pickup_boroct2010 38 | pickup_cdeligibil 39 | pickup_ntacode 40 | pickup_ntaname 41 | pickup_puma 42 | dropoff_nyct2010_gid 43 | dropoff_ctlabel 44 | dropoff_borocode 45 | dropoff_boroname 46 | dropoff_ct2010 47 | dropoff_boroct2010 48 | dropoff_cdeligibil 49 | dropoff_ntacode 50 | dropoff_ntaname 51 | dropoff_puma 52 | -------------------------------------------------------------------------------- /samples/headers: -------------------------------------------------------------------------------- 1 | trip_id 2 | vendor_id 3 | pickup_datetime 4 | dropoff_datetime 5 | store_and_fwd_flag 6 | rate_code_id 7 | pickup_longitude 8 | pickup_latitude 9 | dropoff_longitude 10 | dropoff_latitude 11 | passenger_count 12 | trip_distance 13 | fare_amount 14 | extra 15 | mta_tax 16 | tip_amount 17 | tolls_amount 18 | ehail_fee 19 | improvement_surcharge 20 | total_amount 21 | payment_type 22 | trip_type 23 | pickup 24 | dropoff 25 | cab_type 26 | precipitation 27 | snow_depth 28 | snowfall 29 | max_temperature 30 | min_temperature 31 | average_wind_speed 32 | pickup_nyct2010_gid 33 | pickup_ctlabel 34 | pickup_borocode 35 | pickup_boroname 36 | pickup_ct2010 37 | pickup_boroct2010 38 | pickup_cdeligibil 39 | pickup_ntacode 40 | pickup_ntaname 41 | pickup_puma 42 | dropoff_nyct2010_gid 43 | dropoff_ctlabel 44 | dropoff_borocode 45 | dropoff_boroname 46 | dropoff_ct2010 47 | dropoff_boroct2010 48 | dropoff_cdeligibil 49 | dropoff_ntacode 50 | dropoff_ntaname 51 | dropoff_puma 52 | -------------------------------------------------------------------------------- /src/bin/load_generator.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | 3 | use locustdb::logging_client::BufferFullPolicy; 4 | use locustdb_serialization::api::any_val_syntax::vf64; 5 | use structopt::StructOpt; 6 | use tokio::time; 7 | 8 | #[derive(StructOpt, Debug)] 9 | #[structopt( 10 | name = "LocustDB Logger Test", 11 | about = "Log basic system stats to LocustDB.", 12 | author = "Clemens Winter " 13 | )] 14 | struct Opt { 15 | /// Address of LocustDB server 16 | #[structopt(long, name = "ADDR", default_value = "http://localhost:8080")] 17 | addr: String, 18 | 19 | /// Logging interval in milliseconds 20 | #[structopt(long, name = "INTERVAL", default_value = "100")] 21 | interval: u64, 22 | 23 | /// Number of active tables 24 | #[structopt(long, name = "TABLES", default_value = "10")] 25 | tables: u64, 26 | 27 | /// Number of rows logged per table per interval 28 | #[structopt(long, name = "ROWCOUNT")] 29 | rowcount: Option>, 30 | 31 | /// Number of columns logged per row 32 | #[structopt(long, name = "COLUMNS", default_value = "20")] 33 | columns: u64, 34 | 35 | /// Prefix for table names 36 | #[structopt(long, name = "PREFIX", default_value = "")] 37 | table_prefix: String, 38 | } 39 | 40 | #[tokio::main] 41 | async fn main() { 42 | env_logger::init(); 43 | let Opt { 44 | addr, 45 | interval, 46 | tables: n_tables, 47 | rowcount, 48 | columns, 49 | table_prefix, 50 | } = Opt::from_args(); 51 | let rowcount = rowcount.unwrap_or_else(Vec::new); 52 | let tables: Vec<_> = (0..n_tables) 53 | .map(|i| format!("{table_prefix}{}_{i}", random_word::get(random_word::Lang::En),)) 54 | .collect(); 55 | let mut log = locustdb::logging_client::LoggingClient::new( 56 | Duration::from_secs(1), 57 | &addr, 58 | 1 << 28, 59 | BufferFullPolicy::Block, 60 | None, 61 | ); 62 | let mut interval = time::interval(Duration::from_millis(interval)); 63 | 64 | loop { 65 | interval.tick().await; 66 | for (i, table) in tables.iter().enumerate() { 67 | for _ in 0..(rowcount.get(i).cloned().unwrap_or(1)) { 68 | log.log( 69 | table, 70 | (0..columns).map(|c| (format!("col_{c}"), vf64(rand::random::()))), 71 | ); 72 | } 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/bin/log.rs: -------------------------------------------------------------------------------- 1 | use locustdb_serialization::api::any_val_syntax::vf64; 2 | use locustdb_serialization::api::AnyVal; 3 | use rand::Rng; 4 | use std::mem; 5 | use std::time::Duration; 6 | 7 | use locustdb::logging_client::BufferFullPolicy; 8 | use structopt::StructOpt; 9 | use systemstat::{Platform, System}; 10 | use tokio::time; 11 | 12 | #[derive(StructOpt, Debug)] 13 | #[structopt( 14 | name = "LocustDB Logger Test", 15 | about = "Log basic system stats to LocustDB.", 16 | author = "Clemens Winter " 17 | )] 18 | struct Opt { 19 | /// Address of LocustDB server 20 | #[structopt(long, name = "ADDR", default_value = "http://localhost:8080")] 21 | addr: String, 22 | 23 | /// Logging interval in milliseconds 24 | #[structopt(long, name = "INTERVAL", default_value = "100")] 25 | interval: u64, 26 | 27 | /// Prefix for table names 28 | #[structopt(long, name = "PREFIX", default_value = "")] 29 | table_prefix: String, 30 | 31 | /// Interval multiplier for step 32 | #[structopt(long, name = "STEP_MULTIPLIER", default_value = "1")] 33 | step_interval: i64, 34 | 35 | /// Additional noise added to each value 36 | #[structopt(long, name = "NOISE", default_value = "0.0")] 37 | noise: f64, 38 | } 39 | 40 | struct RandomWalk { 41 | name: String, 42 | curr_value: f64, 43 | interval: u64, 44 | } 45 | 46 | #[tokio::main] 47 | async fn main() { 48 | env_logger::init(); 49 | let Opt { addr, interval, table_prefix, step_interval, noise } = Opt::from_args(); 50 | let mut log = locustdb::logging_client::LoggingClient::new( 51 | Duration::from_secs(1), 52 | &addr, 53 | 1 << 50, 54 | BufferFullPolicy::Block, 55 | None, 56 | ); 57 | let mut rng = rand::rng(); 58 | let mut random_walks = (0..5) 59 | .map(|i| RandomWalk { 60 | name: format!("{table_prefix}random_walk_{}", i), 61 | curr_value: 0.0, 62 | interval: rng.random_range(1..10), 63 | }) 64 | .collect::>(); 65 | let mut interval = time::interval(Duration::from_millis(interval)); 66 | let sys = System::new(); 67 | let mut cpu_watcher = sys.cpu_load_aggregate().unwrap(); 68 | for i in 0..u64::MAX { 69 | interval.tick().await; 70 | let cpu = mem::replace(&mut cpu_watcher, sys.cpu_load_aggregate().unwrap()) 71 | .done() 72 | .unwrap(); 73 | log.log( 74 | "system_stats", 75 | [("cpu".to_string(), vf64(cpu.user))].iter().cloned(), 76 | ); 77 | for walk in random_walks.iter_mut() { 78 | if i % walk.interval == 0 { 79 | walk.curr_value += rng.random_range(-1.0..1.0); 80 | log.log( 81 | &walk.name, 82 | [ 83 | ("value".to_string(), vf64(walk.curr_value + rng.random_range(-noise..noise))), 84 | ("step".to_string(), AnyVal::Int((i / walk.interval) as i64 * step_interval)), 85 | ] 86 | .iter() 87 | .cloned(), 88 | ); 89 | } 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/bin/profile.rs: -------------------------------------------------------------------------------- 1 | use locustdb::LocustDB; 2 | use futures::executor::block_on; 3 | 4 | fn main() { 5 | let locustdb = LocustDB::memory_only(); 6 | let mut loads = Vec::new(); 7 | for x in &["aa", "ab", "ac", "ad", "ae"] { 8 | let path = format!("test_data/nyc-taxi-data/trips_x{}.csv.gz", x); 9 | loads.push(locustdb.load_csv( 10 | locustdb::nyc_taxi_data::ingest_reduced_file(&path, "test") 11 | .with_partition_size(1 << 20))); 12 | } 13 | for l in loads { 14 | let _ = block_on(l); 15 | } 16 | println!("Load completed"); 17 | loop { 18 | let _ = block_on(locustdb.run_query("select passenger_count, to_year(pickup_datetime), trip_distance / 1000, count(0) from test;", false, false, vec![])); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/bin/repl/fmt_table.rs: -------------------------------------------------------------------------------- 1 | pub fn fmt_table(headings: &[&str], rows: &[Vec<&str>]) -> String { 2 | let ncols = headings.len(); 3 | let mut col_width = Vec::::with_capacity(ncols); 4 | for heading in headings { 5 | col_width.push(super::unicode::display_width(heading) + 1); 6 | } 7 | for row in rows { 8 | assert_eq!(ncols, row.len()); 9 | for (i, entry) in row.iter().enumerate() { 10 | let width = super::unicode::display_width(entry) + 1; 11 | if col_width[i] < width { 12 | col_width[i] = width; 13 | } 14 | } 15 | } 16 | 17 | let mut result = String::new(); 18 | append_row(&mut result, headings, &col_width); 19 | 20 | result.push('\n'); 21 | for (i, width) in col_width.iter().enumerate() { 22 | result.push_str(&String::from_utf8(vec![b'-'; *width]).unwrap()); 23 | if i < ncols - 1 { 24 | result.push_str("+-"); 25 | } 26 | } 27 | 28 | for row in rows { 29 | result.push('\n'); 30 | append_row(&mut result, row, &col_width); 31 | } 32 | 33 | result 34 | } 35 | 36 | fn append_row(string: &mut String, row: &[&str], col_width: &[usize]) { 37 | let imax = col_width.len() - 1; 38 | for (i, entry) in row.iter().enumerate() { 39 | string.push_str(&format!("{:1$}", entry, col_width[i])); 40 | if i < imax { 41 | string.push_str("| "); 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/bin/repl/print_results.rs: -------------------------------------------------------------------------------- 1 | use crate::fmt_table::fmt_table; 2 | use locustdb::*; 3 | use locustdb::unit_fmt::*; 4 | 5 | pub fn print_query_result(results: &QueryOutput) { 6 | let rt = if results.stats.runtime_ns == 0 { 1 } else { results.stats.runtime_ns }; 7 | 8 | println!(); 9 | for (query_plan, count) in &results.query_plans { 10 | println!("Query plan in {} batches{}", count, query_plan) 11 | } 12 | println!("Scanned {} rows in {} ({:.2} rows/s)!", 13 | short_scale(results.stats.rows_scanned as f64), 14 | ns(rt as usize), 15 | billion(results.stats.rows_scanned as f64 / rt as f64)); 16 | println!("\n{}", format_results(&results.colnames, results.rows.as_ref().unwrap())); 17 | println!(); 18 | } 19 | 20 | fn format_results(colnames: &[String], rows: &[Vec]) -> String { 21 | let strcolnames: Vec<&str> = colnames.iter().map(|s| s as &str).collect(); 22 | let formattedrows: Vec> = rows.iter() 23 | .map(|row| { 24 | row.iter() 25 | .map(|val| format!("{}", val)) 26 | .collect() 27 | }) 28 | .collect(); 29 | let strrows = 30 | formattedrows.iter().map(|row| row.iter().map(|val| val as &str).collect()).collect::>(); 31 | 32 | fmt_table(&strcolnames, &strrows) 33 | } 34 | 35 | -------------------------------------------------------------------------------- /src/bin/repl/unicode.rs: -------------------------------------------------------------------------------- 1 | // original source: https://github.com/lintje/lintje/blob/501aab06e19008e787237438a69ac961f38bb4b7 2 | // https://tomdebruijn.com/posts/rust-string-length-width-calculations/ 3 | use unicode_segmentation::UnicodeSegmentation; 4 | use unicode_width::UnicodeWidthStr; 5 | use lazy_static::lazy_static; 6 | 7 | const ZERO_WIDTH_JOINER: &str = "\u{200d}"; 8 | const VARIATION_SELECTOR_16: &str = "\u{fe0f}"; 9 | const SKIN_TONES: [&str; 5] = [ 10 | "\u{1f3fb}", // Light Skin Tone 11 | "\u{1f3fc}", // Medium-Light Skin Tone 12 | "\u{1f3fd}", // Medium Skin Tone 13 | "\u{1f3fe}", // Medium-Dark Skin Tone 14 | "\u{1f3ff}", // Dark Skin Tone 15 | ]; 16 | 17 | lazy_static! { 18 | static ref OTHER_PUNCTUATION: Vec = vec!['…', '⋯',]; 19 | } 20 | 21 | // Return String display width as rendered in a monospace font according to the Unicode 22 | // specification. 23 | // 24 | // This may return some odd results at times where some symbols are counted as more character width 25 | // than they actually are. 26 | // 27 | // This function has exceptions for skin tones and other emoji modifiers to determine a more 28 | // accurate display with. 29 | pub fn display_width(string: &str) -> usize { 30 | // String expressed as a vec of Unicode characters. Characters with accents and emoji may 31 | // be multiple characters combined. 32 | let unicode_chars = string.graphemes(true); 33 | let mut width = 0; 34 | for c in unicode_chars.into_iter() { 35 | width += display_width_char(c); 36 | } 37 | width 38 | } 39 | 40 | /// Calculate the render width of a single Unicode character. Unicode characters may consist of 41 | /// multiple String characters, which is why the function argument takes a string. 42 | fn display_width_char(string: &str) -> usize { 43 | // Characters that are used as modifiers on emoji. By themselves they have no width. 44 | if string == ZERO_WIDTH_JOINER || string == VARIATION_SELECTOR_16 { 45 | return 0; 46 | } 47 | // Emoji that are representations of combined emoji. They are normally calculated as the 48 | // combined width of the emoji, rather than the actual display width. This check fixes that and 49 | // returns a width of 2 instead. 50 | if string.contains(ZERO_WIDTH_JOINER) { 51 | return 2; 52 | } 53 | // Any character with a skin tone is most likely an emoji. 54 | // Normally it would be counted as as four or more characters, but these emoji should be 55 | // rendered as having a width of two. 56 | for skin_tone in SKIN_TONES { 57 | if string.contains(skin_tone) { 58 | return 2; 59 | } 60 | } 61 | 62 | match string { 63 | "\t" => { 64 | // unicode-width returns 0 for tab width, which is not how it's rendered. 65 | // I choose 4 columns as that's what most applications render a tab as. 66 | 4 67 | } 68 | _ => UnicodeWidthStr::width(string), 69 | } 70 | } -------------------------------------------------------------------------------- /src/bin/show.rs: -------------------------------------------------------------------------------- 1 | use futures::executor::block_on; 2 | 3 | use locustdb::nyc_taxi_data; 4 | use locustdb::LocustDB; 5 | 6 | fn main() { 7 | let locustdb = LocustDB::memory_only(); 8 | let load = block_on( 9 | locustdb.load_csv( 10 | nyc_taxi_data::ingest_reduced_file("test_data/nyc-taxi.csv.gz", "default") 11 | .with_partition_size(2500), 12 | ), 13 | ); 14 | load.unwrap(); 15 | let query = "select pickup_ntaname, to_year(pickup_datetime), trip_distance / 1000, count(0), sum(total_amount) from default where cab_type = \"CMS\";"; 16 | // let query = "select payment_method, count(0), sum(total_amount) from default;"; 17 | block_on(locustdb.run_query(query, false, true, vec![0])).unwrap(); 18 | } 19 | -------------------------------------------------------------------------------- /src/bitvec.rs: -------------------------------------------------------------------------------- 1 | pub trait BitVecMut { 2 | fn set(&mut self, index: usize); 3 | fn unset(&mut self, index: usize); 4 | } 5 | 6 | pub trait BitVec { 7 | fn is_set(&self, index: usize) -> bool; 8 | } 9 | 10 | impl BitVecMut for Vec { 11 | fn set(&mut self, index: usize) { 12 | let slot = index >> 3; 13 | while slot >= self.len() { 14 | self.push(0); 15 | } 16 | self[slot] |= 1 << (index as u8 & 7) 17 | } 18 | 19 | fn unset(&mut self, index: usize) { 20 | let slot = index >> 3; 21 | if slot < self.len() { 22 | self[slot] &= 0xff ^ (1 << (index as u8 & 7)); 23 | } 24 | } 25 | } 26 | 27 | impl BitVec for Vec { 28 | fn is_set(&self, index: usize) -> bool { 29 | let slot = index >> 3; 30 | slot < self.len() && self[slot] & (1 << (index as u8 & 7)) > 0 31 | } 32 | } 33 | 34 | impl BitVec for [u8] { 35 | fn is_set(&self, index: usize) -> bool { 36 | let slot = index >> 3; 37 | slot < self.len() && self[slot] & (1 << (index as u8 & 7)) > 0 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/disk_store/mod.rs: -------------------------------------------------------------------------------- 1 | mod azure_writer; 2 | mod file_writer; 3 | mod gcs_writer; 4 | pub mod meta_store; 5 | pub mod noop_storage; 6 | mod partition_segment; 7 | pub mod storage; 8 | pub mod wal_segment; 9 | 10 | lazy_static! { 11 | static ref RT: tokio::runtime::Runtime = tokio::runtime::Runtime::new().unwrap(); 12 | } 13 | 14 | use crate::mem_store::column::Column; 15 | use crate::observability::QueryPerfCounter; 16 | use crate::scheduler::inner_locustdb::InnerLocustDB; 17 | 18 | pub trait ColumnLoader: Sync + Send + 'static { 19 | fn load_column( 20 | &self, 21 | table_name: &str, 22 | partition: PartitionID, 23 | column_name: &str, 24 | perf_counter: &QueryPerfCounter, 25 | ) -> Option>; 26 | fn load_column_range( 27 | &self, 28 | start: PartitionID, 29 | end: PartitionID, 30 | column_name: &str, 31 | ldb: &InnerLocustDB, 32 | ); 33 | fn partition_has_been_loaded(&self, table: &str, partition: PartitionID, column: &str) -> bool; 34 | fn mark_subpartition_as_loaded(&self, table: &str, partition: PartitionID, column: &str); 35 | } 36 | 37 | pub type PartitionID = u64; 38 | -------------------------------------------------------------------------------- /src/disk_store/noop_storage.rs: -------------------------------------------------------------------------------- 1 | use crate::disk_store::*; 2 | 3 | pub struct NoopStorage; 4 | 5 | impl ColumnLoader for NoopStorage { 6 | fn load_column( 7 | &self, 8 | _: &str, 9 | _: PartitionID, 10 | _: &str, 11 | _: &QueryPerfCounter, 12 | ) -> Option> { 13 | None 14 | } 15 | fn load_column_range(&self, _: PartitionID, _: PartitionID, _: &str, _: &InnerLocustDB) {} 16 | fn partition_has_been_loaded(&self, _: &str, _: PartitionID, _: &str) -> bool { 17 | true 18 | } 19 | fn mark_subpartition_as_loaded(&self, _: &str, _: PartitionID, _: &str) {} 20 | } 21 | -------------------------------------------------------------------------------- /src/disk_store/wal_segment.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | use capnp::serialize_packed; 3 | use locustdb_serialization::event_buffer::EventBuffer; 4 | use locustdb_serialization::{default_reader_options, wal_segment_capnp}; 5 | 6 | #[derive(Debug)] 7 | pub struct WalSegment<'a> { 8 | pub id: u64, 9 | pub data: Cow<'a, EventBuffer>, 10 | } 11 | 12 | impl<'a> WalSegment<'a> { 13 | pub fn serialize(&self) -> Vec { 14 | let mut builder = capnp::message::Builder::new_default(); 15 | let mut wal_segment = builder.init_root::(); 16 | wal_segment.set_id(self.id); 17 | let mut table_segment_list = wal_segment.get_data().unwrap(); 18 | self.data.serialize_builder(&mut table_segment_list); 19 | let mut buf = Vec::new(); 20 | serialize_packed::write_message(&mut buf, &builder).unwrap(); 21 | buf 22 | } 23 | 24 | pub fn deserialize(data: &[u8]) -> capnp::Result> { 25 | let message_reader = 26 | serialize_packed::read_message(data, default_reader_options()).unwrap(); 27 | let wal_segment = message_reader.get_root::()?; 28 | let id = wal_segment.get_id(); 29 | let data = EventBuffer::deserialize_reader(wal_segment.get_data()?)?; 30 | Ok(WalSegment { 31 | id, 32 | data: Cow::Owned(data), 33 | }) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/engine/data_types/mod.rs: -------------------------------------------------------------------------------- 1 | mod byte_slices; 2 | mod data; 3 | mod nullable_vec_data; 4 | mod scalar_data; 5 | mod types; 6 | mod val_rows; 7 | mod vec_data; 8 | 9 | use ordered_float::OrderedFloat; 10 | 11 | pub use self::byte_slices::*; 12 | pub use self::data::*; 13 | pub use self::nullable_vec_data::*; 14 | pub use self::scalar_data::*; 15 | pub use self::types::*; 16 | pub use self::val_rows::*; 17 | pub use self::vec_data::*; 18 | 19 | #[allow(non_camel_case_types)] 20 | pub type of64 = OrderedFloat; 21 | 22 | pub fn vec_f64_to_vec_of64(vec: Vec) -> Vec { 23 | unsafe { std::mem::transmute::, Vec>(vec) } 24 | } 25 | -------------------------------------------------------------------------------- /src/engine/execution/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod query_task; 2 | mod buffer; 3 | mod executor; 4 | mod batch_merging; 5 | mod scratchpad; 6 | 7 | pub use self::buffer::*; 8 | pub use self::scratchpad::*; 9 | pub use self::executor::*; 10 | pub use self::batch_merging::{BatchResult, combine}; -------------------------------------------------------------------------------- /src/engine/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::QueryError; 2 | 3 | pub use self::data_types::*; 4 | pub use self::execution::*; 5 | pub use self::operators::*; 6 | pub use self::planning::*; 7 | 8 | pub mod data_types; 9 | pub mod execution; 10 | pub mod operators; 11 | pub mod planning; 12 | -------------------------------------------------------------------------------- /src/engine/operators/aggregator.rs: -------------------------------------------------------------------------------- 1 | // TODO: would probably be better to have two types here, an UntypedAggregator emitted by parser which is then converted into the right TypedAggregator by query planner 2 | #[derive(Debug, Clone, Copy, PartialEq)] 3 | pub enum Aggregator { 4 | SumI64 = 0, 5 | SumF64 = 1, 6 | Count = 2, 7 | MaxI64 = 3, 8 | MaxF64 = 4, 9 | MinI64 = 5, 10 | MinF64 = 6, 11 | } -------------------------------------------------------------------------------- /src/engine/operators/assemble_nullable.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct AssembleNullable { 5 | pub data: BufferRef, 6 | pub present: BufferRef, 7 | pub nullable_data: BufferRef>, 8 | } 9 | 10 | impl<'a, T: VecData> VecOperator<'a> for AssembleNullable { 11 | fn execute(&mut self, _streaming: bool, _scratchpad: &mut Scratchpad<'a>) 12 | -> Result<(), QueryError> { 13 | Ok(()) 14 | } 15 | 16 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 17 | // This works even when streaming since it just creates an nullable_data->data alias and sets the null map of nullable_data to present. 18 | // It would incorrect to perform this operation in the `execute` function since otherwise it would results in incorrect ordering with potential `PropagateNullability` operations. 19 | scratchpad.assemble_nullable(self.data, self.present, self.nullable_data); 20 | } 21 | 22 | fn inputs(&self) -> Vec> { vec![self.data.any(), self.present.any()] } 23 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i, &mut self.present.i] } 24 | fn outputs(&self) -> Vec> { vec![self.nullable_data.any()] } 25 | fn can_stream_input(&self, _: usize) -> bool { true } 26 | fn can_stream_output(&self, _: usize) -> bool { true } 27 | fn allocates(&self) -> bool { false } 28 | fn display_op(&self, _: bool) -> String { format!("nullable({}, {})", self.data, self.present) } 29 | } 30 | -------------------------------------------------------------------------------- /src/engine/operators/bit_unpack.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | 4 | #[derive(Debug)] 5 | pub struct BitUnpackOperator { 6 | pub input: BufferRef, 7 | pub output: BufferRef, 8 | pub shift: u8, 9 | pub width: u8, 10 | } 11 | 12 | impl<'a> VecOperator<'a> for BitUnpackOperator { 13 | fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{ 14 | let data = scratchpad.get(self.input); 15 | let mut unpacked = scratchpad.get_mut(self.output); 16 | if stream { unpacked.clear(); } 17 | let mask = (1 << self.width) - 1; 18 | for d in data.iter() { 19 | unpacked.push((d >> self.shift) & mask); 20 | } 21 | Ok(()) 22 | } 23 | 24 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 25 | scratchpad.set(self.output, Vec::::with_capacity(batch_size)); 26 | } 27 | 28 | fn inputs(&self) -> Vec> { vec![self.input.any()] } 29 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 30 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 31 | fn can_stream_input(&self, _: usize) -> bool { true } 32 | fn can_stream_output(&self, _: usize) -> bool { true } 33 | fn can_block_output(&self) -> bool { true } 34 | fn allocates(&self) -> bool { true } 35 | 36 | fn display_op(&self, alternate: bool) -> String { 37 | if alternate { 38 | let mask = (1 << self.width) - 1; 39 | format!("({} >> {}) & {:x}", self.input, self.shift, mask) 40 | } else { 41 | format!("({} >> $shift) & $mask", self.input) 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/engine/operators/collect.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | pub struct Collect { 4 | pub input: BufferRef, 5 | pub output: BufferRef, 6 | pub name: String, 7 | } 8 | 9 | 10 | impl<'a> VecOperator<'a> for Collect { 11 | fn execute(&mut self, _: bool, _scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) } 12 | 13 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 14 | scratchpad.alias(self.input, self.output); 15 | } 16 | 17 | fn inputs(&self) -> Vec> { vec![self.input] } 18 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 19 | fn outputs(&self) -> Vec> { vec![self.output] } 20 | fn can_stream_input(&self, _: usize) -> bool { false } 21 | fn can_stream_output(&self, _: usize) -> bool { false } 22 | fn allocates(&self) -> bool { false } 23 | fn display_op(&self, _: bool) -> String { format!("collect(\"{}\", {})", self.name, self.input) } 24 | } 25 | -------------------------------------------------------------------------------- /src/engine/operators/column_ops.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct ReadColumnData { 5 | pub colname: String, 6 | pub section_index: usize, 7 | pub output: BufferRef, 8 | pub tag: EncodingType, 9 | } 10 | 11 | impl<'a> VecOperator<'a> for ReadColumnData { 12 | fn execute( 13 | &mut self, 14 | _: bool, 15 | scratchpad: &mut Scratchpad<'a>, 16 | ) -> Result<(), QueryError> { 17 | let data_section = scratchpad.get_column_data(&self.colname, self.section_index); 18 | let result = data_section.slice_box(0, data_section.len()); 19 | scratchpad.set_any(self.output, result); 20 | Ok(()) 21 | } 22 | 23 | fn init(&mut self, _: usize, _: usize, _: &mut Scratchpad<'a>) {} 24 | fn inputs(&self) -> Vec> { vec![] } 25 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] } 26 | fn outputs(&self) -> Vec> { vec![self.output] } 27 | fn can_stream_input(&self, _: usize) -> bool { false } 28 | fn can_stream_output(&self, _: usize) -> bool { false } 29 | fn allocates(&self) -> bool { false } 30 | fn display_op(&self, _: bool) -> String { format!("{:?}.{}: {:?}", self.colname, self.section_index, self.tag) } 31 | } 32 | -------------------------------------------------------------------------------- /src/engine/operators/combine_null_maps.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct CombineNullMaps { 5 | pub lhs: BufferRef>, 6 | pub rhs: BufferRef>, 7 | pub output: BufferRef, 8 | } 9 | 10 | impl<'a> VecOperator<'a> for CombineNullMaps { 11 | fn execute(&mut self, _streaming: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{ 12 | let lhs = scratchpad.get_null_map(self.lhs); 13 | let rhs = scratchpad.get_null_map(self.rhs); 14 | let mut output = scratchpad.get_mut(self.output); 15 | for (out, (l, r)) in output.iter_mut().zip(lhs.iter().zip(rhs.iter())) { 16 | *out = l & r; 17 | } 18 | Ok(()) 19 | } 20 | 21 | fn init(&mut self, total_count: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 22 | let output = vec![0u8; batch_size.min(total_count).div_ceil(8)]; 23 | scratchpad.set(self.output, output); 24 | } 25 | 26 | fn inputs(&self) -> Vec> { vec![self.lhs.any(), self.rhs.any()] } 27 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.lhs.i, &mut self.rhs.i] } 28 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 29 | fn can_stream_input(&self, _: usize) -> bool { true } 30 | fn can_stream_output(&self, _: usize) -> bool { true } 31 | fn allocates(&self) -> bool { true } 32 | fn display_op(&self, _: bool) -> String { format!("combine_null_maps({}, {})", self.lhs, self.rhs) } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /src/engine/operators/compact.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct Compact { 5 | pub data: BufferRef, 6 | pub select: BufferRef, 7 | pub compacted: BufferRef, 8 | } 9 | 10 | impl<'a, T: VecData + 'a, U: GenericIntVec> VecOperator<'a> for Compact { 11 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 12 | let mut data = scratchpad.get_mut(self.data); 13 | let select = scratchpad.get(self.select); 14 | // Remove all unmodified entries 15 | let mut j = 0; 16 | for (i, &s) in select.iter().take(data.len()).enumerate() { 17 | if s > U::zero() { 18 | data[j] = data[i]; 19 | j += 1; 20 | } 21 | } 22 | data.truncate(j); 23 | Ok(()) 24 | } 25 | 26 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 27 | scratchpad.alias(self.data, self.compacted); 28 | } 29 | 30 | fn inputs(&self) -> Vec> { vec![self.data.any(), self.select.any()] } 31 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i, &mut self.select.i] } 32 | fn outputs(&self) -> Vec> { vec![self.compacted.any()] } 33 | fn can_stream_input(&self, _: usize) -> bool { false } 34 | fn can_stream_output(&self, _: usize) -> bool { false } 35 | fn mutates(&self, i: usize) -> bool { i == self.data.i } 36 | fn allocates(&self) -> bool { false } 37 | 38 | fn display_op(&self, _: bool) -> String { 39 | format!("{}[{} > 0]", self.data, self.select) 40 | } 41 | } 42 | 43 | -------------------------------------------------------------------------------- /src/engine/operators/compact_nullable.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use crate::bitvec::*; 3 | 4 | #[derive(Debug)] 5 | pub struct CompactNullable { 6 | pub data: BufferRef>, 7 | pub select: BufferRef, 8 | pub compacted: BufferRef>, 9 | } 10 | 11 | impl<'a, T: VecData + 'a, U: GenericIntVec> VecOperator<'a> for CompactNullable { 12 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 13 | let (mut data, mut present) = scratchpad.get_mut_nullable(self.data); 14 | let select = scratchpad.get(self.select); 15 | // Remove all unmodified entries 16 | let mut j = 0; 17 | for (i, &s) in select.iter().take(data.len()).enumerate() { 18 | if s > U::zero() { 19 | data[j] = data[i]; 20 | if present.is_set(i) { 21 | present.set(j); 22 | } else { 23 | present.unset(j); 24 | } 25 | j += 1; 26 | } 27 | } 28 | data.truncate(j); 29 | present.truncate(j.div_ceil(8)); 30 | Ok(()) 31 | } 32 | 33 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 34 | scratchpad.alias(self.data, self.compacted); 35 | } 36 | 37 | fn inputs(&self) -> Vec> { vec![self.data.any(), self.select.any()] } 38 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i, &mut self.select.i] } 39 | fn outputs(&self) -> Vec> { vec![self.compacted.any()] } 40 | fn can_stream_input(&self, _: usize) -> bool { false } 41 | fn can_stream_output(&self, _: usize) -> bool { false } 42 | fn mutates(&self, i: usize) -> bool { i == self.data.i } 43 | fn allocates(&self) -> bool { false } 44 | 45 | fn display_op(&self, _: bool) -> String { 46 | format!("{}[{} > 0]", self.data, self.select) 47 | } 48 | } 49 | 50 | -------------------------------------------------------------------------------- /src/engine/operators/compact_nullable_nullable.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use crate::bitvec::*; 3 | 4 | #[derive(Debug)] 5 | pub struct CompactNullableNullable { 6 | pub data: BufferRef>, 7 | pub select: BufferRef>, 8 | pub compacted: BufferRef>, 9 | } 10 | 11 | impl<'a, T: VecData + 'a, U: GenericIntVec> VecOperator<'a> for CompactNullableNullable { 12 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 13 | let (mut data, mut present) = scratchpad.get_mut_nullable(self.data); 14 | let (select, select_present) = scratchpad.get_nullable(self.select); 15 | // Remove all unmodified entries 16 | let mut j = 0; 17 | for (i, &s) in select.iter().take(data.len()).enumerate() { 18 | if s > U::zero() && (*select_present).is_set(i) { 19 | data[j] = data[i]; 20 | if present.is_set(i) { 21 | present.set(j); 22 | } else { 23 | present.unset(j); 24 | } 25 | j += 1; 26 | } 27 | } 28 | data.truncate(j); 29 | present.truncate(j.div_ceil(8)); 30 | Ok(()) 31 | } 32 | 33 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 34 | scratchpad.alias(self.data, self.compacted); 35 | } 36 | 37 | fn inputs(&self) -> Vec> { vec![self.data.any(), self.select.any()] } 38 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i, &mut self.select.i] } 39 | fn outputs(&self) -> Vec> { vec![self.compacted.any()] } 40 | fn can_stream_input(&self, _: usize) -> bool { false } 41 | fn can_stream_output(&self, _: usize) -> bool { false } 42 | fn mutates(&self, i: usize) -> bool { i == self.data.i } 43 | fn allocates(&self) -> bool { false } 44 | 45 | fn display_op(&self, _: bool) -> String { 46 | format!("{}[{} > 0]", self.data, self.select) 47 | } 48 | } 49 | 50 | -------------------------------------------------------------------------------- /src/engine/operators/compact_with_nullable.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use crate::bitvec::*; 3 | 4 | #[derive(Debug)] 5 | pub struct CompactWithNullable { 6 | pub data: BufferRef, 7 | pub select: BufferRef>, 8 | pub compacted: BufferRef, 9 | } 10 | 11 | impl<'a, T: VecData + 'a, U: GenericIntVec> VecOperator<'a> for CompactWithNullable { 12 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 13 | let mut data = scratchpad.get_mut(self.data); 14 | let (select, select_present) = scratchpad.get_nullable(self.select); 15 | // Remove all unmodified entries 16 | let mut j = 0; 17 | for (i, &s) in select.iter().take(data.len()).enumerate() { 18 | if s > U::zero() && (*select_present).is_set(i) { 19 | data[j] = data[i]; 20 | j += 1; 21 | } 22 | } 23 | data.truncate(j); 24 | Ok(()) 25 | } 26 | 27 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 28 | scratchpad.alias(self.data, self.compacted); 29 | } 30 | 31 | fn inputs(&self) -> Vec> { vec![self.data.any(), self.select.any()] } 32 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i, &mut self.select.i] } 33 | fn outputs(&self) -> Vec> { vec![self.compacted.any()] } 34 | fn can_stream_input(&self, _: usize) -> bool { false } 35 | fn can_stream_output(&self, _: usize) -> bool { false } 36 | fn mutates(&self, i: usize) -> bool { i == self.data.i } 37 | fn allocates(&self) -> bool { false } 38 | 39 | fn display_op(&self, _: bool) -> String { 40 | format!("{}[{} > 0]", self.data, self.select) 41 | } 42 | } 43 | 44 | -------------------------------------------------------------------------------- /src/engine/operators/constant.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use crate::ingest::raw_val::RawVal; 3 | 4 | #[derive(Debug)] 5 | pub struct Constant { 6 | pub val: RawVal, 7 | pub hide_value: bool, 8 | pub output: BufferRef, 9 | } 10 | 11 | impl<'a> VecOperator<'a> for Constant { 12 | fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> { 13 | Ok(()) 14 | } 15 | 16 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 17 | let result = constant_data(self.val.clone()); 18 | scratchpad.set_any(self.output.any(), result); 19 | } 20 | 21 | fn inputs(&self) -> Vec> { 22 | vec![] 23 | } 24 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] } 25 | fn outputs(&self) -> Vec> { 26 | vec![self.output.any()] 27 | } 28 | fn can_stream_input(&self, _: usize) -> bool { 29 | false 30 | } 31 | fn can_stream_output(&self, _: usize) -> bool { 32 | true 33 | } 34 | fn allocates(&self) -> bool { 35 | false 36 | } 37 | 38 | fn display_op(&self, alternate: bool) -> String { 39 | if self.hide_value && !alternate { 40 | format!("Constant<{:?}>", self.val.get_type()) 41 | } else { 42 | format!("{}", &self.val) 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/engine/operators/constant_expand.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct ConstantExpand { 5 | pub val: T, 6 | pub output: BufferRef, 7 | 8 | pub current_index: usize, 9 | pub len: usize, 10 | pub batch_size: usize, 11 | } 12 | 13 | impl<'a, T: GenericIntVec> VecOperator<'a> for ConstantExpand { 14 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 15 | if self.current_index + self.batch_size > self.len { 16 | let mut output = scratchpad.get_mut(self.output); 17 | output.truncate(self.len - self.current_index); 18 | } 19 | self.current_index += self.batch_size; 20 | Ok(()) 21 | } 22 | 23 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 24 | self.batch_size = batch_size; 25 | scratchpad.set(self.output, vec![self.val; batch_size]); 26 | } 27 | 28 | fn inputs(&self) -> Vec> { vec![] } 29 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] } 30 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 31 | fn can_stream_input(&self, _: usize) -> bool { false } 32 | fn can_stream_output(&self, _: usize) -> bool { true } 33 | fn allocates(&self) -> bool { true } 34 | fn is_streaming_producer(&self) -> bool { true } 35 | fn has_more(&self) -> bool { self.current_index < self.len } 36 | 37 | fn display_op(&self, _: bool) -> String { 38 | "ConstantExpand".to_string() 39 | } 40 | } 41 | 42 | -------------------------------------------------------------------------------- /src/engine/operators/constant_vec.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use std::fmt; 3 | use std::mem; 4 | 5 | pub struct ConstantVec<'a> { 6 | pub val: BoxedData<'a>, 7 | pub output: BufferRef, 8 | } 9 | 10 | impl<'a> VecOperator<'a> for ConstantVec<'a> { 11 | fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> { 12 | Ok(()) 13 | } 14 | 15 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 16 | let owned = mem::replace(&mut self.val, empty_data(0)); 17 | scratchpad.set_any(self.output, owned); 18 | } 19 | 20 | fn inputs(&self) -> Vec> { 21 | vec![] 22 | } 23 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] } 24 | fn outputs(&self) -> Vec> { 25 | vec![self.output] 26 | } 27 | fn can_stream_input(&self, _: usize) -> bool { 28 | false 29 | } 30 | fn can_stream_output(&self, _: usize) -> bool { 31 | false 32 | } 33 | fn allocates(&self) -> bool { 34 | false 35 | } 36 | 37 | fn display_op(&self, _: bool) -> String { 38 | "ConstantVec".to_string() 39 | } 40 | } 41 | 42 | impl<'a> fmt::Debug for ConstantVec<'a> { 43 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 44 | write!(f, "{}", self.display_op(false)) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/engine/operators/delta_decode.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct DeltaDecode { 5 | pub encoded: BufferRef, 6 | pub decoded: BufferRef, 7 | pub previous: i64, 8 | } 9 | 10 | impl<'a, T: GenericIntVec> VecOperator<'a> for DeltaDecode { 11 | fn execute(&mut self, streaming: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 12 | let encoded = scratchpad.get(self.encoded); 13 | let mut decoded = scratchpad.get_mut(self.decoded); 14 | if streaming { decoded.clear(); } 15 | let mut previous = self.previous; 16 | for e in encoded.iter() { 17 | let current = e.to_i64().unwrap() + previous; 18 | decoded.push(current); 19 | previous = current; 20 | } 21 | self.previous = previous; 22 | Ok(()) 23 | } 24 | 25 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 26 | scratchpad.set(self.decoded, Vec::with_capacity(batch_size)); 27 | } 28 | 29 | fn inputs(&self) -> Vec> { vec![self.encoded.any()] } 30 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.encoded.i] } 31 | fn outputs(&self) -> Vec> { vec![self.decoded.any()] } 32 | fn can_stream_input(&self, _: usize) -> bool { true } 33 | fn can_stream_output(&self, _: usize) -> bool { true } 34 | fn can_block_output(&self) -> bool { true } 35 | fn allocates(&self) -> bool { true } 36 | 37 | fn display_op(&self, _: bool) -> String { 38 | format!("delta_decode({})", self.encoded) 39 | } 40 | } 41 | 42 | -------------------------------------------------------------------------------- /src/engine/operators/empty.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use std::fmt; 3 | 4 | pub struct Empty { 5 | pub output: BufferRef, 6 | } 7 | 8 | impl<'a, T> VecOperator<'a> for Empty where T: VecData + 'a { 9 | fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> { 10 | Ok(()) 11 | } 12 | 13 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 14 | scratchpad.set(self.output, vec![]); 15 | } 16 | 17 | fn inputs(&self) -> Vec> { 18 | vec![] 19 | } 20 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] } 21 | fn outputs(&self) -> Vec> { 22 | vec![self.output.any()] 23 | } 24 | fn can_stream_input(&self, _: usize) -> bool { 25 | false 26 | } 27 | fn can_stream_output(&self, _: usize) -> bool { 28 | false 29 | } 30 | fn allocates(&self) -> bool { 31 | false 32 | } 33 | 34 | fn display_op(&self, _: bool) -> String { 35 | "Empty".to_string() 36 | } 37 | } 38 | 39 | impl fmt::Debug for Empty where T: VecData { 40 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 41 | write!(f, "{}", self.display_op(false)) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/engine/operators/encode_const.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use crate::mem_store::*; 3 | 4 | #[derive(Debug)] 5 | pub struct EncodeIntConstant { 6 | pub constant: BufferRef>, 7 | pub output: BufferRef>, 8 | pub codec: Codec, 9 | } 10 | 11 | impl<'a> VecOperator<'a> for EncodeIntConstant { 12 | fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> { 13 | Ok(()) 14 | } 15 | 16 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 17 | let constant = scratchpad.get_scalar(&self.constant); 18 | let result = self.codec.encode_int(constant); 19 | scratchpad.set_any(self.output.any(), scalar_i64_data(result)); 20 | } 21 | 22 | fn inputs(&self) -> Vec> { 23 | vec![self.constant.any()] 24 | } 25 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.constant.i] } 26 | fn outputs(&self) -> Vec> { 27 | vec![self.output.any()] 28 | } 29 | fn can_stream_input(&self, _: usize) -> bool { 30 | true 31 | } 32 | fn can_stream_output(&self, _: usize) -> bool { 33 | true 34 | } 35 | fn allocates(&self) -> bool { 36 | false 37 | } 38 | 39 | fn display_op(&self, _: bool) -> String { 40 | format!("encode({}; {:?})", self.constant, self.codec) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/engine/operators/exists.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | 4 | #[derive(Debug)] 5 | pub struct Exists { 6 | pub input: BufferRef, 7 | pub max_index: BufferRef>, 8 | pub output: BufferRef, 9 | } 10 | 11 | impl<'a, T: GenericIntVec + CastUsize> VecOperator<'a> for Exists { 12 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{ 13 | let data = scratchpad.get(self.input); 14 | let mut exists = scratchpad.get_mut(self.output); 15 | 16 | let len = scratchpad.get_scalar(&self.max_index) as usize + 1; 17 | if len > exists.len() { 18 | exists.resize(len, 0); 19 | } 20 | 21 | for &i in data.iter() { 22 | let index = i.cast_usize(); 23 | exists[index] = 1; 24 | } 25 | Ok(()) 26 | } 27 | 28 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 29 | scratchpad.set(self.output, Vec::with_capacity(0)); 30 | } 31 | 32 | fn inputs(&self) -> Vec> { vec![self.input.any(), self.max_index.any()] } 33 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 34 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 35 | fn can_stream_input(&self, _: usize) -> bool { true } 36 | fn can_stream_output(&self, _: usize) -> bool { false } 37 | fn allocates(&self) -> bool { true } 38 | 39 | fn display_output(&self) -> bool { false } 40 | fn display_op(&self, _: bool) -> String { 41 | format!("{}[{}] = 1 {}", self.output, self.input, self.max_index) 42 | } 43 | } 44 | 45 | -------------------------------------------------------------------------------- /src/engine/operators/functions.rs: -------------------------------------------------------------------------------- 1 | use chrono::{DateTime, Datelike}; 2 | 3 | use crate::engine::of64; 4 | 5 | use super::map_operator::MapOp; 6 | 7 | 8 | pub struct ToYear; 9 | 10 | impl MapOp for ToYear { 11 | fn apply(&self, unix_ts: i64) -> i64 { i64::from(DateTime::from_timestamp(unix_ts, 0).unwrap().year()) } 12 | fn name() -> &'static str { "to_year" } 13 | } 14 | 15 | pub struct Floor; 16 | 17 | impl MapOp for Floor { 18 | fn apply(&self, f: of64) -> i64 { f.floor() as i64 } 19 | fn name() -> &'static str { "floor" } 20 | } 21 | 22 | pub struct BooleanNot; 23 | 24 | impl MapOp for BooleanNot { 25 | fn apply(&self, boolean: u8) -> u8 { boolean ^ true as u8 } 26 | fn name() -> &'static str { "not" } 27 | } 28 | 29 | 30 | pub struct RegexMatch { 31 | pub r: regex::Regex 32 | } 33 | 34 | impl<'a> MapOp<&'a str, u8> for RegexMatch { 35 | fn apply(&self, s: &'a str) -> u8 { 36 | match self.r.find(s) { 37 | Some(_) => 1, 38 | None => 0, 39 | } 40 | } 41 | fn name() -> &'static str { "not" } 42 | } 43 | 44 | 45 | pub struct Length; 46 | 47 | impl<'a> MapOp<&'a str, i64> for Length { 48 | fn apply(&self, s: &'a str) -> i64 { s.len() as i64 } 49 | fn name() -> &'static str { "length" } 50 | } 51 | -------------------------------------------------------------------------------- /src/engine/operators/get_null_map.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct GetNullMap { 5 | pub from: BufferRef>, 6 | pub present: BufferRef, 7 | } 8 | 9 | impl<'a> VecOperator<'a> for GetNullMap { 10 | fn execute(&mut self, _streaming: bool, _scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) } 11 | 12 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 13 | scratchpad.alias_null_map(self.from, self.present); 14 | } 15 | 16 | fn inputs(&self) -> Vec> { vec![self.from.any()] } 17 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.from.i] } 18 | fn outputs(&self) -> Vec> { vec![self.present.any()] } 19 | fn can_stream_input(&self, _: usize) -> bool { true } 20 | fn can_stream_output(&self, _: usize) -> bool { true } 21 | fn allocates(&self) -> bool { true } 22 | fn display_op(&self, _: bool) -> String { format!("null_map({})", self.from) } 23 | } 24 | 25 | -------------------------------------------------------------------------------- /src/engine/operators/hashmap_grouping.rs: -------------------------------------------------------------------------------- 1 | use fnv::FnvHashMap; 2 | 3 | use crate::engine::*; 4 | use crate::ingest::raw_val::RawVal; 5 | use std::hash::Hash; 6 | 7 | #[derive(Debug)] 8 | pub struct HashMapGrouping + Hash + Ord> { 9 | input: BufferRef, 10 | unique_out: BufferRef, 11 | grouping_key_out: BufferRef, 12 | cardinality_out: BufferRef>, 13 | map: FnvHashMap, 14 | } 15 | 16 | impl<'a, T: VecData + Hash + Ord + 'a> HashMapGrouping { 17 | pub fn boxed( 18 | input: BufferRef, 19 | unique_out: BufferRef, 20 | grouping_key_out: BufferRef, 21 | cardinality_out: BufferRef>, 22 | _max_index: usize, 23 | ) -> BoxedOperator<'a> { 24 | Box::new(HashMapGrouping:: { 25 | input, 26 | unique_out, 27 | grouping_key_out, 28 | cardinality_out, 29 | map: FnvHashMap::default(), 30 | }) 31 | } 32 | } 33 | 34 | impl<'a, T: VecData + Hash + Ord + 'a> VecOperator<'a> for HashMapGrouping { 35 | fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 36 | let count = { 37 | let raw_grouping_key = scratchpad.get(self.input); 38 | let mut grouping = scratchpad.get_mut(self.grouping_key_out); 39 | let mut unique = scratchpad.get_mut(self.unique_out); 40 | if stream { 41 | grouping.clear() 42 | } 43 | for i in raw_grouping_key.iter() { 44 | grouping.push(*self.map.entry(*i).or_insert_with(|| { 45 | unique.push(*i); 46 | unique.len() as u32 - 1 47 | })); 48 | } 49 | RawVal::Int(unique.len() as i64) 50 | }; 51 | scratchpad.set_any(self.cardinality_out.any(), constant_data(count)); 52 | Ok(()) 53 | } 54 | 55 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 56 | scratchpad.set(self.unique_out, Vec::new()); 57 | scratchpad.set(self.grouping_key_out, Vec::with_capacity(batch_size)); 58 | } 59 | 60 | fn inputs(&self) -> Vec> { 61 | vec![self.input.any()] 62 | } 63 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 64 | fn outputs(&self) -> Vec> { 65 | vec![ 66 | self.unique_out.any(), 67 | self.grouping_key_out.any(), 68 | self.cardinality_out.any(), 69 | ] 70 | } 71 | fn can_stream_input(&self, _: usize) -> bool { 72 | true 73 | } 74 | fn can_stream_output(&self, output: usize) -> bool { 75 | output != self.unique_out.i 76 | } 77 | fn can_block_output(&self) -> bool { true } 78 | fn allocates(&self) -> bool { 79 | true 80 | } 81 | 82 | fn display_op(&self, _: bool) -> String { 83 | format!("hashmap_grouping({})", self.input) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/engine/operators/identity.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | pub struct Identity { 4 | pub input: BufferRef, 5 | pub output: BufferRef, 6 | } 7 | 8 | 9 | impl<'a> VecOperator<'a> for Identity { 10 | fn execute(&mut self, _: bool, _scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) } 11 | 12 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 13 | scratchpad.alias(self.input, self.output); 14 | } 15 | 16 | fn inputs(&self) -> Vec> { vec![self.input.any()] } 17 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 18 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 19 | fn can_stream_input(&self, _: usize) -> bool { true } 20 | fn can_stream_output(&self, _: usize) -> bool { true } 21 | fn allocates(&self) -> bool { false } 22 | fn display_op(&self, _: bool) -> String { format!("{}", self.input) } 23 | } 24 | -------------------------------------------------------------------------------- /src/engine/operators/indices.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct Indices { 5 | pub input: BufferRef, 6 | pub indices_out: BufferRef, 7 | } 8 | 9 | impl<'a> VecOperator<'a> for Indices { 10 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 11 | let len = scratchpad.get_any(self.input).len(); 12 | let indices = (0..len).collect::>(); 13 | scratchpad.set(self.indices_out, indices); 14 | Ok(()) 15 | } 16 | 17 | fn inputs(&self) -> Vec> { vec![self.input.any()] } 18 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 19 | fn outputs(&self) -> Vec> { vec![self.indices_out.any()] } 20 | // TODO: could make streaming? (need to set streaming_producer) 21 | fn can_stream_input(&self, _: usize) -> bool { false } 22 | fn can_stream_output(&self, _: usize) -> bool { false } 23 | fn allocates(&self) -> bool { true } 24 | 25 | fn display_op(&self, _: bool) -> String { 26 | format!("indices({})", self.input) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/engine/operators/is_null.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use crate::bitvec::*; 3 | 4 | 5 | pub struct IsNull { 6 | pub input: BufferRef>, 7 | pub is_null: BufferRef, 8 | } 9 | 10 | impl<'a> VecOperator<'a> for IsNull { 11 | fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{ 12 | let len = scratchpad.get_any(self.input.any()).len(); 13 | let present = scratchpad.get_null_map(self.input); 14 | let mut is_null = scratchpad.get_mut(self.is_null); 15 | if stream { is_null.clear(); } 16 | for i in 0..len { 17 | if (*present).is_set(i) { 18 | is_null.push(false as u8); 19 | } else { 20 | is_null.push(true as u8); 21 | } 22 | } 23 | Ok(()) 24 | } 25 | 26 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 27 | scratchpad.set(self.is_null, Vec::with_capacity(batch_size)); 28 | } 29 | 30 | fn inputs(&self) -> Vec> { vec![self.input.any()] } 31 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 32 | fn outputs(&self) -> Vec> { vec![self.is_null.any()] } 33 | fn can_stream_input(&self, _: usize) -> bool { true } 34 | fn can_stream_output(&self, _: usize) -> bool { true } 35 | fn can_block_output(&self) -> bool { true } 36 | fn allocates(&self) -> bool { true } 37 | 38 | fn display_op(&self, _: bool) -> String { 39 | format!("IsNull({})", self.input) 40 | } 41 | } 42 | 43 | pub struct IsNotNull { 44 | pub input: BufferRef>, 45 | pub is_not_null: BufferRef, 46 | } 47 | 48 | impl<'a> VecOperator<'a> for IsNotNull { 49 | fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{ 50 | let len = scratchpad.get_any(self.input.any()).len(); 51 | let present = scratchpad.get_null_map(self.input); 52 | let mut is_not_null = scratchpad.get_mut(self.is_not_null); 53 | if stream { is_not_null.clear(); } 54 | for i in 0..len { 55 | if (*present).is_set(i) { 56 | is_not_null.push(true as u8); 57 | } else { 58 | is_not_null.push(false as u8); 59 | } 60 | } 61 | Ok(()) 62 | } 63 | 64 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 65 | scratchpad.set(self.is_not_null, Vec::with_capacity(batch_size)); 66 | } 67 | 68 | fn inputs(&self) -> Vec> { vec![self.input.any()] } 69 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 70 | fn outputs(&self) -> Vec> { vec![self.is_not_null.any()] } 71 | fn can_stream_input(&self, _: usize) -> bool { true } 72 | fn can_stream_output(&self, _: usize) -> bool { true } 73 | fn can_block_output(&self) -> bool { true } 74 | fn allocates(&self) -> bool { true } 75 | 76 | fn display_op(&self, _: bool) -> String { 77 | format!("IsNotNull({})", self.input) 78 | } 79 | } 80 | 81 | -------------------------------------------------------------------------------- /src/engine/operators/lz4_decode.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use crate::mem_store::lz4; 3 | use std::fmt; 4 | use std::io::Read; 5 | 6 | pub struct LZ4Decode<'a, T> { 7 | pub encoded: BufferRef, 8 | pub decoded: BufferRef, 9 | pub decoded_len: usize, 10 | pub reader: Box, 11 | pub has_more: bool, 12 | } 13 | 14 | impl<'a, T: VecData + Default + 'static> VecOperator<'a> for LZ4Decode<'a, T> { 15 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 16 | let mut decoded = scratchpad.get_mut(self.decoded); 17 | let len = lz4::decode(&mut self.reader, &mut decoded); 18 | if len < decoded.len() { 19 | decoded.truncate(len); 20 | self.has_more = false; 21 | } 22 | Ok(()) 23 | } 24 | 25 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 26 | scratchpad.set(self.decoded, vec![T::default(); batch_size]); 27 | let encoded = scratchpad.get_pinned(self.encoded); 28 | self.reader = Box::new(lz4::decoder(encoded)); 29 | } 30 | 31 | fn inputs(&self) -> Vec> { vec![self.encoded.any()] } 32 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.encoded.i] } 33 | fn outputs(&self) -> Vec> { vec![self.decoded.any()] } 34 | fn can_stream_input(&self, _: usize) -> bool { false } 35 | fn can_stream_output(&self, _: usize) -> bool { true } 36 | fn allocates(&self) -> bool { true } 37 | fn is_streaming_producer(&self) -> bool { true } 38 | fn has_more(&self) -> bool { self.has_more } 39 | fn custom_output_len(&self) -> Option { Some(self.decoded_len) } 40 | 41 | fn display_op(&self, _: bool) -> String { 42 | format!("lz4_decode({})", self.encoded) 43 | } 44 | } 45 | 46 | impl<'a, T> fmt::Debug for LZ4Decode<'a, T> { 47 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 48 | write!(f, "LZ4Decode {{ encoded: {}, decoded: {} }}", self.encoded, self.decoded) 49 | } 50 | } 51 | 52 | -------------------------------------------------------------------------------- /src/engine/operators/make_nullable.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct MakeNullable { 5 | pub data: BufferRef, 6 | pub present: BufferRef, 7 | pub nullable_data: BufferRef>, 8 | } 9 | 10 | impl<'a, T: VecData + 'a> VecOperator<'a> for MakeNullable { 11 | fn execute(&mut self, _streaming: bool, _scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) } 12 | 13 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 14 | let present = vec![255u8; batch_size / 8 + 1]; 15 | scratchpad.set(self.present, present); 16 | scratchpad.assemble_nullable(self.data, self.present, self.nullable_data); 17 | } 18 | 19 | fn inputs(&self) -> Vec> { vec![self.data.any()] } 20 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i] } 21 | fn outputs(&self) -> Vec> { vec![self.nullable_data.any(), self.present.any()] } 22 | fn can_stream_input(&self, _: usize) -> bool { true } 23 | fn can_stream_output(&self, _: usize) -> bool { true } 24 | fn allocates(&self) -> bool { true } 25 | fn display_op(&self, _: bool) -> String { format!("nullable({}, {})", self.data, self.present) } 26 | } 27 | 28 | -------------------------------------------------------------------------------- /src/engine/operators/map_operator.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | 4 | #[derive(Debug)] 5 | pub struct MapOperator { 6 | pub input: BufferRef, 7 | pub output: BufferRef, 8 | pub map: Map, 9 | } 10 | 11 | impl<'a, Input, Output, Map> VecOperator<'a> for MapOperator 12 | where Input: VecData + 'a, 13 | Output: VecData + 'a, 14 | Map: MapOp { 15 | fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{ 16 | let input = scratchpad.get(self.input); 17 | let mut output = scratchpad.get_mut(self.output); 18 | if stream { output.clear() } 19 | for i in input.iter() { 20 | output.push(self.map.apply(*i)); 21 | } 22 | Ok(()) 23 | } 24 | 25 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 26 | scratchpad.set(self.output, Vec::with_capacity(batch_size)); 27 | } 28 | 29 | fn inputs(&self) -> Vec> { vec![self.input.any()] } 30 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 31 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 32 | fn can_stream_input(&self, _: usize) -> bool { true } 33 | fn can_stream_output(&self, _: usize) -> bool { true } 34 | fn can_block_output(&self) -> bool { true } 35 | fn allocates(&self) -> bool { true } 36 | 37 | fn display_op(&self, _: bool) -> String { 38 | format!("{}({})", Map::name(), self.input) 39 | } 40 | } 41 | 42 | pub trait MapOp { 43 | fn apply(&self, input: Input) -> Output; 44 | fn name() -> &'static str; 45 | } -------------------------------------------------------------------------------- /src/engine/operators/merge_deduplicate.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use std::cmp::{max, min}; 3 | use std::marker::PhantomData; 4 | 5 | #[derive(Debug)] 6 | pub struct MergeDeduplicate { 7 | pub left: BufferRef, 8 | pub right: BufferRef, 9 | pub deduplicated: BufferRef, 10 | pub merge_ops: BufferRef, 11 | pub comparator: PhantomData, 12 | } 13 | 14 | impl<'a, T: VecData + 'a, C: Comparator> VecOperator<'a> for MergeDeduplicate { 15 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 16 | let (deduplicated, merge_ops) = { 17 | let left = scratchpad.get(self.left); 18 | let right = scratchpad.get(self.right); 19 | merge_deduplicate::<_, C>(&left, &right) 20 | }; 21 | scratchpad.set(self.deduplicated, deduplicated); 22 | scratchpad.set(self.merge_ops, merge_ops); 23 | Ok(()) 24 | } 25 | 26 | fn inputs(&self) -> Vec> { vec![self.left.any(), self.right.any()] } 27 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.left.i, &mut self.right.i] } 28 | fn outputs(&self) -> Vec> { vec![self.deduplicated.any(), self.merge_ops.any()] } 29 | fn can_stream_input(&self, _: usize) -> bool { false } 30 | fn can_stream_output(&self, _: usize) -> bool { false } 31 | fn allocates(&self) -> bool { true } 32 | 33 | fn display_op(&self, _: bool) -> String { 34 | format!("merge_deduplicate({}, {})", self.left, self.right) 35 | } 36 | } 37 | 38 | fn merge_deduplicate<'a, T: VecData + 'a, C: Comparator>(left: &[T], right: &[T]) -> (Vec, Vec) { 39 | // Could figure out maths for more precise estimate + variance derived from how much grouping reduced cardinality 40 | let output_len_estimate = max(left.len(), right.len()) + min(left.len(), right.len()) / 2; 41 | let mut result = Vec::with_capacity(output_len_estimate); 42 | let mut ops = Vec::::with_capacity(output_len_estimate); 43 | 44 | let mut i = 0; 45 | let mut j = 0; 46 | while i < left.len() && j < right.len() { 47 | if result.last() == Some(&right[j]) { 48 | ops.push(MergeOp::MergeRight); 49 | j += 1; 50 | } else if C::cmp_eq(left[i], right[j]) { 51 | result.push(left[i]); 52 | ops.push(MergeOp::TakeLeft); 53 | i += 1; 54 | } else { 55 | result.push(right[j]); 56 | ops.push(MergeOp::TakeRight); 57 | j += 1; 58 | } 59 | } 60 | 61 | for x in left[i..].iter() { 62 | result.push(*x); 63 | ops.push(MergeOp::TakeLeft); 64 | } 65 | if j < right.len() && result.last() == Some(&right[j]) { 66 | ops.push(MergeOp::MergeRight); 67 | j += 1; 68 | } 69 | for x in right[j..].iter() { 70 | result.push(*x); 71 | ops.push(MergeOp::TakeRight); 72 | } 73 | 74 | (result, ops) 75 | } 76 | 77 | 78 | -------------------------------------------------------------------------------- /src/engine/operators/merge_drop.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct MergeDrop { 5 | pub merge_ops: BufferRef, 6 | pub left: BufferRef, 7 | pub right: BufferRef, 8 | pub deduplicated: BufferRef, 9 | } 10 | 11 | impl<'a, T: VecData + 'a> VecOperator<'a> for MergeDrop { 12 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 13 | let deduplicated = { 14 | let ops = scratchpad.get(self.merge_ops); 15 | let left = scratchpad.get(self.left); 16 | let right = scratchpad.get(self.right); 17 | merge_drop(&ops, &left, &right) 18 | }; 19 | scratchpad.set(self.deduplicated, deduplicated); 20 | Ok(()) 21 | } 22 | 23 | fn inputs(&self) -> Vec> { vec![self.merge_ops.any(), self.left.any(), self.right.any()] } 24 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.merge_ops.i, &mut self.left.i, &mut self.right.i] } 25 | fn outputs(&self) -> Vec> { vec![self.deduplicated.any()] } 26 | fn can_stream_input(&self, _: usize) -> bool { false } 27 | fn can_stream_output(&self, _: usize) -> bool { false } 28 | fn allocates(&self) -> bool { true } 29 | 30 | fn display_op(&self, _: bool) -> String { 31 | format!("merge_drop({}, {}, {})", self.merge_ops, self.left, self.right) 32 | } 33 | } 34 | 35 | fn merge_drop<'a, T: VecData + 'a>(ops: &[MergeOp], left: &[T], right: &[T]) -> Vec { 36 | // This is an overestimate 37 | let mut result = Vec::with_capacity(ops.len()); 38 | let mut i = 0; 39 | let mut j = 0; 40 | for op in ops { 41 | match *op { 42 | MergeOp::TakeLeft => { 43 | result.push(left[i]); 44 | i += 1; 45 | } 46 | MergeOp::TakeRight => { 47 | result.push(right[j]); 48 | j += 1; 49 | } 50 | MergeOp::MergeRight => { 51 | j += 1; 52 | } 53 | } 54 | } 55 | result 56 | } 57 | 58 | -------------------------------------------------------------------------------- /src/engine/operators/merge_partitioned.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use std::cmp; 3 | use std::fmt::Debug; 4 | use std::marker::PhantomData; 5 | 6 | #[derive(Debug)] 7 | pub struct MergePartitioned { 8 | pub partitioning: BufferRef, 9 | pub left: BufferRef, 10 | pub right: BufferRef, 11 | pub merged: BufferRef, 12 | pub take_left: BufferRef, 13 | pub limit: usize, 14 | pub c: PhantomData, 15 | } 16 | 17 | impl<'a, T: VecData + 'a + Debug, C: Comparator> VecOperator<'a> for MergePartitioned { 18 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 19 | let (merged, merge_ops) = { 20 | let partitioning = scratchpad.get(self.partitioning); 21 | let left = scratchpad.get(self.left); 22 | let right = scratchpad.get(self.right); 23 | merge_partitioned::<_, C>(&partitioning, &left, &right, self.limit) 24 | }; 25 | scratchpad.set(self.merged, merged); 26 | scratchpad.set(self.take_left, merge_ops); 27 | Ok(()) 28 | } 29 | 30 | fn inputs(&self) -> Vec> { vec![self.partitioning.any(), self.left.any(), self.right.any()] } 31 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.partitioning.i, &mut self.left.i, &mut self.right.i] } 32 | fn outputs(&self) -> Vec> { vec![self.merged.any(), self.take_left.any()] } 33 | fn can_stream_input(&self, _: usize) -> bool { false } 34 | fn can_stream_output(&self, _: usize) -> bool { false } 35 | fn allocates(&self) -> bool { true } 36 | 37 | fn display_op(&self, _: bool) -> String { 38 | format!("merge_partitioned({}, {}, {})", self.partitioning, self.left, self.right) 39 | } 40 | } 41 | 42 | pub fn merge_partitioned<'a, T, C>(partitioning: &[Premerge], left: &[T], right: &[T], limit: usize) 43 | -> (Vec, Vec) where T: Debug + Copy + 'a, C: Comparator { 44 | let len = cmp::min(left.len() + right.len(), limit); 45 | let mut result = Vec::with_capacity(len); 46 | let mut take_left = Vec::::with_capacity(len); 47 | 48 | let mut i = 0; 49 | let mut j = 0; 50 | 'outer: for group in partitioning { 51 | let i_max = i + group.left as usize; 52 | let j_max = j + group.right as usize; 53 | for _ in 0..(group.left + group.right) { 54 | if j == j_max || (i < i_max && C::cmp_eq(left[i], right[j])) { 55 | take_left.push(1); 56 | result.push(left[i]); 57 | i += 1; 58 | } else { 59 | take_left.push(0); 60 | result.push(right[j]); 61 | j += 1; 62 | } 63 | if i + j == limit { 64 | break 'outer; 65 | } 66 | } 67 | } 68 | (result, take_left) 69 | } 70 | 71 | -------------------------------------------------------------------------------- /src/engine/operators/mod.rs: -------------------------------------------------------------------------------- 1 | pub use self::aggregator::*; 2 | pub use self::comparator::*; 3 | pub use self::vector_operator::*; 4 | 5 | pub mod vector_operator; 6 | pub mod comparator; 7 | 8 | mod aggregate; 9 | mod assemble_nullable; 10 | mod binary_operator; 11 | mod bit_unpack; 12 | mod bool_op; 13 | mod buffer_stream; 14 | mod collect; 15 | mod column_ops; 16 | mod combine_null_maps; 17 | mod compact_nullable_nullable; 18 | mod compact_nullable; 19 | mod compact_with_nullable; 20 | mod compact; 21 | mod comparison_operators; 22 | mod constant; 23 | mod constant_expand; 24 | mod constant_vec; 25 | mod delta_decode; 26 | mod dict_lookup; 27 | mod empty; 28 | mod encode_const; 29 | mod exists; 30 | mod filter; 31 | mod filter_nullable; 32 | mod functions; 33 | mod fuse_nulls; 34 | mod get_null_map; 35 | mod hashmap_grouping; 36 | mod hashmap_grouping_byte_slices; 37 | mod hashmap_grouping_val_rows; 38 | mod identity; 39 | mod indices; 40 | mod is_null; 41 | mod lz4_decode; 42 | mod make_nullable; 43 | mod map_operator; 44 | mod merge; 45 | mod merge_aggregate; 46 | mod merge_deduplicate; 47 | mod merge_deduplicate_partitioned; 48 | mod merge_drop; 49 | mod merge_keep; 50 | mod merge_partitioned; 51 | mod nonzero_compact; 52 | mod nonzero_indices; 53 | mod null_to_i64; 54 | mod null_to_val; 55 | mod null_to_vec; 56 | mod null_vec; 57 | mod null_vec_like; 58 | mod numeric_operators; 59 | mod parameterized_vec_vec_int_op; 60 | mod partition; 61 | mod pco_decode; 62 | mod propagate_nullability; 63 | mod scalar_f64; 64 | mod scalar_i64; 65 | mod scalar_i64_to_scalar_f64; 66 | mod scalar_str; 67 | mod select; 68 | mod slice_pack; 69 | mod slice_unpack; 70 | mod sort_by; 71 | mod sort_by_slices; 72 | mod sort_by_val_rows; 73 | mod stream_buffer; 74 | mod subpartition; 75 | mod to_val; 76 | mod top_n; 77 | mod type_conversion; 78 | mod unhexpack_strings; 79 | mod unpack_strings; 80 | mod val_rows_pack; 81 | mod val_rows_unpack; 82 | 83 | mod aggregator; 84 | 85 | pub use null_vec_like::LengthSource; -------------------------------------------------------------------------------- /src/engine/operators/nonzero_compact.rs: -------------------------------------------------------------------------------- 1 | use crate::bitvec::BitVec; 2 | use crate::engine::*; 3 | 4 | #[derive(Debug)] 5 | pub struct NonzeroCompact { 6 | pub data: BufferRef, 7 | pub compacted: BufferRef, 8 | } 9 | 10 | impl<'a, T: GenericIntVec> VecOperator<'a> for NonzeroCompact { 11 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 12 | let mut data = scratchpad.get_mut(self.data); 13 | // Remove all unmodified entries 14 | let mut j = 0; 15 | for i in 0..data.len() { 16 | if data[i] > T::zero() { 17 | data[j] = data[i]; 18 | j += 1; 19 | } 20 | } 21 | data.truncate(j); 22 | Ok(()) 23 | } 24 | 25 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 26 | scratchpad.alias(self.data, self.compacted); 27 | } 28 | 29 | fn inputs(&self) -> Vec> { vec![self.data.any()] } 30 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i] } 31 | fn outputs(&self) -> Vec> { vec![self.compacted.any()] } 32 | fn can_stream_input(&self, _: usize) -> bool { false } 33 | fn can_stream_output(&self, _: usize) -> bool { false } 34 | fn mutates(&self, i: usize) -> bool { i == self.data.i } 35 | fn allocates(&self) -> bool { false } 36 | 37 | fn display_op(&self, _: bool) -> String { 38 | format!("{}[{} > 0]", self.data, self.data) 39 | } 40 | } 41 | 42 | 43 | #[derive(Debug)] 44 | pub struct NonzeroCompactNullable { 45 | pub data: BufferRef>, 46 | pub compacted: BufferRef, 47 | } 48 | 49 | impl<'a, T: GenericIntVec> VecOperator<'a> for NonzeroCompactNullable { 50 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 51 | let (mut data, data_present) = scratchpad.get_mut_nullable(self.data); 52 | // Remove all unmodified entries 53 | let mut j = 0; 54 | for i in 0..data.len() { 55 | if (*data_present).is_set(i) && data[i] > T::zero() { 56 | data[j] = data[i]; 57 | j += 1; 58 | } 59 | } 60 | data.truncate(j); 61 | Ok(()) 62 | } 63 | 64 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 65 | scratchpad.alias_data(self.data, self.compacted); 66 | } 67 | 68 | fn inputs(&self) -> Vec> { vec![self.data.any()] } 69 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.data.i] } 70 | fn outputs(&self) -> Vec> { vec![self.compacted.any()] } 71 | fn can_stream_input(&self, _: usize) -> bool { false } 72 | fn can_stream_output(&self, _: usize) -> bool { false } 73 | fn mutates(&self, i: usize) -> bool { i == self.data.i } 74 | fn allocates(&self) -> bool { false } 75 | 76 | fn display_op(&self, _: bool) -> String { 77 | format!("{}[{} > 0]", self.data, self.data) 78 | } 79 | } 80 | 81 | -------------------------------------------------------------------------------- /src/engine/operators/null_to_i64.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | // Take a null count and expands it into a nullable vec of the same length with arbitrary type and all values set to null 4 | #[derive(Debug)] 5 | pub struct NullToI64 { 6 | pub input: BufferRef, 7 | pub output: BufferRef, 8 | 9 | pub batch_size: usize, 10 | } 11 | 12 | impl<'a> VecOperator<'a> for NullToI64 { 13 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 14 | let len = scratchpad.get_any(self.input).len(); 15 | if self.batch_size > len { 16 | let mut output = scratchpad.get_mut(self.output); 17 | output.truncate(len); 18 | } 19 | Ok(()) 20 | } 21 | 22 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 23 | self.batch_size = batch_size; 24 | scratchpad.set(self.output, vec![I64_NULL; batch_size]); 25 | } 26 | 27 | fn inputs(&self) -> Vec> { 28 | vec![self.input.any()] 29 | } 30 | fn inputs_mut(&mut self) -> Vec<&mut usize> { 31 | vec![&mut self.input.i] 32 | } 33 | fn outputs(&self) -> Vec> { 34 | vec![self.output.any()] 35 | } 36 | fn can_stream_input(&self, _: usize) -> bool { 37 | true 38 | } 39 | fn can_stream_output(&self, _: usize) -> bool { 40 | true 41 | } 42 | fn allocates(&self) -> bool { 43 | true 44 | } 45 | 46 | fn display_op(&self, _: bool) -> String { 47 | format!("{} expand as ", self.input) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/engine/operators/null_to_val.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use crate::mem_store::Val; 3 | 4 | #[derive(Debug)] 5 | pub struct NullToVal { 6 | pub input: BufferRef, 7 | pub output: BufferRef>, 8 | 9 | pub batch_size: usize, 10 | } 11 | 12 | impl<'a> VecOperator<'a> for NullToVal { 13 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 14 | let len = scratchpad.get_any(self.input).len(); 15 | if self.batch_size > len { 16 | let mut output = scratchpad.get_mut(self.output); 17 | output.truncate(len); 18 | } 19 | Ok(()) 20 | } 21 | 22 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 23 | self.batch_size = batch_size; 24 | scratchpad.set(self.output, vec![Val::Null; batch_size]); 25 | } 26 | 27 | fn inputs(&self) -> Vec> { 28 | vec![self.input.any()] 29 | } 30 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 31 | fn outputs(&self) -> Vec> { 32 | vec![self.output.any()] 33 | } 34 | fn can_stream_input(&self, _: usize) -> bool { 35 | true 36 | } 37 | fn can_stream_output(&self, _: usize) -> bool { 38 | true 39 | } 40 | fn allocates(&self) -> bool { 41 | true 42 | } 43 | 44 | fn display_op(&self, _: bool) -> String { 45 | format!("{} expand as Val", self.input) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/engine/operators/null_to_vec.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | // Take a null count and expands it into a nullable vec of the same length with arbitrary type and all values set to null 4 | #[derive(Debug)] 5 | pub struct NullToVec { 6 | pub input: BufferRef, 7 | pub output: BufferRef>, 8 | 9 | pub batch_size: usize, 10 | } 11 | 12 | impl<'a, T: 'a> VecOperator<'a> for NullToVec 13 | where 14 | T: VecData + Copy + Default, 15 | { 16 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 17 | let len = scratchpad.get_any(self.input).len(); 18 | if self.batch_size > len { 19 | let (mut output, mut present) = scratchpad.get_mut_nullable(self.output); 20 | output.truncate(len); 21 | present.truncate(len.div_ceil(8)); 22 | } 23 | Ok(()) 24 | } 25 | 26 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 27 | self.batch_size = batch_size; 28 | scratchpad.set_nullable( 29 | self.output, 30 | vec![T::default(); batch_size], 31 | vec![0u8; batch_size.div_ceil(8)], 32 | ); 33 | } 34 | 35 | fn inputs(&self) -> Vec> { 36 | vec![self.input.any()] 37 | } 38 | fn inputs_mut(&mut self) -> Vec<&mut usize> { 39 | vec![&mut self.input.i] 40 | } 41 | fn outputs(&self) -> Vec> { 42 | vec![self.output.any()] 43 | } 44 | fn can_stream_input(&self, _: usize) -> bool { 45 | true 46 | } 47 | fn can_stream_output(&self, _: usize) -> bool { 48 | true 49 | } 50 | fn allocates(&self) -> bool { 51 | true 52 | } 53 | 54 | fn display_op(&self, _: bool) -> String { 55 | format!("{} expand as Nullable<{:?}>", self.input, T::t()) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/engine/operators/null_vec.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct NullVec { 5 | pub len: usize, 6 | pub output: BufferRef, 7 | } 8 | 9 | impl<'a> VecOperator<'a> for NullVec { 10 | fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> { 11 | Ok(()) 12 | } 13 | 14 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 15 | scratchpad.set_any(self.output, empty_data(self.len)); 16 | } 17 | 18 | fn inputs(&self) -> Vec> { 19 | vec![] 20 | } 21 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] } 22 | fn outputs(&self) -> Vec> { 23 | vec![self.output.any()] 24 | } 25 | fn can_stream_input(&self, _: usize) -> bool { 26 | true 27 | } 28 | fn can_stream_output(&self, _: usize) -> bool { 29 | true 30 | } 31 | fn allocates(&self) -> bool { 32 | false 33 | } 34 | fn display_op(&self, _: bool) -> String { 35 | "NullVec".to_string() 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/engine/operators/null_vec_like.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use crate::bitvec::BitVec; 3 | 4 | #[derive(Debug)] 5 | pub enum LengthSource { 6 | InputLength, 7 | NonZeroU8ElementCount, 8 | NonNullElementCount, 9 | } 10 | 11 | #[derive(Debug)] 12 | pub struct NullVecLike { 13 | pub input: BufferRef, 14 | pub output: BufferRef, 15 | pub source_type: LengthSource, 16 | pub count: usize, 17 | } 18 | 19 | impl<'a> VecOperator<'a> for NullVecLike { 20 | fn execute(&mut self, streaming: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 21 | if streaming { self.count = 0 }; 22 | self.count += match self.source_type { 23 | LengthSource::InputLength => scratchpad.get_any(self.input).len(), 24 | LengthSource::NonZeroU8ElementCount => scratchpad.get(self.input.u8()).iter().filter(|&&x| x != 0).count(), 25 | LengthSource::NonNullElementCount => { 26 | let mut count = 0; 27 | let (data, present) = scratchpad.get_nullable(self.input.nullable_u8()); 28 | for (i, d) in data.iter().enumerate() { 29 | if *d != 0 && BitVec::is_set(&*present, i) { 30 | count += 1; 31 | } 32 | } 33 | count 34 | }, 35 | }; 36 | let mut output = scratchpad.get_any_mut(self.output); 37 | *output.cast_ref_mut_null() = self.count; 38 | Ok(()) 39 | } 40 | 41 | fn init(&mut self, _: usize, _: usize, _: &mut Scratchpad<'a>) { } 42 | 43 | fn inputs(&self) -> Vec> { vec![self.input] } 44 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 45 | fn outputs(&self) -> Vec> { 46 | vec![self.output.any()] 47 | } 48 | fn can_stream_input(&self, _: usize) -> bool { true } 49 | fn can_stream_output(&self, _: usize) -> bool { true } 50 | fn can_block_output(&self) -> bool { true } 51 | fn allocates(&self) -> bool { false } 52 | fn display_op(&self, _: bool) -> String { 53 | format!("NullVecLike({})", self.input) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/engine/operators/parameterized_vec_vec_int_op.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::marker::PhantomData; 3 | 4 | use crate::engine::*; 5 | 6 | 7 | #[derive(Debug)] 8 | pub struct ParameterizedVecVecIntegerOperator { 9 | pub lhs: BufferRef, 10 | pub rhs: BufferRef, 11 | pub output: BufferRef, 12 | pub parameter: i64, 13 | pub op: PhantomData, 14 | } 15 | 16 | impl<'a, Op: ParameterizedIntegerOperation + fmt::Debug> VecOperator<'a> for ParameterizedVecVecIntegerOperator { 17 | fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError>{ 18 | let mut output = scratchpad.get_mut(self.output); 19 | let lhs = scratchpad.get(self.lhs); 20 | let rhs = scratchpad.get(self.rhs); 21 | if stream { output.clear(); } 22 | for (l, r) in lhs.iter().zip(rhs.iter()) { 23 | output.push(Op::perform(*l, *r, self.parameter)); 24 | } 25 | Ok(()) 26 | } 27 | 28 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 29 | scratchpad.set(self.output, Vec::with_capacity(batch_size)); 30 | } 31 | 32 | fn inputs(&self) -> Vec> { vec![self.lhs.any(), self.rhs.any()] } 33 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.lhs.i, &mut self.rhs.i] } 34 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 35 | fn can_stream_input(&self, _: usize) -> bool { true } 36 | fn can_stream_output(&self, _: usize) -> bool { true } 37 | fn can_block_output(&self) -> bool { true } 38 | fn allocates(&self) -> bool { true } 39 | 40 | fn display_op(&self, alternate: bool) -> String { 41 | Op::display(self.lhs, self.rhs, self.parameter, alternate) 42 | } 43 | } 44 | 45 | 46 | pub trait ParameterizedIntegerOperation { 47 | fn perform(lhs: i64, rhs: i64, param: i64) -> i64; 48 | fn display(lhs: BufferRef, rhs: BufferRef, param: i64, alternate: bool) -> String; 49 | } 50 | 51 | #[derive(Debug)] 52 | pub struct BitShiftLeftAdd; 53 | 54 | impl ParameterizedIntegerOperation for BitShiftLeftAdd { 55 | fn perform(lhs: i64, rhs: i64, param: i64) -> i64 { lhs + (rhs << param) } 56 | fn display(lhs: BufferRef, rhs: BufferRef, param: i64, alternate: bool) -> String { 57 | if alternate { 58 | format!("{} + ({} << {})", lhs, rhs, param) 59 | } else { 60 | format!("{} + ({} << $shift)", lhs, rhs) 61 | } 62 | } 63 | } 64 | 65 | -------------------------------------------------------------------------------- /src/engine/operators/propagate_nullability.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | /// Applies the null map of a nullable buffer to another (non-nullable) buffer. 4 | #[derive(Debug)] 5 | pub struct PropagateNullability { 6 | pub from: BufferRef>, 7 | pub to: BufferRef, 8 | pub output: BufferRef>, 9 | } 10 | 11 | impl<'a, T: VecData> VecOperator<'a> for PropagateNullability { 12 | fn execute(&mut self, _streaming: bool, _scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) } 13 | 14 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 15 | // This works even when streaming since it just creates an output->to alias and sets the null map of output to the null map of from. 16 | // It would incorrect to perform this operation in the `execute` function since otherwise it would results in incorrect ordering with potential `AssembleNullable` operations. 17 | scratchpad.reassemble_nullable(self.from, self.to, self.output); 18 | } 19 | 20 | fn inputs(&self) -> Vec> { vec![self.from.any(), self.to.any()] } 21 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.from.i, &mut self.to.i] } 22 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 23 | fn can_stream_input(&self, _: usize) -> bool { true } 24 | fn can_stream_output(&self, _: usize) -> bool { true } 25 | fn allocates(&self) -> bool { false } 26 | fn display_op(&self, _: bool) -> String { format!("reassemble_nullable({}, {})", self.from, self.to) } 27 | } 28 | 29 | -------------------------------------------------------------------------------- /src/engine/operators/scalar_f64.rs: -------------------------------------------------------------------------------- 1 | use ordered_float::OrderedFloat; 2 | 3 | use crate::engine::*; 4 | 5 | #[derive(Debug)] 6 | pub struct ScalarF64 { 7 | pub val: OrderedFloat, 8 | pub hide_value: bool, 9 | pub output: BufferRef>>, 10 | } 11 | 12 | impl<'a> VecOperator<'a> for ScalarF64 { 13 | fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) } 14 | 15 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 16 | scratchpad.set_const(self.output, self.val); 17 | } 18 | 19 | fn inputs(&self) -> Vec> { vec![] } 20 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] } 21 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 22 | fn can_stream_input(&self, _: usize) -> bool { false } 23 | fn can_stream_output(&self, _: usize) -> bool { true } 24 | fn allocates(&self) -> bool { false } 25 | 26 | fn display_op(&self, alternate: bool) -> String { 27 | if self.hide_value && !alternate { 28 | "ScalarF64".to_string() 29 | } else { 30 | format!("{}", &self.val) 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/engine/operators/scalar_i64.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct ScalarI64 { 5 | pub val: i64, 6 | pub hide_value: bool, 7 | pub output: BufferRef>, 8 | } 9 | 10 | impl<'a> VecOperator<'a> for ScalarI64 { 11 | fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> { Ok(()) } 12 | 13 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 14 | scratchpad.set_const(self.output, self.val); 15 | } 16 | 17 | fn inputs(&self) -> Vec> { vec![] } 18 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] } 19 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 20 | fn can_stream_input(&self, _: usize) -> bool { false } 21 | fn can_stream_output(&self, _: usize) -> bool { true } 22 | fn allocates(&self) -> bool { false } 23 | 24 | fn display_op(&self, alternate: bool) -> String { 25 | if self.hide_value && !alternate { 26 | "ScalarI64".to_string() 27 | } else { 28 | format!("{}", &self.val) 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/engine/operators/scalar_i64_to_scalar_f64.rs: -------------------------------------------------------------------------------- 1 | use ordered_float::OrderedFloat; 2 | 3 | use crate::engine::*; 4 | 5 | 6 | #[derive(Debug)] 7 | pub struct ScalarI64ToScalarF64 { 8 | pub input: BufferRef>, 9 | pub output: BufferRef>, 10 | } 11 | 12 | impl<'a> VecOperator<'a> for ScalarI64ToScalarF64 { 13 | fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError>{ Ok(()) } 14 | 15 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 16 | let input = scratchpad.get_scalar(&self.input); 17 | scratchpad.set_const(self.output, OrderedFloat(input as f64)); 18 | } 19 | 20 | fn inputs(&self) -> Vec> { vec![self.input.any()] } 21 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 22 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 23 | fn can_stream_input(&self, _: usize) -> bool { true } 24 | fn can_stream_output(&self, _: usize) -> bool { true } 25 | fn can_block_output(&self) -> bool { true } 26 | fn allocates(&self) -> bool { false } 27 | 28 | fn display_op(&self, _: bool) -> String { 29 | format!("{} as f64", self.input) 30 | } 31 | } -------------------------------------------------------------------------------- /src/engine/operators/scalar_str.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct ScalarStr<'a> { 5 | pub val: String, 6 | pub pinned: BufferRef>, 7 | pub output: BufferRef>, 8 | } 9 | 10 | impl<'a> VecOperator<'a> for ScalarStr<'a> { 11 | fn execute(&mut self, _: bool, _: &mut Scratchpad<'a>) -> Result<(), QueryError> { 12 | Ok(()) 13 | } 14 | 15 | fn init(&mut self, _: usize, _: usize, scratchpad: &mut Scratchpad<'a>) { 16 | scratchpad.set_const(self.pinned, self.val.clone()); 17 | let output = scratchpad.get_scalar_string_pinned(&self.pinned); 18 | scratchpad.set_const(self.output, output); 19 | } 20 | 21 | fn inputs(&self) -> Vec> { 22 | vec![] 23 | } 24 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![] } 25 | fn outputs(&self) -> Vec> { 26 | vec![self.output.any()] 27 | } 28 | fn can_stream_input(&self, _: usize) -> bool { 29 | false 30 | } 31 | fn can_stream_output(&self, _: usize) -> bool { 32 | true 33 | } 34 | fn allocates(&self) -> bool { 35 | true 36 | } 37 | fn display_op(&self, _: bool) -> String { 38 | format!("\"{}\"", &self.val) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/engine/operators/select.rs: -------------------------------------------------------------------------------- 1 | use crate::bitvec::*; 2 | use crate::engine::*; 3 | 4 | #[derive(Debug)] 5 | pub struct Select { 6 | pub input: BufferRef, 7 | pub indices: BufferRef, 8 | pub output: BufferRef, 9 | } 10 | 11 | impl<'a, T: 'a> VecOperator<'a> for Select where T: VecData { 12 | fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 13 | let data = scratchpad.get(self.input); 14 | let indices = scratchpad.get(self.indices); 15 | let mut output = scratchpad.get_mut(self.output); 16 | if stream { output.clear(); } 17 | for i in indices.iter() { 18 | output.push(data[*i]); 19 | } 20 | Ok(()) 21 | } 22 | 23 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 24 | scratchpad.set(self.output, Vec::with_capacity(batch_size)); 25 | } 26 | 27 | fn inputs(&self) -> Vec> { vec![self.input.any(), self.indices.any()] } 28 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i, &mut self.indices.i] } 29 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 30 | fn can_stream_input(&self, i: usize) -> bool { i == self.indices.i } 31 | fn can_stream_output(&self, _: usize) -> bool { true } 32 | fn can_block_output(&self) -> bool { true } 33 | fn allocates(&self) -> bool { true } 34 | 35 | fn display_op(&self, _: bool) -> String { 36 | format!("{}[{}]", self.input, self.indices) 37 | } 38 | } 39 | 40 | 41 | #[derive(Debug)] 42 | pub struct SelectNullable { 43 | pub input: BufferRef>, 44 | pub indices: BufferRef, 45 | pub output: BufferRef>, 46 | } 47 | 48 | impl<'a, T: 'a> VecOperator<'a> for SelectNullable where T: VecData { 49 | fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 50 | let (data, present) = scratchpad.get_nullable(self.input); 51 | let indices = scratchpad.get(self.indices); 52 | let (mut data_out, mut present_out) = scratchpad.get_mut_nullable(self.output); 53 | if stream { 54 | data_out.clear(); 55 | present_out.clear(); 56 | } 57 | for (i, &index) in indices.iter().enumerate() { 58 | data_out.push(data[index]); 59 | if (*present).is_set(index) { present_out.set(i) } 60 | } 61 | Ok(()) 62 | } 63 | 64 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 65 | scratchpad.set_nullable(self.output, Vec::with_capacity(batch_size), Vec::with_capacity(batch_size / 8)); 66 | } 67 | 68 | fn inputs(&self) -> Vec> { vec![self.input.any(), self.indices.any()] } 69 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i, &mut self.indices.i] } 70 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 71 | fn can_stream_input(&self, i: usize) -> bool { i == self.indices.i } 72 | fn can_stream_output(&self, _: usize) -> bool { true } 73 | fn can_block_output(&self) -> bool { true } 74 | fn allocates(&self) -> bool { true } 75 | 76 | fn display_op(&self, _: bool) -> String { 77 | format!("{}[{}]", self.input, self.indices) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/engine/operators/slice_unpack.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use std::str; 3 | 4 | #[derive(Debug)] 5 | pub struct SliceUnpackInt { 6 | pub input: BufferRef, 7 | pub output: BufferRef, 8 | pub stride: usize, 9 | pub offset: usize, 10 | } 11 | 12 | impl<'a, T: GenericIntVec> VecOperator<'a> for SliceUnpackInt { 13 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 14 | let packed_any = scratchpad.get_any(self.input); 15 | let packed = packed_any.cast_ref_byte_slices(); 16 | let mut unpacked = scratchpad.get_mut(self.output); 17 | for datum in packed.data.iter().skip(self.offset).step_by(self.stride) { 18 | unpacked.push(T::from_bytes(datum)); 19 | } 20 | Ok(()) 21 | } 22 | 23 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 24 | scratchpad.set(self.output, Vec::with_capacity(batch_size)); 25 | } 26 | 27 | fn inputs(&self) -> Vec> { vec![self.input] } 28 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 29 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 30 | fn can_stream_input(&self, _: usize) -> bool { true } 31 | fn can_stream_output(&self, _: usize) -> bool { true } 32 | fn allocates(&self) -> bool { true } 33 | 34 | fn display_op(&self, _: bool) -> String { 35 | format!("{}[{}, {}, ...] = {}", self.output, self.offset, self.offset + self.stride, self.input) 36 | } 37 | fn display_output(&self) -> bool { false } 38 | } 39 | 40 | #[derive(Debug)] 41 | pub struct SliceUnpackString<'a> { 42 | pub input: BufferRef, 43 | pub output: BufferRef<&'a str>, 44 | pub stride: usize, 45 | pub offset: usize, 46 | } 47 | 48 | impl<'a> VecOperator<'a> for SliceUnpackString<'a> { 49 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 50 | let packed_any = scratchpad.get_any(self.input); 51 | let packed = packed_any.cast_ref_byte_slices(); 52 | let mut unpacked = scratchpad.get_mut(self.output); 53 | for datum in packed.data.iter().skip(self.offset).step_by(self.stride) { 54 | unpacked.push(unsafe { str::from_utf8_unchecked(datum) }); 55 | } 56 | Ok(()) 57 | } 58 | 59 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 60 | scratchpad.set(self.output, Vec::with_capacity(batch_size)); 61 | } 62 | 63 | fn inputs(&self) -> Vec> { vec![self.input] } 64 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 65 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 66 | fn can_stream_input(&self, _: usize) -> bool { true } 67 | fn can_stream_output(&self, _: usize) -> bool { true } 68 | fn allocates(&self) -> bool { true } 69 | 70 | fn display_op(&self, _: bool) -> String { 71 | format!("{}[{}, {}, ...] = {}", self.output, self.offset, self.offset + self.stride, self.input) 72 | } 73 | fn display_output(&self) -> bool { false } 74 | } 75 | -------------------------------------------------------------------------------- /src/engine/operators/sort_by_slices.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | #[derive(Debug)] 4 | pub struct SortBySlices { 5 | pub ranking: BufferRef, 6 | pub indices: BufferRef, 7 | pub output: BufferRef, 8 | pub descending: bool, 9 | pub stable: bool, 10 | } 11 | 12 | impl<'a> VecOperator<'a> for SortBySlices { 13 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 14 | scratchpad.alias(self.indices, self.output); 15 | let ranking_any = scratchpad.get_any(self.ranking); 16 | let ranking = ranking_any.cast_ref_byte_slices(); 17 | let mut result = scratchpad.get_mut(self.indices); 18 | if self.descending { 19 | if self.stable { 20 | result.sort_by(|i, j| ranking.row(*i).cmp(ranking.row(*j)).reverse()); 21 | } else { 22 | result.sort_unstable_by(|i, j| ranking.row(*i).cmp(ranking.row(*j)).reverse()); 23 | } 24 | } else if self.stable { 25 | result.sort_by_key(|i| ranking.row(*i)); 26 | } else { 27 | result.sort_unstable_by_key(|i| ranking.row(*i)); 28 | } 29 | Ok(()) 30 | } 31 | 32 | fn inputs(&self) -> Vec> { 33 | vec![self.ranking.any(), self.indices.any()] 34 | } 35 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.ranking.i, &mut self.indices.i] } 36 | fn outputs(&self) -> Vec> { 37 | vec![self.output.any()] 38 | } 39 | fn can_stream_input(&self, _: usize) -> bool { 40 | false 41 | } 42 | fn can_stream_output(&self, _: usize) -> bool { 43 | false 44 | } 45 | fn allocates(&self) -> bool { 46 | true 47 | } 48 | 49 | fn display_op(&self, _: bool) -> String { 50 | format!( 51 | "sort_by({}, {}, desc={}, stable={})", 52 | self.ranking, self.indices, self.descending, self.stable 53 | ) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/engine/operators/sort_by_val_rows.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | 3 | pub struct SortByValRows<'a> { 4 | pub ranking: BufferRef>, 5 | pub indices: BufferRef, 6 | pub output: BufferRef, 7 | pub descending: bool, 8 | pub stable: bool, 9 | } 10 | 11 | impl<'a> VecOperator<'a> for SortByValRows<'a> { 12 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 13 | scratchpad.alias(self.indices, self.output); 14 | let ranking = scratchpad.get_mut_val_rows(self.ranking); 15 | let mut result = scratchpad.get_mut(self.indices); 16 | if self.descending { 17 | if self.stable { 18 | result.sort_by(|i, j| ranking.row(*i).cmp(ranking.row(*j)).reverse()); 19 | } else { 20 | result.sort_unstable_by(|i, j| ranking.row(*i).cmp(ranking.row(*j)).reverse()); 21 | } 22 | } else if self.stable { 23 | result.sort_by_key(|i| ranking.row(*i)); 24 | } else { 25 | result.sort_unstable_by_key(|i| ranking.row(*i)); 26 | } 27 | Ok(()) 28 | } 29 | 30 | fn inputs(&self) -> Vec> { 31 | vec![self.ranking.any(), self.indices.any()] 32 | } 33 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.indices.i] } 34 | fn outputs(&self) -> Vec> { 35 | vec![self.output.any()] 36 | } 37 | fn can_stream_input(&self, _: usize) -> bool { 38 | false 39 | } 40 | fn can_stream_output(&self, _: usize) -> bool { 41 | false 42 | } 43 | fn allocates(&self) -> bool { 44 | true 45 | } 46 | 47 | fn display_op(&self, _: bool) -> String { 48 | format!( 49 | "sort_by({}, {}, desc={}, stable={})", 50 | self.ranking, self.indices, self.descending, self.stable 51 | ) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/engine/operators/unpack_strings.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use std::fmt; 3 | use crate::stringpack::StringPackerIterator; 4 | 5 | pub struct UnpackStrings<'a> { 6 | pub packed: BufferRef, 7 | pub unpacked: BufferRef<&'a str>, 8 | pub iterator: Option>, 9 | pub has_more: bool, 10 | } 11 | 12 | impl<'a> VecOperator<'a> for UnpackStrings<'a> { 13 | fn execute(&mut self, streaming: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 14 | let mut decoded = scratchpad.get_mut(self.unpacked); 15 | if streaming { 16 | decoded.clear(); 17 | } 18 | for elem in self.iterator.as_mut().unwrap() { 19 | decoded.push(elem); 20 | if decoded.capacity() == decoded.len() { return Ok(()); } 21 | } 22 | self.has_more = false; 23 | Ok(()) 24 | } 25 | 26 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 27 | scratchpad.set(self.unpacked, Vec::with_capacity(batch_size)); 28 | let encoded = scratchpad.get_pinned(self.packed); 29 | self.iterator = Some(unsafe { StringPackerIterator::from_slice(encoded) }); 30 | } 31 | 32 | fn inputs(&self) -> Vec> { vec![self.packed.any()] } 33 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.packed.i] } 34 | fn outputs(&self) -> Vec> { vec![self.unpacked.any()] } 35 | fn can_stream_input(&self, _: usize) -> bool { false } 36 | fn can_stream_output(&self, _: usize) -> bool { true } 37 | fn allocates(&self) -> bool { true } 38 | fn is_streaming_producer(&self) -> bool { true } 39 | fn has_more(&self) -> bool { self.has_more } 40 | 41 | fn display_op(&self, _: bool) -> String { 42 | format!("unpack_strings({})", self.packed) 43 | } 44 | } 45 | 46 | impl<'a> fmt::Debug for UnpackStrings<'a> { 47 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 48 | write!(f, "UnpackStrings {{ packed: {}, unpacked: {} }}", self.packed, self.unpacked) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/engine/operators/val_rows_pack.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use crate::mem_store::value::Val; 3 | 4 | #[derive(Debug)] 5 | pub struct ValRowsPack<'a> { 6 | pub input: BufferRef>, 7 | pub output: BufferRef>, 8 | pub stride: usize, 9 | pub offset: usize, 10 | } 11 | 12 | impl<'a> VecOperator<'a> for ValRowsPack<'a> { 13 | fn execute(&mut self, _: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 14 | let data = scratchpad.get(self.input); 15 | let mut val_rows = scratchpad.get_mut_val_rows(self.output); 16 | for (i, datum) in data.iter().enumerate() { 17 | val_rows.data[i * self.stride + self.offset] = *datum; 18 | } 19 | Ok(()) 20 | } 21 | 22 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 23 | if scratchpad.get_any(self.output.any()).len() == 0 { 24 | scratchpad.set_any( 25 | self.output.any(), 26 | Box::new(ValRows { 27 | row_len: self.stride, 28 | data: vec![Val::Null; batch_size * self.stride], 29 | }), 30 | ); 31 | } 32 | } 33 | 34 | fn inputs(&self) -> Vec> { 35 | vec![self.input.any()] 36 | } 37 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 38 | fn outputs(&self) -> Vec> { 39 | vec![self.output.any()] 40 | } 41 | fn can_stream_input(&self, _: usize) -> bool { 42 | false 43 | } 44 | fn can_stream_output(&self, _: usize) -> bool { 45 | false 46 | } 47 | fn allocates(&self) -> bool { 48 | true 49 | } 50 | 51 | fn display_op(&self, _: bool) -> String { 52 | format!( 53 | "{}[{}, {}, ...] = {}", 54 | self.output, 55 | self.offset, 56 | self.offset + self.stride, 57 | self.input 58 | ) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/engine/operators/val_rows_unpack.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::*; 2 | use crate::mem_store::Val; 3 | 4 | #[derive(Debug)] 5 | pub struct ValRowsUnpack<'a> { 6 | pub input: BufferRef>, 7 | pub output: BufferRef>, 8 | pub stride: usize, 9 | pub offset: usize, 10 | 11 | pub batch_size: usize, 12 | pub curr_index: usize, 13 | pub has_more: bool, 14 | } 15 | 16 | impl<'a> VecOperator<'a> for ValRowsUnpack<'a> { 17 | fn execute(&mut self, stream: bool, scratchpad: &mut Scratchpad<'a>) -> Result<(), QueryError> { 18 | let packed = scratchpad.get_mut_val_rows(self.input); 19 | let mut unpacked = scratchpad.get_mut(self.output); 20 | if stream { 21 | self.curr_index += unpacked.len(); 22 | unpacked.clear(); 23 | } 24 | for &datum in packed.data.iter().skip(self.offset).step_by(self.stride).skip(self.curr_index).take(self.batch_size) { 25 | unpacked.push(datum); 26 | } 27 | self.has_more = (packed.data.len() + self.stride - self.offset - 1) / self.stride > self.curr_index; 28 | Ok(()) 29 | } 30 | 31 | fn init(&mut self, _: usize, batch_size: usize, scratchpad: &mut Scratchpad<'a>) { 32 | self.batch_size = batch_size; 33 | scratchpad.set(self.output, Vec::with_capacity(batch_size)); 34 | } 35 | 36 | fn inputs(&self) -> Vec> { vec![self.input.any()] } 37 | fn inputs_mut(&mut self) -> Vec<&mut usize> { vec![&mut self.input.i] } 38 | fn outputs(&self) -> Vec> { vec![self.output.any()] } 39 | // TODO: make sliced/streamable version of val rows? but have to make ValRowsPack streaming first 40 | fn can_stream_input(&self, _: usize) -> bool { false } 41 | fn can_stream_output(&self, _: usize) -> bool { true } 42 | fn allocates(&self) -> bool { true } 43 | fn has_more(&self) -> bool { self.has_more } 44 | 45 | fn display_op(&self, _: bool) -> String { 46 | format!("{}[{}, {}, ...] = {}", self.output, self.offset, self.offset + self.stride, self.input) 47 | } 48 | fn display_output(&self) -> bool { false } 49 | } 50 | -------------------------------------------------------------------------------- /src/engine/planning/filter.rs: -------------------------------------------------------------------------------- 1 | use crate::engine::{BufferRef, Nullable, QueryPlanner, TypedBufferRef}; 2 | 3 | #[derive(Clone, Copy, Default)] 4 | pub enum Filter { 5 | #[default] 6 | None, 7 | Null, 8 | U8(BufferRef), 9 | NullableU8(BufferRef>), 10 | Indices(BufferRef), 11 | } 12 | 13 | 14 | impl Filter { 15 | pub fn apply_filter(self, planner: &mut QueryPlanner, plan: TypedBufferRef) -> TypedBufferRef { 16 | match self { 17 | Filter::U8(filter) => planner.filter(plan, filter), 18 | Filter::NullableU8(filter) => planner.nullable_filter(plan, filter), 19 | Filter::Indices(indices) => planner.select(plan, indices), 20 | Filter::Null => planner.empty(plan.tag), 21 | Filter::None => plan, 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /src/engine/planning/mod.rs: -------------------------------------------------------------------------------- 1 | mod filter; 2 | pub mod planner; 3 | mod query; 4 | pub mod query_plan; 5 | 6 | pub use self::filter::Filter; 7 | pub use self::planner::QueryPlanner; 8 | pub use self::query::ColumnInfo; 9 | pub use self::query::NormalFormQuery; 10 | pub use self::query::Query; 11 | pub use self::query::ResultColumn; 12 | pub use self::query_plan::QueryPlan; 13 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | use std::backtrace::Backtrace; 2 | use futures::channel::oneshot; 3 | use thiserror::Error; 4 | 5 | #[derive(Error, Debug)] 6 | pub enum QueryError { 7 | #[error("Failed to parse query. Chars remaining: {}", _0)] 8 | SytaxErrorCharsRemaining(String), 9 | #[error("Failed to parse query. Bytes remaining: {:?}", _0)] 10 | SyntaxErrorBytesRemaining(Vec), 11 | #[error("Failed to parse query: {}", _0)] 12 | ParseError(String), 13 | #[error("Some assumption was violated. This is a bug: {}", _0)] 14 | FatalError(String, Backtrace), 15 | #[error("Not implemented: {}", _0)] 16 | NotImplemented(String), 17 | #[error("Type error: {}", _0)] 18 | TypeError(String), 19 | #[error("Overflow or division by zero")] 20 | Overflow, 21 | #[error("Query execution was canceled")] 22 | Canceled { 23 | #[from] 24 | source: oneshot::Canceled, 25 | }, 26 | } 27 | 28 | #[macro_export] 29 | macro_rules! fatal { 30 | ($e:expr) => { 31 | QueryError::FatalError($e.to_owned(), std::backtrace::Backtrace::capture()) 32 | }; 33 | ($fmt:expr, $($arg:tt)+) => { 34 | QueryError::FatalError(format!($fmt, $($arg)+).to_string(), std::backtrace::Backtrace::capture()) 35 | }; 36 | } 37 | 38 | #[macro_export] 39 | macro_rules! bail { 40 | ($kind:expr, $e:expr) => { 41 | return Err($kind($e.to_owned())) 42 | }; 43 | ($kind:expr, $fmt:expr, $($arg:tt)+) => { 44 | return Err($kind(format!($fmt, $($arg)+).to_owned())) 45 | }; 46 | } 47 | 48 | #[macro_export] 49 | macro_rules! ensure { 50 | ($cond:expr, $e:expr) => { 51 | if !($cond) { 52 | return Err(QueryError::FatalError($e.to_string(), std::backtrace::Backtrace::capture())) 53 | } 54 | }; 55 | ($cond:expr, $fmt:expr, $($arg:tt)+) => { 56 | if !($cond) { 57 | return Err(QueryError::FatalError(format!($fmt, $($arg)+).to_string(), std::backtrace::Backtrace::capture())) 58 | } 59 | }; 60 | } 61 | -------------------------------------------------------------------------------- /src/ingest/extractor.rs: -------------------------------------------------------------------------------- 1 | use chrono::prelude::*; 2 | 3 | pub type Extractor = fn(&str) -> i64; 4 | 5 | pub fn multiply_by_100(field: &str) -> i64 { 6 | if let Ok(int) = field.parse::() { 7 | int * 100 8 | } else if let Ok(float) = field.parse::() { 9 | (float * 100.0) as i64 10 | } else if field.is_empty() { 11 | 0 12 | } else { 13 | panic!("invalid field {}", &field) 14 | } 15 | } 16 | 17 | pub fn multiply_by_1000(field: &str) -> i64 { 18 | if let Ok(int) = field.parse::() { 19 | int * 1000 20 | } else if let Ok(float) = field.parse::() { 21 | (float * 1000.0) as i64 22 | } else if field.is_empty() { 23 | 0 24 | } else { 25 | panic!("invalid field {}", &field) 26 | } 27 | } 28 | 29 | pub fn int(field: &str) -> i64 { 30 | if let Ok(int) = field.parse::() { 31 | int 32 | } else if field.is_empty() { 33 | 0 34 | } else { 35 | panic!("can't parse {} as integer", &field) 36 | } 37 | } 38 | 39 | pub fn date_time(field: &str) -> i64 { 40 | NaiveDateTime::parse_from_str(field, "%Y-%m-%d %H:%M:%S") 41 | .unwrap_or_else(|_| panic!("Failed to parse {} as date time", &field)) 42 | .and_utc() 43 | .timestamp() 44 | } 45 | -------------------------------------------------------------------------------- /src/ingest/input_column.rs: -------------------------------------------------------------------------------- 1 | use locustdb_serialization::event_buffer::ColumnData; 2 | 3 | use crate::Value; 4 | 5 | pub enum InputColumn { 6 | Int(Vec), 7 | Float(Vec), 8 | // (Length, [(Index, Value)]) 9 | NullableFloat(u64, Vec<(u64, f64)>), 10 | NullableInt(u64, Vec<(u64, i64)>), 11 | Str(Vec), 12 | Null(usize), 13 | Mixed(Vec), 14 | } 15 | 16 | impl InputColumn { 17 | pub fn from_column_data(column_data: ColumnData, rows: u64) -> Self { 18 | match column_data { 19 | ColumnData::Dense(data) => { 20 | if (data.len() as u64) < rows { 21 | InputColumn::NullableFloat( 22 | rows, 23 | data.into_iter() 24 | .enumerate() 25 | .map(|(i, v)| (i as u64, v)) 26 | .collect(), 27 | ) 28 | } else { 29 | InputColumn::Float(data) 30 | } 31 | } 32 | ColumnData::Sparse(data) => InputColumn::NullableFloat(rows, data), 33 | ColumnData::I64(data) => { 34 | if (data.len() as u64) < rows { 35 | InputColumn::NullableInt( 36 | rows, 37 | data.into_iter() 38 | .enumerate() 39 | .map(|(i, v)| (i as u64, v)) 40 | .collect(), 41 | ) 42 | } else { 43 | InputColumn::Int(data) 44 | } 45 | } 46 | ColumnData::String(data) => { 47 | assert!( 48 | (data.len() as u64) == rows, 49 | "rows: {}, data.len(): {}", 50 | rows, 51 | data.len() 52 | ); 53 | InputColumn::Str(data) 54 | } 55 | ColumnData::Empty => InputColumn::Null(rows as usize), 56 | ColumnData::SparseI64(data) => InputColumn::NullableInt(rows, data), 57 | ColumnData::Mixed(data) => { 58 | InputColumn::Mixed(data.into_iter().map(|v| v.into()).collect()) 59 | } 60 | } 61 | } 62 | 63 | pub fn len(&self) -> usize { 64 | match self { 65 | InputColumn::Int(data) => data.len(), 66 | InputColumn::Float(data) => data.len(), 67 | InputColumn::Str(data) => data.len(), 68 | InputColumn::NullableFloat(rows, _) => *rows as usize, 69 | InputColumn::NullableInt(rows, _) => *rows as usize, 70 | InputColumn::Mixed(data) => data.len(), 71 | InputColumn::Null(rows) => *rows, 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/ingest/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod csv_loader; 2 | pub mod raw_val; 3 | pub mod input_column; 4 | pub mod buffer; 5 | pub mod extractor; 6 | pub mod nyc_taxi_data; 7 | pub mod colgen; 8 | pub mod schema; 9 | mod alias_method_fork; -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // TODO: migrate off incomplete/unsound specialization feature 2 | // TODO: migrate off core_intrinsics? 3 | #![allow(incomplete_features)] 4 | #![allow(internal_features)] 5 | #![feature( 6 | fn_traits, 7 | specialization, 8 | trait_alias, 9 | core_intrinsics, 10 | box_patterns, 11 | proc_macro_hygiene, 12 | let_chains, 13 | duration_constructors, 14 | btree_cursors, 15 | error_generic_member_access, 16 | )] 17 | #[macro_use] 18 | extern crate lazy_static; 19 | #[macro_use] 20 | extern crate log; 21 | pub use crate::disk_store::noop_storage::NoopStorage; 22 | 23 | pub use crate::engine::query_task::{BasicTypeColumn, QueryOutput}; 24 | pub use crate::errors::QueryError; 25 | pub use crate::ingest::colgen; 26 | pub use crate::ingest::csv_loader::Options as LoadOptions; 27 | pub use crate::ingest::extractor; 28 | pub use crate::ingest::nyc_taxi_data; 29 | pub use crate::ingest::raw_val::syntax as value_syntax; 30 | pub use crate::ingest::raw_val::RawVal as Value; 31 | pub use crate::locustdb::LocustDB; 32 | pub use crate::locustdb::Options; 33 | pub use crate::mem_store::table::TableStats; 34 | 35 | #[macro_use] 36 | mod errors; 37 | mod bitvec; 38 | pub mod disk_store; 39 | mod engine; 40 | mod ingest; 41 | mod locustdb; 42 | pub mod logging_client; 43 | mod mem_store; 44 | pub mod observability; 45 | mod scheduler; 46 | pub mod server; 47 | mod stringpack; 48 | mod syntax; 49 | pub mod unit_fmt; 50 | 51 | #[cfg(feature = "python")] 52 | pub mod python; 53 | 54 | pub type QueryResult = Result; 55 | -------------------------------------------------------------------------------- /src/mem_store/floats.rs: -------------------------------------------------------------------------------- 1 | use ordered_float::OrderedFloat; 2 | 3 | use crate::mem_store::*; 4 | use std::sync::Arc; 5 | use crate::bitvec::BitVec; 6 | 7 | pub struct FloatColumn; 8 | 9 | impl FloatColumn { 10 | pub fn new_boxed(name: &str, mut values: Vec>, null: Option>) -> Arc { 11 | let null = null.map(|mut n| { 12 | n.shrink_to_fit(); 13 | n 14 | }); 15 | values.shrink_to_fit(); 16 | let mut column = match null { 17 | Some(present) => { 18 | // Values for null entries are arbitrary, replace them with values that give high compression 19 | let mut last_value = OrderedFloat(0.0); 20 | for (i, value) in values.iter_mut().enumerate() { 21 | if !present.is_set(i) { 22 | *value = last_value; 23 | } else { 24 | last_value = *value; 25 | } 26 | } 27 | Column::new( 28 | name, 29 | values.len(), 30 | None, 31 | vec![CodecOp::PushDataSection(1), CodecOp::Nullable], 32 | vec![values.into(), DataSection::Bitvec(present)], 33 | ) 34 | }, 35 | None => Column::new( 36 | name, 37 | values.len(), 38 | None, 39 | vec![], 40 | vec![DataSection::F64(values)], 41 | ), 42 | }; 43 | column.lz4_or_pco_encode(); 44 | Arc::new(column) 45 | } 46 | } -------------------------------------------------------------------------------- /src/mem_store/lru.rs: -------------------------------------------------------------------------------- 1 | use crate::mem_store::partition::ColumnLocator; 2 | use lru::LruCache; 3 | use std::sync::{Arc, Mutex}; 4 | 5 | #[derive(Clone, Debug)] 6 | pub struct Lru { 7 | cache: Arc>>, 8 | } 9 | 10 | impl Lru { 11 | pub fn touch(&self, column: &ColumnLocator) { 12 | let mut cache = self.cache.lock().unwrap(); 13 | cache.get(column); 14 | } 15 | 16 | pub fn put(&self, column: ColumnLocator) { 17 | let mut cache = self.cache.lock().unwrap(); 18 | cache.put(column, ()); 19 | } 20 | 21 | pub fn remove(&self, column: &ColumnLocator) { 22 | let mut cache = self.cache.lock().unwrap(); 23 | cache.pop(column); 24 | } 25 | 26 | pub fn evict(&self) -> Option { 27 | let mut cache = self.cache.lock().unwrap(); 28 | cache.pop_lru().map(|x| x.0) 29 | } 30 | } 31 | 32 | impl Default for Lru { 33 | fn default() -> Lru { 34 | Lru { 35 | cache: Arc::new(Mutex::new(LruCache::unbounded())), 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/mem_store/lz4.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Read, Write}; 2 | use std::mem; 3 | use std::slice::{from_raw_parts, from_raw_parts_mut}; 4 | use std::fmt::Debug; 5 | 6 | 7 | pub fn decoder(data: &[u8]) -> lz4_flex::frame::FrameDecoder<&[u8]> { 8 | lz4_flex::frame::FrameDecoder::new(data) 9 | } 10 | 11 | pub fn encode(data: &[T]) -> Vec { 12 | let ptr_t = data.as_ptr(); 13 | // Endianness? Never heard of it... 14 | let data_u8: &[u8] = unsafe { 15 | let ptr_u8 = ptr_t as *const u8; 16 | from_raw_parts(ptr_u8, std::mem::size_of_val(data)) 17 | }; 18 | 19 | let mut result = Vec::new(); 20 | { 21 | let mut encoder = lz4_flex::frame::FrameEncoder::new(&mut result); 22 | encoder.write_all(data_u8).unwrap(); 23 | encoder.finish().unwrap(); 24 | } 25 | result 26 | } 27 | 28 | // TODO: unsafe 29 | #[allow(clippy::needless_pass_by_ref_mut)] 30 | pub fn decode(src: &mut dyn Read, dst: &mut [T]) -> usize { 31 | let ptr_t = dst.as_ptr(); 32 | let dst_u8: &mut [u8] = unsafe { 33 | let ptr_u8 = ptr_t as *mut u8; 34 | from_raw_parts_mut(ptr_u8, std::mem::size_of_val(dst)) 35 | }; 36 | 37 | let mut read = 0; 38 | // LZ4 decodes in blocks of at most 65536 elements, so might have to call multiple times to fill buffer 39 | while read < dst_u8.len() && 0 != { 40 | let len = src.read(&mut dst_u8[read..]).unwrap(); 41 | read += len; 42 | len 43 | } {} 44 | if read % mem::size_of::() != 0 { 45 | println!("{} {} {} {}", dst.len(), dst_u8.len(), read, mem::size_of::()); 46 | } 47 | assert_eq!(read % mem::size_of::(), 0); 48 | read / mem::size_of::() 49 | } 50 | 51 | #[cfg(test)] 52 | mod tests { 53 | use super::*; 54 | 55 | #[test] 56 | fn test_encode_decode() { 57 | let data = vec![10i64, 12095, -51235, 3, 0, 0, 12353, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]; 58 | let encoded = encode(&data); 59 | let mut decoded = vec![0i64; data.len()]; 60 | let count = decode(&mut decoder(&encoded), &mut decoded); 61 | assert_eq!(count, data.len()); 62 | assert_eq!(decoded, data); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/mem_store/mixed_column.rs: -------------------------------------------------------------------------------- 1 | use crate::ingest::raw_val::RawVal; 2 | use crate::mem_store::value::Val; 3 | 4 | impl RawVal { 5 | pub fn to_val(&self) -> Val { 6 | match *self { 7 | RawVal::Null => Val::Null, 8 | RawVal::Int(i) => Val::Integer(i), 9 | RawVal::Str(ref string) => Val::Str(string), 10 | RawVal::Float(f) => Val::Float(f), 11 | } 12 | } 13 | 14 | pub fn to_static_val(&self) -> Val<'static> { 15 | match *self { 16 | RawVal::Null => Val::Null, 17 | RawVal::Int(i) => Val::Integer(i), 18 | RawVal::Float(f) => Val::Float(f), 19 | RawVal::Str(_) => panic!("Can't convert RawVal::Str to Val::Str + 'static"), 20 | } 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /src/mem_store/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod codec; 2 | pub mod column; 3 | pub mod column_buffer; 4 | pub mod floats; 5 | pub mod integers; 6 | pub(crate) mod lru; 7 | pub mod lz4; 8 | mod mixed_column; 9 | pub mod partition; 10 | pub mod strings; 11 | pub mod table; 12 | pub mod tree; 13 | pub mod value; 14 | 15 | pub use self::codec::{Codec, CodecOp}; 16 | pub use self::column::{Column, DataSection, DataSource}; 17 | pub use self::lru::Lru; 18 | pub use self::table::TableStats; 19 | pub use self::tree::*; 20 | pub use self::value::Val; 21 | -------------------------------------------------------------------------------- /src/mem_store/value.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use ordered_float::OrderedFloat; 3 | 4 | use crate::ingest::raw_val::RawVal; 5 | 6 | #[derive(Debug, PartialEq, Eq, Ord, PartialOrd, Clone, Copy, Hash)] 7 | pub enum Val<'a> { 8 | Null, 9 | Bool(bool), 10 | Integer(i64), 11 | Str(&'a str), 12 | Float(OrderedFloat), 13 | } 14 | 15 | 16 | impl<'a> fmt::Display for Val<'a> { 17 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 18 | match *self { 19 | Val::Null => write!(f, "null"), 20 | Val::Bool(b) => write!(f, "{}", b), 21 | Val::Integer(i) => write!(f, "{}", i), 22 | Val::Str(s) => write!(f, "\"{}\"", s), 23 | Val::Float(x) => write!(f, "\"{}\"", x), 24 | } 25 | } 26 | } 27 | 28 | impl<'a> From<()> for Val<'a> { 29 | fn from(_: ()) -> Val<'a> { 30 | Val::Null 31 | } 32 | } 33 | 34 | impl<'a> From for Val<'a> { 35 | fn from(b: bool) -> Val<'a> { 36 | Val::Bool(b) 37 | } 38 | } 39 | 40 | impl<'a> From for Val<'a> { 41 | fn from(t: i64) -> Val<'a> { 42 | Val::Integer(t) 43 | } 44 | } 45 | 46 | impl<'a> From<&'a str> for Val<'a> { 47 | fn from(s: &'a str) -> Val<'a> { 48 | Val::Str(s) 49 | } 50 | } 51 | 52 | impl<'a> From for Val<'a> { 53 | fn from(f: f64) -> Val<'a> { 54 | Val::Float(OrderedFloat(f)) 55 | } 56 | } 57 | 58 | impl<'a, T> From> for Val<'a> 59 | where Val<'a>: From 60 | { 61 | fn from(o: Option) -> Val<'a> { 62 | match o { 63 | None => Val::Null, 64 | Some(v) => Val::from(v), 65 | } 66 | } 67 | } 68 | 69 | impl<'a, 'b> From<&'a Val<'b>> for RawVal { 70 | fn from(val: &Val) -> RawVal { 71 | match *val { 72 | Val::Integer(b) => RawVal::Int(b), 73 | Val::Str(s) => RawVal::Str(s.to_string()), 74 | Val::Null | Val::Bool(_) => RawVal::Null, 75 | Val::Float(f) => RawVal::Float(f), 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/observability/metrics.rs: -------------------------------------------------------------------------------- 1 | use prometheus::{register_counter, register_gauge}; 2 | use prometheus::{Counter, Gauge}; 3 | 4 | lazy_static! { 5 | pub static ref QUERY_COUNT: Counter = 6 | register_counter!("query_count", "Number of queries executed").unwrap(); 7 | pub static ref QUERY_OK_COUNT: Counter = 8 | register_counter!("query_ok_count", "Number of queries executed successfully").unwrap(); 9 | pub static ref QUERY_ERROR_COUNT: Counter = 10 | register_counter!("query_error_count", "Number of queries executed with errors").unwrap(); 11 | pub static ref INGESTION_EVENT_COUNT: Counter = 12 | register_counter!("ingestion_event_count", "Number of ingestion events").unwrap(); 13 | pub static ref WAL_SIZE_BYTES: Gauge = 14 | register_gauge!("wal_size_bytes", "Size of the WAL").unwrap(); 15 | pub static ref WAL_UTILIZATION: Gauge = 16 | register_gauge!("wal_utilization", "Utilization of the WAL").unwrap(); 17 | pub static ref COLUMN_CACHE_BYTES: Gauge = 18 | register_gauge!("column_cache_bytes", "In-memory size of columns loaded in-memory").unwrap(); 19 | pub static ref UNFLUSHED_BUFFER_CACHE_BYTES: Gauge = 20 | register_gauge!("unflushed_buffer_cache_bytes", "In-memory size of open table buffers").unwrap(); 21 | pub static ref COLUMN_CACHE_UTILIZATION: Gauge = 22 | register_gauge!("column_cache_utilization", "Utilization of the column cache").unwrap(); 23 | pub static ref META_STORE_BYTES: Gauge = 24 | register_gauge!("meta_store_bytes", "In-memory size of the meta store").unwrap(); 25 | pub static ref TABLE_COUNT: Gauge = 26 | register_gauge!("table_count", "Number of tables").unwrap(); 27 | pub static ref ROW_COUNT: Gauge = 28 | register_gauge!("row_count", "Number of rows in the database").unwrap(); 29 | pub static ref PARTITION_COUNT: Gauge = 30 | register_gauge!("partition_count", "Number of partitions in the database").unwrap(); 31 | pub static ref PARTITION_COLUMN_COUNT: Gauge = 32 | register_gauge!("partition_column_count", "Sum of columns over all partitions").unwrap(); 33 | pub static ref PARTITION_VALUES: Gauge = 34 | register_gauge!("value_count", "Number of values in the partitions in the database").unwrap(); 35 | pub static ref DATABASE_SIZE_BYTES: Gauge = 36 | register_gauge!("database_size_bytes", "Size of the database").unwrap(); 37 | } 38 | -------------------------------------------------------------------------------- /src/observability/mod.rs: -------------------------------------------------------------------------------- 1 | mod simple_trace; 2 | mod perf_counter; 3 | pub(crate) mod metrics; 4 | 5 | pub(crate) use simple_trace::SimpleTracer; 6 | pub use perf_counter::{PerfCounter, QueryPerfCounter}; -------------------------------------------------------------------------------- /src/scheduler/mod.rs: -------------------------------------------------------------------------------- 1 | mod shared_sender; 2 | mod task; 3 | pub(crate) mod disk_read_scheduler; 4 | pub(crate) mod inner_locustdb; 5 | 6 | pub use self::inner_locustdb::InnerLocustDB; 7 | pub use self::task::Task; 8 | pub use self::shared_sender::SharedSender; -------------------------------------------------------------------------------- /src/scheduler/shared_sender.rs: -------------------------------------------------------------------------------- 1 | use futures::channel::oneshot::Sender; 2 | use std::sync::Mutex; 3 | use std::mem; 4 | 5 | pub struct SharedSender { 6 | inner: Mutex>>, 7 | } 8 | 9 | impl SharedSender { 10 | pub fn new(sender: Sender) -> SharedSender { 11 | SharedSender { 12 | inner: Mutex::new(Some(sender)) 13 | } 14 | } 15 | 16 | pub fn send(&self, value: T) { 17 | let mut sender_opt = self.inner.lock().unwrap(); 18 | let mut owned = None; 19 | mem::swap(&mut *sender_opt, &mut owned); 20 | if let Some(sender) = owned { 21 | let _ = sender.send(value); 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /src/scheduler/task.rs: -------------------------------------------------------------------------------- 1 | use super::SharedSender; 2 | use futures::channel::oneshot; 3 | 4 | pub trait Task: Sync + Send { 5 | fn execute(&self); 6 | fn completed(&self) -> bool; 7 | fn max_parallelism(&self) -> usize; 8 | fn multithreaded(&self) -> bool { 9 | self.max_parallelism() > 1 10 | } 11 | } 12 | 13 | impl Task for dyn Fn() + Send + Sync + 'static { 14 | fn execute(&self) { 15 | self.call(()); 16 | } 17 | 18 | fn completed(&self) -> bool { 19 | false 20 | } 21 | fn max_parallelism(&self) -> usize { 22 | 1 23 | } 24 | } 25 | 26 | struct FnTask 27 | where 28 | F: Fn() -> T + Sync + Send + 'static, 29 | T: Send, 30 | { 31 | fun: F, 32 | sender: SharedSender, 33 | } 34 | 35 | impl Task for FnTask 36 | where 37 | F: Fn() -> T + Sync + Send + 'static, 38 | T: Send, 39 | { 40 | fn execute(&self) { 41 | let result = self.fun.call(()); 42 | self.sender.send(result); 43 | } 44 | 45 | fn completed(&self) -> bool { 46 | false 47 | } 48 | fn max_parallelism(&self) -> usize { 49 | 1 50 | } 51 | } 52 | 53 | impl dyn Task { 54 | pub fn from_fn(fun: F) -> (impl Task, oneshot::Receiver) 55 | where 56 | F: Fn() -> T + Sync + Send + 'static, 57 | T: Send, 58 | { 59 | let (sender, receiver) = oneshot::channel(); 60 | ( 61 | FnTask { 62 | fun, 63 | sender: SharedSender::new(sender), 64 | }, 65 | receiver, 66 | ) 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/syntax/expression.rs: -------------------------------------------------------------------------------- 1 | use self::Expr::*; 2 | use crate::engine::*; 3 | use crate::ingest::raw_val::RawVal; 4 | use std::collections::HashSet; 5 | 6 | #[derive(Debug, Clone)] 7 | pub enum Expr { 8 | ColName(String), 9 | Const(RawVal), 10 | Func1(Func1Type, Box), 11 | Func2(Func2Type, Box, Box), 12 | Aggregate(Aggregator, Box), 13 | } 14 | 15 | #[allow(clippy::upper_case_acronyms)] 16 | #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] 17 | pub enum Func2Type { 18 | Equals, 19 | NotEquals, 20 | LT, 21 | LTE, 22 | GT, 23 | GTE, 24 | And, 25 | Or, 26 | Add, 27 | Subtract, 28 | Multiply, 29 | Divide, 30 | Modulo, 31 | RegexMatch, 32 | Like, 33 | NotLike, 34 | } 35 | 36 | #[derive(Debug, Copy, Clone)] 37 | pub enum Func1Type { 38 | Negate, 39 | ToYear, 40 | Not, 41 | IsNull, 42 | IsNotNull, 43 | Length, 44 | Floor, 45 | } 46 | 47 | impl Expr { 48 | pub fn add_colnames(&self, result: &mut HashSet) { 49 | match *self { 50 | ColName(ref name) => { 51 | result.insert(name.to_string()); 52 | } 53 | Func2(_, ref expr1, ref expr2) => { 54 | expr1.add_colnames(result); 55 | expr2.add_colnames(result); 56 | } 57 | Func1(_, ref expr) => expr.add_colnames(result), 58 | Aggregate(_, ref expr) => expr.add_colnames(result), 59 | Const(_) => {} 60 | } 61 | } 62 | 63 | pub fn func(ftype: Func2Type, expr1: Expr, expr2: Expr) -> Expr { 64 | Func2(ftype, Box::new(expr1), Box::new(expr2)) 65 | } 66 | 67 | pub fn func1(ftype: Func1Type, expr: Expr) -> Expr { 68 | Func1(ftype, Box::new(expr)) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/syntax/limit.rs: -------------------------------------------------------------------------------- 1 | #[derive(Clone, Debug, Hash, PartialEq)] 2 | pub struct LimitClause { 3 | pub limit: u64, 4 | pub offset: u64, 5 | } 6 | -------------------------------------------------------------------------------- /src/syntax/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod expression; 2 | pub mod limit; 3 | pub mod parser; -------------------------------------------------------------------------------- /system_dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euxo pipefail 4 | 5 | 6 | if [ "$(uname)" == "Darwin" ]; then 7 | ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" < /dev/null 2> /dev/null 8 | brew install lz4 9 | brew install capnp 10 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 11 | sudo apt-get install -y g++ 12 | sudo apt-get install -y liblz4-dev 13 | sudo apt-get install -y dpkg 14 | #sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu bionic universe" -y 15 | #sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu bionic main" -y 16 | #sudo apt-get update -q 17 | sudo apt-get install -y capnproto 18 | else 19 | echo ERROR: Platform not supported 20 | exit 1 21 | fi 22 | 23 | 24 | -------------------------------------------------------------------------------- /templates/table.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A Basic HTML5 Template 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 |

{{ table }}

29 | 30 |

Columns

31 | {{ columns }} 32 | 33 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /test_data/edge_cases.csv: -------------------------------------------------------------------------------- 1 | u8_offset_encoded,non_dense_ints,enum,string_packed,constant0,constant0_2,negative,id,nullable_int,nullable_int2,country,largenum,float,nullable_float,float01,mixed_float_int_null 2 | 256,0,aa,xyz,0,0,-199,0,-1,,Germany,-9223372036854775808,0.123412,,0.3,1 3 | 258,2,aa,abc,0,0,39,1,-40,-40,USA,9223372036854775806,3e-4,,-0.4,10 4 | 259,3,aa,axz,0,0,-100,2,,,France,9223372036854775806,-124.0,0.4,0.421231,3 5 | 257,1,bb,AXY,0,0,34,3,,0,,9223372036854775806,3.15159,,0.9482,0.21 6 | 275,4,bb,azy,0,0,4031,4,10,9,France,-9223372036854775808,0.1234e30,,0.1,0.12 7 | 500,0,aa,$sss,0,0,32,5,,6,,9223372036854775806,1e-6,,0.2, 8 | 343,2,cc,asd,0,0,-130,6,,,Turkey,-9223372036854775808,0.0,1e-32,0.5, 9 | 432,1,aa,_f,0,0,-120,7,20,,,9223372036854775806,0.000001,,0.23,0.1 10 | 511,2,cc,t,0,0,4010,8,,1,,-9223372036854775808,-1.0,,0.742,0.1 11 | 500,3,bb,😈,0,0,-40,9,13,14,Germany,9223372036854775806,1234124.51325,1.123124e30,-0.2,0.5 -------------------------------------------------------------------------------- /test_data/nyc-taxi.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cswinter/LocustDB/016efd84bbae9781c93ecffda63422e5fefb8e93/test_data/nyc-taxi.csv.gz -------------------------------------------------------------------------------- /wandb_data_import.py: -------------------------------------------------------------------------------- 1 | import locustdb 2 | import time 3 | import wandb 4 | import requests 5 | 6 | entity = "entity-neural-network" 7 | project = "enn-ppo" 8 | run_id = "220511-055353-xor-num_envs=256-ent_coef=0.003-dmodel=64-anneal_ent=false-bs=1024-lr=0.0003" 9 | 10 | api = wandb.Api(timeout=300) 11 | runs = api.runs(f"{entity}/{project}", { 12 | 'config.name': {"$regex": '220511-055353-.*'}, 13 | # { 14 | # "$text": '220511-055353', 15 | # } 16 | }) 17 | 18 | print("Starting...") 19 | i = 0 20 | logger = locustdb.Client(url="http://localhost:8080") 21 | while True: 22 | try: 23 | run = next(runs) 24 | except requests.exceptions.HTTPError as e: 25 | print(e) 26 | continue 27 | except requests.exceptions.ReadTimeout as e: 28 | print(e) 29 | continue 30 | except StopIteration: 31 | break 32 | print(i, run.name) 33 | rows = 0 34 | for row in run.history(pandas=False): 35 | clean_row = {k: v or 0.0 for k, v in row.items() if not isinstance(v, dict) and not isinstance(v, str)} 36 | # print(clean_row) 37 | logger.log(table="gb_9a43be3e-"+run.name, metrics=clean_row) 38 | rows += 1 39 | print(f"Logged {rows} rows") 40 | i += 1 41 | 42 | print("done") 43 | 44 | # run = api.run(f"{entity}/{project}/{run_id}") 45 | 46 | # # random walk 47 | # print("starting logging...") 48 | # value = 0 49 | # for i in range(10000): 50 | # value += np.random.normal() 51 | # locustdb.log(table="test_metrics", metrics={"step": i, "cpu": value}) 52 | 53 | time.sleep(2) 54 | print("done!") 55 | --------------------------------------------------------------------------------