├── .github
└── workflows
│ └── benchmarks.yml
├── .gitignore
├── README.md
├── benchmark.py
├── poetry.lock
├── pyproject.toml
├── queries
├── count.chdb.sql
├── count.databend.sql
├── count.datafusion.sql
├── count.duckdb.sql
├── count.glaredb.sql
├── groupby-local.chdb.sql
├── groupby-local.databend.sql
├── groupby-local.duckdb.sql
├── groupby-local.glaredb.sql
├── groupby.chdb.sql
├── groupby.databend.sql
├── groupby.datafusion.sql
├── groupby.duckdb.sql
├── groupby.glaredb.sql
├── version.chdb.sql
├── version.databend.sql
├── version.datafusion.sql
├── version.duckdb.sql
└── version.glaredb.sql
├── requirements.txt
└── run.sh
/.github/workflows/benchmarks.yml:
--------------------------------------------------------------------------------
1 | name: Benchmark Matrix
2 |
3 | on:
4 | workflow_dispatch:
5 | inputs:
6 | ITERATIONS:
7 | description: 'Test iterations, default 3'
8 | required: false
9 |
10 | jobs:
11 | bench:
12 | runs-on: ubuntu-latest
13 | strategy:
14 | fail-fast: false
15 | matrix:
16 | DBNAME: [ "chdb", "duckdb", "glaredb", "databend", "datafusion"]
17 |
18 | steps:
19 | - uses: actions/checkout@v3
20 | - name: Set up Python 3.10
21 | uses: actions/setup-python@v3
22 | with:
23 | python-version: "3.10"
24 | - name: Install dependencies
25 | run: |
26 | python -m pip install --upgrade pip
27 | pip install poetry pytest
28 | poetry lock --no-update
29 | poetry install
30 | wget -q https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet
31 | # - name: Workflow Telemetry
32 | # uses: runforesight/workflow-telemetry-action@v1.8.7
33 | - name: Test with poetry
34 | env:
35 | ITERATIONS: ${{ github.event.inputs.ITERATIONS || 3 }}
36 | DBNAME: ${{ matrix.DBNAME }}
37 | run: |
38 | poetry run python3 benchmark.py > /tmp/report.txt
39 | - name: Summary Report
40 | run: |
41 | echo "### ${{ matrix.DBNAME }}" >> $GITHUB_STEP_SUMMARY
42 | cat /tmp/report.txt >> $GITHUB_STEP_SUMMARY
43 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.parquet
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Embedded OLAP benchmarks
4 |
5 | This project benchmarks embedded OLAP engines using Python 3.x
6 | Benchmark queries for supported databases are executed within Github Actions.
7 |
8 | :warning: _Focus on free, low-resource runners. NOT intended as a rigorous benchmark!_
9 |
10 | ### OLAP Racers 🏁
11 |
12 | - [chdb](https://doc.chdb.io)
13 | - [duckdb](https://duckdb.org)
14 | - [glaredb](https://glaredb.com)
15 | - [databend](https://databend.com)
16 | - [datafusion](https://arrow.apache.org/datafusion-python/)
17 |
18 |
19 | ## Results
20 |
21 | For the latest results, check the latest Action reports.
22 |
23 |
24 | ## Instructions
25 |
26 | 1. Clone this repo and `cd` into it
27 |
28 | 2. Install Test Requirements with `poetry`
29 | ```shell
30 | poetry install
31 | ```
32 |
33 | 3. Run the benchmark
34 | ```shell
35 | ./run.sh
36 | ```
37 |
--------------------------------------------------------------------------------
/benchmark.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import psutil
4 | from datetime import datetime
5 | from contextlib import contextmanager
6 |
7 | # Import Everything for equal memory conditions
8 | import duckdb
9 | import chdb
10 | from chdb import session as chs
11 | import glaredb
12 | from databend import SessionContext
13 | import datafusion
14 |
15 | DBNAME = os.getenv('DBNAME', '*')
16 | ITERATIONS = int(os.getenv('ITERATIONS', 3))
17 | BENCHMARKS = ["version", "count", "groupby"]
18 |
19 | @contextmanager
20 | def suppress_stdout():
21 | with open(os.devnull, "w") as devnull:
22 | old_stdout = sys.stdout
23 | sys.stdout = devnull
24 | try:
25 | yield
26 | finally:
27 | sys.stdout = old_stdout
28 |
29 | def get_memory_usage():
30 | """ Returns the current memory usage of the Python process. """
31 | process = psutil.Process(os.getpid())
32 | return process.memory_info().rss / (1024 * 1024) # Convert bytes to megabytes
33 |
34 | def load_query(db: str, name: str) -> str:
35 | """ Load SQL query from file. """
36 | try:
37 | with open(f"queries/{name}.{db}.sql") as f:
38 | return f.read()
39 | except FileNotFoundError:
40 | print(f"Query file for {name} not found.")
41 | sys.exit(1)
42 |
43 | def benchmark_db(db: str, execute_fn):
44 | """ Benchmarks all queries against one datastore """
45 | for name in BENCHMARKS:
46 | query = load_query(db, name)
47 | deltas = []
48 | mem_usage_before = get_memory_usage()
49 | for _ in range(ITERATIONS):
50 | start = datetime.now()
51 | with suppress_stdout():
52 | try:
53 | results = execute_fn(query)
54 | except Exception as e:
55 | print(f"Error executing query on {db}: {e}")
56 | continue
57 | end = datetime.now()
58 | deltas.append((end - start).total_seconds())
59 | mem_usage_after = get_memory_usage()
60 |
61 | if deltas:
62 | avg = sum(deltas) / len(deltas)
63 | mem_used = mem_usage_after - mem_usage_before
64 | print(f"{db}:{name}: avg={avg:.3f}s min={min(deltas):.3f}s max={max(deltas):.3f}s ({ITERATIONS} runs) | Memory used: {mem_used:.2f} MB")
65 |
66 | def main():
67 | match DBNAME:
68 | case "chdb":
69 | print("Testing chdb " + str(chdb.engine_version))
70 | chdbs = chs.Session()
71 | benchmark_db("chdb", lambda query: chdb.query(query))
72 | case "duckdb":
73 | print("Testing duckdb " + str(duckdb.__version__))
74 | ddb = duckdb.connect()
75 | benchmark_db("duckdb", lambda query: ddb.execute(query))
76 | case "glaredb":
77 | print("Testing glaredb")
78 | gdb = glaredb.connect()
79 | benchmark_db("glaredb", lambda query: gdb.sql(query).show())
80 | case "databend":
81 | print("Testing databend")
82 | databendx = SessionContext()
83 | benchmark_db("databend", lambda query: databendx.sql(query).collect())
84 | case "datafusion":
85 | print("Testing datafusion")
86 | datafusionx = datafusion.SessionContext()
87 | benchmark_db("datafusion", lambda query: datafusionx.sql(query).collect())
88 |
89 | if __name__ == "__main__":
90 | main()
91 |
--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
2 |
3 | [[package]]
4 | name = "chdb"
5 | version = "0.16.0rc2"
6 | description = "chDB is an in-process SQL OLAP Engine powered by ClickHouse"
7 | optional = false
8 | python-versions = ">=3.8"
9 | files = [
10 | {file = "chdb-0.16.0rc2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f2ac75c7410931f82b959b25e717109e7516f32427da0ecb8822b93edfc21a45"},
11 | {file = "chdb-0.16.0rc2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5da9fcce45ef10475bb23ca16e71819deec1a99a49092d9dffd67b6b960510c5"},
12 | {file = "chdb-0.16.0rc2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:015d87b479477af30ba21e16718f492bb12c53228df15ba58ae4edab497fd2bb"},
13 | {file = "chdb-0.16.0rc2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:6ed6b877a3714de0f5df044a855d8deeac774a87e2cd38af4066058d0b10ff18"},
14 | {file = "chdb-0.16.0rc2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a8613e76c5eb1d492fd6db1b948a2ec298705368f2c1b70dd5b6a5635a7a8bae"},
15 | {file = "chdb-0.16.0rc2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:814b851a0d35dec04830d564dd1c1d8d759d3c0d233f66da64c2f0a28095c804"},
16 | {file = "chdb-0.16.0rc2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c4a17b105ebe5e51fdde380fd055e5a89692653de5092eaa3218821d01e8da0"},
17 | {file = "chdb-0.16.0rc2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8bc0117c98ffd887040bc8b7e878d0c3f310de6f74dfc71fae6fa84da8337f24"},
18 | {file = "chdb-0.16.0rc2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e67a1f01cc0a696f29e5366c11911917c91f557b4d89d7a1a3d123935779f463"},
19 | {file = "chdb-0.16.0rc2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:12e23a23747b7a3106e88bfa12ae206f41ba74e0406e091b9f6ea5256942bb53"},
20 | {file = "chdb-0.16.0rc2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c93848db911bfcbbb628ca5a0dbb7565602f4c6c465efa910954d72874c1a94"},
21 | {file = "chdb-0.16.0rc2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:371b8d468f9be35beed126224d8e2aa1c327917a4d0a64989b7dadeb6088fca5"},
22 | ]
23 |
24 | [[package]]
25 | name = "databend"
26 | version = "1.2.207"
27 | description = "Databend Python Binding"
28 | optional = false
29 | python-versions = ">=3.7"
30 | files = [
31 | {file = "databend-1.2.207-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:a809608d338ad471b38253e434b9c27932f3ec0ff306f17d89a02ec9e5de6e87"},
32 | {file = "databend-1.2.207-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79d784381c74a43c21c17bf50770a5c379bedb9e7d23dbc449c7fcd8ba3ad7a7"},
33 | ]
34 |
35 | [package.extras]
36 | docs = ["pdoc"]
37 | test = ["pandas", "polars", "pytest"]
38 |
39 | [[package]]
40 | name = "datafusion"
41 | version = "32.0.0"
42 | description = "Build and run queries against data"
43 | optional = false
44 | python-versions = ">=3.6"
45 | files = [
46 | {file = "datafusion-32.0.0-cp38-abi3-macosx_10_7_x86_64.whl", hash = "sha256:a74ef7f95798385f0fbdf14c233f9126159101f6ffd9d8bdff177d1070f8a824"},
47 | {file = "datafusion-32.0.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:39ee5b5cc64c978ae725b4486d2500027eeb0f5071f9242d314d44f0fd32e8a9"},
48 | {file = "datafusion-32.0.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e35f996bdce5966b07aa303b475895d1cff5f0d35b4a12f8c902a470d2b9388f"},
49 | {file = "datafusion-32.0.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0ba147342d50e76242b1c5d4465e9c40decdfe0958ec03a4f360aeba1bd95e38"},
50 | {file = "datafusion-32.0.0-cp38-abi3-win_amd64.whl", hash = "sha256:f2d598d993bfd1055a69ad5bb3e6d7f149b8ddd1fc32a5603ca7bc809ee2188d"},
51 | ]
52 |
53 | [package.dependencies]
54 | pyarrow = ">=11.0.0"
55 |
56 | [[package]]
57 | name = "duckdb"
58 | version = "0.9.1"
59 | description = "DuckDB embedded database"
60 | optional = false
61 | python-versions = ">=3.7.0"
62 | files = [
63 | {file = "duckdb-0.9.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6c724e105ecd78c8d86b3c03639b24e1df982392fc836705eb007e4b1b488864"},
64 | {file = "duckdb-0.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:75f12c5a3086079fb6440122565f1762ef1a610a954f2d8081014c1dd0646e1a"},
65 | {file = "duckdb-0.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:151f5410c32f8f8fe03bf23462b9604349bc0b4bd3a51049bbf5e6a482a435e8"},
66 | {file = "duckdb-0.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c1d066fdae22b9b711b1603541651a378017645f9fbc4adc9764b2f3c9e9e4a"},
67 | {file = "duckdb-0.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1de56d8b7bd7a7653428c1bd4b8948316df488626d27e9c388194f2e0d1428d4"},
68 | {file = "duckdb-0.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1fb6cd590b1bb4e31fde8efd25fedfbfa19a86fa72789fa5b31a71da0d95bce4"},
69 | {file = "duckdb-0.9.1-cp310-cp310-win32.whl", hash = "sha256:1039e073714d668cef9069bb02c2a6756c7969cedda0bff1332520c4462951c8"},
70 | {file = "duckdb-0.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:7e6ac4c28918e1d278a89ff26fd528882aa823868ed530df69d6c8a193ae4e41"},
71 | {file = "duckdb-0.9.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5eb750f2ee44397a61343f32ee9d9e8c8b5d053fa27ba4185d0e31507157f130"},
72 | {file = "duckdb-0.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aea2a46881d75dc069a242cb164642d7a4f792889010fb98210953ab7ff48849"},
73 | {file = "duckdb-0.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed3dcedfc7a9449b6d73f9a2715c730180056e0ba837123e7967be1cd3935081"},
74 | {file = "duckdb-0.9.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c55397bed0087ec4445b96f8d55f924680f6d40fbaa7f2e35468c54367214a5"},
75 | {file = "duckdb-0.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3261696130f1cfb955735647c93297b4a6241753fb0de26c05d96d50986c6347"},
76 | {file = "duckdb-0.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:64c04b1728e3e37cf93748829b5d1e028227deea75115bb5ead01c608ece44b1"},
77 | {file = "duckdb-0.9.1-cp311-cp311-win32.whl", hash = "sha256:12cf9fb441a32702e31534330a7b4d569083d46a91bf185e0c9415000a978789"},
78 | {file = "duckdb-0.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:fdfd85575ce9540e593d5d25c9d32050bd636c27786afd7b776aae0f6432b55e"},
79 | {file = "duckdb-0.9.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:704700a4b469e3bb1a7e85ac12e58037daaf2b555ef64a3fe2913ffef7bd585b"},
80 | {file = "duckdb-0.9.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf55b303b7b1a8c2165a96e609eb30484bc47481d94a5fb1e23123e728df0a74"},
81 | {file = "duckdb-0.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b70e23c14746904ca5de316436e43a685eb769c67fe3dbfaacbd3cce996c5045"},
82 | {file = "duckdb-0.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:77379f7f1f8b4dc98e01f8f6f8f15a0858cf456e2385e22507f3cb93348a88f9"},
83 | {file = "duckdb-0.9.1-cp37-cp37m-win32.whl", hash = "sha256:92c8f738489838666cae9ef41703f8b16f660bb146970d1eba8b2c06cb3afa39"},
84 | {file = "duckdb-0.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:08c5484ac06ab714f745526d791141f547e2f5ac92f97a0a1b37dfbb3ea1bd13"},
85 | {file = "duckdb-0.9.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f66d3c07c7f6938d3277294677eb7dad75165e7c57c8dd505503fc5ef10f67ad"},
86 | {file = "duckdb-0.9.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c38044e5f78c0c7b58e9f937dcc6c34de17e9ca6be42f9f8f1a5a239f7a847a5"},
87 | {file = "duckdb-0.9.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73bc0d715b79566b3ede00c367235cfcce67be0eddda06e17665c7a233d6854a"},
88 | {file = "duckdb-0.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d26622c3b4ea6a8328d95882059e3cc646cdc62d267d48d09e55988a3bba0165"},
89 | {file = "duckdb-0.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3367d10096ff2b7919cedddcf60d308d22d6e53e72ee2702f6e6ca03d361004a"},
90 | {file = "duckdb-0.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d88a119f1cb41911a22f08a6f084d061a8c864e28b9433435beb50a56b0d06bb"},
91 | {file = "duckdb-0.9.1-cp38-cp38-win32.whl", hash = "sha256:99567496e45b55c67427133dc916013e8eb20a811fc7079213f5f03b2a4f5fc0"},
92 | {file = "duckdb-0.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:5b3da4da73422a3235c3500b3fb541ac546adb3e35642ef1119dbcd9cc7f68b8"},
93 | {file = "duckdb-0.9.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eca00c0c2062c0265c6c0e78ca2f6a30611b28f3afef062036610e9fc9d4a67d"},
94 | {file = "duckdb-0.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb5af8e89d40fc4baab1515787ea1520a6c6cf6aa40ab9f107df6c3a75686ce1"},
95 | {file = "duckdb-0.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9fae3d4f83ebcb47995f6acad7c6d57d003a9b6f0e1b31f79a3edd6feb377443"},
96 | {file = "duckdb-0.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16b9a7efc745bc3c5d1018c3a2f58d9e6ce49c0446819a9600fdba5f78e54c47"},
97 | {file = "duckdb-0.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b0b60167f5537772e9f5af940e69dcf50e66f5247732b8bb84a493a9af6055"},
98 | {file = "duckdb-0.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4f27f5e94c47df6c4ccddf18e3277b7464eea3db07356d2c4bf033b5c88359b8"},
99 | {file = "duckdb-0.9.1-cp39-cp39-win32.whl", hash = "sha256:d43cd7e6f783006b59dcc5e40fcf157d21ee3d0c8dfced35278091209e9974d7"},
100 | {file = "duckdb-0.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:e666795887d9cf1d6b6f6cbb9d487270680e5ff6205ebc54b2308151f13b8cff"},
101 | {file = "duckdb-0.9.1.tar.gz", hash = "sha256:603a878746015a3f2363a65eb48bcbec816261b6ee8d71eee53061117f6eef9d"},
102 | ]
103 |
104 | [[package]]
105 | name = "glaredb"
106 | version = "0.5.1"
107 | description = "GlareDB is a fast SQL database for querying and analyzing distributed data."
108 | optional = false
109 | python-versions = ">=3.7"
110 | files = [
111 | {file = "glaredb-0.5.1-cp37-abi3-macosx_10_7_x86_64.whl", hash = "sha256:d129be577931223b44faefa30f5a8b90093aeb623d87af37db46f1e583f23ee0"},
112 | {file = "glaredb-0.5.1-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:aaada762fa8845d8f2fb106c2663b4aa53780d99e8d847a8add5cc0274da0bbe"},
113 | {file = "glaredb-0.5.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2f6b5d5507376c2fc9f609544e02d79a76f430e32ccdc1322f6095d548620d8"},
114 | {file = "glaredb-0.5.1-cp37-abi3-win_amd64.whl", hash = "sha256:812e5a4ddbe94da7d0ba31bcb63b571866e9eda6dc0e0c60bcdc96d1b09a9ec5"},
115 | ]
116 |
117 | [[package]]
118 | name = "numpy"
119 | version = "1.26.2"
120 | description = "Fundamental package for array computing in Python"
121 | optional = false
122 | python-versions = ">=3.9"
123 | files = [
124 | {file = "numpy-1.26.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3703fc9258a4a122d17043e57b35e5ef1c5a5837c3db8be396c82e04c1cf9b0f"},
125 | {file = "numpy-1.26.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc392fdcbd21d4be6ae1bb4475a03ce3b025cd49a9be5345d76d7585aea69440"},
126 | {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36340109af8da8805d8851ef1d74761b3b88e81a9bd80b290bbfed61bd2b4f75"},
127 | {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcc008217145b3d77abd3e4d5ef586e3bdfba8fe17940769f8aa09b99e856c00"},
128 | {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ced40d4e9e18242f70dd02d739e44698df3dcb010d31f495ff00a31ef6014fe"},
129 | {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b272d4cecc32c9e19911891446b72e986157e6a1809b7b56518b4f3755267523"},
130 | {file = "numpy-1.26.2-cp310-cp310-win32.whl", hash = "sha256:22f8fc02fdbc829e7a8c578dd8d2e15a9074b630d4da29cda483337e300e3ee9"},
131 | {file = "numpy-1.26.2-cp310-cp310-win_amd64.whl", hash = "sha256:26c9d33f8e8b846d5a65dd068c14e04018d05533b348d9eaeef6c1bd787f9919"},
132 | {file = "numpy-1.26.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b96e7b9c624ef3ae2ae0e04fa9b460f6b9f17ad8b4bec6d7756510f1f6c0c841"},
133 | {file = "numpy-1.26.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aa18428111fb9a591d7a9cc1b48150097ba6a7e8299fb56bdf574df650e7d1f1"},
134 | {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06fa1ed84aa60ea6ef9f91ba57b5ed963c3729534e6e54055fc151fad0423f0a"},
135 | {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ca5482c3dbdd051bcd1fce8034603d6ebfc125a7bd59f55b40d8f5d246832b"},
136 | {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:854ab91a2906ef29dc3925a064fcd365c7b4da743f84b123002f6139bcb3f8a7"},
137 | {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f43740ab089277d403aa07567be138fc2a89d4d9892d113b76153e0e412409f8"},
138 | {file = "numpy-1.26.2-cp311-cp311-win32.whl", hash = "sha256:a2bbc29fcb1771cd7b7425f98b05307776a6baf43035d3b80c4b0f29e9545186"},
139 | {file = "numpy-1.26.2-cp311-cp311-win_amd64.whl", hash = "sha256:2b3fca8a5b00184828d12b073af4d0fc5fdd94b1632c2477526f6bd7842d700d"},
140 | {file = "numpy-1.26.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a4cd6ed4a339c21f1d1b0fdf13426cb3b284555c27ac2f156dfdaaa7e16bfab0"},
141 | {file = "numpy-1.26.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5d5244aabd6ed7f312268b9247be47343a654ebea52a60f002dc70c769048e75"},
142 | {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a3cdb4d9c70e6b8c0814239ead47da00934666f668426fc6e94cce869e13fd7"},
143 | {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa317b2325f7aa0a9471663e6093c210cb2ae9c0ad824732b307d2c51983d5b6"},
144 | {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:174a8880739c16c925799c018f3f55b8130c1f7c8e75ab0a6fa9d41cab092fd6"},
145 | {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f79b231bf5c16b1f39c7f4875e1ded36abee1591e98742b05d8a0fb55d8a3eec"},
146 | {file = "numpy-1.26.2-cp312-cp312-win32.whl", hash = "sha256:4a06263321dfd3598cacb252f51e521a8cb4b6df471bb12a7ee5cbab20ea9167"},
147 | {file = "numpy-1.26.2-cp312-cp312-win_amd64.whl", hash = "sha256:b04f5dc6b3efdaab541f7857351aac359e6ae3c126e2edb376929bd3b7f92d7e"},
148 | {file = "numpy-1.26.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4eb8df4bf8d3d90d091e0146f6c28492b0be84da3e409ebef54349f71ed271ef"},
149 | {file = "numpy-1.26.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1a13860fdcd95de7cf58bd6f8bc5a5ef81c0b0625eb2c9a783948847abbef2c2"},
150 | {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64308ebc366a8ed63fd0bf426b6a9468060962f1a4339ab1074c228fa6ade8e3"},
151 | {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf8aab04a2c0e859da118f0b38617e5ee65d75b83795055fb66c0d5e9e9b818"},
152 | {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d73a3abcac238250091b11caef9ad12413dab01669511779bc9b29261dd50210"},
153 | {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b361d369fc7e5e1714cf827b731ca32bff8d411212fccd29ad98ad622449cc36"},
154 | {file = "numpy-1.26.2-cp39-cp39-win32.whl", hash = "sha256:bd3f0091e845164a20bd5a326860c840fe2af79fa12e0469a12768a3ec578d80"},
155 | {file = "numpy-1.26.2-cp39-cp39-win_amd64.whl", hash = "sha256:2beef57fb031dcc0dc8fa4fe297a742027b954949cabb52a2a376c144e5e6060"},
156 | {file = "numpy-1.26.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1cc3d5029a30fb5f06704ad6b23b35e11309491c999838c31f124fee32107c79"},
157 | {file = "numpy-1.26.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94cc3c222bb9fb5a12e334d0479b97bb2df446fbe622b470928f5284ffca3f8d"},
158 | {file = "numpy-1.26.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe6b44fb8fcdf7eda4ef4461b97b3f63c466b27ab151bec2366db8b197387841"},
159 | {file = "numpy-1.26.2.tar.gz", hash = "sha256:f65738447676ab5777f11e6bbbdb8ce11b785e105f690bc45966574816b6d3ea"},
160 | ]
161 |
162 | [[package]]
163 | name = "psutil"
164 | version = "5.9.6"
165 | description = "Cross-platform lib for process and system monitoring in Python."
166 | optional = false
167 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
168 | files = [
169 | {file = "psutil-5.9.6-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:fb8a697f11b0f5994550555fcfe3e69799e5b060c8ecf9e2f75c69302cc35c0d"},
170 | {file = "psutil-5.9.6-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:91ecd2d9c00db9817a4b4192107cf6954addb5d9d67a969a4f436dbc9200f88c"},
171 | {file = "psutil-5.9.6-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:10e8c17b4f898d64b121149afb136c53ea8b68c7531155147867b7b1ac9e7e28"},
172 | {file = "psutil-5.9.6-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:18cd22c5db486f33998f37e2bb054cc62fd06646995285e02a51b1e08da97017"},
173 | {file = "psutil-5.9.6-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:ca2780f5e038379e520281e4c032dddd086906ddff9ef0d1b9dcf00710e5071c"},
174 | {file = "psutil-5.9.6-cp27-none-win32.whl", hash = "sha256:70cb3beb98bc3fd5ac9ac617a327af7e7f826373ee64c80efd4eb2856e5051e9"},
175 | {file = "psutil-5.9.6-cp27-none-win_amd64.whl", hash = "sha256:51dc3d54607c73148f63732c727856f5febec1c7c336f8f41fcbd6315cce76ac"},
176 | {file = "psutil-5.9.6-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c69596f9fc2f8acd574a12d5f8b7b1ba3765a641ea5d60fb4736bf3c08a8214a"},
177 | {file = "psutil-5.9.6-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92e0cc43c524834af53e9d3369245e6cc3b130e78e26100d1f63cdb0abeb3d3c"},
178 | {file = "psutil-5.9.6-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:748c9dd2583ed86347ed65d0035f45fa8c851e8d90354c122ab72319b5f366f4"},
179 | {file = "psutil-5.9.6-cp36-cp36m-win32.whl", hash = "sha256:3ebf2158c16cc69db777e3c7decb3c0f43a7af94a60d72e87b2823aebac3d602"},
180 | {file = "psutil-5.9.6-cp36-cp36m-win_amd64.whl", hash = "sha256:ff18b8d1a784b810df0b0fff3bcb50ab941c3b8e2c8de5726f9c71c601c611aa"},
181 | {file = "psutil-5.9.6-cp37-abi3-win32.whl", hash = "sha256:a6f01f03bf1843280f4ad16f4bde26b817847b4c1a0db59bf6419807bc5ce05c"},
182 | {file = "psutil-5.9.6-cp37-abi3-win_amd64.whl", hash = "sha256:6e5fb8dc711a514da83098bc5234264e551ad980cec5f85dabf4d38ed6f15e9a"},
183 | {file = "psutil-5.9.6-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:daecbcbd29b289aac14ece28eca6a3e60aa361754cf6da3dfb20d4d32b6c7f57"},
184 | {file = "psutil-5.9.6.tar.gz", hash = "sha256:e4b92ddcd7dd4cdd3f900180ea1e104932c7bce234fb88976e2a3b296441225a"},
185 | ]
186 |
187 | [package.extras]
188 | test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
189 |
190 | [[package]]
191 | name = "pyarrow"
192 | version = "14.0.1"
193 | description = "Python library for Apache Arrow"
194 | optional = false
195 | python-versions = ">=3.8"
196 | files = [
197 | {file = "pyarrow-14.0.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:96d64e5ba7dceb519a955e5eeb5c9adcfd63f73a56aea4722e2cc81364fc567a"},
198 | {file = "pyarrow-14.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a8ae88c0038d1bc362a682320112ee6774f006134cd5afc291591ee4bc06505"},
199 | {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f6f053cb66dc24091f5511e5920e45c83107f954a21032feadc7b9e3a8e7851"},
200 | {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:906b0dc25f2be12e95975722f1e60e162437023f490dbd80d0deb7375baf3171"},
201 | {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:78d4a77a46a7de9388b653af1c4ce539350726cd9af62e0831e4f2bd0c95a2f4"},
202 | {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:06ca79080ef89d6529bb8e5074d4b4f6086143b2520494fcb7cf8a99079cde93"},
203 | {file = "pyarrow-14.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:32542164d905002c42dff896efdac79b3bdd7291b1b74aa292fac8450d0e4dcd"},
204 | {file = "pyarrow-14.0.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:c7331b4ed3401b7ee56f22c980608cf273f0380f77d0f73dd3c185f78f5a6220"},
205 | {file = "pyarrow-14.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:922e8b49b88da8633d6cac0e1b5a690311b6758d6f5d7c2be71acb0f1e14cd61"},
206 | {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58c889851ca33f992ea916b48b8540735055201b177cb0dcf0596a495a667b00"},
207 | {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30d8494870d9916bb53b2a4384948491444741cb9a38253c590e21f836b01222"},
208 | {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:be28e1a07f20391bb0b15ea03dcac3aade29fc773c5eb4bee2838e9b2cdde0cb"},
209 | {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:981670b4ce0110d8dcb3246410a4aabf5714db5d8ea63b15686bce1c914b1f83"},
210 | {file = "pyarrow-14.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:4756a2b373a28f6166c42711240643fb8bd6322467e9aacabd26b488fa41ec23"},
211 | {file = "pyarrow-14.0.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:cf87e2cec65dd5cf1aa4aba918d523ef56ef95597b545bbaad01e6433851aa10"},
212 | {file = "pyarrow-14.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:470ae0194fbfdfbf4a6b65b4f9e0f6e1fa0ea5b90c1ee6b65b38aecee53508c8"},
213 | {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6263cffd0c3721c1e348062997babdf0151301f7353010c9c9a8ed47448f82ab"},
214 | {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8089d7e77d1455d529dbd7cff08898bbb2666ee48bc4085203af1d826a33cc"},
215 | {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fada8396bc739d958d0b81d291cfd201126ed5e7913cb73de6bc606befc30226"},
216 | {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:2a145dab9ed7849fc1101bf03bcdc69913547f10513fdf70fc3ab6c0a50c7eee"},
217 | {file = "pyarrow-14.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:05fe7994745b634c5fb16ce5717e39a1ac1fac3e2b0795232841660aa76647cd"},
218 | {file = "pyarrow-14.0.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:a8eeef015ae69d104c4c3117a6011e7e3ecd1abec79dc87fd2fac6e442f666ee"},
219 | {file = "pyarrow-14.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3c76807540989fe8fcd02285dd15e4f2a3da0b09d27781abec3adc265ddbeba1"},
220 | {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:450e4605e3c20e558485f9161a79280a61c55efe585d51513c014de9ae8d393f"},
221 | {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:323cbe60210173ffd7db78bfd50b80bdd792c4c9daca8843ef3cd70b186649db"},
222 | {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0140c7e2b740e08c5a459439d87acd26b747fc408bde0a8806096ee0baaa0c15"},
223 | {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:e592e482edd9f1ab32f18cd6a716c45b2c0f2403dc2af782f4e9674952e6dd27"},
224 | {file = "pyarrow-14.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:d264ad13605b61959f2ae7c1d25b1a5b8505b112715c961418c8396433f213ad"},
225 | {file = "pyarrow-14.0.1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:01e44de9749cddc486169cb632f3c99962318e9dacac7778315a110f4bf8a450"},
226 | {file = "pyarrow-14.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d0351fecf0e26e152542bc164c22ea2a8e8c682726fce160ce4d459ea802d69c"},
227 | {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33c1f6110c386464fd2e5e4ea3624466055bbe681ff185fd6c9daa98f30a3f9a"},
228 | {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11e045dfa09855b6d3e7705a37c42e2dc2c71d608fab34d3c23df2e02df9aec3"},
229 | {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:097828b55321897db0e1dbfc606e3ff8101ae5725673498cbfa7754ee0da80e4"},
230 | {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1daab52050a1c48506c029e6fa0944a7b2436334d7e44221c16f6f1b2cc9c510"},
231 | {file = "pyarrow-14.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:3f6d5faf4f1b0d5a7f97be987cf9e9f8cd39902611e818fe134588ee99bf0283"},
232 | {file = "pyarrow-14.0.1.tar.gz", hash = "sha256:b8b3f4fe8d4ec15e1ef9b599b94683c5216adaed78d5cb4c606180546d1e2ee1"},
233 | ]
234 |
235 | [package.dependencies]
236 | numpy = ">=1.16.6"
237 |
238 | [metadata]
239 | lock-version = "2.0"
240 | python-versions = "^3.9"
241 | content-hash = "010516149c58ff64ef1df866141f9531026f2eb294ac9bf7902d0e99a2d4e404"
242 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "embedded-olap-benchmarks"
3 | version = "0.1.4"
4 | description = ""
5 | authors = ["Lorenzo Mangani "]
6 |
7 | [tool.poetry.dependencies]
8 | psutil = "^5.9.6"
9 | python = "^3.9"
10 | chdb = "^2.0.0b1"
11 | duckdb = "^1.0.0"
12 | glaredb = "^0.9.4"
13 | databend = "^1.2.453"
14 | datafusion = "^39.0.0"
15 |
16 | [tool.poetry.dev-dependencies]
17 |
18 | [build-system]
19 | requires = ["poetry-core>=1.0.0"]
20 | build-backend = "poetry.core.masonry.api"
21 |
--------------------------------------------------------------------------------
/queries/count.chdb.sql:
--------------------------------------------------------------------------------
1 | SELECT count(*) FROM url('https://shell.duckdb.org/data/tpch/0_01/parquet/lineitem.parquet');
2 |
--------------------------------------------------------------------------------
/queries/count.databend.sql:
--------------------------------------------------------------------------------
1 | SELECT count(*) FROM 'https://shell.duckdb.org/data/tpch/0_01/parquet/lineitem.parquet';
2 |
--------------------------------------------------------------------------------
/queries/count.datafusion.sql:
--------------------------------------------------------------------------------
1 | SELECT count(*) FROM 'https://shell.duckdb.org/data/tpch/0_01/parquet/lineitem.parquet';
2 |
--------------------------------------------------------------------------------
/queries/count.duckdb.sql:
--------------------------------------------------------------------------------
1 | SELECT count(*) FROM "https://shell.duckdb.org/data/tpch/0_01/parquet/lineitem.parquet";
2 |
--------------------------------------------------------------------------------
/queries/count.glaredb.sql:
--------------------------------------------------------------------------------
1 | SELECT count(*) FROM 'https://shell.duckdb.org/data/tpch/0_01/parquet/lineitem.parquet';
2 |
--------------------------------------------------------------------------------
/queries/groupby-local.chdb.sql:
--------------------------------------------------------------------------------
1 | select
2 | toYYYYMMDD(tpep_pickup_datetime) as day,
3 | uniqHLL12(PULocationID) as locations,
4 | count(*) as trips,
5 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue
6 | from file('yellow_tripdata_2023-01.parquet', Parquet)
7 | where trip_distance > 5
8 | group by toYYYYMMDD(tpep_pickup_datetime)
9 | order by day
10 |
--------------------------------------------------------------------------------
/queries/groupby-local.databend.sql:
--------------------------------------------------------------------------------
1 | SELECT TO_DATE(tpep_pickup_datetime::date) as day,
2 | PULocationID as location,
3 | count(*) as trips,
4 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue
5 | FROM 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet'
6 | WHERE trip_distance > 5
7 | GROUP BY tpep_pickup_datetime, location
8 | ORDER BY day
9 |
--------------------------------------------------------------------------------
/queries/groupby-local.duckdb.sql:
--------------------------------------------------------------------------------
1 | select
2 | cast(tpep_pickup_datetime as date) as day,
3 | approx_count_distinct(PULocationID) as locations,
4 | count(*) as trips,
5 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue
6 | from read_parquet('yellow_tripdata_2023-01.parquet')
7 | where trip_distance > 5
8 | group by cast(tpep_pickup_datetime as date)
9 | order by day
10 |
--------------------------------------------------------------------------------
/queries/groupby-local.glaredb.sql:
--------------------------------------------------------------------------------
1 | SELECT arrow_typeof(tpep_pickup_datetime::date) as day,
2 | "PULocationID" as location,
3 | count(*) as trips,
4 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue
5 | FROM 'yellow_tripdata_2023-01.parquet'
6 | WHERE trip_distance > 5
7 | GROUP BY tpep_pickup_datetime, location
8 | ORDER BY day
9 |
--------------------------------------------------------------------------------
/queries/groupby.chdb.sql:
--------------------------------------------------------------------------------
1 | select
2 | toYYYYMMDD(tpep_pickup_datetime) as day,
3 | uniqHLL12(PULocationID) as locations,
4 | count(*) as trips,
5 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue
6 | from url('https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet')
7 | where trip_distance > 5
8 | group by toYYYYMMDD(tpep_pickup_datetime)
9 | order by day
10 |
--------------------------------------------------------------------------------
/queries/groupby.databend.sql:
--------------------------------------------------------------------------------
1 | SELECT TO_DATE(tpep_pickup_datetime::date) as day,
2 | PULocationID as location,
3 | count(*) as trips,
4 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue
5 | FROM 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet'
6 | WHERE trip_distance > 5
7 | GROUP BY tpep_pickup_datetime, location
8 | ORDER BY day
9 |
--------------------------------------------------------------------------------
/queries/groupby.datafusion.sql:
--------------------------------------------------------------------------------
1 | SELECT TO_DATE(tpep_pickup_datetime::date) as day,
2 | PULocationID as location,
3 | count(*) as trips,
4 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue
5 | FROM 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet'
6 | WHERE trip_distance > 5
7 | GROUP BY tpep_pickup_datetime, location
8 | ORDER BY day
9 |
--------------------------------------------------------------------------------
/queries/groupby.duckdb.sql:
--------------------------------------------------------------------------------
1 | select
2 | cast(tpep_pickup_datetime as date) as day,
3 | approx_count_distinct(PULocationID) as locations,
4 | count(*) as trips,
5 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue
6 | from 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet'
7 | where trip_distance > 5
8 | group by cast(tpep_pickup_datetime as date)
9 | order by day
10 |
--------------------------------------------------------------------------------
/queries/groupby.glaredb.sql:
--------------------------------------------------------------------------------
1 | SELECT arrow_typeof(tpep_pickup_datetime::date) as day,
2 | "PULocationID" as location,
3 | count(*) as trips,
4 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue
5 | FROM 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet'
6 | WHERE trip_distance > 5
7 | GROUP BY tpep_pickup_datetime, location
8 | ORDER BY day
9 |
--------------------------------------------------------------------------------
/queries/version.chdb.sql:
--------------------------------------------------------------------------------
1 | SELECT chdb()
2 |
--------------------------------------------------------------------------------
/queries/version.databend.sql:
--------------------------------------------------------------------------------
1 | SELECT version()
2 |
--------------------------------------------------------------------------------
/queries/version.datafusion.sql:
--------------------------------------------------------------------------------
1 | SELECT version()
2 |
--------------------------------------------------------------------------------
/queries/version.duckdb.sql:
--------------------------------------------------------------------------------
1 | SELECT version()
2 |
--------------------------------------------------------------------------------
/queries/version.glaredb.sql:
--------------------------------------------------------------------------------
1 | SELECT version()
2 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | poetry
2 | psutil
3 | chdb
4 | duckdb
5 | glaredb
6 |
--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if ! test -f ./yellow_tripdata_2023-01.parquet; then
4 | wget https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet
5 |
6 | fi
7 |
8 | poetry run python3 benchmark.py
9 |
--------------------------------------------------------------------------------