├── .github └── workflows │ └── benchmarks.yml ├── .gitignore ├── README.md ├── benchmark.py ├── poetry.lock ├── pyproject.toml ├── queries ├── count.chdb.sql ├── count.databend.sql ├── count.datafusion.sql ├── count.duckdb.sql ├── count.glaredb.sql ├── groupby-local.chdb.sql ├── groupby-local.databend.sql ├── groupby-local.duckdb.sql ├── groupby-local.glaredb.sql ├── groupby.chdb.sql ├── groupby.databend.sql ├── groupby.datafusion.sql ├── groupby.duckdb.sql ├── groupby.glaredb.sql ├── version.chdb.sql ├── version.databend.sql ├── version.datafusion.sql ├── version.duckdb.sql └── version.glaredb.sql ├── requirements.txt └── run.sh /.github/workflows/benchmarks.yml: -------------------------------------------------------------------------------- 1 | name: Benchmark Matrix 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | ITERATIONS: 7 | description: 'Test iterations, default 3' 8 | required: false 9 | 10 | jobs: 11 | bench: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | DBNAME: [ "chdb", "duckdb", "glaredb", "databend", "datafusion"] 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Set up Python 3.10 21 | uses: actions/setup-python@v3 22 | with: 23 | python-version: "3.10" 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install poetry pytest 28 | poetry lock --no-update 29 | poetry install 30 | wget -q https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet 31 | # - name: Workflow Telemetry 32 | # uses: runforesight/workflow-telemetry-action@v1.8.7 33 | - name: Test with poetry 34 | env: 35 | ITERATIONS: ${{ github.event.inputs.ITERATIONS || 3 }} 36 | DBNAME: ${{ matrix.DBNAME }} 37 | run: | 38 | poetry run python3 benchmark.py > /tmp/report.txt 39 | - name: Summary Report 40 | run: | 41 | echo "### ${{ matrix.DBNAME }}" >> $GITHUB_STEP_SUMMARY 42 | cat /tmp/report.txt >> $GITHUB_STEP_SUMMARY 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.parquet 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Embedded OLAP benchmarks 4 | 5 | This project benchmarks embedded OLAP engines using Python 3.x
6 | Benchmark queries for supported databases are executed within Github Actions.
7 | 8 | :warning: _Focus on free, low-resource runners. NOT intended as a rigorous benchmark!_ 9 | 10 | ### OLAP Racers 🏁 11 | 12 | - [chdb](https://doc.chdb.io) 13 | - [duckdb](https://duckdb.org) 14 | - [glaredb](https://glaredb.com) 15 | - [databend](https://databend.com) 16 | - [datafusion](https://arrow.apache.org/datafusion-python/) 17 | 18 | 19 | ## Results 20 | 21 | For the latest results, check the latest Action reports. 22 | 23 | 24 | ## Instructions 25 | 26 | 1. Clone this repo and `cd` into it 27 | 28 | 2. Install Test Requirements with `poetry` 29 | ```shell 30 | poetry install 31 | ``` 32 | 33 | 3. Run the benchmark 34 | ```shell 35 | ./run.sh 36 | ``` 37 | -------------------------------------------------------------------------------- /benchmark.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import psutil 4 | from datetime import datetime 5 | from contextlib import contextmanager 6 | 7 | # Import Everything for equal memory conditions 8 | import duckdb 9 | import chdb 10 | from chdb import session as chs 11 | import glaredb 12 | from databend import SessionContext 13 | import datafusion 14 | 15 | DBNAME = os.getenv('DBNAME', '*') 16 | ITERATIONS = int(os.getenv('ITERATIONS', 3)) 17 | BENCHMARKS = ["version", "count", "groupby"] 18 | 19 | @contextmanager 20 | def suppress_stdout(): 21 | with open(os.devnull, "w") as devnull: 22 | old_stdout = sys.stdout 23 | sys.stdout = devnull 24 | try: 25 | yield 26 | finally: 27 | sys.stdout = old_stdout 28 | 29 | def get_memory_usage(): 30 | """ Returns the current memory usage of the Python process. """ 31 | process = psutil.Process(os.getpid()) 32 | return process.memory_info().rss / (1024 * 1024) # Convert bytes to megabytes 33 | 34 | def load_query(db: str, name: str) -> str: 35 | """ Load SQL query from file. """ 36 | try: 37 | with open(f"queries/{name}.{db}.sql") as f: 38 | return f.read() 39 | except FileNotFoundError: 40 | print(f"Query file for {name} not found.") 41 | sys.exit(1) 42 | 43 | def benchmark_db(db: str, execute_fn): 44 | """ Benchmarks all queries against one datastore """ 45 | for name in BENCHMARKS: 46 | query = load_query(db, name) 47 | deltas = [] 48 | mem_usage_before = get_memory_usage() 49 | for _ in range(ITERATIONS): 50 | start = datetime.now() 51 | with suppress_stdout(): 52 | try: 53 | results = execute_fn(query) 54 | except Exception as e: 55 | print(f"Error executing query on {db}: {e}") 56 | continue 57 | end = datetime.now() 58 | deltas.append((end - start).total_seconds()) 59 | mem_usage_after = get_memory_usage() 60 | 61 | if deltas: 62 | avg = sum(deltas) / len(deltas) 63 | mem_used = mem_usage_after - mem_usage_before 64 | print(f"{db}:{name}: avg={avg:.3f}s min={min(deltas):.3f}s max={max(deltas):.3f}s ({ITERATIONS} runs) | Memory used: {mem_used:.2f} MB") 65 | 66 | def main(): 67 | match DBNAME: 68 | case "chdb": 69 | print("Testing chdb " + str(chdb.engine_version)) 70 | chdbs = chs.Session() 71 | benchmark_db("chdb", lambda query: chdb.query(query)) 72 | case "duckdb": 73 | print("Testing duckdb " + str(duckdb.__version__)) 74 | ddb = duckdb.connect() 75 | benchmark_db("duckdb", lambda query: ddb.execute(query)) 76 | case "glaredb": 77 | print("Testing glaredb") 78 | gdb = glaredb.connect() 79 | benchmark_db("glaredb", lambda query: gdb.sql(query).show()) 80 | case "databend": 81 | print("Testing databend") 82 | databendx = SessionContext() 83 | benchmark_db("databend", lambda query: databendx.sql(query).collect()) 84 | case "datafusion": 85 | print("Testing datafusion") 86 | datafusionx = datafusion.SessionContext() 87 | benchmark_db("datafusion", lambda query: datafusionx.sql(query).collect()) 88 | 89 | if __name__ == "__main__": 90 | main() 91 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. 2 | 3 | [[package]] 4 | name = "chdb" 5 | version = "0.16.0rc2" 6 | description = "chDB is an in-process SQL OLAP Engine powered by ClickHouse" 7 | optional = false 8 | python-versions = ">=3.8" 9 | files = [ 10 | {file = "chdb-0.16.0rc2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f2ac75c7410931f82b959b25e717109e7516f32427da0ecb8822b93edfc21a45"}, 11 | {file = "chdb-0.16.0rc2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5da9fcce45ef10475bb23ca16e71819deec1a99a49092d9dffd67b6b960510c5"}, 12 | {file = "chdb-0.16.0rc2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:015d87b479477af30ba21e16718f492bb12c53228df15ba58ae4edab497fd2bb"}, 13 | {file = "chdb-0.16.0rc2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:6ed6b877a3714de0f5df044a855d8deeac774a87e2cd38af4066058d0b10ff18"}, 14 | {file = "chdb-0.16.0rc2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a8613e76c5eb1d492fd6db1b948a2ec298705368f2c1b70dd5b6a5635a7a8bae"}, 15 | {file = "chdb-0.16.0rc2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:814b851a0d35dec04830d564dd1c1d8d759d3c0d233f66da64c2f0a28095c804"}, 16 | {file = "chdb-0.16.0rc2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c4a17b105ebe5e51fdde380fd055e5a89692653de5092eaa3218821d01e8da0"}, 17 | {file = "chdb-0.16.0rc2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8bc0117c98ffd887040bc8b7e878d0c3f310de6f74dfc71fae6fa84da8337f24"}, 18 | {file = "chdb-0.16.0rc2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e67a1f01cc0a696f29e5366c11911917c91f557b4d89d7a1a3d123935779f463"}, 19 | {file = "chdb-0.16.0rc2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:12e23a23747b7a3106e88bfa12ae206f41ba74e0406e091b9f6ea5256942bb53"}, 20 | {file = "chdb-0.16.0rc2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c93848db911bfcbbb628ca5a0dbb7565602f4c6c465efa910954d72874c1a94"}, 21 | {file = "chdb-0.16.0rc2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:371b8d468f9be35beed126224d8e2aa1c327917a4d0a64989b7dadeb6088fca5"}, 22 | ] 23 | 24 | [[package]] 25 | name = "databend" 26 | version = "1.2.207" 27 | description = "Databend Python Binding" 28 | optional = false 29 | python-versions = ">=3.7" 30 | files = [ 31 | {file = "databend-1.2.207-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:a809608d338ad471b38253e434b9c27932f3ec0ff306f17d89a02ec9e5de6e87"}, 32 | {file = "databend-1.2.207-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79d784381c74a43c21c17bf50770a5c379bedb9e7d23dbc449c7fcd8ba3ad7a7"}, 33 | ] 34 | 35 | [package.extras] 36 | docs = ["pdoc"] 37 | test = ["pandas", "polars", "pytest"] 38 | 39 | [[package]] 40 | name = "datafusion" 41 | version = "32.0.0" 42 | description = "Build and run queries against data" 43 | optional = false 44 | python-versions = ">=3.6" 45 | files = [ 46 | {file = "datafusion-32.0.0-cp38-abi3-macosx_10_7_x86_64.whl", hash = "sha256:a74ef7f95798385f0fbdf14c233f9126159101f6ffd9d8bdff177d1070f8a824"}, 47 | {file = "datafusion-32.0.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:39ee5b5cc64c978ae725b4486d2500027eeb0f5071f9242d314d44f0fd32e8a9"}, 48 | {file = "datafusion-32.0.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e35f996bdce5966b07aa303b475895d1cff5f0d35b4a12f8c902a470d2b9388f"}, 49 | {file = "datafusion-32.0.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0ba147342d50e76242b1c5d4465e9c40decdfe0958ec03a4f360aeba1bd95e38"}, 50 | {file = "datafusion-32.0.0-cp38-abi3-win_amd64.whl", hash = "sha256:f2d598d993bfd1055a69ad5bb3e6d7f149b8ddd1fc32a5603ca7bc809ee2188d"}, 51 | ] 52 | 53 | [package.dependencies] 54 | pyarrow = ">=11.0.0" 55 | 56 | [[package]] 57 | name = "duckdb" 58 | version = "0.9.1" 59 | description = "DuckDB embedded database" 60 | optional = false 61 | python-versions = ">=3.7.0" 62 | files = [ 63 | {file = "duckdb-0.9.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6c724e105ecd78c8d86b3c03639b24e1df982392fc836705eb007e4b1b488864"}, 64 | {file = "duckdb-0.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:75f12c5a3086079fb6440122565f1762ef1a610a954f2d8081014c1dd0646e1a"}, 65 | {file = "duckdb-0.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:151f5410c32f8f8fe03bf23462b9604349bc0b4bd3a51049bbf5e6a482a435e8"}, 66 | {file = "duckdb-0.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c1d066fdae22b9b711b1603541651a378017645f9fbc4adc9764b2f3c9e9e4a"}, 67 | {file = "duckdb-0.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1de56d8b7bd7a7653428c1bd4b8948316df488626d27e9c388194f2e0d1428d4"}, 68 | {file = "duckdb-0.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1fb6cd590b1bb4e31fde8efd25fedfbfa19a86fa72789fa5b31a71da0d95bce4"}, 69 | {file = "duckdb-0.9.1-cp310-cp310-win32.whl", hash = "sha256:1039e073714d668cef9069bb02c2a6756c7969cedda0bff1332520c4462951c8"}, 70 | {file = "duckdb-0.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:7e6ac4c28918e1d278a89ff26fd528882aa823868ed530df69d6c8a193ae4e41"}, 71 | {file = "duckdb-0.9.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5eb750f2ee44397a61343f32ee9d9e8c8b5d053fa27ba4185d0e31507157f130"}, 72 | {file = "duckdb-0.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aea2a46881d75dc069a242cb164642d7a4f792889010fb98210953ab7ff48849"}, 73 | {file = "duckdb-0.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed3dcedfc7a9449b6d73f9a2715c730180056e0ba837123e7967be1cd3935081"}, 74 | {file = "duckdb-0.9.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c55397bed0087ec4445b96f8d55f924680f6d40fbaa7f2e35468c54367214a5"}, 75 | {file = "duckdb-0.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3261696130f1cfb955735647c93297b4a6241753fb0de26c05d96d50986c6347"}, 76 | {file = "duckdb-0.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:64c04b1728e3e37cf93748829b5d1e028227deea75115bb5ead01c608ece44b1"}, 77 | {file = "duckdb-0.9.1-cp311-cp311-win32.whl", hash = "sha256:12cf9fb441a32702e31534330a7b4d569083d46a91bf185e0c9415000a978789"}, 78 | {file = "duckdb-0.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:fdfd85575ce9540e593d5d25c9d32050bd636c27786afd7b776aae0f6432b55e"}, 79 | {file = "duckdb-0.9.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:704700a4b469e3bb1a7e85ac12e58037daaf2b555ef64a3fe2913ffef7bd585b"}, 80 | {file = "duckdb-0.9.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf55b303b7b1a8c2165a96e609eb30484bc47481d94a5fb1e23123e728df0a74"}, 81 | {file = "duckdb-0.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b70e23c14746904ca5de316436e43a685eb769c67fe3dbfaacbd3cce996c5045"}, 82 | {file = "duckdb-0.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:77379f7f1f8b4dc98e01f8f6f8f15a0858cf456e2385e22507f3cb93348a88f9"}, 83 | {file = "duckdb-0.9.1-cp37-cp37m-win32.whl", hash = "sha256:92c8f738489838666cae9ef41703f8b16f660bb146970d1eba8b2c06cb3afa39"}, 84 | {file = "duckdb-0.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:08c5484ac06ab714f745526d791141f547e2f5ac92f97a0a1b37dfbb3ea1bd13"}, 85 | {file = "duckdb-0.9.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f66d3c07c7f6938d3277294677eb7dad75165e7c57c8dd505503fc5ef10f67ad"}, 86 | {file = "duckdb-0.9.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c38044e5f78c0c7b58e9f937dcc6c34de17e9ca6be42f9f8f1a5a239f7a847a5"}, 87 | {file = "duckdb-0.9.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73bc0d715b79566b3ede00c367235cfcce67be0eddda06e17665c7a233d6854a"}, 88 | {file = "duckdb-0.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d26622c3b4ea6a8328d95882059e3cc646cdc62d267d48d09e55988a3bba0165"}, 89 | {file = "duckdb-0.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3367d10096ff2b7919cedddcf60d308d22d6e53e72ee2702f6e6ca03d361004a"}, 90 | {file = "duckdb-0.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d88a119f1cb41911a22f08a6f084d061a8c864e28b9433435beb50a56b0d06bb"}, 91 | {file = "duckdb-0.9.1-cp38-cp38-win32.whl", hash = "sha256:99567496e45b55c67427133dc916013e8eb20a811fc7079213f5f03b2a4f5fc0"}, 92 | {file = "duckdb-0.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:5b3da4da73422a3235c3500b3fb541ac546adb3e35642ef1119dbcd9cc7f68b8"}, 93 | {file = "duckdb-0.9.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eca00c0c2062c0265c6c0e78ca2f6a30611b28f3afef062036610e9fc9d4a67d"}, 94 | {file = "duckdb-0.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb5af8e89d40fc4baab1515787ea1520a6c6cf6aa40ab9f107df6c3a75686ce1"}, 95 | {file = "duckdb-0.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9fae3d4f83ebcb47995f6acad7c6d57d003a9b6f0e1b31f79a3edd6feb377443"}, 96 | {file = "duckdb-0.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16b9a7efc745bc3c5d1018c3a2f58d9e6ce49c0446819a9600fdba5f78e54c47"}, 97 | {file = "duckdb-0.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b0b60167f5537772e9f5af940e69dcf50e66f5247732b8bb84a493a9af6055"}, 98 | {file = "duckdb-0.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4f27f5e94c47df6c4ccddf18e3277b7464eea3db07356d2c4bf033b5c88359b8"}, 99 | {file = "duckdb-0.9.1-cp39-cp39-win32.whl", hash = "sha256:d43cd7e6f783006b59dcc5e40fcf157d21ee3d0c8dfced35278091209e9974d7"}, 100 | {file = "duckdb-0.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:e666795887d9cf1d6b6f6cbb9d487270680e5ff6205ebc54b2308151f13b8cff"}, 101 | {file = "duckdb-0.9.1.tar.gz", hash = "sha256:603a878746015a3f2363a65eb48bcbec816261b6ee8d71eee53061117f6eef9d"}, 102 | ] 103 | 104 | [[package]] 105 | name = "glaredb" 106 | version = "0.5.1" 107 | description = "GlareDB is a fast SQL database for querying and analyzing distributed data." 108 | optional = false 109 | python-versions = ">=3.7" 110 | files = [ 111 | {file = "glaredb-0.5.1-cp37-abi3-macosx_10_7_x86_64.whl", hash = "sha256:d129be577931223b44faefa30f5a8b90093aeb623d87af37db46f1e583f23ee0"}, 112 | {file = "glaredb-0.5.1-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:aaada762fa8845d8f2fb106c2663b4aa53780d99e8d847a8add5cc0274da0bbe"}, 113 | {file = "glaredb-0.5.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2f6b5d5507376c2fc9f609544e02d79a76f430e32ccdc1322f6095d548620d8"}, 114 | {file = "glaredb-0.5.1-cp37-abi3-win_amd64.whl", hash = "sha256:812e5a4ddbe94da7d0ba31bcb63b571866e9eda6dc0e0c60bcdc96d1b09a9ec5"}, 115 | ] 116 | 117 | [[package]] 118 | name = "numpy" 119 | version = "1.26.2" 120 | description = "Fundamental package for array computing in Python" 121 | optional = false 122 | python-versions = ">=3.9" 123 | files = [ 124 | {file = "numpy-1.26.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3703fc9258a4a122d17043e57b35e5ef1c5a5837c3db8be396c82e04c1cf9b0f"}, 125 | {file = "numpy-1.26.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc392fdcbd21d4be6ae1bb4475a03ce3b025cd49a9be5345d76d7585aea69440"}, 126 | {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36340109af8da8805d8851ef1d74761b3b88e81a9bd80b290bbfed61bd2b4f75"}, 127 | {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcc008217145b3d77abd3e4d5ef586e3bdfba8fe17940769f8aa09b99e856c00"}, 128 | {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ced40d4e9e18242f70dd02d739e44698df3dcb010d31f495ff00a31ef6014fe"}, 129 | {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b272d4cecc32c9e19911891446b72e986157e6a1809b7b56518b4f3755267523"}, 130 | {file = "numpy-1.26.2-cp310-cp310-win32.whl", hash = "sha256:22f8fc02fdbc829e7a8c578dd8d2e15a9074b630d4da29cda483337e300e3ee9"}, 131 | {file = "numpy-1.26.2-cp310-cp310-win_amd64.whl", hash = "sha256:26c9d33f8e8b846d5a65dd068c14e04018d05533b348d9eaeef6c1bd787f9919"}, 132 | {file = "numpy-1.26.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b96e7b9c624ef3ae2ae0e04fa9b460f6b9f17ad8b4bec6d7756510f1f6c0c841"}, 133 | {file = "numpy-1.26.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aa18428111fb9a591d7a9cc1b48150097ba6a7e8299fb56bdf574df650e7d1f1"}, 134 | {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06fa1ed84aa60ea6ef9f91ba57b5ed963c3729534e6e54055fc151fad0423f0a"}, 135 | {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ca5482c3dbdd051bcd1fce8034603d6ebfc125a7bd59f55b40d8f5d246832b"}, 136 | {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:854ab91a2906ef29dc3925a064fcd365c7b4da743f84b123002f6139bcb3f8a7"}, 137 | {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f43740ab089277d403aa07567be138fc2a89d4d9892d113b76153e0e412409f8"}, 138 | {file = "numpy-1.26.2-cp311-cp311-win32.whl", hash = "sha256:a2bbc29fcb1771cd7b7425f98b05307776a6baf43035d3b80c4b0f29e9545186"}, 139 | {file = "numpy-1.26.2-cp311-cp311-win_amd64.whl", hash = "sha256:2b3fca8a5b00184828d12b073af4d0fc5fdd94b1632c2477526f6bd7842d700d"}, 140 | {file = "numpy-1.26.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a4cd6ed4a339c21f1d1b0fdf13426cb3b284555c27ac2f156dfdaaa7e16bfab0"}, 141 | {file = "numpy-1.26.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5d5244aabd6ed7f312268b9247be47343a654ebea52a60f002dc70c769048e75"}, 142 | {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a3cdb4d9c70e6b8c0814239ead47da00934666f668426fc6e94cce869e13fd7"}, 143 | {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa317b2325f7aa0a9471663e6093c210cb2ae9c0ad824732b307d2c51983d5b6"}, 144 | {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:174a8880739c16c925799c018f3f55b8130c1f7c8e75ab0a6fa9d41cab092fd6"}, 145 | {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f79b231bf5c16b1f39c7f4875e1ded36abee1591e98742b05d8a0fb55d8a3eec"}, 146 | {file = "numpy-1.26.2-cp312-cp312-win32.whl", hash = "sha256:4a06263321dfd3598cacb252f51e521a8cb4b6df471bb12a7ee5cbab20ea9167"}, 147 | {file = "numpy-1.26.2-cp312-cp312-win_amd64.whl", hash = "sha256:b04f5dc6b3efdaab541f7857351aac359e6ae3c126e2edb376929bd3b7f92d7e"}, 148 | {file = "numpy-1.26.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4eb8df4bf8d3d90d091e0146f6c28492b0be84da3e409ebef54349f71ed271ef"}, 149 | {file = "numpy-1.26.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1a13860fdcd95de7cf58bd6f8bc5a5ef81c0b0625eb2c9a783948847abbef2c2"}, 150 | {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64308ebc366a8ed63fd0bf426b6a9468060962f1a4339ab1074c228fa6ade8e3"}, 151 | {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf8aab04a2c0e859da118f0b38617e5ee65d75b83795055fb66c0d5e9e9b818"}, 152 | {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d73a3abcac238250091b11caef9ad12413dab01669511779bc9b29261dd50210"}, 153 | {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b361d369fc7e5e1714cf827b731ca32bff8d411212fccd29ad98ad622449cc36"}, 154 | {file = "numpy-1.26.2-cp39-cp39-win32.whl", hash = "sha256:bd3f0091e845164a20bd5a326860c840fe2af79fa12e0469a12768a3ec578d80"}, 155 | {file = "numpy-1.26.2-cp39-cp39-win_amd64.whl", hash = "sha256:2beef57fb031dcc0dc8fa4fe297a742027b954949cabb52a2a376c144e5e6060"}, 156 | {file = "numpy-1.26.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1cc3d5029a30fb5f06704ad6b23b35e11309491c999838c31f124fee32107c79"}, 157 | {file = "numpy-1.26.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94cc3c222bb9fb5a12e334d0479b97bb2df446fbe622b470928f5284ffca3f8d"}, 158 | {file = "numpy-1.26.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe6b44fb8fcdf7eda4ef4461b97b3f63c466b27ab151bec2366db8b197387841"}, 159 | {file = "numpy-1.26.2.tar.gz", hash = "sha256:f65738447676ab5777f11e6bbbdb8ce11b785e105f690bc45966574816b6d3ea"}, 160 | ] 161 | 162 | [[package]] 163 | name = "psutil" 164 | version = "5.9.6" 165 | description = "Cross-platform lib for process and system monitoring in Python." 166 | optional = false 167 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" 168 | files = [ 169 | {file = "psutil-5.9.6-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:fb8a697f11b0f5994550555fcfe3e69799e5b060c8ecf9e2f75c69302cc35c0d"}, 170 | {file = "psutil-5.9.6-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:91ecd2d9c00db9817a4b4192107cf6954addb5d9d67a969a4f436dbc9200f88c"}, 171 | {file = "psutil-5.9.6-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:10e8c17b4f898d64b121149afb136c53ea8b68c7531155147867b7b1ac9e7e28"}, 172 | {file = "psutil-5.9.6-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:18cd22c5db486f33998f37e2bb054cc62fd06646995285e02a51b1e08da97017"}, 173 | {file = "psutil-5.9.6-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:ca2780f5e038379e520281e4c032dddd086906ddff9ef0d1b9dcf00710e5071c"}, 174 | {file = "psutil-5.9.6-cp27-none-win32.whl", hash = "sha256:70cb3beb98bc3fd5ac9ac617a327af7e7f826373ee64c80efd4eb2856e5051e9"}, 175 | {file = "psutil-5.9.6-cp27-none-win_amd64.whl", hash = "sha256:51dc3d54607c73148f63732c727856f5febec1c7c336f8f41fcbd6315cce76ac"}, 176 | {file = "psutil-5.9.6-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c69596f9fc2f8acd574a12d5f8b7b1ba3765a641ea5d60fb4736bf3c08a8214a"}, 177 | {file = "psutil-5.9.6-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92e0cc43c524834af53e9d3369245e6cc3b130e78e26100d1f63cdb0abeb3d3c"}, 178 | {file = "psutil-5.9.6-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:748c9dd2583ed86347ed65d0035f45fa8c851e8d90354c122ab72319b5f366f4"}, 179 | {file = "psutil-5.9.6-cp36-cp36m-win32.whl", hash = "sha256:3ebf2158c16cc69db777e3c7decb3c0f43a7af94a60d72e87b2823aebac3d602"}, 180 | {file = "psutil-5.9.6-cp36-cp36m-win_amd64.whl", hash = "sha256:ff18b8d1a784b810df0b0fff3bcb50ab941c3b8e2c8de5726f9c71c601c611aa"}, 181 | {file = "psutil-5.9.6-cp37-abi3-win32.whl", hash = "sha256:a6f01f03bf1843280f4ad16f4bde26b817847b4c1a0db59bf6419807bc5ce05c"}, 182 | {file = "psutil-5.9.6-cp37-abi3-win_amd64.whl", hash = "sha256:6e5fb8dc711a514da83098bc5234264e551ad980cec5f85dabf4d38ed6f15e9a"}, 183 | {file = "psutil-5.9.6-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:daecbcbd29b289aac14ece28eca6a3e60aa361754cf6da3dfb20d4d32b6c7f57"}, 184 | {file = "psutil-5.9.6.tar.gz", hash = "sha256:e4b92ddcd7dd4cdd3f900180ea1e104932c7bce234fb88976e2a3b296441225a"}, 185 | ] 186 | 187 | [package.extras] 188 | test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] 189 | 190 | [[package]] 191 | name = "pyarrow" 192 | version = "14.0.1" 193 | description = "Python library for Apache Arrow" 194 | optional = false 195 | python-versions = ">=3.8" 196 | files = [ 197 | {file = "pyarrow-14.0.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:96d64e5ba7dceb519a955e5eeb5c9adcfd63f73a56aea4722e2cc81364fc567a"}, 198 | {file = "pyarrow-14.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a8ae88c0038d1bc362a682320112ee6774f006134cd5afc291591ee4bc06505"}, 199 | {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f6f053cb66dc24091f5511e5920e45c83107f954a21032feadc7b9e3a8e7851"}, 200 | {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:906b0dc25f2be12e95975722f1e60e162437023f490dbd80d0deb7375baf3171"}, 201 | {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:78d4a77a46a7de9388b653af1c4ce539350726cd9af62e0831e4f2bd0c95a2f4"}, 202 | {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:06ca79080ef89d6529bb8e5074d4b4f6086143b2520494fcb7cf8a99079cde93"}, 203 | {file = "pyarrow-14.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:32542164d905002c42dff896efdac79b3bdd7291b1b74aa292fac8450d0e4dcd"}, 204 | {file = "pyarrow-14.0.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:c7331b4ed3401b7ee56f22c980608cf273f0380f77d0f73dd3c185f78f5a6220"}, 205 | {file = "pyarrow-14.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:922e8b49b88da8633d6cac0e1b5a690311b6758d6f5d7c2be71acb0f1e14cd61"}, 206 | {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58c889851ca33f992ea916b48b8540735055201b177cb0dcf0596a495a667b00"}, 207 | {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30d8494870d9916bb53b2a4384948491444741cb9a38253c590e21f836b01222"}, 208 | {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:be28e1a07f20391bb0b15ea03dcac3aade29fc773c5eb4bee2838e9b2cdde0cb"}, 209 | {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:981670b4ce0110d8dcb3246410a4aabf5714db5d8ea63b15686bce1c914b1f83"}, 210 | {file = "pyarrow-14.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:4756a2b373a28f6166c42711240643fb8bd6322467e9aacabd26b488fa41ec23"}, 211 | {file = "pyarrow-14.0.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:cf87e2cec65dd5cf1aa4aba918d523ef56ef95597b545bbaad01e6433851aa10"}, 212 | {file = "pyarrow-14.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:470ae0194fbfdfbf4a6b65b4f9e0f6e1fa0ea5b90c1ee6b65b38aecee53508c8"}, 213 | {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6263cffd0c3721c1e348062997babdf0151301f7353010c9c9a8ed47448f82ab"}, 214 | {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8089d7e77d1455d529dbd7cff08898bbb2666ee48bc4085203af1d826a33cc"}, 215 | {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fada8396bc739d958d0b81d291cfd201126ed5e7913cb73de6bc606befc30226"}, 216 | {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:2a145dab9ed7849fc1101bf03bcdc69913547f10513fdf70fc3ab6c0a50c7eee"}, 217 | {file = "pyarrow-14.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:05fe7994745b634c5fb16ce5717e39a1ac1fac3e2b0795232841660aa76647cd"}, 218 | {file = "pyarrow-14.0.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:a8eeef015ae69d104c4c3117a6011e7e3ecd1abec79dc87fd2fac6e442f666ee"}, 219 | {file = "pyarrow-14.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3c76807540989fe8fcd02285dd15e4f2a3da0b09d27781abec3adc265ddbeba1"}, 220 | {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:450e4605e3c20e558485f9161a79280a61c55efe585d51513c014de9ae8d393f"}, 221 | {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:323cbe60210173ffd7db78bfd50b80bdd792c4c9daca8843ef3cd70b186649db"}, 222 | {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0140c7e2b740e08c5a459439d87acd26b747fc408bde0a8806096ee0baaa0c15"}, 223 | {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:e592e482edd9f1ab32f18cd6a716c45b2c0f2403dc2af782f4e9674952e6dd27"}, 224 | {file = "pyarrow-14.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:d264ad13605b61959f2ae7c1d25b1a5b8505b112715c961418c8396433f213ad"}, 225 | {file = "pyarrow-14.0.1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:01e44de9749cddc486169cb632f3c99962318e9dacac7778315a110f4bf8a450"}, 226 | {file = "pyarrow-14.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d0351fecf0e26e152542bc164c22ea2a8e8c682726fce160ce4d459ea802d69c"}, 227 | {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33c1f6110c386464fd2e5e4ea3624466055bbe681ff185fd6c9daa98f30a3f9a"}, 228 | {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11e045dfa09855b6d3e7705a37c42e2dc2c71d608fab34d3c23df2e02df9aec3"}, 229 | {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:097828b55321897db0e1dbfc606e3ff8101ae5725673498cbfa7754ee0da80e4"}, 230 | {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1daab52050a1c48506c029e6fa0944a7b2436334d7e44221c16f6f1b2cc9c510"}, 231 | {file = "pyarrow-14.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:3f6d5faf4f1b0d5a7f97be987cf9e9f8cd39902611e818fe134588ee99bf0283"}, 232 | {file = "pyarrow-14.0.1.tar.gz", hash = "sha256:b8b3f4fe8d4ec15e1ef9b599b94683c5216adaed78d5cb4c606180546d1e2ee1"}, 233 | ] 234 | 235 | [package.dependencies] 236 | numpy = ">=1.16.6" 237 | 238 | [metadata] 239 | lock-version = "2.0" 240 | python-versions = "^3.9" 241 | content-hash = "010516149c58ff64ef1df866141f9531026f2eb294ac9bf7902d0e99a2d4e404" 242 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "embedded-olap-benchmarks" 3 | version = "0.1.4" 4 | description = "" 5 | authors = ["Lorenzo Mangani "] 6 | 7 | [tool.poetry.dependencies] 8 | psutil = "^5.9.6" 9 | python = "^3.9" 10 | chdb = "^2.0.0b1" 11 | duckdb = "^1.0.0" 12 | glaredb = "^0.9.4" 13 | databend = "^1.2.453" 14 | datafusion = "^39.0.0" 15 | 16 | [tool.poetry.dev-dependencies] 17 | 18 | [build-system] 19 | requires = ["poetry-core>=1.0.0"] 20 | build-backend = "poetry.core.masonry.api" 21 | -------------------------------------------------------------------------------- /queries/count.chdb.sql: -------------------------------------------------------------------------------- 1 | SELECT count(*) FROM url('https://shell.duckdb.org/data/tpch/0_01/parquet/lineitem.parquet'); 2 | -------------------------------------------------------------------------------- /queries/count.databend.sql: -------------------------------------------------------------------------------- 1 | SELECT count(*) FROM 'https://shell.duckdb.org/data/tpch/0_01/parquet/lineitem.parquet'; 2 | -------------------------------------------------------------------------------- /queries/count.datafusion.sql: -------------------------------------------------------------------------------- 1 | SELECT count(*) FROM 'https://shell.duckdb.org/data/tpch/0_01/parquet/lineitem.parquet'; 2 | -------------------------------------------------------------------------------- /queries/count.duckdb.sql: -------------------------------------------------------------------------------- 1 | SELECT count(*) FROM "https://shell.duckdb.org/data/tpch/0_01/parquet/lineitem.parquet"; 2 | -------------------------------------------------------------------------------- /queries/count.glaredb.sql: -------------------------------------------------------------------------------- 1 | SELECT count(*) FROM 'https://shell.duckdb.org/data/tpch/0_01/parquet/lineitem.parquet'; 2 | -------------------------------------------------------------------------------- /queries/groupby-local.chdb.sql: -------------------------------------------------------------------------------- 1 | select 2 | toYYYYMMDD(tpep_pickup_datetime) as day, 3 | uniqHLL12(PULocationID) as locations, 4 | count(*) as trips, 5 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue 6 | from file('yellow_tripdata_2023-01.parquet', Parquet) 7 | where trip_distance > 5 8 | group by toYYYYMMDD(tpep_pickup_datetime) 9 | order by day 10 | -------------------------------------------------------------------------------- /queries/groupby-local.databend.sql: -------------------------------------------------------------------------------- 1 | SELECT TO_DATE(tpep_pickup_datetime::date) as day, 2 | PULocationID as location, 3 | count(*) as trips, 4 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue 5 | FROM 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet' 6 | WHERE trip_distance > 5 7 | GROUP BY tpep_pickup_datetime, location 8 | ORDER BY day 9 | -------------------------------------------------------------------------------- /queries/groupby-local.duckdb.sql: -------------------------------------------------------------------------------- 1 | select 2 | cast(tpep_pickup_datetime as date) as day, 3 | approx_count_distinct(PULocationID) as locations, 4 | count(*) as trips, 5 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue 6 | from read_parquet('yellow_tripdata_2023-01.parquet') 7 | where trip_distance > 5 8 | group by cast(tpep_pickup_datetime as date) 9 | order by day 10 | -------------------------------------------------------------------------------- /queries/groupby-local.glaredb.sql: -------------------------------------------------------------------------------- 1 | SELECT arrow_typeof(tpep_pickup_datetime::date) as day, 2 | "PULocationID" as location, 3 | count(*) as trips, 4 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue 5 | FROM 'yellow_tripdata_2023-01.parquet' 6 | WHERE trip_distance > 5 7 | GROUP BY tpep_pickup_datetime, location 8 | ORDER BY day 9 | -------------------------------------------------------------------------------- /queries/groupby.chdb.sql: -------------------------------------------------------------------------------- 1 | select 2 | toYYYYMMDD(tpep_pickup_datetime) as day, 3 | uniqHLL12(PULocationID) as locations, 4 | count(*) as trips, 5 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue 6 | from url('https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet') 7 | where trip_distance > 5 8 | group by toYYYYMMDD(tpep_pickup_datetime) 9 | order by day 10 | -------------------------------------------------------------------------------- /queries/groupby.databend.sql: -------------------------------------------------------------------------------- 1 | SELECT TO_DATE(tpep_pickup_datetime::date) as day, 2 | PULocationID as location, 3 | count(*) as trips, 4 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue 5 | FROM 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet' 6 | WHERE trip_distance > 5 7 | GROUP BY tpep_pickup_datetime, location 8 | ORDER BY day 9 | -------------------------------------------------------------------------------- /queries/groupby.datafusion.sql: -------------------------------------------------------------------------------- 1 | SELECT TO_DATE(tpep_pickup_datetime::date) as day, 2 | PULocationID as location, 3 | count(*) as trips, 4 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue 5 | FROM 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet' 6 | WHERE trip_distance > 5 7 | GROUP BY tpep_pickup_datetime, location 8 | ORDER BY day 9 | -------------------------------------------------------------------------------- /queries/groupby.duckdb.sql: -------------------------------------------------------------------------------- 1 | select 2 | cast(tpep_pickup_datetime as date) as day, 3 | approx_count_distinct(PULocationID) as locations, 4 | count(*) as trips, 5 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue 6 | from 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet' 7 | where trip_distance > 5 8 | group by cast(tpep_pickup_datetime as date) 9 | order by day 10 | -------------------------------------------------------------------------------- /queries/groupby.glaredb.sql: -------------------------------------------------------------------------------- 1 | SELECT arrow_typeof(tpep_pickup_datetime::date) as day, 2 | "PULocationID" as location, 3 | count(*) as trips, 4 | sum(fare_amount) + sum(mta_tax) + sum(tolls_amount) + sum(tip_amount) as revenue 5 | FROM 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet' 6 | WHERE trip_distance > 5 7 | GROUP BY tpep_pickup_datetime, location 8 | ORDER BY day 9 | -------------------------------------------------------------------------------- /queries/version.chdb.sql: -------------------------------------------------------------------------------- 1 | SELECT chdb() 2 | -------------------------------------------------------------------------------- /queries/version.databend.sql: -------------------------------------------------------------------------------- 1 | SELECT version() 2 | -------------------------------------------------------------------------------- /queries/version.datafusion.sql: -------------------------------------------------------------------------------- 1 | SELECT version() 2 | -------------------------------------------------------------------------------- /queries/version.duckdb.sql: -------------------------------------------------------------------------------- 1 | SELECT version() 2 | -------------------------------------------------------------------------------- /queries/version.glaredb.sql: -------------------------------------------------------------------------------- 1 | SELECT version() 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | poetry 2 | psutil 3 | chdb 4 | duckdb 5 | glaredb 6 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if ! test -f ./yellow_tripdata_2023-01.parquet; then 4 | wget https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-01.parquet 5 | 6 | fi 7 | 8 | poetry run python3 benchmark.py 9 | --------------------------------------------------------------------------------