├── .gitmodules
├── benchmarks
├── .gitignore
├── spark
│ └── .gitignore
├── queries
│ ├── q6.sql
│ ├── q17.sql
│ ├── q14.sql
│ ├── q4.sql
│ ├── q13.sql
│ ├── q3.sql
│ ├── q5.sql
│ ├── q1.sql
│ ├── q15.sql
│ ├── q11.sql
│ ├── q16.sql
│ ├── q18.sql
│ ├── q10.sql
│ ├── q12.sql
│ ├── q9.sql
│ ├── q2.sql
│ ├── q21.sql
│ ├── q20.sql
│ ├── q22.sql
│ ├── q8.sql
│ ├── q7.sql
│ └── q19.sql
├── db-benchmark
│ ├── run-bench.sh
│ └── README.md
├── .dockerignore
├── tpch-gen.sh
├── run.sh
├── Cargo.toml
└── tpch.py
├── python
├── testdata
│ ├── test.csv
│ └── test.parquet
├── .gitignore
├── requirements.txt
├── ballista
│ ├── tests
│ │ └── __init__.py
│ └── __init__.py
├── build.rs
├── examples
│ ├── scheduler.py
│ ├── executor.py
│ ├── client_remote.py
│ ├── client_standalone.py
│ ├── readme_standalone.py
│ └── readme_remote.py
├── Cargo.toml
├── src
│ └── utils.rs
└── pyproject.toml
├── dev
├── msrvcheck
│ ├── .gitignore
│ ├── README.md
│ └── Cargo.toml
├── docker
│ ├── cli-entrypoint.sh
│ ├── executor-entrypoint.sh
│ ├── scheduler-entrypoint.sh
│ ├── builder-entrypoint.sh
│ ├── standalone-entrypoint.sh
│ ├── ballista-cli.Dockerfile
│ ├── ballista-executor.Dockerfile
│ ├── ballista-scheduler.Dockerfile
│ ├── ballista-benchmarks.Dockerfile
│ ├── ballista-builder.Dockerfile
│ └── ballista-standalone.Dockerfile
├── build-set-env.sh
├── release
│ ├── rat_exclude_files.txt
│ ├── crate-deps.dot
│ ├── run-rat.sh
│ ├── check-rat-report.py
│ └── release-tarball.sh
├── rust_lint.sh
├── build-ballista-executables.sh
├── integration-tests.sh
└── build-ballista-docker.sh
├── ballista
├── core
│ ├── tests
│ │ └── customer.csv
│ ├── src
│ │ ├── consistent_hash
│ │ │ └── node.rs
│ │ ├── serde
│ │ │ └── generated
│ │ │ │ └── mod.rs
│ │ ├── execution_plans
│ │ │ └── mod.rs
│ │ └── lib.rs
│ └── README.md
├── client
│ ├── testdata
│ │ ├── single_nan.parquet
│ │ ├── alltypes_plain.parquet
│ │ └── bug_1296
│ │ │ ├── store.csv
│ │ │ ├── item.csv
│ │ │ ├── store_sales.csv
│ │ │ └── date_dim.csv
│ ├── src
│ │ ├── lib.rs
│ │ └── prelude.rs
│ └── Cargo.toml
├── scheduler
│ ├── testdata
│ │ ├── region
│ │ │ └── region.tbl
│ │ ├── nation
│ │ │ └── nation.tbl
│ │ ├── orders
│ │ │ └── orders.tbl
│ │ ├── part
│ │ │ └── part.tbl
│ │ ├── lineitem
│ │ │ ├── partition0.tbl
│ │ │ └── partition1.tbl
│ │ ├── supplier
│ │ │ └── supplier.tbl
│ │ ├── customer
│ │ │ └── customer.tbl
│ │ └── partsupp
│ │ │ └── partsupp.tbl
│ ├── src
│ │ ├── physical_optimizer
│ │ │ └── mod.rs
│ │ ├── lib.rs
│ │ ├── api
│ │ │ └── mod.rs
│ │ └── state
│ │ │ └── session_manager.rs
│ ├── README.md
│ ├── build.rs
│ └── proto
│ │ └── keda.proto
└── executor
│ ├── README.md
│ ├── src
│ ├── terminate.rs
│ └── metrics
│ │ └── mod.rs
│ └── Cargo.toml
├── examples
├── testdata
│ └── alltypes_plain.parquet
├── src
│ └── lib.rs
├── examples
│ ├── standalone-sql.rs
│ ├── remote-dataframe.rs
│ ├── custom-executor.rs
│ ├── remote-sql.rs
│ └── custom-scheduler.rs
└── Cargo.toml
├── docs
├── developer
│ ├── images
│ │ └── query-execution.png
│ └── README.md
├── source
│ ├── _static
│ │ └── images
│ │ │ ├── ballista-logo.png
│ │ │ ├── ballista_black.png
│ │ │ ├── ballista_white.png
│ │ │ ├── tpch_allqueries.png
│ │ │ ├── tpch_queries_compare.png
│ │ │ ├── tpch_queries_speedup_abs.png
│ │ │ └── tpch_queries_speedup_rel.png
│ ├── contributors-guide
│ │ └── ballista.drawio.png
│ ├── user-guide
│ │ ├── images
│ │ │ ├── ballista-web-ui.png
│ │ │ └── example-query-plan.png
│ │ ├── deployment
│ │ │ ├── index.rst
│ │ │ └── cargo-install.md
│ │ ├── faq.md
│ │ ├── scheduler.md
│ │ └── metrics.md
│ ├── _templates
│ │ ├── layout.html
│ │ └── docs-sidebar.html
│ ├── community
│ │ └── communication.md
│ └── index.rst
├── .gitignore
├── build.sh
├── requirements.txt
├── Makefile
├── README.md
└── make.bat
├── .dockerignore
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── workflows
│ ├── dev_pr
│ │ └── labeler.yml
│ ├── dependencies.yml
│ ├── dev.yml
│ ├── dev_pr.yml
│ └── docs.yaml
├── pull_request_template.md
├── dependabot.yml
└── actions
│ ├── setup-builder
│ └── action.yaml
│ ├── setup-macos-builder
│ └── action.yaml
│ ├── setup-windows-builder
│ └── action.yaml
│ └── setup-rust-runtime
│ └── action.yaml
├── header
├── clippy.toml
├── rust-toolchain.toml
├── rustfmt.toml
├── ci
└── scripts
│ ├── rust_fmt.sh
│ ├── rust_clippy.sh
│ ├── rust_toml_fmt.sh
│ └── rust_docs.sh
├── CODE_OF_CONDUCT.md
├── ballista-cli
├── src
│ └── lib.rs
├── Dockerfile
└── Cargo.toml
├── .github_changelog_generator
├── .devcontainer
└── devcontainer.json
├── .asf.yaml
├── NOTICE.txt
├── .gitignore
├── docker-compose.yml
└── pre-commit.sh
/.gitmodules:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/benchmarks/.gitignore:
--------------------------------------------------------------------------------
1 | data
--------------------------------------------------------------------------------
/benchmarks/spark/.gitignore:
--------------------------------------------------------------------------------
1 | target
--------------------------------------------------------------------------------
/python/testdata/test.csv:
--------------------------------------------------------------------------------
1 | a,b
2 | 1,2
--------------------------------------------------------------------------------
/dev/msrvcheck/.gitignore:
--------------------------------------------------------------------------------
1 | Cargo.lock
2 |
--------------------------------------------------------------------------------
/python/.gitignore:
--------------------------------------------------------------------------------
1 | venv
2 | *.so
3 | .pyo3_build_config
--------------------------------------------------------------------------------
/ballista/core/tests/customer.csv:
--------------------------------------------------------------------------------
1 | andrew,100
2 | jorge,200
3 | andy,150
4 | paul,300
5 |
--------------------------------------------------------------------------------
/python/requirements.txt:
--------------------------------------------------------------------------------
1 | datafusion==49.0.0
2 | pyarrow
3 | pytest
4 | maturin>=1.8.0,<2.0.0
5 | cloudpickle
6 | pandas
--------------------------------------------------------------------------------
/python/testdata/test.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/python/testdata/test.parquet
--------------------------------------------------------------------------------
/examples/testdata/alltypes_plain.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/examples/testdata/alltypes_plain.parquet
--------------------------------------------------------------------------------
/ballista/client/testdata/single_nan.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/ballista/client/testdata/single_nan.parquet
--------------------------------------------------------------------------------
/docs/developer/images/query-execution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/docs/developer/images/query-execution.png
--------------------------------------------------------------------------------
/docs/source/_static/images/ballista-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/docs/source/_static/images/ballista-logo.png
--------------------------------------------------------------------------------
/docs/source/_static/images/ballista_black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/docs/source/_static/images/ballista_black.png
--------------------------------------------------------------------------------
/docs/source/_static/images/ballista_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/docs/source/_static/images/ballista_white.png
--------------------------------------------------------------------------------
/ballista/client/testdata/alltypes_plain.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/ballista/client/testdata/alltypes_plain.parquet
--------------------------------------------------------------------------------
/docs/source/_static/images/tpch_allqueries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/docs/source/_static/images/tpch_allqueries.png
--------------------------------------------------------------------------------
/docs/source/contributors-guide/ballista.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/docs/source/contributors-guide/ballista.drawio.png
--------------------------------------------------------------------------------
/docs/source/user-guide/images/ballista-web-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/docs/source/user-guide/images/ballista-web-ui.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tpch_queries_compare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/docs/source/_static/images/tpch_queries_compare.png
--------------------------------------------------------------------------------
/docs/source/user-guide/images/example-query-plan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/docs/source/user-guide/images/example-query-plan.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tpch_queries_speedup_abs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/docs/source/_static/images/tpch_queries_speedup_abs.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tpch_queries_speedup_rel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista/HEAD/docs/source/_static/images/tpch_queries_speedup_rel.png
--------------------------------------------------------------------------------
/ballista/client/testdata/bug_1296/store.csv:
--------------------------------------------------------------------------------
1 | s_store_sk,s_store_id,s_store_name,s_company_name,s_city,s_state
2 | 10,ST001,Downtown Store,Retail Corp,New York,NY
3 | 11,ST002,Mall Store,Retail Corp,Los Angeles,CA
4 | 12,ST003,Uptown Market,Market Group,Chicago,IL
--------------------------------------------------------------------------------
/benchmarks/queries/q6.sql:
--------------------------------------------------------------------------------
1 | select
2 | sum(l_extendedprice * l_discount) as revenue
3 | from
4 | lineitem
5 | where
6 | l_shipdate >= date '1994-01-01'
7 | and l_shipdate < date '1995-01-01'
8 | and l_discount between 0.06 - 0.01 and 0.06 + 0.01
9 | and l_quantity < 24;
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | .git
2 | ci
3 | conbench
4 | dev/dist
5 | dev/release
6 | python
7 | **/docs
8 | target/
9 | CHANGELOG.md
10 | **/tests
11 | **/data
12 | !target/release/ballista-scheduler
13 | !target/release/ballista-executor
14 | !target/release/ballista-cli
15 | !target/release/tpch
16 |
--------------------------------------------------------------------------------
/ballista/client/testdata/bug_1296/item.csv:
--------------------------------------------------------------------------------
1 | i_item_sk,i_item_id,i_item_name,i_category,i_brand,i_price
2 | 20,ITM001,Laptop Pro,Electronics,TechBrand,999.99
3 | 21,ITM002,Wireless Headphones,Electronics,SoundTech,199.99
4 | 22,ITM003,Cotton T-Shirt,Clothing,FashionNow,19.99
5 | 23,ITM004,Running Shoes,Footwear,SpeedWay,89.99
6 | 24,ITM005,Coffee Maker,Appliances,HomeGoods,79.99
--------------------------------------------------------------------------------
/benchmarks/queries/q17.sql:
--------------------------------------------------------------------------------
1 | select
2 | sum(l_extendedprice) / 7.0 as avg_yearly
3 | from
4 | lineitem,
5 | part
6 | where
7 | p_partkey = l_partkey
8 | and p_brand = 'Brand#23'
9 | and p_container = 'MED BOX'
10 | and l_quantity < (
11 | select
12 | 0.2 * avg(l_quantity)
13 | from
14 | lineitem
15 | where
16 | l_partkey = p_partkey
17 | );
--------------------------------------------------------------------------------
/ballista/client/testdata/bug_1296/store_sales.csv:
--------------------------------------------------------------------------------
1 | ss_item_sk,ss_sold_date_sk,ss_store_sk,ss_quantity,ss_sales_price
2 | 20,1,10,2,999.99
3 | 20,2,10,3,999.99
4 | 22,2,11,10,19.99
5 | 20,3,10,4,999.99
6 | 20,3,10,1,999.99
7 | 22,3,11,8,19.99
8 | 20,4,10,9,999.99
9 | 20,4,10,1,999.99
10 | 22,4,11,15,19.99
11 | 20,5,10,5,999.99
12 | 22,5,11,9,19.99
13 | 20,6,10,4,999.99
14 | 22,6,11,8,19.99
15 | 20,9,10,3,999.99
16 | 22,9,11,10,19.99
--------------------------------------------------------------------------------
/ballista/scheduler/testdata/region/region.tbl:
--------------------------------------------------------------------------------
1 | 0|AFRICA|lar deposits. blithely final packages cajole. regular waters are final requests. regular accounts are according to |
2 | 1|AMERICA|hs use ironic, even requests. s|
3 | 2|ASIA|ges. thinly even pinto beans ca|
4 | 3|EUROPE|ly final courts cajole furiously final excuse|
5 | 4|MIDDLE EAST|uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl|
6 |
--------------------------------------------------------------------------------
/benchmarks/queries/q14.sql:
--------------------------------------------------------------------------------
1 | select
2 | 100.00 * sum(case
3 | when p_type like 'PROMO%'
4 | then l_extendedprice * (1 - l_discount)
5 | else 0
6 | end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
7 | from
8 | lineitem,
9 | part
10 | where
11 | l_partkey = p_partkey
12 | and l_shipdate >= date '1995-09-01'
13 | and l_shipdate < date '1995-10-01';
--------------------------------------------------------------------------------
/ballista/client/testdata/bug_1296/date_dim.csv:
--------------------------------------------------------------------------------
1 | d_date_sk,d_date_id,d_date,d_year,d_moy,d_month_name
2 | 1,19981201,1998-12-01,1998,12,December
3 | 2,19981215,1998-12-15,1998,12,December
4 | 3,19990105,1999-01-05,1999,1,January
5 | 4,19990210,1999-02-10,1999,2,February
6 | 5,19990315,1999-03-15,1999,3,March
7 | 6,19990420,1999-04-20,1999,4,April
8 | 7,19990525,1999-05-25,1999,5,May
9 | 8,19990630,1999-06-30,1999,6,June
10 | 9,20000105,2000-01-05,2000,1,January
11 | 10,20000120,2000-01-20,2000,1,January
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: bug
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 |
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 |
19 | **Additional context**
20 | Add any other context about the problem here.
21 |
--------------------------------------------------------------------------------
/benchmarks/queries/q4.sql:
--------------------------------------------------------------------------------
1 | select
2 | o_orderpriority,
3 | count(*) as order_count
4 | from
5 | orders
6 | where
7 | o_orderdate >= '1993-07-01'
8 | and o_orderdate < date '1993-07-01' + interval '3' month
9 | and exists (
10 | select
11 | *
12 | from
13 | lineitem
14 | where
15 | l_orderkey = o_orderkey
16 | and l_commitdate < l_receiptdate
17 | )
18 | group by
19 | o_orderpriority
20 | order by
21 | o_orderpriority;
--------------------------------------------------------------------------------
/benchmarks/queries/q13.sql:
--------------------------------------------------------------------------------
1 | select
2 | c_count,
3 | count(*) as custdist
4 | from
5 | (
6 | select
7 | c_custkey,
8 | count(o_orderkey)
9 | from
10 | customer left outer join orders on
11 | c_custkey = o_custkey
12 | and o_comment not like '%special%requests%'
13 | group by
14 | c_custkey
15 | ) as c_orders (c_custkey, c_count)
16 | group by
17 | c_count
18 | order by
19 | custdist desc,
20 | c_count desc;
--------------------------------------------------------------------------------
/benchmarks/queries/q3.sql:
--------------------------------------------------------------------------------
1 | select
2 | l_orderkey,
3 | sum(l_extendedprice * (1 - l_discount)) as revenue,
4 | o_orderdate,
5 | o_shippriority
6 | from
7 | customer,
8 | orders,
9 | lineitem
10 | where
11 | c_mktsegment = 'BUILDING'
12 | and c_custkey = o_custkey
13 | and l_orderkey = o_orderkey
14 | and o_orderdate < date '1995-03-15'
15 | and l_shipdate > date '1995-03-15'
16 | group by
17 | l_orderkey,
18 | o_orderdate,
19 | o_shippriority
20 | order by
21 | revenue desc,
22 | o_orderdate
23 | limit 10;
--------------------------------------------------------------------------------
/benchmarks/queries/q5.sql:
--------------------------------------------------------------------------------
1 | select
2 | n_name,
3 | sum(l_extendedprice * (1 - l_discount)) as revenue
4 | from
5 | customer,
6 | orders,
7 | lineitem,
8 | supplier,
9 | nation,
10 | region
11 | where
12 | c_custkey = o_custkey
13 | and l_orderkey = o_orderkey
14 | and l_suppkey = s_suppkey
15 | and c_nationkey = s_nationkey
16 | and s_nationkey = n_nationkey
17 | and n_regionkey = r_regionkey
18 | and r_name = 'ASIA'
19 | and o_orderdate >= date '1994-01-01'
20 | and o_orderdate < date '1995-01-01'
21 | group by
22 | n_name
23 | order by
24 | revenue desc;
--------------------------------------------------------------------------------
/benchmarks/queries/q1.sql:
--------------------------------------------------------------------------------
1 | select
2 | l_returnflag,
3 | l_linestatus,
4 | sum(l_quantity) as sum_qty,
5 | sum(l_extendedprice) as sum_base_price,
6 | sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
7 | sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
8 | avg(l_quantity) as avg_qty,
9 | avg(l_extendedprice) as avg_price,
10 | avg(l_discount) as avg_disc,
11 | count(*) as count_order
12 | from
13 | lineitem
14 | where
15 | l_shipdate <= date '1998-09-02'
16 | group by
17 | l_returnflag,
18 | l_linestatus
19 | order by
20 | l_returnflag,
21 | l_linestatus;
--------------------------------------------------------------------------------
/benchmarks/queries/q15.sql:
--------------------------------------------------------------------------------
1 | create view revenue0 (supplier_no, total_revenue) as
2 | select
3 | l_suppkey,
4 | sum(l_extendedprice * (1 - l_discount))
5 | from
6 | lineitem
7 | where
8 | l_shipdate >= date '1996-01-01'
9 | and l_shipdate < date '1996-01-01' + interval '3' month
10 | group by
11 | l_suppkey;
12 |
13 |
14 | select
15 | s_suppkey,
16 | s_name,
17 | s_address,
18 | s_phone,
19 | total_revenue
20 | from
21 | supplier,
22 | revenue0
23 | where
24 | s_suppkey = supplier_no
25 | and total_revenue = (
26 | select
27 | max(total_revenue)
28 | from
29 | revenue0
30 | )
31 | order by
32 | s_suppkey;
33 |
34 | drop view revenue0;
--------------------------------------------------------------------------------
/benchmarks/queries/q11.sql:
--------------------------------------------------------------------------------
1 | select
2 | ps_partkey,
3 | sum(ps_supplycost * ps_availqty) as value
4 | from
5 | partsupp,
6 | supplier,
7 | nation
8 | where
9 | ps_suppkey = s_suppkey
10 | and s_nationkey = n_nationkey
11 | and n_name = 'GERMANY'
12 | group by
13 | ps_partkey having
14 | sum(ps_supplycost * ps_availqty) > (
15 | select
16 | sum(ps_supplycost * ps_availqty) * 0.0001
17 | from
18 | partsupp,
19 | supplier,
20 | nation
21 | where
22 | ps_suppkey = s_suppkey
23 | and s_nationkey = n_nationkey
24 | and n_name = 'GERMANY'
25 | )
26 | order by
27 | value desc;
--------------------------------------------------------------------------------
/benchmarks/queries/q16.sql:
--------------------------------------------------------------------------------
1 | select
2 | p_brand,
3 | p_type,
4 | p_size,
5 | count(distinct ps_suppkey) as supplier_cnt
6 | from
7 | partsupp,
8 | part
9 | where
10 | p_partkey = ps_partkey
11 | and p_brand <> 'Brand#45'
12 | and p_type not like 'MEDIUM POLISHED%'
13 | and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
14 | and ps_suppkey not in (
15 | select
16 | s_suppkey
17 | from
18 | supplier
19 | where
20 | s_comment like '%Customer%Complaints%'
21 | )
22 | group by
23 | p_brand,
24 | p_type,
25 | p_size
26 | order by
27 | supplier_cnt desc,
28 | p_brand,
29 | p_type,
30 | p_size;
--------------------------------------------------------------------------------
/benchmarks/queries/q18.sql:
--------------------------------------------------------------------------------
1 | select
2 | c_name,
3 | c_custkey,
4 | o_orderkey,
5 | o_orderdate,
6 | o_totalprice,
7 | sum(l_quantity)
8 | from
9 | customer,
10 | orders,
11 | lineitem
12 | where
13 | o_orderkey in (
14 | select
15 | l_orderkey
16 | from
17 | lineitem
18 | group by
19 | l_orderkey having
20 | sum(l_quantity) > 300
21 | )
22 | and c_custkey = o_custkey
23 | and o_orderkey = l_orderkey
24 | group by
25 | c_name,
26 | c_custkey,
27 | o_orderkey,
28 | o_orderdate,
29 | o_totalprice
30 | order by
31 | o_totalprice desc,
32 | o_orderdate
33 | limit 100;
--------------------------------------------------------------------------------
/benchmarks/queries/q10.sql:
--------------------------------------------------------------------------------
1 | select
2 | c_custkey,
3 | c_name,
4 | sum(l_extendedprice * (1 - l_discount)) as revenue,
5 | c_acctbal,
6 | n_name,
7 | c_address,
8 | c_phone,
9 | c_comment
10 | from
11 | customer,
12 | orders,
13 | lineitem,
14 | nation
15 | where
16 | c_custkey = o_custkey
17 | and l_orderkey = o_orderkey
18 | and o_orderdate >= date '1993-10-01'
19 | and o_orderdate < date '1994-01-01'
20 | and l_returnflag = 'R'
21 | and c_nationkey = n_nationkey
22 | group by
23 | c_custkey,
24 | c_name,
25 | c_acctbal,
26 | c_phone,
27 | n_name,
28 | c_address,
29 | c_comment
30 | order by
31 | revenue desc
32 | limit 20;
--------------------------------------------------------------------------------
/header:
--------------------------------------------------------------------------------
1 | Licensed to the Apache Software Foundation (ASF) under one
2 | or more contributor license agreements. See the NOTICE file
3 | distributed with this work for additional information
4 | regarding copyright ownership. The ASF licenses this file
5 | to you under the Apache License, Version 2.0 (the
6 | "License"); you may not use this file except in compliance
7 | with the License. You may obtain a copy of the License at
8 |
9 | http://www.apache.org/licenses/LICENSE-2.0
10 |
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 |
17 |
--------------------------------------------------------------------------------
/benchmarks/queries/q12.sql:
--------------------------------------------------------------------------------
1 | select
2 | l_shipmode,
3 | sum(case
4 | when o_orderpriority = '1-URGENT'
5 | or o_orderpriority = '2-HIGH'
6 | then 1
7 | else 0
8 | end) as high_line_count,
9 | sum(case
10 | when o_orderpriority <> '1-URGENT'
11 | and o_orderpriority <> '2-HIGH'
12 | then 1
13 | else 0
14 | end) as low_line_count
15 | from
16 | lineitem
17 | join
18 | orders
19 | on
20 | l_orderkey = o_orderkey
21 | where
22 | l_shipmode in ('MAIL', 'SHIP')
23 | and l_commitdate < l_receiptdate
24 | and l_shipdate < l_commitdate
25 | and l_receiptdate >= date '1994-01-01'
26 | and l_receiptdate < date '1995-01-01'
27 | group by
28 | l_shipmode
29 | order by
30 | l_shipmode;
--------------------------------------------------------------------------------
/clippy.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 |
--------------------------------------------------------------------------------
/python/ballista/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: enhancement
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem or challenge? Please describe what you are trying to do.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | (This section helps Arrow developers understand the context and *why* for this feature, in addition to the *what*)
13 |
14 | **Describe the solution you'd like**
15 | A clear and concise description of what you want to happen.
16 |
17 | **Describe alternatives you've considered**
18 | A clear and concise description of any alternative solutions or features you've considered.
19 |
20 | **Additional context**
21 | Add any other context or screenshots about the feature request here.
22 |
--------------------------------------------------------------------------------
/docs/source/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "pydata_sphinx_theme/layout.html" %}
2 |
3 | {# Silence the navbar #}
4 | {% block docs_navbar %}
5 | {% endblock %}
6 |
7 |
10 | {% block footer %}
11 |
12 |
25 |
26 | {% endblock %}
27 |
--------------------------------------------------------------------------------
/examples/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | pub mod test_util;
19 |
--------------------------------------------------------------------------------
/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [toolchain]
19 | channel = "stable"
20 |
--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | edition = "2021"
19 | max_width = 90
20 |
21 |
--------------------------------------------------------------------------------
/benchmarks/queries/q9.sql:
--------------------------------------------------------------------------------
1 | select
2 | nation,
3 | o_year,
4 | sum(amount) as sum_profit
5 | from
6 | (
7 | select
8 | n_name as nation,
9 | extract(year from o_orderdate) as o_year,
10 | l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
11 | from
12 | part,
13 | supplier,
14 | lineitem,
15 | partsupp,
16 | orders,
17 | nation
18 | where
19 | s_suppkey = l_suppkey
20 | and ps_suppkey = l_suppkey
21 | and ps_partkey = l_partkey
22 | and p_partkey = l_partkey
23 | and o_orderkey = l_orderkey
24 | and s_nationkey = n_nationkey
25 | and p_name like '%green%'
26 | ) as profit
27 | group by
28 | nation,
29 | o_year
30 | order by
31 | nation,
32 | o_year desc;
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | build
19 | source/python/generated
20 | venv/
21 |
--------------------------------------------------------------------------------
/docs/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | rm -rf build
21 | make html
22 |
--------------------------------------------------------------------------------
/ballista/scheduler/src/physical_optimizer/mod.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | pub mod join_selection;
19 |
--------------------------------------------------------------------------------
/ci/scripts/rust_fmt.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 | cargo fmt --all -- --check
22 |
--------------------------------------------------------------------------------
/dev/docker/cli-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | /root/ballista-cli "$@"
23 |
--------------------------------------------------------------------------------
/ballista/scheduler/testdata/nation/nation.tbl:
--------------------------------------------------------------------------------
1 | 0|ALGERIA|0| haggle. carefully final deposits detect slyly agai|
2 | 1|ARGENTINA|1|al foxes promise slyly according to the regular accounts. bold requests alon|
3 | 2|BRAZIL|1|y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special |
4 | 3|CANADA|1|eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold|
5 | 4|EGYPT|4|y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d|
6 | 5|ETHIOPIA|0|ven packages wake quickly. regu|
7 | 6|FRANCE|3|refully final requests. regular, ironi|
8 | 7|GERMANY|3|l platelets. regular accounts x-ray: unusual, regular acco|
9 | 8|INDIA|2|ss excuses cajole slyly across the packages. deposits print aroun|
10 | 9|INDONESIA|2| slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull|
11 |
--------------------------------------------------------------------------------
/dev/docker/executor-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | /root/ballista-executor "$@"
23 |
--------------------------------------------------------------------------------
/dev/docker/scheduler-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | /root/ballista-scheduler "$@"
23 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | sphinx
19 | Jinja2
20 | pydata-sphinx-theme==0.8.0
21 | myst-parser
22 | maturin
23 |
--------------------------------------------------------------------------------
/python/build.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | fn main() {
19 | pyo3_build_config::add_extension_module_link_args();
20 | }
21 |
--------------------------------------------------------------------------------
/ci/scripts/rust_clippy.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 | cargo clippy --all-targets --workspace -- -D warnings
22 |
--------------------------------------------------------------------------------
/dev/build-set-env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | cd ballista/core/
21 | export BALLISTA_VERSION=$(cargo pkgid | cut '-d@' -f2)
22 | cd -
23 |
--------------------------------------------------------------------------------
/ballista/executor/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Ballista Executor Process
21 |
22 | This crate contains the Ballista executor process.
23 |
--------------------------------------------------------------------------------
/ballista/scheduler/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Ballista Scheduler Process
21 |
22 | This crate contains the Ballista scheduler process.
23 |
--------------------------------------------------------------------------------
/benchmarks/queries/q2.sql:
--------------------------------------------------------------------------------
1 | select
2 | s_acctbal,
3 | s_name,
4 | n_name,
5 | p_partkey,
6 | p_mfgr,
7 | s_address,
8 | s_phone,
9 | s_comment
10 | from
11 | part,
12 | supplier,
13 | partsupp,
14 | nation,
15 | region
16 | where
17 | p_partkey = ps_partkey
18 | and s_suppkey = ps_suppkey
19 | and p_size = 15
20 | and p_type like '%BRASS'
21 | and s_nationkey = n_nationkey
22 | and n_regionkey = r_regionkey
23 | and r_name = 'EUROPE'
24 | and ps_supplycost = (
25 | select
26 | min(ps_supplycost)
27 | from
28 | partsupp,
29 | supplier,
30 | nation,
31 | region
32 | where
33 | p_partkey = ps_partkey
34 | and s_suppkey = ps_suppkey
35 | and s_nationkey = n_nationkey
36 | and n_regionkey = r_regionkey
37 | and r_name = 'EUROPE'
38 | )
39 | order by
40 | s_acctbal desc,
41 | n_name,
42 | s_name,
43 | p_partkey
44 | limit 100;
--------------------------------------------------------------------------------
/ci/scripts/rust_toml_fmt.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 | find . -mindepth 2 -name 'Cargo.toml' -exec cargo tomlfmt -k -p {} \;
22 |
--------------------------------------------------------------------------------
/benchmarks/queries/q21.sql:
--------------------------------------------------------------------------------
1 | select
2 | s_name,
3 | count(*) as numwait
4 | from
5 | supplier,
6 | lineitem l1,
7 | orders,
8 | nation
9 | where
10 | s_suppkey = l1.l_suppkey
11 | and o_orderkey = l1.l_orderkey
12 | and o_orderstatus = 'F'
13 | and l1.l_receiptdate > l1.l_commitdate
14 | and exists (
15 | select
16 | *
17 | from
18 | lineitem l2
19 | where
20 | l2.l_orderkey = l1.l_orderkey
21 | and l2.l_suppkey <> l1.l_suppkey
22 | )
23 | and not exists (
24 | select
25 | *
26 | from
27 | lineitem l3
28 | where
29 | l3.l_orderkey = l1.l_orderkey
30 | and l3.l_suppkey <> l1.l_suppkey
31 | and l3.l_receiptdate > l3.l_commitdate
32 | )
33 | and s_nationkey = n_nationkey
34 | and n_name = 'SAUDI ARABIA'
35 | group by
36 | s_name
37 | order by
38 | numwait desc,
39 | s_name
40 | limit 100;
--------------------------------------------------------------------------------
/ballista/client/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | #![doc = include_str!("../README.md")]
19 |
20 | pub mod extension;
21 | pub mod prelude;
22 | pub use datafusion;
23 |
--------------------------------------------------------------------------------
/ballista/core/src/consistent_hash/node.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | pub trait Node {
19 | fn name(&self) -> &str;
20 |
21 | fn is_valid(&self) -> bool;
22 | }
23 |
--------------------------------------------------------------------------------
/ci/scripts/rust_docs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 | export RUSTDOCFLAGS="-D warnings"
22 | cargo doc --document-private-items --no-deps --workspace
23 |
--------------------------------------------------------------------------------
/benchmarks/queries/q20.sql:
--------------------------------------------------------------------------------
1 | select
2 | s_name,
3 | s_address
4 | from
5 | supplier,
6 | nation
7 | where
8 | s_suppkey in (
9 | select
10 | ps_suppkey
11 | from
12 | partsupp
13 | where
14 | ps_partkey in (
15 | select
16 | p_partkey
17 | from
18 | part
19 | where
20 | p_name like 'forest%'
21 | )
22 | and ps_availqty > (
23 | select
24 | 0.5 * sum(l_quantity)
25 | from
26 | lineitem
27 | where
28 | l_partkey = ps_partkey
29 | and l_suppkey = ps_suppkey
30 | and l_shipdate >= date '1994-01-01'
31 | and l_shipdate < date '1994-01-01' + interval '1' year
32 | )
33 | )
34 | and s_nationkey = n_nationkey
35 | and n_name = 'CANADA'
36 | order by
37 | s_name;
38 |
--------------------------------------------------------------------------------
/docs/source/_templates/docs-sidebar.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
10 |
11 |
20 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Code of Conduct
21 |
22 | - [Code of Conduct for The Apache Software Foundation][1]
23 |
24 | [1]: https://www.apache.org/foundation/policies/conduct.html
25 |
--------------------------------------------------------------------------------
/benchmarks/db-benchmark/run-bench.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 | set -e
19 |
20 | SRC_DATANAME=G1_1e7_1e2_0_0 python3 datafusion/groupby-datafusion.py
21 | SRC_DATANAME=J1_1e7_NA_0_0 python3 datafusion/join-datafusion.py
22 |
--------------------------------------------------------------------------------
/dev/docker/builder-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 | set -x
22 |
23 | printenv
24 | RELEASE_FLAG=${RELEASE_FLAG:=release}
25 | cargo build --features rest-api --profile $RELEASE_FLAG "$@"
26 |
--------------------------------------------------------------------------------
/ballista/core/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Ballista Core Library
21 |
22 | This crate contains the Ballista core library which is used as a dependency by the `ballista-client`,
23 | `ballista-scheduler`, and `ballista-executor` crates.
24 |
--------------------------------------------------------------------------------
/benchmarks/queries/q22.sql:
--------------------------------------------------------------------------------
1 | select
2 | cntrycode,
3 | count(*) as numcust,
4 | sum(c_acctbal) as totacctbal
5 | from
6 | (
7 | select
8 | substring(c_phone from 1 for 2) as cntrycode,
9 | c_acctbal
10 | from
11 | customer
12 | where
13 | substring(c_phone from 1 for 2) in
14 | ('13', '31', '23', '29', '30', '18', '17')
15 | and c_acctbal > (
16 | select
17 | avg(c_acctbal)
18 | from
19 | customer
20 | where
21 | c_acctbal > 0.00
22 | and substring(c_phone from 1 for 2) in
23 | ('13', '31', '23', '29', '30', '18', '17')
24 | )
25 | and not exists (
26 | select
27 | *
28 | from
29 | orders
30 | where
31 | o_custkey = c_custkey
32 | )
33 | ) as custsale
34 | group by
35 | cntrycode
36 | order by
37 | cntrycode;
--------------------------------------------------------------------------------
/dev/msrvcheck/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | This directory contains a tool that ensures there MSRV is consistent with upstream
21 | datafusion dependencies.
22 |
23 | [issue 1271]: https://github.com/apache/datafusion-ballista/issues/1271#issuecomment-3094456313
24 |
--------------------------------------------------------------------------------
/dev/docker/standalone-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | echo "Starting for scheduler..."
23 | /root/ballista-scheduler &
24 | while ! nc -z 127.0.0.1 50050; do
25 | sleep 1
26 | done
27 |
28 | echo "Starting executor"
29 | /root/ballista-executor
30 |
--------------------------------------------------------------------------------
/dev/release/rat_exclude_files.txt:
--------------------------------------------------------------------------------
1 | *.npmrc
2 | *.gitignore
3 | *.dockerignore
4 | .gitmodules
5 | *_generated.js
6 | *_generated.ts
7 | *.csv
8 | *.json
9 | *.snap
10 | .github/ISSUE_TEMPLATE/*.md
11 | .github/pull_request_template.md
12 | ci/etc/rprofile
13 | ci/etc/*.patch
14 | ci/vcpkg/*.patch
15 | CHANGELOG.md
16 | ballista/CHANGELOG.md
17 | python/CHANGELOG.md
18 | dev/requirements*.txt
19 | dev/release/rat_exclude_files.txt
20 | helm/ballista/Chart.lock
21 | pax_global_header
22 | MANIFEST.in
23 | __init__.pxd
24 | __init__.py
25 | requirements.txt
26 | *.html
27 | *.sgml
28 | *.css
29 | *.png
30 | *.ico
31 | *.svg
32 | *.devhelp2
33 | *.scss
34 | .gitattributes
35 | benchmarks/queries/q*.sql
36 | ballista/scheduler/testdata/*
37 | **/yarn.lock
38 | python/requirements*.txt
39 | **/testdata/*
40 | benchmarks/queries/*
41 | benchmarks/data/*
42 | ci/*
43 | **/*.svg
44 | **/*.csv
45 | **/*.json
46 | **/*.sql
47 | venv/*
48 | testing/*
49 | target/*
50 | **/target/*
51 | Cargo.lock
52 | **/Cargo.lock
53 | .history
54 | parquet-testing/*
55 | *rat.txt
56 | ballista/core/src/serde/generated/ballista.rs
57 | python/uv.lock
--------------------------------------------------------------------------------
/python/examples/scheduler.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # %%
19 | from ballista import BallistaScheduler
20 |
21 | # %%
22 | scheduler = BallistaScheduler()
23 | # %%
24 | scheduler
25 | # %%
26 | scheduler.start()
27 | # %%
28 | scheduler.wait_for_termination()
29 | # %%
30 | scheduler.close()
31 |
--------------------------------------------------------------------------------
/ballista-cli/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | #![doc = include_str!("../README.md")]
19 | pub const BALLISTA_CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
20 |
21 | pub mod command;
22 | pub mod exec;
23 |
24 | pub use datafusion_cli::{functions, helper, print_format, print_options};
25 |
--------------------------------------------------------------------------------
/dev/msrvcheck/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # MSRV checker for upstream DataFusion
19 | [package]
20 | name = "msrvcheck"
21 | edition = "2021"
22 |
23 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
24 |
25 | [dependencies]
26 | cargo = "0.91.0"
27 |
--------------------------------------------------------------------------------
/python/examples/executor.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # %%
19 | from ballista import BallistaExecutor
20 |
21 | # %%
22 | executor = BallistaExecutor()
23 | # %%
24 | executor.start()
25 | # %%
26 | executor
27 | # %%
28 | executor.wait_for_termination()
29 | # %%
30 | # %%
31 | executor.close()
32 | # %%
33 |
--------------------------------------------------------------------------------
/dev/rust_lint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | set -e
20 | if ! command -v cargo-tomlfmt &> /dev/null; then
21 | echo "Installing cargo-tomlfmt using cargo"
22 | cargo install cargo-tomlfmt
23 | fi
24 |
25 | ci/scripts/rust_fmt.sh
26 | ci/scripts/rust_clippy.sh
27 | ci/scripts/rust_toml_fmt.sh
--------------------------------------------------------------------------------
/python/examples/client_remote.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # %%
19 | from ballista import BallistaBuilder
20 | from datafusion.context import SessionContext
21 |
22 | ctx: SessionContext = BallistaBuilder().remote("df://127.0.0.1:50050")
23 |
24 | # Select 1 to verify its working
25 | ctx.sql("SELECT 1").show()
26 |
27 | # %%
28 |
--------------------------------------------------------------------------------
/.github/workflows/dev_pr/labeler.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | python:
19 | - python/**/*
20 |
21 | development-process:
22 | - dev/**.*
23 | - .github/**.*
24 | - ci/**.*
25 | - .asf.yaml
26 |
27 | documentation:
28 | - docs/**.*
29 | - README.md
30 | - ./**/README.md
31 | - DEVELOPERS.md
32 | - ballista/docs/**.*
33 |
--------------------------------------------------------------------------------
/ballista/client/src/prelude.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! Ballista Prelude (common imports)
19 |
20 | // pub use ballista_core::{
21 | // config::BallistaConfig,
22 | // error::{BallistaError, Result},
23 | // };
24 |
25 | pub use crate::extension::{SessionConfigExt, SessionContextExt};
26 | //pub use futures::StreamExt;
27 |
--------------------------------------------------------------------------------
/benchmarks/queries/q8.sql:
--------------------------------------------------------------------------------
1 | select
2 | o_year,
3 | sum(case
4 | when nation = 'BRAZIL' then volume
5 | else 0
6 | end) / sum(volume) as mkt_share
7 | from
8 | (
9 | select
10 | extract(year from o_orderdate) as o_year,
11 | l_extendedprice * (1 - l_discount) as volume,
12 | n2.n_name as nation
13 | from
14 | part,
15 | supplier,
16 | lineitem,
17 | orders,
18 | customer,
19 | nation n1,
20 | nation n2,
21 | region
22 | where
23 | p_partkey = l_partkey
24 | and s_suppkey = l_suppkey
25 | and l_orderkey = o_orderkey
26 | and o_custkey = c_custkey
27 | and c_nationkey = n1.n_nationkey
28 | and n1.n_regionkey = r_regionkey
29 | and r_name = 'AMERICA'
30 | and s_nationkey = n2.n_nationkey
31 | and o_orderdate between date '1995-01-01' and date '1996-12-31'
32 | and p_type = 'ECONOMY ANODIZED STEEL'
33 | ) as all_nations
34 | group by
35 | o_year
36 | order by
37 | o_year;
--------------------------------------------------------------------------------
/benchmarks/db-benchmark/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Run db-benchmark
21 |
22 | ## Directions
23 |
24 | Run the following from root `arrow-datafusion` directory
25 |
26 | ```bash
27 | $ docker buildx build -t db-benchmark -f benchmarks/db-benchmark/db-benchmark.Dockerfile .
28 | $ docker run --privileged db-benchmark
29 | ```
30 |
--------------------------------------------------------------------------------
/docs/source/user-guide/deployment/index.rst:
--------------------------------------------------------------------------------
1 | .. Licensed to the Apache Software Foundation (ASF) under one
2 | .. or more contributor license agreements. See the NOTICE file
3 | .. distributed with this work for additional information
4 | .. regarding copyright ownership. The ASF licenses this file
5 | .. to you under the Apache License, Version 2.0 (the
6 | .. "License"); you may not use this file except in compliance
7 | .. with the License. You may obtain a copy of the License at
8 |
9 | .. http://www.apache.org/licenses/LICENSE-2.0
10 |
11 | .. Unless required by applicable law or agreed to in writing,
12 | .. software distributed under the License is distributed on an
13 | .. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | .. KIND, either express or implied. See the License for the
15 | .. specific language governing permissions and limitations
16 | .. under the License.
17 |
18 | Start a Ballista Cluster
19 | ========================
20 |
21 | .. toctree::
22 | :maxdepth: 2
23 |
24 | Quick Start
25 | Cargo Install
26 | Docker
27 | Docker Compose
28 | Kubernetes
29 |
--------------------------------------------------------------------------------
/ballista/scheduler/testdata/orders/orders.tbl:
--------------------------------------------------------------------------------
1 | 1|36901|O|173665.47|1996-01-02|5-LOW|Clerk#000000951|0|nstructions sleep furiously among |
2 | 2|78002|O|46929.18|1996-12-01|1-URGENT|Clerk#000000880|0| foxes. pending accounts at the pending, silent asymptot|
3 | 3|123314|F|193846.25|1993-10-14|5-LOW|Clerk#000000955|0|sly final accounts boost. carefully regular ideas cajole carefully. depos|
4 | 4|136777|O|32151.78|1995-10-11|5-LOW|Clerk#000000124|0|sits. slyly regular warthogs cajole. regular, regular theodolites acro|
5 | 5|44485|F|144659.20|1994-07-30|5-LOW|Clerk#000000925|0|quickly. bold deposits sleep slyly. packages use slyly|
6 | 6|55624|F|58749.59|1992-02-21|4-NOT SPECIFIED|Clerk#000000058|0|ggle. special, final requests are against the furiously specia|
7 | 7|39136|O|252004.18|1996-01-10|2-HIGH|Clerk#000000470|0|ly special requests |
8 | 32|130057|O|208660.75|1995-07-16|2-HIGH|Clerk#000000616|0|ise blithely bold, regular requests. quickly unusual dep|
9 | 33|66958|F|163243.98|1993-10-27|3-MEDIUM|Clerk#000000409|0|uriously. furiously final request|
10 | 34|61001|O|58949.67|1998-07-21|3-MEDIUM|Clerk#000000223|0|ly final packages. fluffily final deposits wake blithely ideas. spe|
11 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | # Which issue does this PR close?
2 |
3 |
6 |
7 | Closes #.
8 |
9 | # Rationale for this change
10 |
14 |
15 | # What changes are included in this PR?
16 |
19 |
20 | # Are there any user-facing changes?
21 |
24 |
25 |
28 |
--------------------------------------------------------------------------------
/ballista/core/src/serde/generated/mod.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | // include the generated protobuf source as a submodule
19 | #[allow(clippy::all)]
20 | #[rustfmt::skip]
21 | #[cfg(not(docsrs))]
22 | pub mod ballista;
23 |
24 | #[cfg(docsrs)]
25 | #[allow(clippy::all)]
26 | pub mod ballista {
27 | include!(concat!(env!("OUT_DIR"), "/ballista.rs"));
28 | }
29 |
--------------------------------------------------------------------------------
/benchmarks/queries/q7.sql:
--------------------------------------------------------------------------------
1 | select
2 | supp_nation,
3 | cust_nation,
4 | l_year,
5 | sum(volume) as revenue
6 | from
7 | (
8 | select
9 | n1.n_name as supp_nation,
10 | n2.n_name as cust_nation,
11 | extract(year from l_shipdate) as l_year,
12 | l_extendedprice * (1 - l_discount) as volume
13 | from
14 | supplier,
15 | lineitem,
16 | orders,
17 | customer,
18 | nation n1,
19 | nation n2
20 | where
21 | s_suppkey = l_suppkey
22 | and o_orderkey = l_orderkey
23 | and c_custkey = o_custkey
24 | and s_nationkey = n1.n_nationkey
25 | and c_nationkey = n2.n_nationkey
26 | and (
27 | (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
28 | or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
29 | )
30 | and l_shipdate between date '1995-01-01' and date '1996-12-31'
31 | ) as shipping
32 | group by
33 | supp_nation,
34 | cust_nation,
35 | l_year
36 | order by
37 | supp_nation,
38 | cust_nation,
39 | l_year;
40 |
--------------------------------------------------------------------------------
/ballista/scheduler/testdata/part/part.tbl:
--------------------------------------------------------------------------------
1 | 1|goldenrod lavender spring chocolate lace|Manufacturer#1|Brand#13|PROMO BURNISHED COPPER|7|JUMBO PKG|901.00|ly. slyly ironi|
2 | 2|blush thistle blue yellow saddle|Manufacturer#1|Brand#13|LARGE BRUSHED BRASS|1|LG CASE|902.00|lar accounts amo|
3 | 3|spring green yellow purple cornsilk|Manufacturer#4|Brand#42|STANDARD POLISHED BRASS|21|WRAP CASE|903.00|egular deposits hag|
4 | 4|cornflower chocolate smoke green pink|Manufacturer#3|Brand#34|SMALL PLATED BRASS|14|MED DRUM|904.00|p furiously r|
5 | 5|forest brown coral puff cream|Manufacturer#3|Brand#32|STANDARD POLISHED TIN|15|SM PKG|905.00| wake carefully |
6 | 6|bisque cornflower lawn forest magenta|Manufacturer#2|Brand#24|PROMO PLATED STEEL|4|MED BAG|906.00|sual a|
7 | 7|moccasin green thistle khaki floral|Manufacturer#1|Brand#11|SMALL PLATED COPPER|45|SM BAG|907.00|lyly. ex|
8 | 8|misty lace thistle snow royal|Manufacturer#4|Brand#44|PROMO BURNISHED TIN|41|LG DRUM|908.00|eposi|
9 | 9|thistle dim navajo dark gainsboro|Manufacturer#4|Brand#43|SMALL BURNISHED STEEL|12|WRAP CASE|909.00|ironic foxe|
10 | 10|linen pink saddle puff powder|Manufacturer#5|Brand#54|LARGE BURNISHED STEEL|44|LG CAN|910.01|ithely final deposit|
11 |
--------------------------------------------------------------------------------
/dev/docker/ballista-cli.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM ubuntu:24.04
19 |
20 | ARG RELEASE_FLAG=release
21 |
22 | ENV RELEASE_FLAG=${RELEASE_FLAG}
23 | ENV RUST_LOG=info
24 | ENV RUST_BACKTRACE=full
25 |
26 | COPY target/$RELEASE_FLAG/ballista-cli /root/ballista-cli
27 |
28 | COPY dev/docker/cli-entrypoint.sh /root/cli-entrypoint.sh
29 | ENTRYPOINT ["/root/cli-entrypoint.sh"]
30 |
--------------------------------------------------------------------------------
/python/ballista/__init__.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | try:
19 | import importlib.metadata as importlib_metadata
20 | except ImportError:
21 | import importlib_metadata
22 |
23 | from .ballista_internal import BallistaBuilder, BallistaScheduler, BallistaExecutor
24 |
25 | __version__ = importlib_metadata.version(__name__)
26 |
27 | __all__ = ["BallistaBuilder", "BallistaScheduler", "BallistaExecutor"]
28 |
--------------------------------------------------------------------------------
/benchmarks/.dockerignore:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # Turn .dockerignore to .dockerallow by excluding everything and explicitly
19 | # allowing specific files and directories. This enables us to quickly add
20 | # dependency files to the docker content without scanning the whole directory.
21 | # This setup requires to all of our docker containers have arrow's source
22 | # as a mounted directory.
23 |
24 | data
25 | target
--------------------------------------------------------------------------------
/ballista/scheduler/testdata/lineitem/partition0.tbl:
--------------------------------------------------------------------------------
1 | 1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular courts above the|
2 | 1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-04-20|TAKE BACK RETURN|MAIL|ly final dependencies: slyly bold |
3 | 1|63700|3701|3|8|13309.60|0.10|0.02|N|O|1996-01-29|1996-03-05|1996-01-31|TAKE BACK RETURN|REG AIR|riously. regular, express dep|
4 | 1|2132|4633|4|28|28955.64|0.09|0.06|N|O|1996-04-21|1996-03-30|1996-05-16|NONE|AIR|lites. fluffily even de|
5 | 1|24027|1534|5|24|22824.48|0.10|0.04|N|O|1996-03-30|1996-03-14|1996-04-01|NONE|FOB| pending foxes. slyly re|
6 | 1|15635|638|6|32|49620.16|0.07|0.02|N|O|1996-01-30|1996-02-07|1996-02-03|DELIVER IN PERSON|MAIL|arefully slyly ex|
7 | 2|106170|1191|1|38|44694.46|0.00|0.05|N|O|1997-01-28|1997-01-14|1997-02-02|TAKE BACK RETURN|RAIL|ven requests. deposits breach a|
8 | 3|4297|1798|1|45|54058.05|0.06|0.00|R|F|1994-02-02|1994-01-04|1994-02-23|NONE|AIR|ongside of the furiously brave acco|
9 | 3|19036|6540|2|49|46796.47|0.10|0.00|R|F|1993-11-09|1993-12-20|1993-11-24|TAKE BACK RETURN|RAIL| unusual accounts. eve|
10 | 3|128449|3474|3|27|39890.88|0.06|0.07|A|F|1994-01-16|1993-11-22|1994-01-23|DELIVER IN PERSON|SHIP|nal foxes wake. |
11 |
--------------------------------------------------------------------------------
/ballista/scheduler/testdata/lineitem/partition1.tbl:
--------------------------------------------------------------------------------
1 | 1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular courts above the|
2 | 1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-04-20|TAKE BACK RETURN|MAIL|ly final dependencies: slyly bold |
3 | 1|63700|3701|3|8|13309.60|0.10|0.02|N|O|1996-01-29|1996-03-05|1996-01-31|TAKE BACK RETURN|REG AIR|riously. regular, express dep|
4 | 1|2132|4633|4|28|28955.64|0.09|0.06|N|O|1996-04-21|1996-03-30|1996-05-16|NONE|AIR|lites. fluffily even de|
5 | 1|24027|1534|5|24|22824.48|0.10|0.04|N|O|1996-03-30|1996-03-14|1996-04-01|NONE|FOB| pending foxes. slyly re|
6 | 1|15635|638|6|32|49620.16|0.07|0.02|N|O|1996-01-30|1996-02-07|1996-02-03|DELIVER IN PERSON|MAIL|arefully slyly ex|
7 | 2|106170|1191|1|38|44694.46|0.00|0.05|N|O|1997-01-28|1997-01-14|1997-02-02|TAKE BACK RETURN|RAIL|ven requests. deposits breach a|
8 | 3|4297|1798|1|45|54058.05|0.06|0.00|R|F|1994-02-02|1994-01-04|1994-02-23|NONE|AIR|ongside of the furiously brave acco|
9 | 3|19036|6540|2|49|46796.47|0.10|0.00|R|F|1993-11-09|1993-12-20|1993-11-24|TAKE BACK RETURN|RAIL| unusual accounts. eve|
10 | 3|128449|3474|3|27|39890.88|0.06|0.07|A|F|1994-01-16|1993-11-22|1994-01-23|DELIVER IN PERSON|SHIP|nal foxes wake. |
11 |
--------------------------------------------------------------------------------
/ballista/scheduler/build.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | fn main() -> Result<(), String> {
19 | #[cfg(feature = "keda-scaler")]
20 | println!("cargo:rerun-if-changed=proto/keda.proto");
21 |
22 | #[cfg(feature = "keda-scaler")]
23 | tonic_build::configure()
24 | .compile_protos(&["proto/keda.proto"], &["proto"])
25 | .map_err(|e| format!("protobuf compilation failed: {e}"))?;
26 |
27 | Ok(())
28 | }
29 |
--------------------------------------------------------------------------------
/ballista/scheduler/testdata/supplier/supplier.tbl:
--------------------------------------------------------------------------------
1 | 1|Supplier#000000001| N kD4on9OM Ipw3,gf0JBoQDd7tgrzrddZ|17|27-918-335-1736|5755.94|each slyly above the careful|
2 | 2|Supplier#000000002|89eJ5ksX3ImxJQBvxObC,|5|15-679-861-2259|4032.68| slyly bold instructions. idle dependen|
3 | 3|Supplier#000000003|q1,G3Pj6OjIuUYfUoH18BFTKP5aU9bEV3|1|11-383-516-1199|4192.40|blithely silent requests after the express dependencies are sl|
4 | 4|Supplier#000000004|Bk7ah4CK8SYQTepEmvMkkgMwg|15|25-843-787-7479|4641.08|riously even requests above the exp|
5 | 5|Supplier#000000005|Gcdm2rJRzl5qlTVzc|11|21-151-690-3663|-283.84|. slyly regular pinto bea|
6 | 6|Supplier#000000006|tQxuVm7s7CnK|14|24-696-997-4969|1365.79|final accounts. regular dolphins use against the furiously ironic decoys. |
7 | 7|Supplier#000000007|s,4TicNGB4uO6PaSqNBUq|23|33-990-965-2201|6820.35|s unwind silently furiously regular courts. final requests are deposits. requests wake quietly blit|
8 | 8|Supplier#000000008|9Sq4bBH2FQEmaFOocY45sRTxo6yuoG|17|27-498-742-3860|7627.85|al pinto beans. asymptotes haggl|
9 | 9|Supplier#000000009|1KhUgZegwM3ua7dsYmekYBsK|10|20-403-398-8662|5302.37|s. unusual, even requests along the furiously regular pac|
10 | 10|Supplier#000000010|Saygah3gYWMp72i PY|24|34-852-489-8585|3891.91|ing waters. regular requests ar|
11 |
--------------------------------------------------------------------------------
/dev/release/crate-deps.dot:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | digraph G {
19 |
20 | ballista_core
21 | ballista_scheduler
22 | ballista_executor
23 | ballista
24 | ballista_cli
25 |
26 | ballista_scheduler -> ballista_core
27 |
28 | ballista_executor -> ballista_core
29 |
30 | ballista -> ballista_core
31 | ballista -> ballista_scheduler
32 | ballista -> ballista_executor
33 |
34 | ballista_cli -> ballista
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/docs/developer/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Ballista Developer Documentation
21 |
22 | - Read the [Architecture Overview](architecture.md) to get an understanding of the scheduler and executor
23 | processes and how distributed query execution works.
24 | - Watch the [Ballista: Distributed Compute with Rust and Apache Arrow](https://www.youtube.com/watch?v=ZZHQaOap9pQ)
25 | talk from the New York Open Statistical Programming Meetup (Feb 2021)
26 |
--------------------------------------------------------------------------------
/docs/source/user-guide/faq.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Frequently Asked Questions
21 |
22 | ## What is the relationship between DataFusion and Ballista?
23 |
24 | DataFusion is a library for executing queries in-process using the Apache Arrow memory
25 | model and computational kernels. It is designed to run within a single process, using threads
26 | for parallel query execution.
27 |
28 | Ballista is a distributed compute platform for DataFusion workloads.
29 |
--------------------------------------------------------------------------------
/dev/docker/ballista-executor.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM ubuntu:24.04
19 |
20 | ARG RELEASE_FLAG=release
21 |
22 | ENV RELEASE_FLAG=${RELEASE_FLAG}
23 | ENV RUST_LOG=info
24 | ENV RUST_BACKTRACE=full
25 |
26 | COPY target/$RELEASE_FLAG/ballista-executor /root/ballista-executor
27 |
28 | # Expose Ballista Executor gRPC port
29 | EXPOSE 50051
30 |
31 | COPY dev/docker/executor-entrypoint.sh /root/executor-entrypoint.sh
32 | ENTRYPOINT ["/root/executor-entrypoint.sh"]
33 |
--------------------------------------------------------------------------------
/ballista/scheduler/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | #![doc = include_str ! ("../README.md")]
19 | #[cfg(feature = "rest-api")]
20 | pub mod api;
21 | pub mod cluster;
22 | pub mod config;
23 | pub mod display;
24 | pub mod metrics;
25 | pub mod physical_optimizer;
26 | pub mod planner;
27 | pub mod scheduler_process;
28 | pub mod scheduler_server;
29 | pub mod standalone;
30 | pub mod state;
31 |
32 | #[cfg(test)]
33 | pub mod test_utils;
34 |
35 | pub use scheduler_server::SessionBuilder;
36 |
--------------------------------------------------------------------------------
/dev/docker/ballista-scheduler.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM ubuntu:24.04
19 |
20 | ARG RELEASE_FLAG=release
21 |
22 | ENV RELEASE_FLAG=${RELEASE_FLAG}
23 | ENV RUST_LOG=info
24 | ENV RUST_BACKTRACE=full
25 | ENV DEBIAN_FRONTEND=noninteractive
26 |
27 | COPY target/$RELEASE_FLAG/ballista-scheduler /root/ballista-scheduler
28 |
29 | # Expose Ballista Scheduler gRPC port
30 | EXPOSE 50050
31 |
32 | COPY dev/docker/scheduler-entrypoint.sh /root/scheduler-entrypoint.sh
33 | ENTRYPOINT ["/root/scheduler-entrypoint.sh"]
34 |
--------------------------------------------------------------------------------
/ballista/core/src/execution_plans/mod.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | //! This module contains execution plans that are needed to distribute DataFusion's execution plans into
19 | //! several Ballista executors.
20 |
21 | mod distributed_query;
22 | mod shuffle_reader;
23 | mod shuffle_writer;
24 | mod unresolved_shuffle;
25 |
26 | pub use distributed_query::DistributedQueryExec;
27 | pub use shuffle_reader::ShuffleReaderExec;
28 | pub use shuffle_writer::ShuffleWriterExec;
29 | pub use unresolved_shuffle::UnresolvedShuffleExec;
30 |
--------------------------------------------------------------------------------
/benchmarks/queries/q19.sql:
--------------------------------------------------------------------------------
1 | select
2 | sum(l_extendedprice* (1 - l_discount)) as revenue
3 | from
4 | lineitem,
5 | part
6 | where
7 | (
8 | p_partkey = l_partkey
9 | and p_brand = 'Brand#12'
10 | and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
11 | and l_quantity >= 1 and l_quantity <= 1 + 10
12 | and p_size between 1 and 5
13 | and l_shipmode in ('AIR', 'AIR REG')
14 | and l_shipinstruct = 'DELIVER IN PERSON'
15 | )
16 | or
17 | (
18 | p_partkey = l_partkey
19 | and p_brand = 'Brand#23'
20 | and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
21 | and l_quantity >= 10 and l_quantity <= 10 + 10
22 | and p_size between 1 and 10
23 | and l_shipmode in ('AIR', 'AIR REG')
24 | and l_shipinstruct = 'DELIVER IN PERSON'
25 | )
26 | or
27 | (
28 | p_partkey = l_partkey
29 | and p_brand = 'Brand#34'
30 | and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
31 | and l_quantity >= 20 and l_quantity <= 20 + 10
32 | and p_size between 1 and 15
33 | and l_shipmode in ('AIR', 'AIR REG')
34 | and l_shipinstruct = 'DELIVER IN PERSON'
35 | );
--------------------------------------------------------------------------------
/.github_changelog_generator:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | #
20 |
21 | # some issues are just documentation
22 | add-sections={"documentation":{"prefix":"**Documentation updates:**","labels":["documentation"]},"performance":{"prefix":"**Performance improvements:**","labels":["performance"]}}
23 | # uncomment to not show PRs. TBD if we shown them or not.
24 | #pull-requests=false
25 | # so that the component is shown associated with the issue
26 | issue-line-labels=sql
27 | exclude-labels=development-process,invalid
28 | breaking-labels=api change
29 |
--------------------------------------------------------------------------------
/dev/docker/ballista-benchmarks.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM ubuntu:24.04
19 |
20 | ARG RELEASE_FLAG=release
21 |
22 | ENV RELEASE_FLAG=${RELEASE_FLAG}
23 | ENV RUST_LOG=info
24 | ENV RUST_BACKTRACE=full
25 |
26 | COPY target/$RELEASE_FLAG/ballista-scheduler /root/ballista-scheduler
27 | COPY target/$RELEASE_FLAG/ballista-executor /root/ballista-executor
28 | COPY target/$RELEASE_FLAG/tpch /root/tpch
29 |
30 | COPY benchmarks/run.sh /root/run.sh
31 | COPY benchmarks/queries/ /root/benchmarks/queries
32 |
33 | WORKDIR /root
34 |
35 | CMD ["/root/run.sh"]
--------------------------------------------------------------------------------
/ballista/executor/src/terminate.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | #[cfg(unix)]
19 | use tokio::signal::unix::SignalKind;
20 | #[cfg(unix)]
21 | use tokio::signal::unix::{self as os_impl};
22 | #[cfg(windows)]
23 | use tokio::signal::windows::{self as os_impl};
24 |
25 | use std::io;
26 |
27 | pub async fn sig_term() -> io::Result<()> {
28 | #[cfg(unix)]
29 | os_impl::signal(SignalKind::terminate())?.recv().await;
30 | #[cfg(windows)]
31 | // TODO fix windows terminate after upgrading to latest tokio
32 | os_impl::ctrl_break()?.recv().await;
33 | Ok(())
34 | }
35 |
--------------------------------------------------------------------------------
/python/examples/client_standalone.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # %%
19 |
20 | from ballista import BallistaBuilder
21 | from datafusion.context import SessionContext
22 |
23 | ctx: SessionContext = (
24 | BallistaBuilder()
25 | .config("datafusion.catalog.information_schema", "true")
26 | .config("ballista.job.name", "example ballista")
27 | .standalone()
28 | )
29 |
30 |
31 | ctx.sql("SELECT 1").show()
32 |
33 | # %%
34 | ctx.sql("SHOW TABLES").show()
35 | # %%
36 | ctx.sql(
37 | "select name, value from information_schema.df_settings where name like 'ballista.job.name'"
38 | ).show()
39 |
40 |
41 | # %%
42 |
--------------------------------------------------------------------------------
/dev/build-ballista-executables.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | RELEASE_FLAG=${RELEASE_FLAG:=release}
23 |
24 | # TODO: it would be very nice if we could make CI work the exact same way so the build logic isn't duplicated
25 |
26 | # build a docker container in which to run the build - this is to make life easier for Windows & Mac users
27 | docker build -t ballista-builder --build-arg EXT_UID="$(id -u)" -f dev/docker/ballista-builder.Dockerfile .
28 |
29 | # run cargo & yarn builds inside the builder container
30 | docker run -v $(pwd):/home/builder/workspace --env RELEASE_FLAG=$RELEASE_FLAG ballista-builder
31 |
--------------------------------------------------------------------------------
/ballista-cli/Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM rust:1.76-bullseye as builder
19 |
20 | COPY ./ballista-cli /usr/src/ballista-cli
21 |
22 | COPY ./ballista /usr/src/ballista
23 |
24 | COPY ./benchmarks /usr/src/benchmarks
25 |
26 | COPY ./examples /usr/src/examples
27 |
28 | COPY ./Cargo.toml /usr/src/Cargo.toml
29 |
30 | WORKDIR /usr/src/ballista-cli
31 |
32 | RUN apt-get update && apt-get install -y protobuf-compiler
33 |
34 | RUN rustup component add rustfmt
35 |
36 | RUN cargo build --release
37 |
38 | FROM debian:bullseye-slim
39 |
40 | COPY --from=builder /usr/src/target/release/ballista-cli /usr/local/bin
41 |
42 | ENTRYPOINT ["ballista-cli"]
43 |
44 | CMD ["--data-path", "/data"]
45 |
--------------------------------------------------------------------------------
/dev/integration-tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | set -e
20 |
21 | echo "Generating benchmark data ..."
22 | pushd benchmarks
23 | ./tpch-gen.sh
24 | popd
25 |
26 | echo "Building Docker images ..."
27 | ./dev/build-ballista-docker.sh
28 |
29 | echo "Starting docker-compose in background ..."
30 | docker-compose up -d
31 |
32 | # give the scheduler a chance to start up
33 | echo "Sleeping (wait for scheduler to start)..."
34 | sleep 10
35 |
36 | echo "Running benchmarks ..."
37 | docker-compose run ballista-client /root/run.sh
38 |
39 | #TODO need to call docker-compose down even if benchmarks fail
40 |
41 | echo "Stopping docker-compose ..."
42 | docker-compose down
43 |
44 | popd
45 |
--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the
2 | // README at: https://github.com/devcontainers/templates/tree/main/src/rust
3 | {
4 | "name": "datafusion-ballista",
5 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
6 | "image": "mcr.microsoft.com/devcontainers/rust:latest",
7 | "features": {
8 | "ghcr.io/devcontainers/features/rust:latest": {
9 | "version": "latest",
10 | "profile": "complete"
11 | },
12 | "ghcr.io/devcontainers-extra/features/protoc:1": {},
13 | "ghcr.io/devcontainers/features/node:1": {},
14 | "ghcr.io/devcontainers/features/docker-in-docker:2": {},
15 | }
16 | // Use 'mounts' to make the cargo cache persistent in a Docker Volume.
17 | ,
18 | "mounts": [
19 | {
20 | "source": "devcontainer-cargo-cache-${devcontainerId}",
21 | "target": "/usr/local/cargo",
22 | "type": "volume"
23 | }
24 | ],
25 | // Features to add to the dev container. More info: https://containers.dev/features.
26 | // "features": {},
27 | // Use 'forwardPorts' to make a list of ports inside the container available locally.
28 | "forwardPorts": [
29 | 50050,
30 | 3000
31 | ],
32 | // Use 'postCreateCommand' to run commands after the container is created.
33 | "postCreateCommand": "rustc --version",
34 | // Configure tool-specific properties.
35 | // "customizations": {},
36 | // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
37 | // "remoteUser": "root"
38 | }
--------------------------------------------------------------------------------
/python/examples/readme_standalone.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # %%
19 |
20 | from ballista import BallistaBuilder
21 | from datafusion.context import SessionContext
22 |
23 | ctx: SessionContext = (
24 | BallistaBuilder()
25 | .config("ballista.job.name", "Readme Example")
26 | .config("datafusion.execution.target_partitions", "4")
27 | .standalone()
28 | )
29 |
30 | ctx.sql("create external table t stored as parquet location '../testdata/test.parquet'")
31 |
32 | # %%
33 | df = ctx.sql("select * from t limit 5")
34 | pyarrow_batches = df.collect()
35 | pyarrow_batches[0].to_pandas()
36 | # %%
37 | df = ctx.read_parquet("../testdata/test.parquet").limit(5)
38 | pyarrow_batches = df.collect()
39 | pyarrow_batches[0].to_pandas()
40 | # %%
41 |
--------------------------------------------------------------------------------
/dev/build-ballista-docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | RELEASE_FLAG=${RELEASE_FLAG:=release}
23 |
24 | ./dev/build-ballista-executables.sh
25 |
26 | . ./dev/build-set-env.sh
27 |
28 | docker build -t "apache/datafusion-ballista-standalone:latest" -f dev/docker/ballista-standalone.Dockerfile .
29 | docker build -t "apache/datafusion-ballista-scheduler:latest" -f dev/docker/ballista-scheduler.Dockerfile .
30 | docker build -t "apache/datafusion-ballista-executor:latest" -f dev/docker/ballista-executor.Dockerfile .
31 | docker build -t "apache/datafusion-ballista-cli:latest" -f dev/docker/ballista-cli.Dockerfile .
32 | docker build -t "apache/datafusion-ballista-benchmarks:latest" -f dev/docker/ballista-benchmarks.Dockerfile .
33 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | #
19 | # Minimal makefile for Sphinx documentation
20 | #
21 |
22 | # You can set these variables from the command line, and also
23 | # from the environment for the first two.
24 | SPHINXOPTS ?=
25 | SPHINXBUILD ?= sphinx-build
26 | SOURCEDIR = source
27 | BUILDDIR = build
28 |
29 | # Put it first so that "make" without argument is like "make help".
30 | help:
31 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
32 |
33 | .PHONY: help Makefile
34 |
35 | # Catch-all target: route all unknown targets to Sphinx using the new
36 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
37 | %: Makefile
38 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
39 |
--------------------------------------------------------------------------------
/python/examples/readme_remote.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # %%
19 |
20 | from ballista import BallistaBuilder
21 | from datafusion.context import SessionContext
22 |
23 | ctx: SessionContext = (
24 | BallistaBuilder()
25 | .config("ballista.job.name", "Readme Example Remote")
26 | .config("datafusion.execution.target_partitions", "4")
27 | .remote("df://127.0.0.1:50050")
28 | )
29 |
30 | ctx.sql("create external table t stored as parquet location '../testdata/test.parquet'")
31 |
32 | # %%
33 | df = ctx.sql("select * from t limit 5")
34 | pyarrow_batches = df.collect()
35 | pyarrow_batches[0].to_pandas()
36 | # %%
37 | df = ctx.read_parquet("../testdata/test.parquet").limit(5)
38 | pyarrow_batches = df.collect()
39 | pyarrow_batches[0].to_pandas()
40 | # %%
41 |
--------------------------------------------------------------------------------
/benchmarks/tpch-gen.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 |
19 | mkdir -p data/answers 2>/dev/null
20 |
21 | set -e
22 |
23 | pushd ..
24 | . ./dev/build-set-env.sh
25 | popd
26 |
27 | # Generate data into the ./data directory if it does not already exist
28 | FILE=./data/supplier.tbl
29 | if test -f "$FILE"; then
30 | echo "$FILE exists."
31 | else
32 | docker run -v `pwd`/data:/data -it --rm ghcr.io/scalytics/tpch-docker:main -vf -s 1
33 | fi
34 |
35 | # Copy expected answers into the ./data/answers directory if it does not already exist
36 | FILE=./data/answers/q1.out
37 | if test -f "$FILE"; then
38 | echo "$FILE exists."
39 | else
40 | docker run -v `pwd`/data:/data -it --entrypoint /bin/bash --rm ghcr.io/scalytics/tpch-docker:main -c "cp /opt/tpch/2.18.0_rc2/dbgen/answers/* /data/answers/"
41 | fi
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | version: 2
19 | updates:
20 | - package-ecosystem: cargo
21 | directory: "/"
22 | schedule:
23 | interval: daily
24 | open-pull-requests-limit: 10
25 | target-branch: main
26 | labels: [auto-dependencies]
27 | ignore:
28 | # arrow and datafusion are bumped manually
29 | - dependency-name: "arrow*"
30 | update-types: ["version-update:semver-major"]
31 | - dependency-name: "datafusion*"
32 | update-types: ["version-update:semver-major"]
33 | - dependency-name: "sqlparser"
34 | update-types: ["version-update:semver-major"]
35 | - package-ecosystem: "github-actions"
36 | directory: "/"
37 | schedule:
38 | interval: "daily"
39 | open-pull-requests-limit: 10
40 | labels: [auto-dependencies]
41 |
--------------------------------------------------------------------------------
/ballista/scheduler/testdata/customer/customer.tbl:
--------------------------------------------------------------------------------
1 | 1|Customer#000000001|IVhzIApeRb ot,c,E|15|25-989-741-2988|711.56|BUILDING|to the even, regular platelets. regular, ironic epitaphs nag e|
2 | 2|Customer#000000002|XSTf4,NCwDVaWNe6tEgvwfmRchLXak|13|23-768-687-3665|121.65|AUTOMOBILE|l accounts. blithely ironic theodolites integrate boldly: caref|
3 | 3|Customer#000000003|MG9kdTD2WBHm|1|11-719-748-3364|7498.12|AUTOMOBILE| deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov|
4 | 4|Customer#000000004|XxVSJsLAGtn|4|14-128-190-5944|2866.83|MACHINERY| requests. final, regular ideas sleep final accou|
5 | 5|Customer#000000005|KvpyuHCplrB84WgAiGV6sYpZq7Tj|3|13-750-942-6364|794.47|HOUSEHOLD|n accounts will have to unwind. foxes cajole accor|
6 | 6|Customer#000000006|sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn|20|30-114-968-4951|7638.57|AUTOMOBILE|tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious|
7 | 7|Customer#000000007|TcGe5gaZNgVePxU5kRrvXBfkasDTea|18|28-190-982-9759|9561.95|AUTOMOBILE|ainst the ironic, express theodolites. express, even pinto beans among the exp|
8 | 8|Customer#000000008|I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5|17|27-147-574-9335|6819.74|BUILDING|among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide|
9 | 9|Customer#000000009|xKiAFTjUsCuxfeleNqefumTrjS|8|18-338-906-3675|8324.07|FURNITURE|r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl|
10 | 10|Customer#000000010|6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2|5|15-741-346-9870|2753.54|HOUSEHOLD|es regular deposits haggle. fur|
11 |
--------------------------------------------------------------------------------
/.asf.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | notifications:
19 | commits: commits@datafusion.apache.org
20 | issues: github@datafusion.apache.org
21 | pullrequests: github@datafusion.apache.org
22 | jira_options: link label worklog
23 | github:
24 | description: "Apache DataFusion Ballista Distributed Query Engine"
25 | homepage: https://datafusion.apache.org/ballista
26 | labels:
27 | - arrow
28 | - big-data
29 | - dataframe
30 | - distributed
31 | - olap
32 | - python
33 | - query-engine
34 | - rust
35 | - sql
36 | enabled_merge_buttons:
37 | squash: true
38 | merge: false
39 | rebase: false
40 | features:
41 | issues: true
42 | # publishes the content of the `asf-site` branch to
43 | # https://datafusion.apache.org/ballista/
44 | publish:
45 | whoami: asf-site
46 | subdir: ballista
--------------------------------------------------------------------------------
/.github/actions/setup-builder/action.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Prepare Rust Builder
19 | description: 'Prepare Rust Build Environment'
20 | inputs:
21 | rust-version:
22 | description: 'version of rust to install (e.g. stable)'
23 | required: true
24 | default: 'stable'
25 | runs:
26 | using: "composite"
27 | steps:
28 | - name: Install protobuf compiler
29 | shell: bash
30 | run: |
31 | apt-get update
32 | apt-get install -y protobuf-compiler
33 | - name: Setup Rust toolchain
34 | shell: bash
35 | run: |
36 | echo "Installing ${{ inputs.rust-version }}"
37 | rustup toolchain install ${{ inputs.rust-version }}
38 | rustup default ${{ inputs.rust-version }}
39 | rustup component add rustfmt
40 | - name: Configure rust runtime env
41 | uses: ./.github/actions/setup-rust-runtime
42 |
--------------------------------------------------------------------------------
/ballista/scheduler/testdata/partsupp/partsupp.tbl:
--------------------------------------------------------------------------------
1 | 1|2|3325|771.64|, even theodolites. regular, final theodolites eat after the carefully pending foxes. furiously regular deposits sleep slyly. carefully bold realms above the ironic dependencies haggle careful|
2 | 1|2502|8076|993.49|ven ideas. quickly even packages print. pending multipliers must have to are fluff|
3 | 1|5002|3956|337.09|after the fluffily ironic deposits? blithely special dependencies integrate furiously even excuses. blithely silent theodolites could have to haggle pending, express requests; fu|
4 | 1|7502|4069|357.84|al, regular dependencies serve carefully after the quickly final pinto beans. furiously even deposits sleep quickly final, silent pinto beans. fluffily reg|
5 | 2|3|8895|378.49|nic accounts. final accounts sleep furiously about the ironic, bold packages. regular, regular accounts|
6 | 2|2503|4969|915.27|ptotes. quickly pending dependencies integrate furiously. fluffily ironic ideas impress blithely above the express accounts. furiously even epitaphs need to wak|
7 | 2|5003|8539|438.37|blithely bold ideas. furiously stealthy packages sleep fluffily. slyly special deposits snooze furiously carefully regular accounts. regular deposits according to the accounts nag carefully slyl|
8 | 2|7503|3025|306.39|olites. deposits wake carefully. even, express requests cajole. carefully regular ex|
9 | 3|4|4651|920.92|ilent foxes affix furiously quickly unusual requests. even packages across the carefully even theodolites nag above the sp|
10 | 3|2504|4093|498.13|ending dependencies haggle fluffily. regular deposits boost quickly carefully regular requests. deposits affix furiously around the pinto beans. ironic, unusual platelets across the p|
11 |
--------------------------------------------------------------------------------
/dev/release/run-rat.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | #
20 |
21 | RAT_VERSION=0.13
22 |
23 | # download apache rat
24 | if [ ! -f apache-rat-${RAT_VERSION}.jar ]; then
25 | curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar > apache-rat-${RAT_VERSION}.jar
26 | fi
27 |
28 | RAT="java -jar apache-rat-${RAT_VERSION}.jar -x "
29 |
30 | RELEASE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
31 |
32 | # generate the rat report
33 | $RAT $1 > rat.txt
34 | python $RELEASE_DIR/check-rat-report.py $RELEASE_DIR/rat_exclude_files.txt rat.txt > filtered_rat.txt
35 | cat filtered_rat.txt
36 | UNAPPROVED=`cat filtered_rat.txt | grep "NOT APPROVED" | wc -l`
37 |
38 | if [ "0" -eq "${UNAPPROVED}" ]; then
39 | echo "No unapproved licenses"
40 | else
41 | echo "${UNAPPROVED} unapproved licences. Check rat report: rat.txt"
42 | exit 1
43 | fi
44 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Ballista Documentation
21 |
22 | ## User Documentation
23 |
24 | Documentation for the current published release can be found at and the source
25 | content is located [here](source/user-guide/introduction.md).
26 |
27 | ## Developer Documentation
28 |
29 | Developer documentation can be found [here](developer/README.md).
30 |
31 | ## Building the User Guide
32 |
33 | ### Dependencies
34 |
35 | It's recommended to install build dependencies and build the documentation
36 | inside a Python virtualenv.
37 |
38 | - Python
39 | - `pip install -r requirements.txt`
40 |
41 | ## Build
42 |
43 | ```bash
44 | ./build.sh
45 | ```
46 |
47 | ## Release
48 |
49 | The documentation is published from the `asf-site` branch of this repository.
50 |
51 | Documentation is published automatically when documentation changes are pushed to the main branch.
52 |
--------------------------------------------------------------------------------
/docs/source/user-guide/deployment/cargo-install.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Deploying a standalone Ballista cluster using cargo install
21 |
22 | A simple way to start a local cluster for testing purposes is to use cargo to install
23 | the scheduler and executor crates.
24 |
25 | ```bash
26 | cargo install --locked ballista-scheduler
27 | cargo install --locked ballista-executor
28 | ```
29 |
30 | With these crates installed, it is now possible to start a scheduler process.
31 |
32 | ```bash
33 | RUST_LOG=info ballista-scheduler
34 | ```
35 |
36 | The scheduler will bind to port 50050 by default.
37 |
38 | Next, start an executor processes in a new terminal session.
39 |
40 | ```bash
41 | RUST_LOG=info ballista-executor
42 | ```
43 |
44 | The executor will bind to port 50051 by default. Additional executors can be started by
45 | manually specifying a bind port. For example:
46 |
47 | ```bash
48 | RUST_LOG=info ballista-executor --bind-port 50052
49 | ```
50 |
--------------------------------------------------------------------------------
/benchmarks/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 |
19 | set -e
20 | set -x
21 |
22 | # This bash script is meant to be run inside the docker-compose environment. Check the README for instructions
23 |
24 | # regression checks for queries that return the correct results
25 | # TODO add all queries once https://github.com/apache/arrow-datafusion/issues/3478 is implemented and once
26 | # queries return decimal results with the correct precision
27 | for query in 4 12 13
28 | do
29 | /root/tpch benchmark ballista --host ballista-scheduler --port 50050 --query $query --path /data --format tbl --iterations 1 --debug --expected /data
30 | done
31 |
32 | # at least make sure these queries run, even though we do not check that the results are correct yet
33 |
34 | #TODO: add query 16 once we support it
35 | for query in 1 2 3 5 6 7 8 9 10 11 14 15 17 18 19 20 21 22
36 | do
37 | /root/tpch benchmark ballista --host ballista-scheduler --port 50050 --query $query --path /data --format tbl --iterations 1 --debug
38 | done
39 |
40 |
--------------------------------------------------------------------------------
/ballista-cli/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | name = "ballista-cli"
20 | description = "Command Line Client for Ballista distributed query engine."
21 | version = "50.0.0"
22 | authors = ["Apache DataFusion "]
23 | edition = { workspace = true }
24 | rust-version = { workspace = true }
25 | keywords = ["ballista", "cli"]
26 | license = "Apache-2.0"
27 | homepage = "https://datafusion.apache.org/ballista/"
28 | repository = "https://github.com/apache/datafusion-ballista"
29 | readme = "README.md"
30 |
31 | [dependencies]
32 | ballista = { path = "../ballista/client", version = "50.0.0", features = ["standalone"] }
33 | clap = { workspace = true, features = ["derive", "cargo"] }
34 | datafusion = { workspace = true }
35 | datafusion-cli = { workspace = true }
36 | dirs = "6.0"
37 | env_logger = { workspace = true }
38 | mimalloc = { workspace = true }
39 | rustyline = "17.0.1"
40 | tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] }
41 |
42 | [features]
43 |
44 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @rem Licensed to the Apache Software Foundation (ASF) under one
2 | @rem or more contributor license agreements. See the NOTICE file
3 | @rem distributed with this work for additional information
4 | @rem regarding copyright ownership. The ASF licenses this file
5 | @rem to you under the Apache License, Version 2.0 (the
6 | @rem "License"); you may not use this file except in compliance
7 | @rem with the License. You may obtain a copy of the License at
8 | @rem
9 | @rem http://www.apache.org/licenses/LICENSE-2.0
10 | @rem
11 | @rem Unless required by applicable law or agreed to in writing,
12 | @rem software distributed under the License is distributed on an
13 | @rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | @rem KIND, either express or implied. See the License for the
15 | @rem specific language governing permissions and limitations
16 | @rem under the License.
17 |
18 | @ECHO OFF
19 |
20 | pushd %~dp0
21 |
22 | REM Command file for Sphinx documentation
23 |
24 | if "%SPHINXBUILD%" == "" (
25 | set SPHINXBUILD=sphinx-build
26 | )
27 | set SOURCEDIR=source
28 | set BUILDDIR=build
29 |
30 | if "%1" == "" goto help
31 |
32 | %SPHINXBUILD% >NUL 2>NUL
33 | if errorlevel 9009 (
34 | echo.
35 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
36 | echo.installed, then set the SPHINXBUILD environment variable to point
37 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
38 | echo.may add the Sphinx directory to PATH.
39 | echo.
40 | echo.If you don't have Sphinx installed, grab it from
41 | echo.http://sphinx-doc.org/
42 | exit /b 1
43 | )
44 |
45 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
46 | goto end
47 |
48 | :help
49 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
50 |
51 | :end
52 | popd
53 |
--------------------------------------------------------------------------------
/NOTICE.txt:
--------------------------------------------------------------------------------
1 | Apache DataFusion Ballista
2 | Copyright 2016-2025 The Apache Software Foundation
3 |
4 | This product includes software developed at
5 | The Apache Software Foundation (http://www.apache.org/).
6 |
7 | This product includes software from the LLVM project
8 | * distributed under the University of Illinois Open Source
9 |
10 | This product includes software from the google-lint project
11 | * Copyright (c) 2009 Google Inc. All rights reserved.
12 |
13 | This product includes software from the CMake project
14 | * Copyright 2001-2009 Kitware, Inc.
15 | * Copyright 2012-2014 Continuum Analytics, Inc.
16 | * All rights reserved.
17 |
18 | This product include software from CMake (BSD 3-Clause)
19 | * CMake - Cross Platform Makefile Generator
20 | * Copyright 2000-2019 Kitware, Inc. and Contributors
21 |
22 | The web site includes files generated by Jekyll.
23 |
24 | --------------------------------------------------------------------------------
25 |
26 | This product includes code from Apache Kudu, which includes the following in
27 | its NOTICE file:
28 |
29 | Apache Kudu
30 | Copyright 2016 The Apache Software Foundation
31 |
32 | This product includes software developed at
33 | The Apache Software Foundation (http://www.apache.org/).
34 |
35 | Portions of this software were developed at
36 | Cloudera, Inc (http://www.cloudera.com/).
37 |
38 | --------------------------------------------------------------------------------
39 |
40 | This product includes code from Apache ORC, which includes the following in
41 | its NOTICE file:
42 |
43 | Apache ORC
44 | Copyright 2013-2019 The Apache Software Foundation
45 |
46 | This product includes software developed by The Apache Software
47 | Foundation (http://www.apache.org/).
48 |
49 | This product includes software developed by Hewlett-Packard:
50 | (c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P
51 |
--------------------------------------------------------------------------------
/dev/docker/ballista-builder.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM rust:1.85-bullseye
19 |
20 | ARG EXT_UID
21 |
22 | ENV RUST_LOG=info
23 | ENV RUST_BACKTRACE=full
24 | ENV DEBIAN_FRONTEND=noninteractive
25 |
26 | RUN apt-get update && \
27 | apt-get -y install libssl-dev openssl zlib1g zlib1g-dev libpq-dev cmake protobuf-compiler curl unzip
28 |
29 | RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
30 | apt-get update && \
31 | apt-get install -y nodejs && \
32 | npm install -g yarn
33 |
34 | # create build user with same UID as
35 | RUN adduser -q -u $EXT_UID builder --home /home/builder && \
36 | mkdir -p /home/builder/workspace
37 | USER builder
38 |
39 | ENV NODE_VER=18.9.0
40 | ENV HOME=/home/builder
41 | ENV PATH=$HOME/.cargo/bin:$PATH
42 |
43 | # prepare rust
44 | RUN rustup update && \
45 | rustup component add rustfmt && \
46 | cargo install cargo-chef --version 0.1.62
47 |
48 | WORKDIR /home/builder/workspace
49 |
50 | COPY dev/docker/builder-entrypoint.sh /home/builder
51 | ENTRYPOINT ["/home/builder/builder-entrypoint.sh"]
52 |
--------------------------------------------------------------------------------
/.github/actions/setup-macos-builder/action.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Prepare Rust Builder for MacOS
19 | description: 'Prepare Rust Build Environment for MacOS'
20 | inputs:
21 | rust-version:
22 | description: 'version of rust to install (e.g. stable)'
23 | required: true
24 | default: 'stable'
25 | runs:
26 | using: "composite"
27 | steps:
28 | - name: Install protobuf compiler
29 | shell: bash
30 | run: |
31 | mkdir -p $HOME/d/protoc
32 | cd $HOME/d/protoc
33 | export PROTO_ZIP="protoc-29.1-osx-x86_64.zip"
34 | curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v29.1/$PROTO_ZIP
35 | unzip $PROTO_ZIP
36 | echo "$HOME/d/protoc/bin" >> $GITHUB_PATH
37 | export PATH=$PATH:$HOME/d/protoc/bin
38 | protoc --version
39 | - name: Setup Rust toolchain
40 | shell: bash
41 | run: |
42 | rustup update stable
43 | rustup toolchain install stable
44 | rustup default stable
45 | rustup component add rustfmt
46 | - name: Configure rust runtime env
47 | uses: ./.github/actions/setup-rust-runtime
48 |
--------------------------------------------------------------------------------
/.github/workflows/dependencies.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Dependencies
19 |
20 | concurrency:
21 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
22 | cancel-in-progress: true
23 |
24 | on:
25 | push:
26 | branches-ignore:
27 | - 'gh-readonly-queue/**'
28 | paths:
29 | - "**/Cargo.toml"
30 | - "**/Cargo.lock"
31 | pull_request:
32 | paths:
33 | - "**/Cargo.toml"
34 | - "**/Cargo.lock"
35 | merge_group:
36 | # manual trigger
37 | # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
38 | workflow_dispatch:
39 |
40 | jobs:
41 | depcheck:
42 | name: msrv dependency check
43 | runs-on: ubuntu-latest
44 | container:
45 | image: amd64/rust
46 | steps:
47 | - uses: actions/checkout@v5
48 | with:
49 | submodules: true
50 | fetch-depth: 1
51 | - name: Setup Rust toolchain
52 | uses: ./.github/actions/setup-builder
53 | with:
54 | rust-version: stable
55 | - name: Check dependencies
56 | run: |
57 | cd dev/msrvcheck
58 | cargo run
59 |
--------------------------------------------------------------------------------
/.github/actions/setup-windows-builder/action.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Prepare Rust Builder for Windows
19 | description: 'Prepare Rust Build Environment for Windows'
20 | inputs:
21 | rust-version:
22 | description: 'version of rust to install (e.g. stable)'
23 | required: true
24 | default: 'stable'
25 | runs:
26 | using: "composite"
27 | steps:
28 | - name: Install protobuf compiler
29 | shell: bash
30 | run: |
31 | mkdir -p $HOME/d/protoc
32 | cd $HOME/d/protoc
33 | export PROTO_ZIP="protoc-29.1-win64.zip"
34 | curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v29.1/$PROTO_ZIP
35 | unzip $PROTO_ZIP
36 | export PATH=$PATH:$HOME/d/protoc/bin
37 | protoc.exe --version
38 | - name: Setup Rust toolchain
39 | shell: bash
40 | run: |
41 | # Avoid self update to avoid CI failures: https://github.com/apache/datafusion/issues/9653
42 | rustup toolchain install stable --no-self-update
43 | rustup default stable
44 | rustup component add rustfmt
45 | - name: Configure rust runtime env
46 | uses: ./.github/actions/setup-rust-runtime
47 |
--------------------------------------------------------------------------------
/examples/examples/standalone-sql.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use ballista::datafusion::{
19 | common::Result,
20 | execution::{options::ParquetReadOptions, SessionStateBuilder},
21 | prelude::{SessionConfig, SessionContext},
22 | };
23 | use ballista::prelude::{SessionConfigExt, SessionContextExt};
24 | use ballista_examples::test_util;
25 |
26 | #[tokio::main]
27 | async fn main() -> Result<()> {
28 | let config = SessionConfig::new_with_ballista()
29 | .with_target_partitions(1)
30 | .with_ballista_standalone_parallelism(2);
31 |
32 | let state = SessionStateBuilder::new()
33 | .with_config(config)
34 | .with_default_features()
35 | .build();
36 |
37 | let ctx = SessionContext::standalone_with_state(state).await?;
38 |
39 | let test_data = test_util::examples_test_data();
40 |
41 | // register parquet file with the execution context
42 | ctx.register_parquet(
43 | "test",
44 | &format!("{test_data}/alltypes_plain.parquet"),
45 | ParquetReadOptions::default(),
46 | )
47 | .await?;
48 |
49 | let df = ctx.sql("select count(1) from test").await?;
50 |
51 | df.show().await?;
52 | Ok(())
53 | }
54 |
--------------------------------------------------------------------------------
/benchmarks/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | name = "ballista-benchmarks"
20 | description = "Ballista Benchmarks"
21 | version = "50.0.0"
22 | edition = "2021"
23 | authors = ["Apache DataFusion "]
24 | homepage = "https://datafusion.apache.org/ballista/"
25 | repository = "https://github.com/apache/datafusion-ballista"
26 | license = "Apache-2.0"
27 | publish = false
28 |
29 | [features]
30 | ci = []
31 | default = ["mimalloc"]
32 | snmalloc = ["snmalloc-rs"]
33 |
34 | [dependencies]
35 | ballista = { path = "../ballista/client", version = "50.0.0" }
36 | datafusion = { workspace = true }
37 | datafusion-proto = { workspace = true }
38 | env_logger = { workspace = true }
39 | futures = { workspace = true }
40 | mimalloc = { workspace = true, optional = true }
41 | rand = { workspace = true }
42 | serde = { workspace = true }
43 | serde_json = "1.0.78"
44 | snmalloc-rs = { version = "0.3", optional = true }
45 | structopt = { version = "0.3", default-features = false }
46 | tokio = { version = "^1.44", features = [
47 | "macros",
48 | "rt",
49 | "rt-multi-thread",
50 | "parking_lot",
51 | ] }
52 |
53 | [dev-dependencies]
54 | ballista-core = { path = "../ballista/core", version = "50.0.0" }
55 |
--------------------------------------------------------------------------------
/ballista/executor/src/metrics/mod.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use crate::execution_engine::QueryStageExecutor;
19 | use log::info;
20 | use std::sync::Arc;
21 |
22 | /// `ExecutorMetricsCollector` records metrics for `ShuffleWriteExec`
23 | /// after they are executed.
24 | ///
25 | /// After each stage completes, `ShuffleWriteExec::record_stage` will be
26 | /// called.
27 | pub trait ExecutorMetricsCollector: Send + Sync {
28 | /// Record metrics for stage after it is executed
29 | fn record_stage(
30 | &self,
31 | job_id: &str,
32 | stage_id: usize,
33 | partition: usize,
34 | plan: Arc,
35 | );
36 | }
37 |
38 | /// Implementation of `ExecutorMetricsCollector` which logs the completed
39 | /// plan to stdout.
40 | #[derive(Default)]
41 | pub struct LoggingMetricsCollector {}
42 |
43 | impl ExecutorMetricsCollector for LoggingMetricsCollector {
44 | fn record_stage(
45 | &self,
46 | job_id: &str,
47 | stage_id: usize,
48 | partition: usize,
49 | plan: Arc,
50 | ) {
51 | info!(
52 | "=== [{job_id}/{stage_id}/{partition}] Physical plan with metrics ===\n{plan}\n"
53 | );
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/examples/examples/remote-dataframe.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use ballista::prelude::*;
19 | use ballista_examples::test_util;
20 | use datafusion::{
21 | common::Result,
22 | execution::SessionStateBuilder,
23 | prelude::{col, lit, ParquetReadOptions, SessionConfig, SessionContext},
24 | };
25 |
26 | /// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
27 | /// fetching results, using the DataFrame trait
28 | #[tokio::main]
29 | async fn main() -> Result<()> {
30 | let config = SessionConfig::new_with_ballista().with_target_partitions(4);
31 |
32 | let state = SessionStateBuilder::new()
33 | .with_config(config)
34 | .with_default_features()
35 | .build();
36 |
37 | let ctx = SessionContext::remote_with_state("df://localhost:50050", state).await?;
38 |
39 | let test_data = test_util::examples_test_data();
40 | let filename = format!("{test_data}/alltypes_plain.parquet");
41 |
42 | let df = ctx
43 | .read_parquet(filename, ParquetReadOptions::default())
44 | .await?
45 | .select_columns(&["id", "bool_col", "timestamp_col"])?
46 | .filter(col("id").gt(lit(1)))?;
47 |
48 | df.show().await?;
49 |
50 | Ok(())
51 | }
52 |
--------------------------------------------------------------------------------
/examples/examples/custom-executor.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use ballista_core::object_store::{
19 | runtime_env_with_s3_support, session_config_with_s3_support,
20 | };
21 |
22 | use ballista_executor::executor_process::{
23 | start_executor_process, ExecutorProcessConfig,
24 | };
25 | use std::sync::Arc;
26 | ///
27 | /// # Custom Ballista Executor
28 | ///
29 | /// This example demonstrates how to crate custom ballista executors.
30 | ///
31 | #[tokio::main]
32 | async fn main() -> ballista_core::error::Result<()> {
33 | let _ = env_logger::builder()
34 | .filter_level(log::LevelFilter::Info)
35 | .is_test(true)
36 | .try_init();
37 |
38 | let config: ExecutorProcessConfig = ExecutorProcessConfig {
39 | // overriding default config producer with custom producer
40 | // which has required S3 configuration options
41 | override_config_producer: Some(Arc::new(session_config_with_s3_support)),
42 | // overriding default runtime producer with custom producer
43 | // which knows how to create S3 connections
44 | override_runtime_producer: Some(Arc::new(runtime_env_with_s3_support)),
45 | ..Default::default()
46 | };
47 |
48 | start_executor_process(Arc::new(config)).await
49 | }
50 |
--------------------------------------------------------------------------------
/ballista/scheduler/proto/keda.proto:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2020 The KEDA Authors.
3 |
4 | and others that have contributed code to the public domain.
5 |
6 | Licensed under the Apache License, Version 2.0 (the "License");
7 | you may not use this file except in compliance with the License.
8 | You may obtain a copy of the License at.
9 |
10 | http://www.apache.org/licenses/LICENSE-2.0
11 |
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | */
18 | // This file comes from https://github.com/kedacore/keda/blob/main/pkg/scalers/externalscaler/externalscaler.proto
19 | syntax = "proto3";
20 |
21 | package externalscaler;
22 | option go_package = ".;externalscaler";
23 |
24 | service ExternalScaler {
25 | rpc IsActive(ScaledObjectRef) returns (IsActiveResponse) {}
26 | // Commented out since we aren't supporting the streaming scaler interface at the moment
27 | // rpc StreamIsActive(ScaledObjectRef) returns (stream IsActiveResponse) {}
28 | rpc GetMetricSpec(ScaledObjectRef) returns (GetMetricSpecResponse) {}
29 | rpc GetMetrics(GetMetricsRequest) returns (GetMetricsResponse) {}
30 | }
31 |
32 | message ScaledObjectRef {
33 | string name = 1;
34 | string namespace = 2;
35 | map scalerMetadata = 3;
36 | }
37 |
38 | message IsActiveResponse {
39 | bool result = 1;
40 | }
41 |
42 | message GetMetricSpecResponse {
43 | repeated MetricSpec metricSpecs = 1;
44 | }
45 |
46 | message MetricSpec {
47 | string metricName = 1;
48 | int64 targetSize = 2;
49 | }
50 |
51 | message GetMetricsRequest {
52 | ScaledObjectRef scaledObjectRef = 1;
53 | string metricName = 2;
54 | }
55 |
56 | message GetMetricsResponse {
57 | repeated MetricValue metricValues = 1;
58 | }
59 |
60 | message MetricValue {
61 | string metricName = 1;
62 | int64 metricValue = 2;
63 | }
--------------------------------------------------------------------------------
/python/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | name = "pyballista"
20 | version = "49.0.0"
21 | homepage = "https://datafusion.apache.org/ballista/"
22 | repository = "https://github.com/apache/datafusion-ballista"
23 | authors = ["Apache DataFusion "]
24 | description = "Apache Arrow Ballista Python Client"
25 | readme = "README.md"
26 | license = "Apache-2.0"
27 | edition = "2021"
28 | rust-version = "1.82.0"
29 | include = ["/src", "/ballista", "/LICENSE.txt", "pyproject.toml", "Cargo.toml", "Cargo.lock"]
30 | publish = false
31 |
32 | [dependencies]
33 | async-trait = "0.1.89"
34 | ballista = { version = "49.0.0" }
35 | ballista-core = { version = "49.0.0" }
36 | ballista-executor = { version = "49.0.0", default-features = false }
37 | ballista-scheduler = { version = "49.0.0", default-features = false }
38 | datafusion = { version = "49", features = ["pyarrow", "avro"] }
39 | datafusion-proto = { version = "49" }
40 | datafusion-python = { version = "49" }
41 |
42 | pyo3 = { version = "0.24", features = ["extension-module", "abi3", "abi3-py39"] }
43 | pyo3-log = "0.12"
44 | tokio = { version = "1.48", features = ["macros", "rt", "rt-multi-thread", "sync"] }
45 |
46 | [lib]
47 | crate-type = ["cdylib"]
48 | name = "ballista"
49 |
50 | [build-dependencies]
51 | pyo3-build-config = "0.24"
52 |
--------------------------------------------------------------------------------
/benchmarks/tpch.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | import sys
19 | import time
20 | import argparse
21 |
22 | parser = argparse.ArgumentParser(description='Run SQL benchmarks.')
23 | parser.add_argument('--query', help='query to run, such as q1')
24 | parser.add_argument('--path', help='path to data files')
25 | parser.add_argument('--ext', default='', help='optional file extension, such as parquet')
26 |
27 | args = parser.parse_args()
28 |
29 | query = args.query
30 | path = args.path
31 | table_ext = args.ext
32 |
33 | from ballista import BallistaBuilder
34 | from datafusion.context import SessionContext
35 |
36 | ctx: SessionContext = BallistaBuilder().remote("df://127.0.0.1:50050")
37 |
38 | tables = ["part", "supplier", "partsupp", "customer", "orders", "lineitem", "nation", "region"]
39 |
40 | for table in tables:
41 | table_path = path + "/" + table
42 | if len(table_ext) > 0:
43 | table_path = table_path + "." + table_ext
44 | print("Registering table", table, "at path", table_path)
45 | ctx.register_parquet(table, table_path)
46 |
47 | with open("queries/" + query + ".sql", 'r') as file:
48 | sql = file.read()
49 |
50 | import time
51 |
52 | start = time.time()
53 |
54 | df = ctx.sql(sql)
55 | df.show()
56 |
57 | end = time.time()
58 | print("Query", query, "took", end - start, "second(s)")
59 |
60 |
61 |
--------------------------------------------------------------------------------
/dev/docker/ballista-standalone.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM ubuntu:24.04
19 |
20 | LABEL org.opencontainers.image.source="https://github.com/apache/datafusion-ballista"
21 | LABEL org.opencontainers.image.description="Apache Arrow Ballista Distributed SQL Query Engine"
22 | LABEL org.opencontainers.image.licenses="Apache-2.0"
23 |
24 | ARG RELEASE_FLAG=release
25 |
26 | ENV RELEASE_FLAG=${RELEASE_FLAG}
27 | ENV RUST_LOG=info
28 | ENV RUST_BACKTRACE=full
29 | ENV DEBIAN_FRONTEND=noninteractive
30 |
31 | RUN apt-get -qq update && apt-get install -qq -y wget
32 |
33 | COPY target/$RELEASE_FLAG/ballista-scheduler /root/ballista-scheduler
34 | COPY target/$RELEASE_FLAG/ballista-executor /root/ballista-executor
35 |
36 | RUN chmod a+x /root/ballista-scheduler && \
37 | chmod a+x /root/ballista-executor
38 |
39 | # populate some sample data for ListingSchemaProvider
40 | RUN mkdir -p /data && \
41 | wget -q https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2022-01.parquet -P /data/
42 | ENV DATAFUSION_CATALOG_LOCATION=/data
43 | ENV DATAFUSION_CATALOG_TYPE=csv
44 |
45 | # Expose Ballista Scheduler gRPC port
46 | EXPOSE 50050
47 |
48 | # Expose Ballista Executor gRPC port
49 | EXPOSE 50051
50 |
51 | COPY dev/docker/standalone-entrypoint.sh /root/standalone-entrypoint.sh
52 | ENTRYPOINT ["/root/standalone-entrypoint.sh"]
53 |
--------------------------------------------------------------------------------
/.github/actions/setup-rust-runtime/action.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Setup Rust Runtime
19 | description: 'Setup Rust Runtime Environment'
20 | runs:
21 | using: "composite"
22 | steps:
23 | # https://github.com/apache/datafusion/issues/15535
24 | # disabled because neither version nor git hash works with apache github policy
25 | #- name: Run sccache-cache
26 | # uses: mozilla-actions/sccache-action@65101d47ea8028ed0c98a1cdea8dd9182e9b5133 # v0.0.8
27 | - name: Configure runtime env
28 | shell: bash
29 | # do not produce debug symbols to keep memory usage down
30 | # hardcoding other profile params to avoid profile override values
31 | # More on Cargo profiles https://doc.rust-lang.org/cargo/reference/profiles.html?profile-settings#profile-settings
32 | #
33 | # Set debuginfo=line-tables-only as debuginfo=0 causes immensely slow build
34 | # See for more details: https://github.com/rust-lang/rust/issues/119560
35 | #
36 | # readd the following to the run below once sccache-cache is re-enabled
37 | # echo "RUSTC_WRAPPER=sccache" >> $GITHUB_ENV
38 | # echo "SCCACHE_GHA_ENABLED=true" >> $GITHUB_ENV
39 | run: |
40 | echo "RUST_BACKTRACE=1" >> $GITHUB_ENV
41 | echo "RUSTFLAGS=-C debuginfo=line-tables-only -C incremental=false" >> $GITHUB_ENV
42 |
43 |
--------------------------------------------------------------------------------
/docs/source/community/communication.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Communication
21 |
22 | We welcome participation from everyone and encourage you to join us, ask
23 | questions, and get involved.
24 |
25 | All participation in the Apache DataFusion Ballista project is governed by the
26 | Apache Software Foundation's [code of
27 | conduct](https://www.apache.org/foundation/policies/conduct.html).
28 |
29 | We use the same communication channels as the main DataFusion project:
30 |
31 | [https://datafusion.apache.org/contributor-guide/communication.html](https://datafusion.apache.org/contributor-guide/communication.html)
32 |
33 | ## Contributing
34 |
35 | Our source code is hosted on
36 | [GitHub](https://github.com/apache/datafusion-ballista). More information on contributing is in
37 | the [Contribution Guide](https://github.com/apache/datafusion-ballista/blob/main/CONTRIBUTING.md)
38 | , and we have curated a [good-first-issue](https://github.com/apache/datafusion-ballista/contribute)
39 | list to help you get started. You can find datafusion's major designs in docs/source/specification.
40 |
41 | We use GitHub issues for maintaining a queue of development work and as the
42 | public record. We often use Google docs, Github issues and pull requests for
43 | quick and small design discussions. For major design change proposals, we encourage you to write a rfc.
44 |
--------------------------------------------------------------------------------
/ballista/scheduler/src/api/mod.rs:
--------------------------------------------------------------------------------
1 | // Licensed under the Apache License, Version 2.0 (the "License");
2 | // you may not use this file except in compliance with the License.
3 | // You may obtain a copy of the License at
4 | //
5 | // http://www.apache.org/licenses/LICENSE-2.0
6 | //
7 | // Unless required by applicable law or agreed to in writing, software
8 | // distributed under the License is distributed on an "AS IS" BASIS,
9 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | // See the License for the specific language governing permissions and
11 | // limitations under the License.
12 |
13 | mod handlers;
14 |
15 | use crate::scheduler_server::SchedulerServer;
16 | use axum::routing::patch;
17 | use axum::{routing::get, Router};
18 | use datafusion_proto::logical_plan::AsLogicalPlan;
19 | use datafusion_proto::physical_plan::AsExecutionPlan;
20 | use std::sync::Arc;
21 |
22 | pub fn get_routes<
23 | T: AsLogicalPlan + Clone + Send + Sync + 'static,
24 | U: AsExecutionPlan + Send + Sync + 'static,
25 | >(
26 | scheduler_server: Arc>,
27 | ) -> Router {
28 | let router = Router::new()
29 | .route("/api/state", get(handlers::get_scheduler_state::))
30 | .route("/api/executors", get(handlers::get_executors::))
31 | .route("/api/jobs", get(handlers::get_jobs::))
32 | .route("/api/job/{job_id}", patch(handlers::cancel_job::))
33 | .route(
34 | "/api/job/{job_id}/stages",
35 | get(handlers::get_query_stages::),
36 | )
37 | .route(
38 | "/api/job/{job_id}/dot",
39 | get(handlers::get_job_dot_graph::),
40 | )
41 | .route(
42 | "/api/job/{job_id}/stage/{stage_id}/dot",
43 | get(handlers::get_query_stage_dot_graph::),
44 | )
45 | .route("/api/metrics", get(handlers::get_scheduler_metrics::));
46 |
47 | #[cfg(feature = "graphviz-support")]
48 | let router = router.route(
49 | "/api/job/{job_id}/dot_svg",
50 | get(handlers::get_job_svg_graph::),
51 | );
52 |
53 | router.with_state(scheduler_server)
54 | }
55 |
--------------------------------------------------------------------------------
/docs/source/user-guide/scheduler.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Ballista Scheduler
21 |
22 | ## REST API
23 |
24 | The scheduler also provides a REST API that allows jobs to be monitored.
25 |
26 | > This is optional scheduler feature which should be enabled with `rest-api` feature
27 |
28 | | API | Method | Description |
29 | | ------------------------------------ | ------ | ----------------------------------------------------------------- |
30 | | /api/jobs | GET | Get a list of jobs that have been submitted to the cluster. |
31 | | /api/job/{job_id} | GET | Get a summary of a submitted job. |
32 | | /api/job/{job_id}/dot | GET | Produce a query plan in DOT (graphviz) format. |
33 | | /api/job/:job_id/dot_svg | GET | Produce a query plan in SVG format. (`graphviz-support` required) |
34 | | /api/job/{job_id} | PATCH | Cancel a currently running job |
35 | | /api/job/:job_id/stage/:stage_id/dot | GET | Produces stage plan in DOT (graphviz) format |
36 | | /api/metrics | GET | Return current scheduler metric set |
37 |
--------------------------------------------------------------------------------
/examples/examples/remote-sql.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use ballista::datafusion::{
19 | common::Result,
20 | execution::SessionStateBuilder,
21 | prelude::{CsvReadOptions, SessionConfig, SessionContext},
22 | };
23 | use ballista::prelude::*;
24 | use ballista_examples::test_util;
25 |
26 | /// This example demonstrates executing a simple query against an Arrow data source (CSV) and
27 | /// fetching results, using SQL
28 | #[tokio::main]
29 | async fn main() -> Result<()> {
30 | let config = SessionConfig::new_with_ballista()
31 | .with_target_partitions(4)
32 | .with_ballista_job_name("Remote SQL Example");
33 |
34 | let state = SessionStateBuilder::new()
35 | .with_config(config)
36 | .with_default_features()
37 | .build();
38 |
39 | let ctx = SessionContext::remote_with_state("df://localhost:50050", state).await?;
40 |
41 | let test_data = test_util::examples_test_data();
42 |
43 | ctx.register_csv(
44 | "test",
45 | &format!("{test_data}/aggregate_test_100.csv"),
46 | CsvReadOptions::new(),
47 | )
48 | .await?;
49 |
50 | let df = ctx
51 | .sql(
52 | "SELECT c1, MIN(c12), MAX(c12) \
53 | FROM test \
54 | WHERE c11 > 0.1 AND c11 < 0.9 \
55 | GROUP BY c1",
56 | )
57 | .await?;
58 |
59 | df.show().await?;
60 |
61 | Ok(())
62 | }
63 |
--------------------------------------------------------------------------------
/dev/release/check-rat-report.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | ##############################################################################
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | ##############################################################################
20 | import fnmatch
21 | import re
22 | import sys
23 | import xml.etree.ElementTree as ET
24 |
25 | if len(sys.argv) != 3:
26 | sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" %
27 | sys.argv[0])
28 | sys.exit(1)
29 |
30 | exclude_globs_filename = sys.argv[1]
31 | xml_filename = sys.argv[2]
32 |
33 | globs = [line.strip() for line in open(exclude_globs_filename, "r")]
34 |
35 | tree = ET.parse(xml_filename)
36 | root = tree.getroot()
37 | resources = root.findall('resource')
38 |
39 | all_ok = True
40 | for r in resources:
41 | approvals = r.findall('license-approval')
42 | if not approvals or approvals[0].attrib['name'] == 'true':
43 | continue
44 | clean_name = re.sub('^[^/]+/', '', r.attrib['name'])
45 | excluded = False
46 | for g in globs:
47 | if fnmatch.fnmatch(clean_name, g):
48 | excluded = True
49 | break
50 | if not excluded:
51 | sys.stdout.write("NOT APPROVED: %s (%s): %s\n" % (
52 | clean_name, r.attrib['name'], approvals[0].attrib['name']))
53 | all_ok = False
54 |
55 | if not all_ok:
56 | sys.exit(1)
57 |
58 | print('OK')
59 | sys.exit(0)
60 |
--------------------------------------------------------------------------------
/docs/source/user-guide/metrics.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Ballista Scheduler Metrics
21 |
22 | ## Prometheus
23 |
24 | > This is optional scheduler feature which should be enabled with `prometheus-metrics` feature
25 |
26 | Built with default features, the ballista scheduler will automatically collect and expose a standard set of prometheus metrics.
27 | The metrics currently collected automatically include:
28 |
29 | - _job_exec_time_seconds_ - Histogram of successful job execution time in seconds
30 | - _planning_time_ms_ - Histogram of job planning time in milliseconds
31 | - _failed_ - Counter of failed jobs
32 | - _job_failed_total_ - Counter of failed jobs
33 | - _job_cancelled_total_ - Counter of cancelled jobs
34 | - _job_completed_total_ - Counter of completed jobs
35 | - _job_submitted_total_ - Counter of submitted jobs
36 | - _pending_task_queue_size_ - Number of pending tasks
37 |
38 | **NOTE** Currently the histogram buckets for the above metrics are set to reasonable defaults. If the defaults are not
39 | appropriate for a given use case, the only workaround is to implement a customer `SchedulerMetricsCollector`. In the future
40 | the buckets should be made configurable.
41 |
42 | The metrics are then exported through the scheduler REST API at `GET /api/metrics`. It should be sufficient to ingest metrics
43 | into an existing metrics system by point your chosen prometheus exporter at that endpoint.
44 |
--------------------------------------------------------------------------------
/.github/workflows/dev.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Dev
19 | on: [push, pull_request]
20 |
21 | concurrency:
22 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
23 | cancel-in-progress: true
24 |
25 | jobs:
26 | rat:
27 | name: Release Audit Tool (RAT)
28 | runs-on: ubuntu-latest
29 | steps:
30 | - name: Checkout
31 | uses: actions/checkout@v5
32 | - name: Setup Python
33 | uses: actions/setup-python@v4
34 | with:
35 | python-version: "3.10"
36 | - name: Audit licenses
37 | run: ./dev/release/run-rat.sh .
38 |
39 | prettier:
40 | name: Use prettier to check formatting of documents
41 | runs-on: ubuntu-latest
42 | steps:
43 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
44 | - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
45 | with:
46 | node-version: "20"
47 | - name: Prettier check
48 | run: |
49 | # if you encounter error, rerun the command below and commit the changes
50 | #
51 | # ignore subproject CHANGELOG.md because they are machine generated
52 | npx prettier@2.7.1 --write \
53 | '{ballista,docs}/**/*.md' \
54 | '!ballista/CHANGELOG.md' \
55 | README.md \
56 | CONTRIBUTING.md
57 | git diff --exit-code
58 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | apache-rat-*.jar
19 | rat.txt
20 | filtered_rat.txt
21 | arrow-src.tar
22 | arrow-src.tar.gz
23 | CHANGELOG.md.bak
24 | Cargo.toml.bak
25 |
26 | # Compiled source
27 | *.a
28 | *.dll
29 | *.o
30 | *.py[ocd]
31 | *.so
32 | *.so.*
33 | *.bundle
34 | *.dylib
35 | .build_cache_dir
36 | dependency-reduced-pom.xml
37 | MANIFEST
38 | compile_commands.json
39 | build.ninja
40 |
41 | # Generated Visual Studio files
42 | *.vcxproj
43 | *.vcxproj.*
44 | *.sln
45 | *.iml
46 |
47 | # Linux perf sample data
48 | perf.data
49 | perf.data.old
50 |
51 | cpp/.idea/
52 | .clangd/
53 | cpp/.clangd/
54 | cpp/apidoc/xml/
55 | docs/example.gz
56 | docs/example1.dat
57 | docs/example3.dat
58 | python/.eggs/
59 | python/doc/
60 | # Egg metadata
61 | *.egg-info
62 |
63 | .vscode
64 | .idea/
65 | .pytest_cache/
66 | pkgs
67 | docker_cache
68 | .gdb_history
69 | *.orig
70 | .*.swp
71 | .*.swo
72 |
73 | site/
74 |
75 | # R files
76 | **/.Rproj.user
77 | **/*.Rcheck/
78 | **/.Rhistory
79 | .Rproj.user
80 |
81 | # macOS
82 | cpp/Brewfile.lock.json
83 | .DS_Store
84 |
85 | # docker volumes used for caching
86 | .docker
87 |
88 | # Rust
89 | target
90 | # Cargo.lock
91 | !ballista-cli/Cargo.lock
92 |
93 | rusty-tags.vi
94 | .history
95 | .flatbuffers/
96 |
97 | .vscode
98 | venv/
99 |
100 | # apache release artifacts
101 | dev/dist
102 |
103 | # logs
104 | logs/
105 |
--------------------------------------------------------------------------------
/.github/workflows/dev_pr.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Labeler
19 |
20 | concurrency:
21 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
22 | cancel-in-progress: true
23 |
24 | on:
25 | pull_request_target:
26 | types:
27 | - opened
28 | - edited
29 | - synchronize
30 |
31 | jobs:
32 | process:
33 | name: Process
34 | runs-on: ubuntu-latest
35 | steps:
36 | - uses: actions/checkout@v5
37 |
38 | - name: Assign GitHub labels
39 | if: |
40 | github.event_name == 'pull_request_target' &&
41 | (github.event.action == 'opened' ||
42 | github.event.action == 'synchronize')
43 | uses: actions/labeler@v4.3.0
44 | with:
45 | repo-token: ${{ secrets.GITHUB_TOKEN }}
46 | configuration-path: .github/workflows/dev_pr/labeler.yml
47 | sync-labels: true
48 |
49 | # TODO: Enable this when eps1lon/actions-label-merge-conflict is available.
50 | # - name: Checks if PR needs rebase
51 | # if: |
52 | # github.event_name == 'push' ||
53 | # (github.event_name == 'pull_request_target' &&
54 | # (github.event.action == 'opened' ||
55 | # github.event.action == 'synchronize'))
56 | # uses: eps1lon/actions-label-merge-conflict@releases/2.x
57 | # with:
58 | # dirtyLabel: "needs-rebase"
59 | # repoToken: "${{ secrets.GITHUB_TOKEN }}"
60 |
--------------------------------------------------------------------------------
/ballista/client/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | name = "ballista"
20 | description = "Ballista Distributed Compute"
21 | license = "Apache-2.0"
22 | version = "50.0.0"
23 | homepage = "https://datafusion.apache.org/ballista/"
24 | repository = "https://github.com/apache/datafusion-ballista"
25 | readme = "README.md"
26 | authors = ["Apache DataFusion "]
27 | edition = { workspace = true }
28 | rust-version = { workspace = true }
29 |
30 | [dependencies]
31 | async-trait = { workspace = true }
32 | ballista-core = { path = "../core", version = "50.0.0" }
33 | ballista-executor = { path = "../executor", version = "50.0.0", optional = true }
34 | ballista-scheduler = { path = "../scheduler", version = "50.0.0", optional = true }
35 | datafusion = { workspace = true }
36 | log = { workspace = true }
37 |
38 | tokio = { workspace = true }
39 | url = { workspace = true }
40 |
41 | [dev-dependencies]
42 | ballista-executor = { path = "../executor", version = "50.0.0" }
43 | ballista-scheduler = { path = "../scheduler", version = "50.0.0" }
44 | ctor = { workspace = true }
45 | datafusion-proto = { workspace = true }
46 | env_logger = { workspace = true }
47 | rstest = { workspace = true }
48 | tempfile = { workspace = true }
49 | tonic = { workspace = true }
50 |
51 | [features]
52 | default = ["standalone"]
53 | standalone = ["ballista-executor", "ballista-scheduler"]
54 | # tests which need change of RUST_MIN_STACK in order for
55 | # tests to run.
56 | test_extended_stack = []
57 |
--------------------------------------------------------------------------------
/ballista/core/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | #![doc = include_str!("../README.md")]
19 |
20 | use std::sync::Arc;
21 |
22 | use datafusion::{execution::runtime_env::RuntimeEnv, prelude::SessionConfig};
23 | pub const BALLISTA_VERSION: &str = env!("CARGO_PKG_VERSION");
24 |
25 | pub fn print_version() {
26 | println!("Ballista version: {BALLISTA_VERSION}")
27 | }
28 |
29 | pub mod client;
30 | pub mod config;
31 | pub mod consistent_hash;
32 | pub mod diagram;
33 | pub mod error;
34 | pub mod event_loop;
35 | pub mod execution_plans;
36 | pub mod extension;
37 | #[cfg(feature = "build-binary")]
38 | pub mod object_store;
39 | pub mod planner;
40 | pub mod registry;
41 | pub mod serde;
42 | pub mod utils;
43 |
44 | ///
45 | /// [RuntimeProducer] is a factory which creates runtime [RuntimeEnv]
46 | /// from [SessionConfig]. As [SessionConfig] will be propagated
47 | /// from client to executors, this provides possibility to
48 | /// create [RuntimeEnv] components and configure them according to
49 | /// [SessionConfig] or some of its config extension
50 | ///
51 | /// It is intended to be used with executor configuration
52 | ///
53 | pub type RuntimeProducer = Arc<
54 | dyn Fn(&SessionConfig) -> datafusion::error::Result> + Send + Sync,
55 | >;
56 | ///
57 | /// [ConfigProducer] is a factory which can create [SessionConfig], with
58 | /// additional extension or configuration codecs
59 | ///
60 | /// It is intended to be used with executor configuration
61 | ///
62 | pub type ConfigProducer = Arc SessionConfig + Send + Sync>;
63 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. Licensed to the Apache Software Foundation (ASF) under one
2 | .. or more contributor license agreements. See the NOTICE file
3 | .. distributed with this work for additional information
4 | .. regarding copyright ownership. The ASF licenses this file
5 | .. to you under the Apache License, Version 2.0 (the
6 | .. "License"); you may not use this file except in compliance
7 | .. with the License. You may obtain a copy of the License at
8 |
9 | .. http://www.apache.org/licenses/LICENSE-2.0
10 |
11 | .. Unless required by applicable law or agreed to in writing,
12 | .. software distributed under the License is distributed on an
13 | .. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | .. KIND, either express or implied. See the License for the
15 | .. specific language governing permissions and limitations
16 | .. under the License.
17 |
18 | =====================
19 | Apache DataFusion Ballista
20 | =====================
21 |
22 | Table of content
23 | ================
24 |
25 |
26 | .. _toc.guide:
27 |
28 | .. toctree::
29 | :maxdepth: 1
30 | :caption: User Guide
31 |
32 | Introduction
33 |
34 | .. toctree::
35 | :maxdepth: 1
36 | :caption: Cluster Deployment
37 |
38 | Deployment
39 | Scheduler
40 |
41 | .. toctree::
42 | :maxdepth: 1
43 | :caption: Clients
44 |
45 | Python
46 | Rust
47 | Flight SQL JDBC
48 | SQL CLI
49 |
50 | .. toctree::
51 | :maxdepth: 1
52 | :caption: Reference
53 |
54 | user-guide/configs
55 | user-guide/tuning-guide
56 | user-guide/metrics
57 | user-guide/faq
58 | user-guide/extending-components
59 |
60 | .. _toc.contributors:
61 |
62 | .. toctree::
63 | :maxdepth: 1
64 | :caption: Contributors Guide
65 |
66 | contributors-guide/architecture
67 | contributors-guide/code-organization
68 | contributors-guide/development
69 | Source code
70 |
71 | .. _toc.community:
72 |
73 | .. toctree::
74 | :maxdepth: 1
75 | :caption: Community
76 |
77 | community/communication
78 |
79 | Issue tracker
80 | Code of conduct
81 |
--------------------------------------------------------------------------------
/examples/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | name = "ballista-examples"
20 | description = "Ballista usage examples"
21 | version = "50.0.0"
22 | homepage = "https://datafusion.apache.org/ballista/"
23 | repository = "https://github.com/apache/datafusion-ballista"
24 | authors = ["Apache DataFusion "]
25 | license = "Apache-2.0"
26 | keywords = ["arrow", "distributed", "query", "sql"]
27 | edition = { workspace = true }
28 | rust-version = { workspace = true }
29 | publish = false
30 |
31 | [[example]]
32 | name = "standalone_sql"
33 | path = "examples/standalone-sql.rs"
34 | required-features = ["ballista/standalone"]
35 |
36 | [dependencies]
37 |
38 | [dev-dependencies]
39 | ballista = { path = "../ballista/client", version = "50.0.0" }
40 | ballista-core = { path = "../ballista/core", version = "50.0.0", default-features = false }
41 | ballista-executor = { path = "../ballista/executor", version = "50.0.0", default-features = false }
42 | ballista-scheduler = { path = "../ballista/scheduler", version = "50.0.0", default-features = false }
43 | ctor = { workspace = true }
44 | datafusion = { workspace = true }
45 | env_logger = { workspace = true }
46 | log = { workspace = true }
47 | object_store = { workspace = true, features = ["aws"] }
48 | testcontainers-modules = { version = "0.13", features = ["minio"] }
49 | tokio = { workspace = true, features = [
50 | "macros",
51 | "rt",
52 | "rt-multi-thread",
53 | "sync",
54 | "parking_lot"
55 | ] }
56 | tonic = { workspace = true }
57 | url = { workspace = true }
58 |
59 | [features]
60 | default = []
61 | testcontainers = []
62 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | version: '3.3'
18 | services:
19 | ballista-scheduler:
20 | image: ballista-scheduler
21 | build:
22 | dockerfile: dev/docker/ballista-scheduler.Dockerfile
23 | context: .
24 | command: "--bind-host 0.0.0.0"
25 | ports:
26 | - "50050:50050"
27 | environment:
28 | - RUST_LOG=ballista=info
29 | volumes:
30 | - ./benchmarks/data:/data
31 | healthcheck:
32 | test: ["CMD", "nc", "-z", "ballista-scheduler", "50050"]
33 | interval: 5s
34 | timeout: 5s
35 | retries: 5
36 | ballista-executor:
37 | image: ballista-executor
38 | build:
39 | dockerfile: dev/docker/ballista-executor.Dockerfile
40 | context: .
41 | command: "--bind-host 0.0.0.0 --scheduler-host ballista-scheduler --scheduler-connect-timeout-seconds 15"
42 | deploy:
43 | replicas: 2
44 | restart: always
45 | environment:
46 | - RUST_LOG=ballista=info
47 | volumes:
48 | - ./benchmarks/data:/data
49 | depends_on:
50 | - ballista-scheduler
51 | healthcheck:
52 | test: ["CMD", "nc", "-z", "ballista-executor", "50051"]
53 | interval: 5s
54 | timeout: 5s
55 | retries: 5
56 | ballista-client:
57 | image: ballista-benchmarks
58 | build:
59 | dockerfile: dev/docker/ballista-benchmarks.Dockerfile
60 | context: .
61 | command: ["/bin/bash", "-c", "sleep infinity"]
62 | ports:
63 | - "50051:50051"
64 | environment:
65 | - RUST_LOG=info
66 | volumes:
67 | - ./benchmarks/data:/data
68 | depends_on:
69 | - ballista-scheduler
70 | - ballista-executor
71 |
--------------------------------------------------------------------------------
/python/src/utils.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use std::future::Future;
19 | use std::sync::OnceLock;
20 | use tokio::task::JoinHandle;
21 |
22 | use ballista_core::error::BallistaError;
23 | use pyo3::exceptions::PyException;
24 | use pyo3::{PyErr, Python};
25 | use tokio::runtime::Runtime;
26 |
27 | use crate::TokioRuntime;
28 |
29 | pub(crate) fn to_pyerr(err: BallistaError) -> PyErr {
30 | PyException::new_err(err.to_string())
31 | }
32 |
33 | #[inline]
34 | pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime {
35 | // NOTE: Other pyo3 python libraries have had issues with using tokio
36 | // behind a forking app-server like `gunicorn`
37 | // If we run into that problem, in the future we can look to `delta-rs`
38 | // which adds a check in that disallows calls from a forked process
39 | // https://github.com/delta-io/delta-rs/blob/87010461cfe01563d91a4b9cd6fa468e2ad5f283/python/src/utils.rs#L10-L31
40 | static RUNTIME: OnceLock = OnceLock::new();
41 | RUNTIME.get_or_init(|| TokioRuntime(tokio::runtime::Runtime::new().unwrap()))
42 | }
43 |
44 | /// Utility to collect rust futures with GIL released
45 | pub(crate) fn wait_for_future(py: Python, f: F) -> F::Output
46 | where
47 | F: Future + Send,
48 | F::Output: Send,
49 | {
50 | let runtime: &Runtime = &get_tokio_runtime().0;
51 | py.allow_threads(|| runtime.block_on(f))
52 | }
53 |
54 | pub(crate) fn spawn_feature(py: Python, f: F) -> JoinHandle
55 | where
56 | F: Future + Send + 'static,
57 | F::Output: Send,
58 | {
59 | let runtime: &Runtime = &get_tokio_runtime().0;
60 | // do we need py.allow_threads ?
61 | py.allow_threads(|| runtime.spawn(f))
62 | }
63 |
--------------------------------------------------------------------------------
/examples/examples/custom-scheduler.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use ballista_core::error::BallistaError;
19 | use ballista_core::object_store::{
20 | session_config_with_s3_support, session_state_with_s3_support,
21 | };
22 |
23 | use ballista_scheduler::cluster::BallistaCluster;
24 | use ballista_scheduler::config::SchedulerConfig;
25 | use ballista_scheduler::scheduler_process::start_server;
26 | use std::net::AddrParseError;
27 | use std::sync::Arc;
28 |
29 | ///
30 | /// # Custom Ballista Scheduler
31 | ///
32 | /// This example demonstrates how to crate custom ballista schedulers.
33 | ///
34 | #[tokio::main]
35 | async fn main() -> ballista_core::error::Result<()> {
36 | let _ = env_logger::builder()
37 | .filter_level(log::LevelFilter::Info)
38 | .is_test(true)
39 | .try_init();
40 |
41 | let config: SchedulerConfig = SchedulerConfig {
42 | // overriding default runtime producer with custom producer
43 | // which knows how to create S3 connections
44 | override_config_producer: Some(Arc::new(session_config_with_s3_support)),
45 | // overriding default session builder, which has custom session configuration
46 | // runtime environment and session state.
47 | override_session_builder: Some(Arc::new(session_state_with_s3_support)),
48 | ..Default::default()
49 | };
50 |
51 | let addr = format!("{}:{}", config.bind_host, config.bind_port);
52 | let addr = addr
53 | .parse()
54 | .map_err(|e: AddrParseError| BallistaError::Configuration(e.to_string()))?;
55 |
56 | let cluster = BallistaCluster::new_from_config(&config).await?;
57 | start_server(cluster, addr, Arc::new(config)).await?;
58 |
59 | Ok(())
60 | }
61 |
--------------------------------------------------------------------------------
/dev/release/release-tarball.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | #
20 |
21 | # Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/release-tarball.sh
22 |
23 | # This script copies a tarball from the "dev" area of the
24 | # dist.apache.datafusion repository to the "release" area
25 | #
26 | # This script should only be run after the release has been approved
27 | # by the DataFusion PMC committee.
28 | #
29 | # See release/README.md for full release instructions
30 | #
31 | # Based in part on post-01-upload.sh from apache/arrow
32 |
33 |
34 | set -e
35 | set -u
36 |
37 | if [ "$#" -ne 2 ]; then
38 | echo "Usage: $0 "
39 | echo "ex. $0 4.1.0 2"
40 | exit
41 | fi
42 |
43 | version=$1
44 | rc=$2
45 |
46 | tmp_dir=tmp-apache-datafusion-ballista-dist
47 |
48 | echo "Recreate temporary directory: ${tmp_dir}"
49 | rm -rf ${tmp_dir}
50 | mkdir -p ${tmp_dir}
51 |
52 | echo "Clone dev dist repository"
53 | svn \
54 | co \
55 | https://dist.apache.org/repos/dist/dev/datafusion/apache-datafusion-ballista-${version}-rc${rc} \
56 | ${tmp_dir}/dev
57 |
58 | echo "Clone release dist repository"
59 | svn co https://dist.apache.org/repos/dist/release/datafusion ${tmp_dir}/release
60 |
61 | echo "Copy ${version}-rc${rc} to release working copy"
62 | release_version=datafusion-ballista-${version}
63 | mkdir -p ${tmp_dir}/release/${release_version}
64 | cp -r ${tmp_dir}/dev/* ${tmp_dir}/release/${release_version}/
65 | svn add ${tmp_dir}/release/${release_version}
66 |
67 | echo "Commit release"
68 | svn ci -m "Apache DataFusion Ballista ${version}" ${tmp_dir}/release
69 |
70 | echo "Clean up"
71 | rm -rf ${tmp_dir}
72 |
73 | echo "Success! The release is available here:"
74 | echo " https://dist.apache.org/repos/dist/release/datafusion/${release_version}"
75 |
--------------------------------------------------------------------------------
/pre-commit.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | # This file is git pre-commit hook.
21 | #
22 | # Soft link it as git hook under top dir of apache arrow git repository:
23 | # $ ln -s ../../pre-commit.sh .git/hooks/pre-commit
24 | #
25 | # This file be run directly:
26 | # $ ./pre-commit.sh
27 |
28 | function RED() {
29 | echo "\033[0;31m$@\033[0m"
30 | }
31 |
32 | function GREEN() {
33 | echo "\033[0;32m$@\033[0m"
34 | }
35 |
36 | function BYELLOW() {
37 | echo "\033[1;33m$@\033[0m"
38 | }
39 |
40 | # env GIT_DIR is set by git when run a pre-commit hook.
41 | if [ -z "${GIT_DIR}" ]; then
42 | GIT_DIR=$(git rev-parse --show-toplevel)
43 | fi
44 |
45 | cd ${GIT_DIR}
46 |
47 | NUM_CHANGES=$(git diff --cached --name-only . |
48 | grep -e ".*/*.rs$" |
49 | awk '{print $1}' |
50 | wc -l)
51 |
52 | if [ ${NUM_CHANGES} -eq 0 ]; then
53 | echo -e "$(GREEN INFO): no staged changes in *.rs, $(GREEN skip cargo fmt/clippy)"
54 | exit 0
55 | fi
56 |
57 | # 1. cargo clippy
58 |
59 | echo -e "$(GREEN INFO): cargo clippy ..."
60 |
61 | # Cargo clippy always return exit code 0, and `tee` doesn't work.
62 | # So let's just run cargo clippy.
63 | cargo clippy
64 | echo -e "$(GREEN INFO): cargo clippy done"
65 |
66 | # 2. cargo fmt: format with nightly and stable.
67 |
68 | CHANGED_BY_CARGO_FMT=false
69 | echo -e "$(GREEN INFO): cargo fmt with nightly and stable ..."
70 |
71 | for version in nightly stable; do
72 | CMD="cargo +${version} fmt"
73 | ${CMD} --all -q -- --check 2>/dev/null
74 | if [ $? -ne 0 ]; then
75 | ${CMD} --all
76 | echo -e "$(BYELLOW WARN): ${CMD} changed some files"
77 | CHANGED_BY_CARGO_FMT=true
78 | fi
79 | done
80 |
81 | if ${CHANGED_BY_CARGO_FMT}; then
82 | echo -e "$(RED FAIL): git commit $(RED ABORTED), please have a look and run git add/commit again"
83 | exit 1
84 | fi
85 |
86 | exit 0
87 |
--------------------------------------------------------------------------------
/python/pyproject.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [build-system]
19 | requires = ["maturin>=1.8.0,<2.0"]
20 | build-backend = "maturin"
21 |
22 | [project]
23 | name = "ballista"
24 | description = "Python client for Apache Arrow Ballista Distributed SQL Query Engine"
25 | readme = "README.md"
26 | license = {file = "LICENSE.txt"}
27 | requires-python = ">=3.9"
28 | keywords = ["ballista", "sql", "rust", "distributed"]
29 | classifiers = [
30 | "Development Status :: 2 - Pre-Alpha",
31 | "Intended Audience :: Developers",
32 | "License :: OSI Approved :: Apache Software License",
33 | "License :: OSI Approved",
34 | "Operating System :: MacOS",
35 | "Operating System :: Microsoft :: Windows",
36 | "Operating System :: POSIX :: Linux",
37 | "Programming Language :: Python :: 3",
38 | "Programming Language :: Python :: 3.9",
39 | "Programming Language :: Python :: 3.10",
40 | "Programming Language :: Python",
41 | "Programming Language :: Rust",
42 | ]
43 | dependencies = [
44 | "pyarrow>=21.0.0",
45 | "cloudpickle",
46 | "datafusion==49",
47 | ]
48 | dynamic = ["version"]
49 |
50 | [project.urls]
51 | homepage = "https://datafusion.apache.org/ballista"
52 | documentation = "https://datafusion.apache.org/ballista"
53 | repository = "https://github.com/apache/datafusion-ballista"
54 |
55 | [tool.isort]
56 | profile = "black"
57 |
58 | [tool.maturin]
59 | module-name = "ballista.ballista_internal"
60 | include = [
61 | { path = "Cargo.lock", format = "sdist" }
62 | ]
63 | exclude = [".github/**", "ci/**", ".asf.yaml"]
64 | # Require Cargo.lock is up to date
65 | locked = true
66 |
67 | [dependency-groups]
68 | dev = [
69 | "maturin>=1.8.1",
70 | "pytest>=8.4.1",
71 | "pytest-asyncio>=1.1.0",
72 | "ruff>=0.9.1",
73 | "toml>=0.10.2",
74 | "pygithub==2.5.0",
75 | ]
76 | docs = [
77 | ]
78 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | on:
19 | push:
20 | branches:
21 | - main
22 | paths:
23 | - .asf.yaml
24 | - .github/workflows/docs.yaml
25 | - docs/**
26 |
27 | name: Deploy DataFusion Ballista site
28 |
29 | jobs:
30 | build-docs:
31 | name: Build docs
32 | runs-on: ubuntu-latest
33 | steps:
34 | - name: Checkout docs sources
35 | uses: actions/checkout@v5
36 |
37 | - name: Checkout asf-site branch
38 | uses: actions/checkout@v5
39 | with:
40 | ref: asf-site
41 | path: asf-site
42 |
43 | - name: Setup Python
44 | uses: actions/setup-python@v5
45 | with:
46 | python-version: "3.10"
47 |
48 | - name: Install dependencies
49 | run: |
50 | set -x
51 | python3 -m venv venv
52 | source venv/bin/activate
53 | pip install -r docs/requirements.txt
54 |
55 | - name: Build docs
56 | run: |
57 | set -x
58 | source venv/bin/activate
59 | cd docs
60 | ./build.sh
61 |
62 | - name: Copy & push the generated HTML
63 | run: |
64 | set -x
65 | cd asf-site/
66 | rsync \
67 | -a \
68 | --delete \
69 | --exclude '/.git/' \
70 | ../docs/build/html/ \
71 | ./
72 | cp ../.asf.yaml .
73 | touch .nojekyll
74 | git status --porcelain
75 | if [ "$(git status --porcelain)" != "" ]; then
76 | git config user.name "github-actions[bot]"
77 | git config user.email "github-actions[bot]@users.noreply.github.com"
78 | git add --all
79 | git commit -m 'Publish built docs triggered by ${{ github.sha }}'
80 | git push || git push --force
81 | fi
--------------------------------------------------------------------------------
/ballista/scheduler/src/state/session_manager.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use crate::scheduler_server::SessionBuilder;
19 | use ballista_core::error::Result;
20 | use datafusion::prelude::{SessionConfig, SessionContext};
21 |
22 | use crate::cluster::JobState;
23 | use std::sync::Arc;
24 |
25 | #[derive(Clone)]
26 | pub struct SessionManager {
27 | state: Arc,
28 | }
29 |
30 | impl SessionManager {
31 | pub fn new(state: Arc) -> Self {
32 | Self { state }
33 | }
34 | pub async fn remove_session(&self, session_id: &str) -> Result<()> {
35 | self.state.remove_session(session_id).await
36 | }
37 |
38 | pub async fn create_or_update_session(
39 | &self,
40 | session_id: &str,
41 | config: &SessionConfig,
42 | ) -> Result> {
43 | self.state
44 | .create_or_update_session(session_id, config)
45 | .await
46 | }
47 |
48 | pub(crate) fn produce_config(&self) -> SessionConfig {
49 | self.state.produce_config()
50 | }
51 | }
52 |
53 | /// Create a DataFusion session context that is compatible with Ballista Configuration
54 | pub fn create_datafusion_context(
55 | session_config: &SessionConfig,
56 | session_builder: SessionBuilder,
57 | ) -> datafusion::common::Result> {
58 | let session_state = if session_config.round_robin_repartition() {
59 | let session_config = session_config
60 | .clone()
61 | // should we disable catalog on the scheduler side
62 | .with_round_robin_repartition(false);
63 |
64 | log::warn!("session manager will override `datafusion.optimizer.enable_round_robin_repartition` to `false` ");
65 | session_builder(session_config)?
66 | } else {
67 | session_builder(session_config.clone())?
68 | };
69 |
70 | Ok(Arc::new(SessionContext::new_with_state(session_state)))
71 | }
72 |
--------------------------------------------------------------------------------
/ballista/executor/Cargo.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [package]
19 | name = "ballista-executor"
20 | description = "Ballista Distributed Compute - Executor"
21 | license = "Apache-2.0"
22 | version = "50.0.0"
23 | homepage = "https://datafusion.apache.org/ballista/"
24 | repository = "https://github.com/apache/datafusion-ballista"
25 | readme = "README.md"
26 | authors = ["Apache DataFusion "]
27 | edition = { workspace = true }
28 | rust-version = { workspace = true }
29 |
30 | [[bin]]
31 | name = "ballista-executor"
32 | path = "src/bin/main.rs"
33 | required-features = ["build-binary"]
34 |
35 | [features]
36 | build-binary = ["clap", "tracing-subscriber", "tracing-appender", "tracing", "ballista-core/build-binary"]
37 | default = ["build-binary", "mimalloc"]
38 |
39 | [dependencies]
40 | arrow = { workspace = true }
41 | arrow-flight = { workspace = true }
42 | async-trait = { workspace = true }
43 | ballista-core = { path = "../core", version = "50.0.0" }
44 | clap = { workspace = true, optional = true }
45 | dashmap = { workspace = true }
46 | datafusion = { workspace = true }
47 | datafusion-proto = { workspace = true }
48 | futures = { workspace = true }
49 | log = { workspace = true }
50 | mimalloc = { workspace = true, optional = true }
51 | parking_lot = { workspace = true }
52 | tempfile = { workspace = true }
53 | tokio = { workspace = true, features = ["full"] }
54 | tokio-stream = { workspace = true, features = ["net"] }
55 | tokio-util = { version = "0.7", features = ["io-util"] }
56 | tonic = { workspace = true }
57 | tracing = { workspace = true, optional = true }
58 | tracing-appender = { workspace = true, optional = true }
59 | tracing-subscriber = { workspace = true, optional = true }
60 | uuid = { workspace = true }
61 |
62 | [dev-dependencies]
63 |
64 | [build-dependencies]
65 |
66 | # use libc on unix like platforms to set worker priority in DedicatedExecutor
67 | [target."cfg(unix)".dependencies.libc]
68 | version = "0.2"
69 |
--------------------------------------------------------------------------------