├── .asf.yaml
├── .cargo
└── config
├── .dockerignore
├── .flake8
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── actions
│ └── setup-builder
│ │ └── action.yaml
├── dependabot.yml
├── pull_request_template.md
└── workflows
│ ├── cancel.yml
│ ├── comment_bot.yml
│ ├── dev.yml
│ ├── dev_pr.yml
│ ├── dev_pr
│ └── labeler.yml
│ ├── python_build.yml
│ ├── python_test.yaml
│ └── rust.yml
├── .gitignore
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Cargo.toml
├── LICENSE.txt
├── OLDREADME.md
├── README.md
├── ballista
├── __init__.py
├── functions.py
└── tests
│ ├── __init__.py
│ └── test_imports.py
├── ci
├── appveyor-cpp-build.bat
├── appveyor-cpp-setup.bat
├── conda_env_archery.yml
├── conda_env_cpp.yml
├── conda_env_crossbow.txt
├── conda_env_gandiva.yml
├── conda_env_gandiva_win.yml
├── conda_env_python.yml
├── conda_env_r.yml
├── conda_env_sphinx.yml
├── conda_env_unix.yml
├── detect-changes.py
├── docker
│ ├── conda-cpp.Dockerfile
│ ├── conda-integration.Dockerfile
│ ├── conda-python-dask.Dockerfile
│ ├── conda-python-hdfs.Dockerfile
│ ├── conda-python-jpype.Dockerfile
│ ├── conda-python-kartothek.Dockerfile
│ ├── conda-python-pandas.Dockerfile
│ ├── conda-python-spark.Dockerfile
│ ├── conda-python-turbodbc.Dockerfile
│ ├── conda-python.Dockerfile
│ ├── conda.Dockerfile
│ ├── debian-10-cpp.Dockerfile
│ ├── debian-10-go.Dockerfile
│ ├── debian-10-js.Dockerfile
│ ├── debian-9-java.Dockerfile
│ ├── fedora-33-cpp.Dockerfile
│ ├── linux-apt-c-glib.Dockerfile
│ ├── linux-apt-docs.Dockerfile
│ ├── linux-apt-jni.Dockerfile
│ ├── linux-apt-lint.Dockerfile
│ ├── linux-apt-python-3.Dockerfile
│ ├── linux-apt-r.Dockerfile
│ ├── linux-apt-ruby.Dockerfile
│ ├── linux-dnf-python-3.Dockerfile
│ ├── linux-r.Dockerfile
│ ├── python-sdist.Dockerfile
│ ├── python-wheel-manylinux-201x.Dockerfile
│ ├── python-wheel-manylinux-test.Dockerfile
│ ├── python-wheel-windows-vs2017.Dockerfile
│ ├── ubuntu-18.04-cpp.Dockerfile
│ ├── ubuntu-18.04-csharp.Dockerfile
│ ├── ubuntu-20.04-cpp.Dockerfile
│ └── ubuntu-20.10-cpp.Dockerfile
├── etc
│ ├── hdfs-site.xml
│ └── rprofile
├── scripts
│ ├── PKGBUILD
│ ├── c_glib_build.sh
│ ├── c_glib_test.sh
│ ├── ccache_setup.sh
│ ├── cpp_build.sh
│ ├── cpp_test.sh
│ ├── csharp_build.sh
│ ├── csharp_pack.sh
│ ├── csharp_test.sh
│ ├── docs_build.sh
│ ├── go_build.sh
│ ├── go_test.sh
│ ├── install_conda.sh
│ ├── install_dask.sh
│ ├── install_iwyu.sh
│ ├── install_kartothek.sh
│ ├── install_minio.sh
│ ├── install_osx_sdk.sh
│ ├── install_pandas.sh
│ ├── install_spark.sh
│ ├── install_turbodbc.sh
│ ├── integration_arrow.sh
│ ├── integration_dask.sh
│ ├── integration_hdfs.sh
│ ├── integration_hiveserver2.sh
│ ├── integration_kartothek.sh
│ ├── integration_spark.sh
│ ├── integration_turbodbc.sh
│ ├── java_build.sh
│ ├── java_test.sh
│ ├── js_build.sh
│ ├── js_test.sh
│ ├── msys2_setup.sh
│ ├── msys2_system_clean.sh
│ ├── msys2_system_upgrade.sh
│ ├── python_benchmark.sh
│ ├── python_build.sh
│ ├── python_sdist_build.sh
│ ├── python_sdist_test.sh
│ ├── python_test.sh
│ ├── python_wheel_macos_build.sh
│ ├── python_wheel_macos_test.sh
│ ├── python_wheel_manylinux_build.sh
│ ├── python_wheel_manylinux_test.sh
│ ├── python_wheel_windows_build.bat
│ ├── python_wheel_windows_test.bat
│ ├── r_build.sh
│ ├── r_deps.sh
│ ├── r_docker_configure.sh
│ ├── r_pkgdown_check.sh
│ ├── r_sanitize.sh
│ ├── r_test.sh
│ ├── r_windows_build.sh
│ ├── release_test.sh
│ ├── ruby_test.sh
│ ├── rust_build.sh
│ ├── rust_clippy.sh
│ ├── rust_fmt.sh
│ ├── rust_toml_fmt.sh
│ ├── util_checkout.sh
│ ├── util_cleanup.sh
│ ├── util_download_apache.sh
│ └── util_wait_for_it.sh
└── vcpkg
│ ├── arm64-linux-static-debug.cmake
│ ├── arm64-linux-static-release.cmake
│ ├── ports.patch
│ ├── x64-linux-static-debug.cmake
│ ├── x64-linux-static-release.cmake
│ ├── x64-osx-static-debug.cmake
│ ├── x64-osx-static-release.cmake
│ ├── x64-windows-static-md-debug.cmake
│ └── x64-windows-static-md-release.cmake
├── dev
├── build-ballista-docker.sh
├── build-ballista-executables.sh
├── build-set-env.sh
├── create_license.py
├── docker
│ ├── ballista-benchmarks.Dockerfile
│ ├── ballista-builder.Dockerfile
│ ├── ballista-cli.Dockerfile
│ ├── ballista-executor.Dockerfile
│ ├── ballista-scheduler.Dockerfile
│ ├── ballista-standalone.Dockerfile
│ ├── builder-entrypoint.sh
│ ├── cli-entrypoint.sh
│ ├── executor-entrypoint.sh
│ ├── nginx.conf
│ ├── scheduler-entrypoint.sh
│ └── standalone-entrypoint.sh
├── integration-tests.sh
├── python_lint.sh
├── release
│ ├── README.md
│ ├── check-rat-report.py
│ ├── crate-deps.dot
│ ├── crate-deps.svg
│ ├── create-tarball.sh
│ ├── download-python-wheels.py
│ ├── rat_exclude_files.txt
│ ├── release-tarball.sh
│ ├── run-rat.sh
│ ├── update_change_log-ballista.sh
│ ├── update_change_log.sh
│ └── verify-release-candidate.sh
├── rust_lint.sh
├── update_arrow_deps.py
├── update_ballista_versions.py
└── update_datafusion_versions.py
├── docs
├── .gitignore
├── Makefile
├── README.md
├── build.sh
├── developer
│ ├── README.md
│ ├── architecture.md
│ └── images
│ │ └── query-execution.png
├── make.bat
├── requirements.txt
├── source
│ ├── _static
│ │ ├── images
│ │ │ └── ballista-logo.png
│ │ └── theme_overrides.css
│ ├── _templates
│ │ ├── docs-sidebar.html
│ │ └── layout.html
│ ├── community
│ │ └── communication.md
│ ├── conf.py
│ ├── index.rst
│ └── user-guide
│ │ ├── cli.md
│ │ ├── configs.md
│ │ ├── deployment
│ │ ├── cargo-install.md
│ │ ├── docker-compose.md
│ │ ├── docker.md
│ │ ├── index.rst
│ │ └── kubernetes.md
│ │ ├── faq.md
│ │ ├── flightsql.md
│ │ ├── images
│ │ ├── ballista-web-ui.png
│ │ └── example-query-plan.png
│ │ ├── introduction.md
│ │ ├── metrics.md
│ │ ├── python.md
│ │ ├── rust.md
│ │ ├── scheduler.md
│ │ └── tuning-guide.md
└── sqlbench-h-workstation-10-distributed-perquery.png
├── examples
├── dataframe-parquet.py
├── run-executor.py
├── run-scheduler.py
└── sql-parquet.py
├── pyproject.toml
├── requirements-310.txt
├── requirements-37.txt
├── requirements.in
├── requirements.txt
└── src
├── context.rs
├── dataframe.rs
├── datatype.rs
├── errors.rs
├── executor.rs
├── expression.rs
├── functions.rs
├── lib.rs
├── scheduler.rs
├── udaf.rs
├── udf.rs
└── utils.rs
/.asf.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | github:
19 | description: Apache Arrow Ballista Python bindings
20 | homepage: https://arrow.apache.org/ballista-python/
21 | labels:
22 | - arrow
23 | - big-data
24 | - dataframe
25 | - distributed
26 | - olap
27 | - python
28 | - query-engine
29 | - rust
30 | - sql
31 | enabled_merge_buttons:
32 | merge: false
33 | rebase: false
34 | squash: true
35 | features:
36 | issues: true
37 |
38 | notifications:
39 | commits: commits@arrow.apache.org
40 | issues_status: issues@arrow.apache.org
41 | issues: github@arrow.apache.org
42 | pullrequests: github@arrow.apache.org
43 |
44 | publish:
45 | whoami: asf-site
46 | subdir: ballista-python
47 |
--------------------------------------------------------------------------------
/.cargo/config:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [target.x86_64-apple-darwin]
19 | rustflags = [
20 | "-C", "link-arg=-undefined",
21 | "-C", "link-arg=dynamic_lookup",
22 | ]
23 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | target
19 | venv
20 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [flake8]
19 | exclude =
20 | venv
21 | dev
22 | docs
23 | ci
24 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: bug
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 |
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 |
19 | **Additional context**
20 | Add any other context about the problem here.
21 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: enhancement
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem or challenge? Please describe what you are trying to do.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | (This section helps Arrow developers understand the context and *why* for this feature, in addition to the *what*)
13 |
14 | **Describe the solution you'd like**
15 | A clear and concise description of what you want to happen.
16 |
17 | **Describe alternatives you've considered**
18 | A clear and concise description of any alternative solutions or features you've considered.
19 |
20 | **Additional context**
21 | Add any other context or screenshots about the feature request here.
22 |
--------------------------------------------------------------------------------
/.github/actions/setup-builder/action.yaml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Prepare Rust Builder
19 | description: 'Prepare Rust Build Environment'
20 | inputs:
21 | rust-version:
22 | description: 'version of rust to install (e.g. stable)'
23 | required: true
24 | default: 'stable'
25 | runs:
26 | using: "composite"
27 | steps:
28 | - name: Install Build Dependencies
29 | shell: bash
30 | run: |
31 | apt-get update
32 | apt-get install -y protobuf-compiler
33 | - name: Setup Rust toolchain
34 | shell: bash
35 | run: |
36 | echo "Installing ${{ inputs.rust-version }}"
37 | rustup toolchain install ${{ inputs.rust-version }}
38 | rustup default ${{ inputs.rust-version }}
39 | rustup component add rustfmt
40 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | version: 2
19 | updates:
20 | - package-ecosystem: cargo
21 | directory: "/"
22 | schedule:
23 | interval: daily
24 | open-pull-requests-limit: 10
25 | target-branch: main
26 | labels: [auto-dependencies]
27 | ignore:
28 | # arrow and datafusion are bumped manually
29 | - dependency-name: "arrow*"
30 | update-types: ["version-update:semver-major"]
31 | - dependency-name: "datafusion*"
32 | update-types: ["version-update:semver-major"]
33 | - dependency-name: "sqlparser"
34 | update-types: ["version-update:semver-major"]
35 | - package-ecosystem: "github-actions"
36 | directory: "/"
37 | schedule:
38 | interval: "daily"
39 | open-pull-requests-limit: 10
40 | labels: [auto-dependencies]
41 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | # Which issue does this PR close?
2 |
3 |
6 |
7 | Closes #.
8 |
9 | # Rationale for this change
10 |
14 |
15 | # What changes are included in this PR?
16 |
19 |
20 | # Are there any user-facing changes?
21 |
24 |
25 |
28 |
--------------------------------------------------------------------------------
/.github/workflows/cancel.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Cancel stale runs
19 |
20 | on:
21 | workflow_run:
22 | # The name of another workflow (whichever one) that always runs on PRs
23 | workflows: ['Dev']
24 | types: ['requested']
25 |
26 | jobs:
27 | cancel-stale-workflow-runs:
28 | name: "Cancel stale workflow runs"
29 | runs-on: ubuntu-latest
30 | steps:
31 | # Unfortunately, we need to define a separate cancellation step for
32 | # each workflow where we want to cancel stale runs.
33 | - uses: potiuk/cancel-workflow-runs@master
34 | name: "Cancel stale Dev runs"
35 | with:
36 | cancelMode: allDuplicates
37 | token: ${{ secrets.GITHUB_TOKEN }}
38 | workflowFileName: dev.yml
39 | skipEventTypes: '["push", "schedule"]'
40 | - uses: potiuk/cancel-workflow-runs@master
41 | name: "Cancel stale Rust runs"
42 | with:
43 | cancelMode: allDuplicates
44 | token: ${{ secrets.GITHUB_TOKEN }}
45 | workflowFileName: rust.yml
46 | skipEventTypes: '["push", "schedule"]'
47 |
--------------------------------------------------------------------------------
/.github/workflows/dev_pr.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | name: Labeler
19 |
20 | on:
21 | pull_request_target:
22 | types:
23 | - opened
24 | - edited
25 | - synchronize
26 |
27 | jobs:
28 | process:
29 | name: Process
30 | runs-on: ubuntu-latest
31 | steps:
32 | - uses: actions/checkout@v3
33 |
34 | - name: Assign GitHub labels
35 | if: |
36 | github.event_name == 'pull_request_target' &&
37 | (github.event.action == 'opened' ||
38 | github.event.action == 'synchronize')
39 | uses: actions/labeler@4.1.0
40 | with:
41 | repo-token: ${{ secrets.GITHUB_TOKEN }}
42 | configuration-path: .github/workflows/dev_pr/labeler.yml
43 | sync-labels: true
44 |
45 | # TODO: Enable this when eps1lon/actions-label-merge-conflict is available.
46 | # - name: Checks if PR needs rebase
47 | # if: |
48 | # github.event_name == 'push' ||
49 | # (github.event_name == 'pull_request_target' &&
50 | # (github.event.action == 'opened' ||
51 | # github.event.action == 'synchronize'))
52 | # uses: eps1lon/actions-label-merge-conflict@releases/2.x
53 | # with:
54 | # dirtyLabel: "needs-rebase"
55 | # repoToken: "${{ secrets.GITHUB_TOKEN }}"
56 |
--------------------------------------------------------------------------------
/.github/workflows/dev_pr/labeler.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | python:
19 | - python/**/*
20 |
21 | development-process:
22 | - dev/**.*
23 | - .github/**.*
24 | - ci/**.*
25 | - .asf.yaml
26 |
27 | documentation:
28 | - docs/**.*
29 | - README.md
30 | - ./**/README.md
31 | - DEVELOPERS.md
32 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | apache-rat-*.jar
19 | arrow-src.tar
20 | arrow-src.tar.gz
21 | CHANGELOG.md.bak
22 |
23 | # Compiled source
24 | *.a
25 | *.dll
26 | *.o
27 | *.py[ocd]
28 | *.so
29 | *.so.*
30 | *.bundle
31 | *.dylib
32 | .build_cache_dir
33 | dependency-reduced-pom.xml
34 | MANIFEST
35 | compile_commands.json
36 | build.ninja
37 |
38 | # Generated Visual Studio files
39 | *.vcxproj
40 | *.vcxproj.*
41 | *.sln
42 | *.iml
43 |
44 | # Linux perf sample data
45 | perf.data
46 | perf.data.old
47 |
48 | cpp/.idea/
49 | .clangd/
50 | cpp/.clangd/
51 | cpp/apidoc/xml/
52 | docs/example.gz
53 | docs/example1.dat
54 | docs/example3.dat
55 | python/.eggs/
56 | python/doc/
57 | # Egg metadata
58 | *.egg-info
59 |
60 | .vscode
61 | .idea/
62 | .pytest_cache/
63 | pkgs
64 | docker_cache
65 | .gdb_history
66 | *.orig
67 | .*.swp
68 | .*.swo
69 |
70 | site/
71 |
72 | # R files
73 | **/.Rproj.user
74 | **/*.Rcheck/
75 | **/.Rhistory
76 | .Rproj.user
77 |
78 | # macOS
79 | cpp/Brewfile.lock.json
80 | .DS_Store
81 |
82 | # docker volumes used for caching
83 | .docker
84 |
85 | # Rust
86 | target
87 | Cargo.lock
88 | !ballista-cli/Cargo.lock
89 |
90 | rusty-tags.vi
91 | .history
92 | .flatbuffers/
93 |
94 | .vscode
95 | venv/*
96 | .venv
97 |
98 | # apache release artifacts
99 | dev/dist
100 |
101 | # logs
102 | logs/
103 |
104 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Code of Conduct
21 |
22 | - [Code of Conduct for The Apache Software Foundation][1]
23 |
24 | [1]: https://www.apache.org/foundation/policies/conduct.html
25 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Ballista Python Bindings (PyBallista
21 |
22 | PyBallista is now located within the main Ballista repo [here](https://github.com/apache/arrow-ballista/tree/main/python).
23 |
24 | The original README is [here](OLDREADME.md).
25 |
--------------------------------------------------------------------------------
/ballista/functions.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 |
19 | from ._internal import functions
20 |
21 |
22 | def __getattr__(name):
23 | return getattr(functions, name)
24 |
--------------------------------------------------------------------------------
/ballista/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
--------------------------------------------------------------------------------
/ballista/tests/test_imports.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | import pytest
19 |
20 | import ballista
21 | from ballista import (
22 | AggregateUDF,
23 | BallistaContext,
24 | DataFrame,
25 | Expression,
26 | ScalarUDF,
27 | functions,
28 | )
29 |
30 |
31 | def test_import_ballista():
32 | assert ballista.__name__ == "ballista"
33 |
34 |
35 | def test_class_module_is_ballista():
36 | for klass in [
37 | BallistaContext,
38 | Expression,
39 | DataFrame,
40 | ScalarUDF,
41 | AggregateUDF,
42 | ]:
43 | assert klass.__module__ == "ballista"
44 |
45 |
46 | def test_import_from_functions_submodule():
47 | from ballista.functions import abs, sin # noqa
48 |
49 | assert functions.abs is abs
50 | assert functions.sin is sin
51 |
52 | msg = "cannot import name 'foobar' from 'ballista.functions'"
53 | with pytest.raises(ImportError, match=msg):
54 | from ballista.functions import foobar # noqa
55 |
56 |
57 | def test_classes_are_inheritable():
58 | class MyExecContext(BallistaContext):
59 | pass
60 |
61 | class MyExpression(Expression):
62 | pass
63 |
64 | class MyDataFrame(DataFrame):
65 | pass
66 |
--------------------------------------------------------------------------------
/ci/conda_env_archery.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # cli
19 | click
20 |
21 | # bot, crossbow
22 | github3.py
23 | jinja2
24 | jira
25 | pygit2
26 | pygithub
27 | ruamel.yaml
28 | setuptools_scm
29 | toolz
30 |
31 | # benchmark
32 | pandas
33 |
34 | # docker
35 | python-dotenv
36 | #ruamel.yaml
37 |
38 | # release
39 | gitpython
40 | #jinja2
41 | #jira
42 | semver
43 |
--------------------------------------------------------------------------------
/ci/conda_env_cpp.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | aws-sdk-cpp
19 | benchmark=1.5.2
20 | boost-cpp>=1.68.0
21 | brotli
22 | bzip2
23 | c-ares
24 | cmake
25 | gflags
26 | glog
27 | gmock>=1.10.0
28 | grpc-cpp>=1.27.3
29 | gtest=1.10.0
30 | libprotobuf
31 | libutf8proc
32 | lz4-c
33 | make
34 | ninja
35 | pkg-config
36 | python
37 | rapidjson
38 | re2
39 | snappy
40 | thrift-cpp>=0.11.0
41 | zlib
42 | zstd
43 |
--------------------------------------------------------------------------------
/ci/conda_env_crossbow.txt:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | click
19 | github3.py
20 | jinja2
21 | jira
22 | pygit2
23 | ruamel.yaml
24 | setuptools_scm
25 | toolz
26 |
--------------------------------------------------------------------------------
/ci/conda_env_gandiva.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | clang=11
19 | llvmdev=11
20 |
--------------------------------------------------------------------------------
/ci/conda_env_gandiva_win.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # llvmdev=9 or later require Visual Studio 2017
19 | clangdev=8
20 | llvmdev=8
21 |
--------------------------------------------------------------------------------
/ci/conda_env_python.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # don't add pandas here, because it is not a mandatory test dependency
19 | boto3 # not a direct dependency of s3fs, but needed for our s3fs fixture
20 | cffi
21 | cython
22 | cloudpickle
23 | fsspec
24 | hypothesis
25 | numpy>=1.16.6
26 | pytest
27 | pytest-faulthandler
28 | pytest-lazy-fixture
29 | pytz
30 | s3fs>=0.4
31 | setuptools
32 | setuptools_scm
33 |
--------------------------------------------------------------------------------
/ci/conda_env_r.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | r-assertthat
19 | r-base
20 | r-bit64
21 | r-dplyr
22 | r-purrr
23 | r-r6
24 | r-cpp11
25 | r-rlang
26 | r-tidyselect
27 | r-vctrs
28 | # Test/"Suggests" dependencies
29 | pandoc
30 | r-covr
31 | r-hms
32 | r-lubridate
33 | r-rcmdcheck
34 | r-reticulate
35 | r-rmarkdown
36 | r-testthat
37 | r-tibble
38 |
--------------------------------------------------------------------------------
/ci/conda_env_sphinx.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # Requirements for building the documentation
19 | breathe
20 | doxygen
21 | ipython
22 | # Pinned per ARROW-9693
23 | sphinx=3.1.2
24 | pydata-sphinx-theme
25 |
--------------------------------------------------------------------------------
/ci/conda_env_unix.yml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # conda package dependencies specific to Unix-like environments (Linux and macOS)
19 |
20 | autoconf
21 | ccache
22 | orc
23 | pkg-config
24 |
--------------------------------------------------------------------------------
/ci/docker/conda-cpp.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG repo
19 | ARG arch
20 | FROM ${repo}:${arch}-conda
21 |
22 | # install the required conda packages into the test environment
23 | COPY ci/conda_env_cpp.yml \
24 | ci/conda_env_gandiva.yml \
25 | /arrow/ci/
26 | RUN conda install \
27 | --file arrow/ci/conda_env_cpp.yml \
28 | --file arrow/ci/conda_env_gandiva.yml \
29 | compilers \
30 | doxygen \
31 | gdb \
32 | valgrind && \
33 | conda clean --all
34 |
35 | ENV ARROW_BUILD_TESTS=ON \
36 | ARROW_DATASET=ON \
37 | ARROW_DEPENDENCY_SOURCE=CONDA \
38 | ARROW_FLIGHT=ON \
39 | ARROW_GANDIVA=ON \
40 | ARROW_HOME=$CONDA_PREFIX \
41 | ARROW_ORC=ON \
42 | ARROW_PARQUET=ON \
43 | ARROW_PLASMA=ON \
44 | ARROW_S3=ON \
45 | ARROW_USE_CCACHE=ON \
46 | ARROW_WITH_BROTLI=ON \
47 | ARROW_WITH_BZ2=ON \
48 | ARROW_WITH_LZ4=ON \
49 | ARROW_WITH_SNAPPY=ON \
50 | ARROW_WITH_ZLIB=ON \
51 | ARROW_WITH_ZSTD=ON \
52 | PARQUET_BUILD_EXAMPLES=ON \
53 | PARQUET_BUILD_EXECUTABLES=ON \
54 | PARQUET_HOME=$CONDA_PREFIX
55 |
--------------------------------------------------------------------------------
/ci/docker/conda-python-dask.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG repo
19 | ARG arch=amd64
20 | ARG python=3.6
21 | FROM ${repo}:${arch}-conda-python-${python}
22 |
23 | ARG dask=latest
24 | COPY ci/scripts/install_dask.sh /arrow/ci/scripts/
25 | RUN /arrow/ci/scripts/install_dask.sh ${dask}
--------------------------------------------------------------------------------
/ci/docker/conda-python-hdfs.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG repo
19 | ARG arch=amd64
20 | ARG python=3.6
21 | FROM ${repo}:${arch}-conda-python-${python}
22 |
23 | ARG jdk=8
24 | ARG maven=3.5
25 | RUN conda install -q \
26 | maven=${maven} \
27 | openjdk=${jdk} \
28 | pandas && \
29 | conda clean --all
30 |
31 | # installing libhdfs (JNI)
32 | ARG hdfs=3.2.1
33 | ENV HADOOP_HOME=/opt/hadoop-${hdfs} \
34 | HADOOP_OPTS=-Djava.library.path=/opt/hadoop-${hdfs}/lib/native \
35 | PATH=$PATH:/opt/hadoop-${hdfs}/bin:/opt/hadoop-${hdfs}/sbin
36 | COPY ci/scripts/util_download_apache.sh /arrow/ci/scripts/
37 | RUN /arrow/ci/scripts/util_download_apache.sh \
38 | "hadoop/common/hadoop-${hdfs}/hadoop-${hdfs}.tar.gz" /opt
39 |
40 | COPY ci/etc/hdfs-site.xml $HADOOP_HOME/etc/hadoop/
41 |
42 | # build cpp with tests
43 | ENV CC=gcc \
44 | CXX=g++ \
45 | ARROW_FLIGHT=OFF \
46 | ARROW_GANDIVA=OFF \
47 | ARROW_PLASMA=OFF \
48 | ARROW_PARQUET=ON \
49 | ARROW_ORC=OFF \
50 | ARROW_HDFS=ON \
51 | ARROW_PYTHON=ON \
52 | ARROW_BUILD_TESTS=ON
53 |
--------------------------------------------------------------------------------
/ci/docker/conda-python-jpype.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG repo
19 | ARG arch=amd64
20 | ARG python=3.6
21 | FROM ${repo}:${arch}-conda-python-${python}
22 |
23 | ARG jdk=11
24 | ARG maven=3.6
25 | RUN conda install -q \
26 | maven=${maven} \
27 | openjdk=${jdk} \
28 | jpype1 && \
29 | conda clean --all
30 |
--------------------------------------------------------------------------------
/ci/docker/conda-python-kartothek.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG repo
19 | ARG arch=amd64
20 | ARG python=3.6
21 | FROM ${repo}:${arch}-conda-python-${python}
22 |
23 | # install kartothek dependencies from conda-forge
24 | RUN conda install -c conda-forge -q \
25 | attrs \
26 | click \
27 | cloudpickle \
28 | dask \
29 | decorator \
30 | freezegun \
31 | msgpack-python \
32 | prompt-toolkit \
33 | pytest-mock \
34 | pytest-xdist \
35 | pyyaml \
36 | simplejson \
37 | simplekv \
38 | storefact \
39 | toolz \
40 | urlquote \
41 | zstandard && \
42 | conda clean --all
43 |
44 | ARG kartothek=latest
45 | COPY ci/scripts/install_kartothek.sh /arrow/ci/scripts/
46 | RUN /arrow/ci/scripts/install_kartothek.sh ${kartothek} /kartothek
47 |
--------------------------------------------------------------------------------
/ci/docker/conda-python-pandas.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG repo
19 | ARG arch=amd64
20 | ARG python=3.6
21 | FROM ${repo}:${arch}-conda-python-${python}
22 |
23 | ARG pandas=latest
24 | ARG numpy=latest
25 | COPY ci/scripts/install_pandas.sh /arrow/ci/scripts/
26 | RUN conda uninstall -q -y numpy && \
27 | /arrow/ci/scripts/install_pandas.sh ${pandas} ${numpy}
28 |
--------------------------------------------------------------------------------
/ci/docker/conda-python-spark.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG repo
19 | ARG arch=amd64
20 | ARG python=3.6
21 | FROM ${repo}:${arch}-conda-python-${python}
22 |
23 | ARG jdk=8
24 | ARG maven=3.5
25 |
26 | RUN conda install -q \
27 | openjdk=${jdk} \
28 | maven=${maven} \
29 | pandas && \
30 | conda clean --all
31 |
32 | # installing specific version of spark
33 | ARG spark=master
34 | COPY ci/scripts/install_spark.sh /arrow/ci/scripts/
35 | RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark
36 |
37 | # build cpp with tests
38 | ENV CC=gcc \
39 | CXX=g++ \
40 | ARROW_PYTHON=ON \
41 | ARROW_HDFS=ON \
42 | ARROW_BUILD_TESTS=OFF \
43 | SPARK_VERSION=${spark}
44 |
--------------------------------------------------------------------------------
/ci/docker/conda-python-turbodbc.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG repo
19 | ARG arch=amd64
20 | ARG python=3.6
21 | FROM ${repo}:${arch}-conda-python-${python}
22 |
23 | RUN export DEBIAN_FRONTEND=noninteractive && \
24 | apt-get update -y -q && \
25 | apt-get install -y -q --no-install-recommends \
26 | odbc-postgresql \
27 | postgresql \
28 | sudo && \
29 | apt-get clean && \
30 | rm -rf /var/lib/apt/lists/*
31 |
32 | # install turbodbc dependencies from conda-forge
33 | RUN conda install -c conda-forge -q\
34 | pybind11 \
35 | pytest-cov \
36 | mock \
37 | unixodbc && \
38 | conda clean --all
39 |
40 | RUN service postgresql start && \
41 | sudo -u postgres psql -U postgres -c \
42 | "CREATE DATABASE test_db;" && \
43 | sudo -u postgres psql -U postgres -c \
44 | "ALTER USER postgres WITH PASSWORD 'password';"
45 |
46 | ARG turbodbc=latest
47 | COPY ci/scripts/install_turbodbc.sh /arrow/ci/scripts/
48 | RUN /arrow/ci/scripts/install_turbodbc.sh ${turbodbc} /turbodbc
49 |
50 | ENV TURBODBC_TEST_CONFIGURATION_FILES "query_fixtures_postgresql.json"
51 |
--------------------------------------------------------------------------------
/ci/docker/conda-python.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG repo
19 | ARG arch
20 | FROM ${repo}:${arch}-conda-cpp
21 |
22 | # install python specific packages
23 | ARG python=3.6
24 | COPY ci/conda_env_python.yml /arrow/ci/
25 | RUN conda install -q \
26 | --file arrow/ci/conda_env_python.yml \
27 | $([ "$python" == "3.6" -o "$python" == "3.7" ] && echo "pickle5") \
28 | python=${python} \
29 | nomkl && \
30 | conda clean --all
31 |
32 | ENV ARROW_PYTHON=ON \
33 | ARROW_BUILD_STATIC=OFF \
34 | ARROW_BUILD_TESTS=OFF \
35 | ARROW_BUILD_UTILITIES=OFF \
36 | ARROW_TENSORFLOW=ON \
37 | ARROW_USE_GLOG=OFF
38 |
--------------------------------------------------------------------------------
/ci/docker/conda.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG arch=amd64
19 | FROM ${arch}/ubuntu:18.04
20 |
21 | # arch is unset after the FROM statement, so need to define it again
22 | ARG arch=amd64
23 | ARG prefix=/opt/conda
24 |
25 | # install build essentials
26 | RUN export DEBIAN_FRONTEND=noninteractive && \
27 | apt-get update -y -q && \
28 | apt-get install -y -q wget tzdata libc6-dbg \
29 | && apt-get clean \
30 | && rm -rf /var/lib/apt/lists/*
31 |
32 | ENV PATH=${prefix}/bin:$PATH
33 | # install conda and minio
34 | COPY ci/scripts/install_conda.sh \
35 | ci/scripts/install_minio.sh \
36 | /arrow/ci/scripts/
37 | RUN /arrow/ci/scripts/install_conda.sh ${arch} linux latest ${prefix}
38 | RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest ${prefix}
39 |
40 | # create a conda environment
41 | ADD ci/conda_env_unix.yml /arrow/ci/
42 | RUN conda create -n arrow --file arrow/ci/conda_env_unix.yml git && \
43 | conda clean --all
44 |
45 | # activate the created environment by default
46 | RUN echo "conda activate arrow" >> ~/.profile
47 | ENV CONDA_PREFIX=${prefix}/envs/arrow
48 |
49 | # use login shell to activate arrow environment un the RUN commands
50 | SHELL [ "/bin/bash", "-c", "-l" ]
51 |
52 | # use login shell when running the container
53 | ENTRYPOINT [ "/bin/bash", "-c", "-l" ]
54 |
--------------------------------------------------------------------------------
/ci/docker/debian-10-go.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG arch=amd64
19 | ARG go=1.15
20 | FROM ${arch}/golang:${go}
21 |
22 | # TODO(kszucs):
23 | # 1. add the files required to install the dependencies to .dockerignore
24 | # 2. copy these files to their appropriate path
25 | # 3. download and compile the dependencies
26 |
--------------------------------------------------------------------------------
/ci/docker/debian-10-js.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG arch=amd64
19 | ARG node=14
20 | FROM ${arch}/node:${node}
21 |
22 | ENV NODE_NO_WARNINGS=1
23 |
24 | # TODO(kszucs):
25 | # 1. add the files required to install the dependencies to .dockerignore
26 | # 2. copy these files to their appropriate path
27 | # 3. download and compile the dependencies
28 |
--------------------------------------------------------------------------------
/ci/docker/debian-9-java.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG arch=amd64
19 | ARG jdk=8
20 | ARG maven=3.5.4
21 | FROM ${arch}/maven:${maven}-jdk-${jdk}
22 |
23 | ENV ARROW_JAVA_SHADE_FLATBUFS=ON
24 |
25 | # TODO(kszucs):
26 | # 1. add the files required to install the dependencies to .dockerignore
27 | # 2. copy these files to their appropriate path
28 | # 3. download and compile the dependencies
29 |
--------------------------------------------------------------------------------
/ci/docker/linux-apt-python-3.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG base
19 | FROM ${base}
20 |
21 | RUN apt-get update -y -q && \
22 | apt-get install -y -q \
23 | python3 \
24 | python3-pip \
25 | python3-dev && \
26 | apt-get clean && \
27 | rm -rf /var/lib/apt/lists/*
28 |
29 | RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
30 | ln -s /usr/bin/pip3 /usr/local/bin/pip
31 |
32 | RUN pip install -U pip setuptools
33 |
34 | COPY python/requirements-build.txt \
35 | python/requirements-test.txt \
36 | /arrow/python/
37 |
38 | RUN pip install \
39 | -r arrow/python/requirements-build.txt \
40 | -r arrow/python/requirements-test.txt
41 |
42 | ENV ARROW_PYTHON=ON \
43 | ARROW_BUILD_STATIC=OFF \
44 | ARROW_BUILD_TESTS=OFF \
45 | ARROW_BUILD_UTILITIES=OFF \
46 | ARROW_USE_GLOG=OFF \
47 |
--------------------------------------------------------------------------------
/ci/docker/linux-apt-ruby.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # depends on a C GLib image
19 | ARG base
20 | FROM ${base}
21 |
22 | COPY ruby/ /arrow/ruby/
23 | RUN bundle install --gemfile /arrow/ruby/Gemfile
24 | RUN \
25 | for package in /arrow/ruby/*; do \
26 | bundle install --gemfile ${package}/Gemfile; \
27 | done
28 |
--------------------------------------------------------------------------------
/ci/docker/linux-dnf-python-3.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG base
19 | FROM ${base}
20 |
21 | RUN dnf install -y \
22 | python3 \
23 | python3-pip \
24 | python3-devel
25 |
26 | RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
27 | ln -s /usr/bin/pip3 /usr/local/bin/pip
28 |
29 | COPY python/requirements-build.txt \
30 | python/requirements-test.txt \
31 | /arrow/python/
32 |
33 | RUN pip install \
34 | -r arrow/python/requirements-build.txt \
35 | -r arrow/python/requirements-test.txt
36 |
37 | ENV ARROW_PYTHON=ON \
38 | ARROW_BUILD_STATIC=OFF \
39 | ARROW_BUILD_TESTS=OFF \
40 | ARROW_BUILD_UTILITIES=OFF \
41 | ARROW_USE_GLOG=OFF \
42 |
--------------------------------------------------------------------------------
/ci/docker/linux-r.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | # General purpose Dockerfile to take a Docker image containing R
19 | # and install Arrow R package dependencies
20 |
21 | ARG base
22 | FROM ${base}
23 |
24 | ARG r_bin=R
25 | ENV R_BIN=${r_bin}
26 |
27 | ARG r_dev=FALSE
28 | ENV ARROW_R_DEV=${r_dev}
29 |
30 | ARG devtoolset_version=-1
31 | ENV DEVTOOLSET_VERSION=${devtoolset_version}
32 |
33 | # Make sure R is on the path for the R-hub devel versions (where RPREFIX is set in its Dockerfile)
34 | ENV PATH "${RPREFIX}/bin:${PATH}"
35 |
36 | # Patch up some of the docker images
37 | COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/
38 | COPY ci/etc/rprofile /arrow/ci/etc/
39 | COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
40 | RUN /arrow/ci/scripts/r_docker_configure.sh
41 |
42 | COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
43 | COPY r/DESCRIPTION /arrow/r/
44 | RUN /arrow/ci/scripts/r_deps.sh /arrow
45 |
--------------------------------------------------------------------------------
/ci/docker/python-sdist.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM amd64/ubuntu:20.04
19 |
20 | SHELL ["/bin/bash", "-o", "pipefail", "-c"]
21 |
22 | RUN echo "debconf debconf/frontend select Noninteractive" | \
23 | debconf-set-selections
24 |
25 | RUN apt-get update -y -q && \
26 | apt-get install -y -q --no-install-recommends \
27 | git \
28 | python3-pip && \
29 | apt-get clean && \
30 | rm -rf /var/lib/apt/lists*
31 |
32 | COPY python/requirements-build.txt \
33 | /arrow/python/requirements-build.txt
34 | RUN pip3 install --requirement /arrow/python/requirements-build.txt
35 |
36 | ENV PYTHON=/usr/bin/python3
37 |
--------------------------------------------------------------------------------
/ci/docker/python-wheel-manylinux-test.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG arch
19 | ARG python
20 | FROM ${arch}/python:${python}
21 |
22 | # RUN pip install --upgrade pip
23 |
24 | # pandas doesn't provide wheel for aarch64 yet, so cache the compiled
25 | # test dependencies in a docker image
26 | COPY python/requirements-wheel-test.txt /arrow/python/
27 | RUN pip install -r /arrow/python/requirements-wheel-test.txt
28 |
--------------------------------------------------------------------------------
/ci/docker/ubuntu-18.04-csharp.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | ARG platform=bionic
19 | ARG dotnet=3.1
20 | FROM mcr.microsoft.com/dotnet/core/sdk:${dotnet}-${platform}
21 |
22 | RUN dotnet tool install --tool-path /usr/local/bin sourcelink
23 |
--------------------------------------------------------------------------------
/ci/etc/hdfs-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
16 |
17 |
18 |
19 |
20 |
21 | dfs.replication
22 | 2
23 |
24 |
25 | dfs.datanode.data.dir
26 | file:///data/dfs/data
27 |
28 |
29 | dfs.namenode.name.dir
30 | file:///data/dfs/name
31 |
32 |
33 | dfs.namenode.checkpoint.dir
34 | file:///data/dfs/namesecondary
35 |
36 |
37 | dfs.namenode.datanode.registration.ip-hostname-check
38 | false
39 |
40 |
41 | dfs.default.replica
42 | 1
43 |
44 |
45 | dfs.support.append
46 | true
47 |
48 |
49 | dfs.client.block.write.replace-datanode-on-failure.enable
50 | false
51 |
52 |
53 |
--------------------------------------------------------------------------------
/ci/etc/rprofile:
--------------------------------------------------------------------------------
1 | local({
2 | .pick_cran <- function() {
3 | # Return a CRAN repo URL, preferring RSPM binaries if available for this OS
4 | rspm_template <- "https://packagemanager.rstudio.com/cran/__linux__/%s/latest"
5 | supported_os <- c("focal", "xenial", "bionic", "centos7", "centos8", "opensuse42", "opensuse15", "opensuse152")
6 |
7 | if (nzchar(Sys.which("lsb_release"))) {
8 | os <- tolower(system("lsb_release -cs", intern = TRUE))
9 | if (os %in% supported_os) {
10 | return(sprintf(rspm_template, os))
11 | }
12 | }
13 | if (file.exists("/etc/os-release")) {
14 | os_release <- readLines("/etc/os-release")
15 | vals <- sub("^.*=(.*)$", "\\1", os_release)
16 | os <- intersect(vals, supported_os)
17 | if (length(os)) {
18 | # e.g. "bionic"
19 | return(sprintf(rspm_template, os))
20 | } else {
21 | names(vals) <- sub("^(.*)=.*$", "\\1", os_release)
22 | if (vals["ID"] == "opensuse") {
23 | version <- sub('^"?([0-9]+).*"?.*$', "\\1", vals["VERSION_ID"])
24 | os <- paste0("opensuse", version)
25 | if (os %in% supported_os) {
26 | return(sprintf(rspm_template, os))
27 | }
28 | }
29 | }
30 | }
31 | if (file.exists("/etc/system-release")) {
32 | # Something like "CentOS Linux release 7.7.1908 (Core)"
33 | system_release <- tolower(utils::head(readLines("/etc/system-release"), 1))
34 | # Extract from that the distro and the major version number
35 | os <- sub("^([a-z]+) .* ([0-9]+).*$", "\\1\\2", system_release)
36 | if (os %in% supported_os) {
37 | return(sprintf(rspm_template, os))
38 | }
39 | }
40 |
41 | return("https://cloud.r-project.org")
42 | }
43 |
44 | options(
45 | Ncpus = parallel::detectCores(),
46 | repos = tryCatch(.pick_cran(), error = function(e) "https://cloud.r-project.org"),
47 | HTTPUserAgent = sprintf(
48 | 'R/%s R (%s)',
49 | getRversion(),
50 | paste(getRversion(), R.version$platform, R.version$arch, R.version$os)
51 | )
52 | )
53 | })
54 |
--------------------------------------------------------------------------------
/ci/scripts/c_glib_build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | source_dir=${1}/c_glib
23 | build_dir=${2}/c_glib
24 | : ${ARROW_GLIB_GTK_DOC:=false}
25 | : ${ARROW_GLIB_DEVELOPMENT_MODE:=false}
26 |
27 | export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
28 |
29 | export CFLAGS="-DARROW_NO_DEPRECATED_API"
30 | export CXXFLAGS="-DARROW_NO_DEPRECATED_API"
31 |
32 | mkdir -p ${build_dir}
33 |
34 | # Build with Meson
35 | meson --prefix=$ARROW_HOME \
36 | --libdir=lib \
37 | -Ddevelopment_mode=${ARROW_GLIB_DEVELOPMENT_MODE} \
38 | -Dgtk_doc=${ARROW_GLIB_GTK_DOC} \
39 | ${build_dir} \
40 | ${source_dir}
41 |
42 | pushd ${build_dir}
43 | ninja
44 | ninja install
45 | popd
46 |
--------------------------------------------------------------------------------
/ci/scripts/c_glib_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | source_dir=${1}/c_glib
23 | build_dir=${2}/c_glib
24 |
25 | export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
26 | export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
27 | export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
28 |
29 | pushd ${source_dir}
30 |
31 | ruby test/run-test.rb
32 |
33 | if [[ "$(uname -s)" == "Linux" ]]; then
34 | # TODO(kszucs): on osx it fails to load 'lgi.corelgilua51' despite that lgi
35 | # was installed by luarocks
36 | pushd example/lua
37 | lua write-batch.lua
38 | lua read-batch.lua
39 | lua write-stream.lua
40 | lua read-stream.lua
41 | popd
42 | fi
43 |
44 | popd
45 |
46 | pushd ${build_dir}
47 | example/extension-type
48 | popd
49 |
--------------------------------------------------------------------------------
/ci/scripts/ccache_setup.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -eux
21 |
22 | echo "ARROW_USE_CCACHE=ON" >> $GITHUB_ENV
23 | echo "CCACHE_COMPILERCHECK=content" >> $GITHUB_ENV
24 | echo "CCACHE_COMPRESS=1" >> $GITHUB_ENV
25 | echo "CCACHE_COMPRESSLEVEL=6" >> $GITHUB_ENV
26 | echo "CCACHE_MAXSIZE=500M" >> $GITHUB_ENV
27 |
--------------------------------------------------------------------------------
/ci/scripts/csharp_build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | source_dir=${1}/csharp
23 |
24 | pushd ${source_dir}
25 | dotnet build
26 | popd
27 |
--------------------------------------------------------------------------------
/ci/scripts/csharp_pack.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -eux
21 |
22 | source_dir=${1}/csharp
23 |
24 | pushd ${source_dir}
25 | dotnet pack -c Release
26 | popd
27 |
--------------------------------------------------------------------------------
/ci/scripts/csharp_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | source_dir=${1}/csharp
23 |
24 | pushd ${source_dir}
25 | dotnet test
26 | for pdb in artifacts/Apache.Arrow/*/*/Apache.Arrow.pdb; do
27 | sourcelink test ${pdb}
28 | done
29 | popd
30 |
--------------------------------------------------------------------------------
/ci/scripts/docs_build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one or more
4 | # contributor license agreements. See the NOTICE file distributed with
5 | # this work for additional information regarding copyright ownership.
6 | # The ASF licenses this file to You under the Apache License, Version 2.0
7 | # (the "License"); you may not use this file except in compliance with
8 | # the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 |
19 | set -ex
20 |
21 | arrow_dir=${1}
22 | build_dir=${2}/docs
23 |
24 | export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
25 | export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig:${PKG_CONFIG_PATH}
26 | export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
27 | export CFLAGS="-DARROW_NO_DEPRECATED_API"
28 | export CXXFLAGS="-DARROW_NO_DEPRECATED_API"
29 |
30 | ncpus=$(python3 -c "import os; print(os.cpu_count())")
31 |
32 | # Sphinx docs
33 | sphinx-build -b html -j ${ncpus} ${arrow_dir}/docs/source ${build_dir}
34 |
35 | # C++ - original doxygen
36 | # rsync -a ${arrow_dir}/cpp/apidoc/ ${build_dir}/cpp
37 |
38 | # R
39 | rsync -a ${arrow_dir}/r/docs/ ${build_dir}/r
40 |
41 | # C GLib
42 | rsync -a ${ARROW_HOME}/share/gtk-doc/html/ ${build_dir}/c_glib
43 |
44 | # Java
45 | rsync -a ${arrow_dir}/java/target/site/apidocs/ ${build_dir}/java/reference
46 |
47 | # Javascript
48 | rsync -a ${arrow_dir}/js/doc/ ${build_dir}/js
49 |
--------------------------------------------------------------------------------
/ci/scripts/go_build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | source_dir=${1}/go
23 |
24 | pushd ${source_dir}/arrow
25 |
26 | go get -d -t -v ./...
27 | go install -v ./...
28 |
29 | popd
30 |
31 | pushd ${source_dir}/parquet
32 |
33 | go get -d -t -v ./...
34 | go install -v ./...
35 |
36 | popd
37 |
--------------------------------------------------------------------------------
/ci/scripts/go_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | source_dir=${1}/go
23 |
24 | pushd ${source_dir}/arrow
25 |
26 | for d in $(go list ./... | grep -v vendor); do
27 | go test $d
28 | done
29 |
30 | popd
31 |
32 | pushd ${source_dir}/parquet
33 |
34 | for d in $(go list ./... | grep -v vendor); do
35 | go test $d
36 | done
37 |
38 | popd
39 |
--------------------------------------------------------------------------------
/ci/scripts/install_conda.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | declare -A archs
23 | archs=([amd64]=x86_64
24 | [arm32v7]=armv7l
25 | [ppc64le]=ppc64le
26 | [i386]=x86)
27 |
28 | declare -A platforms
29 | platforms=([windows]=Windows
30 | [macos]=MacOSX
31 | [linux]=Linux)
32 |
33 | if [ "$#" -ne 4 ]; then
34 | echo "Usage: $0 "
35 | exit 1
36 | elif [[ -z ${archs[$1]} ]]; then
37 | echo "Unexpected architecture: ${1}"
38 | exit 1
39 | elif [[ -z ${platforms[$2]} ]]; then
40 | echo "Unexpected platform: ${2}"
41 | exit 1
42 | fi
43 |
44 | arch=${archs[$1]}
45 | platform=${platforms[$2]}
46 | version=$3
47 | prefix=$4
48 |
49 | echo "Downloading Miniconda installer..."
50 | wget -nv https://repo.continuum.io/miniconda/Miniconda3-${version}-${platform}-${arch}.sh -O /tmp/miniconda.sh
51 | bash /tmp/miniconda.sh -b -p ${prefix}
52 | rm /tmp/miniconda.sh
53 |
54 | # Like "conda init", but for POSIX sh rather than bash
55 | ln -s ${prefix}/etc/profile.d/conda.sh /etc/profile.d/conda.sh
56 |
57 | # Configure
58 | source /etc/profile.d/conda.sh
59 | conda config --add channels conda-forge
60 | conda config --set channel_priority strict
61 | conda config --set show_channel_urls True
62 | conda config --set remote_connect_timeout_secs 12
63 |
64 | # Update and clean
65 | conda update --all -y
66 | conda clean --all -y
67 |
--------------------------------------------------------------------------------
/ci/scripts/install_dask.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | if [ "$#" -ne 1 ]; then
23 | echo "Usage: $0 "
24 | exit 1
25 | fi
26 |
27 | dask=$1
28 |
29 | if [ "${dask}" = "master" ]; then
30 | pip install https://github.com/dask/dask/archive/main.tar.gz#egg=dask[dataframe]
31 | elif [ "${dask}" = "latest" ]; then
32 | conda install -q dask
33 | else
34 | conda install -q dask=${dask}
35 | fi
36 | conda clean --all
37 |
--------------------------------------------------------------------------------
/ci/scripts/install_iwyu.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one or more
4 | # contributor license agreements. See the NOTICE file distributed with
5 | # this work for additional information regarding copyright ownership.
6 | # The ASF licenses this file to You under the Apache License, Version 2.0
7 | # (the "License"); you may not use this file except in compliance with
8 | # the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 |
19 | set -eu
20 |
21 | source_dir=${1:-/tmp/iwyu}
22 | install_prefix=${2:-/usr/local}
23 | clang_tools_version=${3:-8}
24 |
25 | iwyu_branch_name="clang_${clang_tools_version}"
26 | if [ ${clang_tools_version} -lt 10 ]; then
27 | iwyu_branch_name="${iwyu_branch_name}.0"
28 | fi
29 |
30 | git clone --single-branch --branch ${iwyu_branch_name} \
31 | https://github.com/include-what-you-use/include-what-you-use.git ${source_dir}
32 |
33 | mkdir -p ${source_dir}/build
34 | pushd ${source_dir}/build
35 |
36 | # Build IWYU for current Clang
37 | export CC=clang-${clang_tools_version}
38 | export CXX=clang++-${clang_tools_version}
39 |
40 | cmake -DCMAKE_PREFIX_PATH=/usr/lib/llvm-${clang_tools_version} \
41 | -DCMAKE_INSTALL_PREFIX=${install_prefix} \
42 | ${source_dir}
43 | make -j4
44 | make install
45 |
46 | popd
47 |
48 | rm -rf ${source_dir}
49 |
--------------------------------------------------------------------------------
/ci/scripts/install_kartothek.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | if [ "$#" -ne 2 ]; then
23 | echo "Usage: $0 "
24 | exit 1
25 | fi
26 |
27 | karthothek=$1
28 | target=$2
29 |
30 | git clone --recurse-submodules https://github.com/JDASoftwareGroup/kartothek "${target}"
31 | if [ "${kartothek}" = "master" ]; then
32 | git -C "${target}" checkout master;
33 | elif [ "${kartothek}" = "latest" ]; then
34 | git -C "${target}" checkout $(git describe --tags);
35 | else
36 | git -C "${target}" checkout ${kartothek};
37 | fi
38 |
39 | pushd "${target}"
40 | pip install --no-deps .
41 | popd
42 |
--------------------------------------------------------------------------------
/ci/scripts/install_minio.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | declare -A archs
23 | archs=([amd64]=amd64
24 | [arm64v8]=arm64
25 | [arm32v7]=arm
26 | [s390x]=s390x)
27 |
28 | declare -A platforms
29 | platforms=([linux]=linux
30 | [macos]=darwin)
31 |
32 | arch=${archs[$1]}
33 | platform=${platforms[$2]}
34 | version=$3
35 | prefix=$4
36 |
37 | if [ "$#" -ne 4 ]; then
38 | echo "Usage: $0 "
39 | exit 1
40 | elif [[ -z ${arch} ]]; then
41 | echo "Unexpected architecture: ${1}"
42 | exit 1
43 | elif [[ -z ${platform} ]]; then
44 | echo "Unexpected platform: ${2}"
45 | exit 1
46 | elif [[ ${version} != "latest" ]]; then
47 | echo "Cannot fetch specific versions of minio, only latest is supported."
48 | exit 1
49 | fi
50 |
51 | wget -nv -P ${prefix}/bin https://dl.min.io/server/minio/release/${platform}-${arch}/minio
52 | chmod +x ${prefix}/bin/minio
53 |
--------------------------------------------------------------------------------
/ci/scripts/install_osx_sdk.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | if [ ${using_homebrew} != "yes" ]; then
23 | export MACOSX_DEPLOYMENT_TARGET="10.9"
24 | export CONDA_BUILD_SYSROOT="$(xcode-select -p)/Platforms/MacOSX.platform/Developer/SDKs/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk"
25 |
26 | if [[ ! -d ${CONDA_BUILD_SYSROOT} || "$OSX_FORCE_SDK_DOWNLOAD" == "1" ]]; then
27 | echo "downloading ${macosx_deployment_target} sdk"
28 | curl -L -O https://github.com/phracker/MacOSX-SDKs/releases/download/10.13/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz
29 | tar -xf MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz -C "$(dirname "$CONDA_BUILD_SYSROOT")"
30 | # set minimum sdk version to our target
31 | plutil -replace MinimumSDKVersion -string ${MACOSX_DEPLOYMENT_TARGET} $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist
32 | plutil -replace DTSDKName -string macosx${MACOSX_DEPLOYMENT_TARGET}internal $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist
33 | fi
34 |
35 | if [ -d "${CONDA_BUILD_SYSROOT}" ]; then
36 | echo "Found CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}"
37 | else
38 | echo "Missing CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}"
39 | exit 1
40 | fi
41 | fi
42 |
--------------------------------------------------------------------------------
/ci/scripts/install_pandas.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | if [ "$#" -lt 1 ]; then
23 | echo "Usage: $0 "
24 | exit 1
25 | fi
26 |
27 | pandas=$1
28 | numpy=${2:-"latest"}
29 |
30 | if [ "${numpy}" = "nightly" ]; then
31 | pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre numpy
32 | elif [ "${numpy}" = "latest" ]; then
33 | pip install numpy
34 | else
35 | pip install numpy==${numpy}
36 | fi
37 |
38 | if [ "${pandas}" = "master" ]; then
39 | pip install git+https://github.com/pandas-dev/pandas.git --no-build-isolation
40 | elif [ "${pandas}" = "nightly" ]; then
41 | pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas
42 | elif [ "${pandas}" = "latest" ]; then
43 | pip install pandas
44 | else
45 | pip install pandas==${pandas}
46 | fi
47 |
--------------------------------------------------------------------------------
/ci/scripts/install_spark.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | if [ "$#" -ne 2 ]; then
23 | echo "Usage: $0 "
24 | exit 1
25 | fi
26 |
27 | spark=$1
28 | target=$2
29 |
30 | git clone https://github.com/apache/spark "${target}"
31 | git -C "${target}" checkout "${spark}"
32 |
--------------------------------------------------------------------------------
/ci/scripts/install_turbodbc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | if [ "$#" -ne 2 ]; then
23 | echo "Usage: $0 "
24 | exit 1
25 | fi
26 |
27 | turbodbc=$1
28 | target=$2
29 |
30 | git clone --recurse-submodules https://github.com/blue-yonder/turbodbc "${target}"
31 | if [ "${turbodbc}" = "master" ]; then
32 | git -C "${target}" checkout master;
33 | elif [ "${turbodbc}" = "latest" ]; then
34 | git -C "${target}" checkout $(git describe --tags);
35 | else
36 | git -C "${target}" checkout ${turbodbc};
37 | fi
38 |
--------------------------------------------------------------------------------
/ci/scripts/integration_arrow.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | arrow_dir=${1}
23 | source_dir=${1}/cpp
24 | build_dir=${2}/cpp
25 |
26 | gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration
27 |
28 | pip install -e $arrow_dir/dev/archery
29 |
30 | archery integration --with-all --run-flight \
31 | --gold-dirs=$gold_dir/0.14.1 \
32 | --gold-dirs=$gold_dir/0.17.1 \
33 | --gold-dirs=$gold_dir/1.0.0-bigendian \
34 | --gold-dirs=$gold_dir/1.0.0-littleendian \
35 | --gold-dirs=$gold_dir/2.0.0-compression \
36 | --gold-dirs=$gold_dir/4.0.0-shareddict \
37 |
--------------------------------------------------------------------------------
/ci/scripts/integration_dask.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | # check that optional pyarrow modules are available
23 | # because pytest would just skip the dask tests
24 | python -c "import pyarrow.orc"
25 | python -c "import pyarrow.parquet"
26 |
27 | # check that dask.dataframe is correctly installed
28 | python -c "import dask.dataframe"
29 |
30 | # TODO(kszucs): the following tests are also uses pyarrow
31 | # pytest -sv --pyargs dask.bytes.tests.test_s3
32 | # pytest -sv --pyargs dask.bytes.tests.test_hdfs
33 | # pytest -sv --pyargs dask.bytes.tests.test_local
34 |
35 | # skip failing pickle test, see https://github.com/dask/dask/issues/6374
36 | pytest -v --pyargs dask.dataframe.tests.test_dataframe -k "not test_dataframe_picklable"
37 | pytest -v --pyargs dask.dataframe.io.tests.test_orc
38 | # skip failing parquet tests, see https://github.com/dask/dask/issues/6243
39 | # test_illegal_column_name can be removed once next dask release is out
40 | # (https://github.com/dask/dask/pull/6378)
41 | pytest -v --pyargs dask.dataframe.io.tests.test_parquet \
42 | -k "not test_to_parquet_pyarrow_w_inconsistent_schema_by_partition_fails_by_default and not test_timeseries_nulls_in_schema and not test_illegal_column_name"
43 |
--------------------------------------------------------------------------------
/ci/scripts/integration_hdfs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | source_dir=${1}/cpp
23 | build_dir=${2}/cpp
24 |
25 | export CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath --glob)
26 | export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
27 | export LIBHDFS3_CONF=$HADOOP_CONF_DIR/hdfs-site.xml
28 | export ARROW_LIBHDFS3_DIR=$CONDA_PREFIX/lib
29 |
30 | libhdfs_dir=$HADOOP_HOME/lib/native
31 | hadoop_home=$HADOOP_HOME
32 |
33 | function use_hadoop_home() {
34 | unset ARROW_LIBHDFS_DIR
35 | export HADOOP_HOME=$hadoop_home
36 | }
37 |
38 | function use_libhdfs_dir() {
39 | unset HADOOP_HOME
40 | export ARROW_LIBHDFS_DIR=$libhdfs_dir
41 | }
42 |
43 | # execute cpp tests
44 | export ARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON
45 | pushd ${build_dir}
46 |
47 | debug/arrow-io-hdfs-test
48 | debug/arrow-hdfs-test
49 |
50 | use_libhdfs_dir
51 | debug/arrow-io-hdfs-test
52 | debug/arrow-hdfs-test
53 | use_hadoop_home
54 |
55 | popd
56 |
57 | # cannot use --pyargs with custom arguments like --hdfs or --only-hdfs, because
58 | # pytest ignores them, see https://github.com/pytest-dev/pytest/issues/3517
59 | export PYARROW_TEST_HDFS=ON
60 |
61 | export PYARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON
62 |
63 | pytest -vs --pyargs pyarrow.tests.test_fs
64 | pytest -vs --pyargs pyarrow.tests.test_hdfs
65 |
66 | use_libhdfs_dir
67 | pytest -vs --pyargs pyarrow.tests.test_fs
68 | pytest -vs --pyargs pyarrow.tests.test_hdfs
69 | use_hadoop_home
70 |
--------------------------------------------------------------------------------
/ci/scripts/integration_hiveserver2.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one or more
4 | # contributor license agreements. See the NOTICE file distributed with
5 | # this work for additional information regarding copyright ownership.
6 | # The ASF licenses this file to You under the Apache License, Version 2.0
7 | # (the "License"); you may not use this file except in compliance with
8 | # the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 |
19 | set -e
20 |
21 | arrow_dir=${1}
22 | source_dir=${1}/cpp
23 | build_dir=${2}/cpp
24 |
25 | ${arrow_dir}/ci/scripts/util_wait_for_it.sh impala:21050 -t 300 -s -- echo "impala is up"
26 |
27 | pushd ${build_dir}
28 |
29 | # ninja hiveserver2-test
30 | debug/hiveserver2-test
31 |
32 | popd
33 |
--------------------------------------------------------------------------------
/ci/scripts/integration_kartothek.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | # check that optional pyarrow modules are available
23 | # because pytest would just skip the pyarrow tests
24 | python -c "import pyarrow.parquet"
25 |
26 | # check that kartothek is correctly installed
27 | python -c "import kartothek"
28 |
29 | pushd /kartothek
30 | # See ARROW-12314, test_load_dataframes_columns_raises_missing skipped because of changed error message
31 | pytest -n0 --ignore tests/cli/test_query.py -k "not test_load_dataframes_columns_raises_missing"
32 |
--------------------------------------------------------------------------------
/ci/scripts/integration_turbodbc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | source_dir=${1}
23 | build_dir=${2}/turbodbc
24 |
25 | # check that optional pyarrow modules are available
26 | # because pytest would just skip the pyarrow tests
27 | python -c "import pyarrow.orc"
28 | python -c "import pyarrow.parquet"
29 |
30 | mkdir -p ${build_dir}
31 | pushd ${build_dir}
32 |
33 | cmake -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
34 | -DCMAKE_CXX_FLAGS=${CXXFLAGS} \
35 | -DPYTHON_EXECUTABLE=$(which python) \
36 | -GNinja \
37 | ${source_dir}
38 | ninja install
39 |
40 | # TODO(ARROW-5074)
41 | export LD_LIBRARY_PATH="${ARROW_HOME}/lib:${LD_LIBRARY_PATH}"
42 | export ODBCSYSINI="${source_dir}/travis/odbc/"
43 |
44 | service postgresql start
45 | ctest --output-on-failure
46 |
47 | popd
48 |
--------------------------------------------------------------------------------
/ci/scripts/java_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 |
19 | set -ex
20 |
21 | arrow_dir=${1}
22 | source_dir=${1}/java
23 | cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug}
24 |
25 | # For JNI and Plasma tests
26 | export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
27 | export PLASMA_STORE=${ARROW_HOME}/bin/plasma-store-server
28 |
29 | mvn="mvn -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
30 | # Use `2 * ncores` threads
31 | mvn="${mvn} -T 2C"
32 |
33 | pushd ${source_dir}
34 |
35 | ${mvn} test
36 |
37 | if [ "${ARROW_JNI}" = "ON" ]; then
38 | ${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${cpp_build_dir}
39 | fi
40 |
41 | if [ "${ARROW_PLASMA}" = "ON" ]; then
42 | pushd ${source_dir}/plasma
43 | java -cp target/test-classes:target/classes \
44 | -Djava.library.path=${cpp_build_dir} \
45 | org.apache.arrow.plasma.PlasmaClientTest
46 | popd
47 | fi
48 |
49 | popd
50 |
--------------------------------------------------------------------------------
/ci/scripts/js_build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | source_dir=${1}/js
23 | with_docs=${2:-false}
24 |
25 | pushd ${source_dir}
26 |
27 | yarn --frozen-lockfile
28 | # TODO(kszucs): linting should be moved to archery
29 | yarn lint:ci
30 | yarn build
31 |
32 | if [ "${with_docs}" == "true" ]; then
33 | yarn doc
34 | fi
35 |
36 | popd
37 |
--------------------------------------------------------------------------------
/ci/scripts/js_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | source_dir=${1}/js
23 |
24 | pushd ${source_dir}
25 |
26 | yarn lint
27 | yarn test
28 |
29 | popd
30 |
--------------------------------------------------------------------------------
/ci/scripts/msys2_system_clean.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -eux
21 |
22 | pacman \
23 | --cascade \
24 | --noconfirm \
25 | --nosave \
26 | --recursive \
27 | --remove \
28 | ${MINGW_PACKAGE_PREFIX}-clang-tools-extra \
29 | ${MINGW_PACKAGE_PREFIX}-gcc-ada \
30 | ${MINGW_PACKAGE_PREFIX}-gcc-fortran \
31 | ${MINGW_PACKAGE_PREFIX}-gcc-libgfortran \
32 | ${MINGW_PACKAGE_PREFIX}-gcc-objc \
33 | ${MINGW_PACKAGE_PREFIX}-libgccjit
34 |
--------------------------------------------------------------------------------
/ci/scripts/msys2_system_upgrade.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -eux
21 |
22 | # https://www.msys2.org/news/#2020-06-29-new-packagers
23 | msys2_repo_base_url=https://repo.msys2.org/msys
24 | # Mirror
25 | msys2_repo_base_url=https://sourceforge.net/projects/msys2/files/REPOS/MSYS2
26 | msys2_keyring_pkg=msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz
27 | for suffix in "" ".sig"; do
28 | curl \
29 | --location \
30 | --remote-name \
31 | --show-error \
32 | --silent \
33 | ${msys2_repo_base_url}/x86_64/${msys2_keyring_pkg}${suffix}
34 | done
35 | pacman-key --verify ${msys2_keyring_pkg}.sig
36 | pacman \
37 | --noconfirm \
38 | --upgrade \
39 | ${msys2_keyring_pkg}
40 |
41 |
42 | pacman \
43 | --noconfirm \
44 | --refresh \
45 | --refresh \
46 | --sync \
47 | --sysupgrade \
48 | --sysupgrade
49 |
--------------------------------------------------------------------------------
/ci/scripts/python_benchmark.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | # Check the ASV benchmarking setup.
21 | # Unfortunately this won't ensure that all benchmarks succeed
22 | # (see https://github.com/airspeed-velocity/asv/issues/449)
23 | source deactivate
24 | conda create -y -q -n pyarrow_asv python=$PYTHON_VERSION
25 | conda activate pyarrow_asv
26 | pip install -q git+https://github.com/pitrou/asv.git@customize_commands
27 |
28 | export PYARROW_WITH_PARQUET=1
29 | export PYARROW_WITH_PLASMA=1
30 | export PYARROW_WITH_ORC=0
31 | export PYARROW_WITH_GANDIVA=0
32 |
33 | pushd $ARROW_PYTHON_DIR
34 | # Workaround for https://github.com/airspeed-velocity/asv/issues/631
35 | git fetch --depth=100 origin master:master
36 | # Generate machine information (mandatory)
37 | asv machine --yes
38 | # Run benchmarks on the changeset being tested
39 | asv run --no-pull --show-stderr --quick HEAD^!
40 | popd # $ARROW_PYTHON_DIR
41 |
--------------------------------------------------------------------------------
/ci/scripts/python_build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | source_dir=${1}/python
23 | build_dir=${2}/python
24 |
25 | if [ ! -z "${CONDA_PREFIX}" ]; then
26 | echo -e "===\n=== Conda environment for build\n==="
27 | conda list
28 | fi
29 |
30 | export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
31 | export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
32 | export PYARROW_WITH_S3=${ARROW_S3:-OFF}
33 | export PYARROW_WITH_ORC=${ARROW_ORC:-OFF}
34 | export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
35 | export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF}
36 | export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF}
37 | export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF}
38 | export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF}
39 | export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF}
40 | export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF}
41 |
42 | export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
43 |
44 | pushd ${source_dir}
45 |
46 | relative_build_dir=$(realpath --relative-to=. $build_dir)
47 |
48 | # not nice, but prevents mutating the mounted the source directory for docker
49 | ${PYTHON:-python} \
50 | setup.py build --build-base $build_dir \
51 | install --single-version-externally-managed \
52 | --record $relative_build_dir/record.txt
53 |
54 | popd
55 |
--------------------------------------------------------------------------------
/ci/scripts/python_sdist_build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -eux
21 |
22 | source_dir=${1}/python
23 |
24 | pushd ${source_dir}
25 | export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION:-}
26 | ${PYTHON:-python} setup.py sdist
27 | popd
28 |
--------------------------------------------------------------------------------
/ci/scripts/python_sdist_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -eux
21 |
22 | arrow_dir=${1}
23 |
24 | export ARROW_SOURCE_DIR=${arrow_dir}
25 | export ARROW_TEST_DATA=${arrow_dir}/testing/data
26 | export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
27 |
28 | export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
29 | export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
30 | export PYARROW_WITH_S3=${ARROW_S3:-OFF}
31 | export PYARROW_WITH_ORC=${ARROW_ORC:-OFF}
32 | export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
33 | export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF}
34 | export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF}
35 | export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF}
36 | export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF}
37 | export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF}
38 | export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF}
39 |
40 | # TODO: Users should not require ARROW_HOME and pkg-config to find Arrow C++.
41 | # Related: ARROW-9171
42 | # unset ARROW_HOME
43 | # apt purge -y pkg-config
44 |
45 | if [ -n "${PYARROW_VERSION:-}" ]; then
46 | sdist="${arrow_dir}/python/dist/pyarrow-${PYARROW_VERSION}.tar.gz"
47 | else
48 | sdist=$(ls "${arrow_dir}/python/dist/pyarrow-*.tar.gz" | sort -r | head -n1)
49 | fi
50 | ${PYTHON:-python} -m pip install ${sdist}
51 |
52 | pytest -r s ${PYTEST_ARGS:-} --pyargs pyarrow
53 |
--------------------------------------------------------------------------------
/ci/scripts/python_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | arrow_dir=${1}
23 |
24 | export ARROW_SOURCE_DIR=${arrow_dir}
25 | export ARROW_TEST_DATA=${arrow_dir}/testing/data
26 | export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
27 | export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
28 |
29 | # Enable some checks inside Python itself
30 | export PYTHONDEVMODE=1
31 |
32 | pytest -r s ${PYTEST_ARGS} --pyargs pyarrow
33 |
--------------------------------------------------------------------------------
/ci/scripts/python_wheel_macos_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | source_dir=${1}
23 |
24 | : ${ARROW_S3:=ON}
25 |
26 | export PYARROW_TEST_CYTHON=OFF
27 | export PYARROW_TEST_DATASET=ON
28 | export PYARROW_TEST_GANDIVA=OFF
29 | export PYARROW_TEST_HDFS=ON
30 | export PYARROW_TEST_ORC=ON
31 | export PYARROW_TEST_PANDAS=ON
32 | export PYARROW_TEST_PARQUET=ON
33 | export PYARROW_TEST_PLASMA=ON
34 | export PYARROW_TEST_S3=${ARROW_S3}
35 | export PYARROW_TEST_TENSORFLOW=ON
36 | export PYARROW_TEST_FLIGHT=ON
37 |
38 | export ARROW_TEST_DATA=${source_dir}/testing/data
39 | export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data
40 |
41 | # Install the built wheels
42 | pip install ${source_dir}/python/dist/*.whl
43 |
44 | # Test that the modules are importable
45 | python -c "
46 | import pyarrow
47 | import pyarrow._hdfs
48 | import pyarrow.csv
49 | import pyarrow.dataset
50 | import pyarrow.flight
51 | import pyarrow.fs
52 | import pyarrow.json
53 | import pyarrow.orc
54 | import pyarrow.parquet
55 | import pyarrow.plasma
56 | "
57 |
58 | if [ "${PYARROW_TEST_S3}" == "ON" ]; then
59 | python -c "import pyarrow._s3fs"
60 | fi
61 |
62 | # Install testing dependencies
63 | pip install -r ${source_dir}/python/requirements-wheel-test.txt
64 |
65 | # Execute unittest
66 | pytest -r s --pyargs pyarrow
67 |
--------------------------------------------------------------------------------
/ci/scripts/python_wheel_windows_test.bat:
--------------------------------------------------------------------------------
1 | @rem Licensed to the Apache Software Foundation (ASF) under one
2 | @rem or more contributor license agreements. See the NOTICE file
3 | @rem distributed with this work for additional information
4 | @rem regarding copyright ownership. The ASF licenses this file
5 | @rem to you under the Apache License, Version 2.0 (the
6 | @rem "License"); you may not use this file except in compliance
7 | @rem with the License. You may obtain a copy of the License at
8 | @rem
9 | @rem http://www.apache.org/licenses/LICENSE-2.0
10 | @rem
11 | @rem Unless required by applicable law or agreed to in writing,
12 | @rem software distributed under the License is distributed on an
13 | @rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | @rem KIND, either express or implied. See the License for the
15 | @rem specific language governing permissions and limitations
16 | @rem under the License.
17 |
18 | @echo on
19 |
20 | set PYARROW_TEST_CYTHON=OFF
21 | set PYARROW_TEST_DATASET=ON
22 | set PYARROW_TEST_GANDIVA=OFF
23 | set PYARROW_TEST_HDFS=ON
24 | set PYARROW_TEST_ORC=OFF
25 | set PYARROW_TEST_PANDAS=ON
26 | set PYARROW_TEST_PARQUET=ON
27 | set PYARROW_TEST_PLASMA=OFF
28 | set PYARROW_TEST_S3=OFF
29 | set PYARROW_TEST_TENSORFLOW=ON
30 | set PYARROW_TEST_FLIGHT=ON
31 |
32 | set ARROW_TEST_DATA=C:\arrow\testing\data
33 | set PARQUET_TEST_DATA=C:\arrow\submodules\parquet-testing\data
34 |
35 | @REM Install the built wheels
36 | python -m pip install numpy
37 | python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B
38 |
39 | @REM Test that the modules are importable
40 | python -c "import pyarrow"
41 | python -c "import pyarrow._hdfs"
42 | python -c "import pyarrow._s3fs"
43 | python -c "import pyarrow.csv"
44 | python -c "import pyarrow.dataset"
45 | python -c "import pyarrow.flight"
46 | python -c "import pyarrow.fs"
47 | python -c "import pyarrow.json"
48 | python -c "import pyarrow.parquet"
49 |
50 | @REM Install testing dependencies
51 | pip install -r C:\arrow\python\requirements-wheel-test.txt
52 |
53 | @REM Execute unittest
54 | pytest -r s --pyargs pyarrow
55 |
--------------------------------------------------------------------------------
/ci/scripts/r_build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 |
19 | set -ex
20 |
21 | : ${R_BIN:=R}
22 | source_dir=${1}/r
23 | with_docs=${2:-false}
24 |
25 | pushd ${source_dir}
26 |
27 | ${R_BIN} CMD INSTALL .
28 |
29 | if [ "${with_docs}" == "true" ]; then
30 | ${R_BIN} -e "pkgdown::build_site(install = FALSE)"
31 | fi
32 |
33 | popd
--------------------------------------------------------------------------------
/ci/scripts/r_deps.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 |
19 | set -ex
20 |
21 | : ${R_BIN:=R}
22 |
23 | source_dir=${1}/r
24 |
25 | pushd ${source_dir}
26 |
27 | # Install R package dependencies
28 | ${R_BIN} -e "install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck', 'sys'))"
29 | ${R_BIN} -e "remotes::install_deps(dependencies = TRUE)"
30 |
31 | popd
32 |
--------------------------------------------------------------------------------
/ci/scripts/r_pkgdown_check.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | # Use this script to prevent errors in the pkgdown site being rendered due to missing YAML entries
21 |
22 | # all .Rd files in the repo
23 | all_rd_files=`find ./r/man -maxdepth 1 -name "*.Rd" | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort`
24 |
25 | # .Rd files to exclude from search (i.e. are internal)
26 | exclusions=`grep "\keyword{internal}" -rl ./r/man --include=*.Rd | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort`
27 |
28 | # .Rd files to check against pkgdown.yml
29 | rd_files=`echo ${exclusions[@]} ${all_rd_files[@]} | tr ' ' '\n' | sort | uniq -u`
30 |
31 | # pkgdown sections
32 | pkgdown_sections=`awk '/^[^ ]/{ f=/reference:/; next } f{ if (sub(/:$/,"")) pkg=$2; else print pkg, $2 }' ./r/_pkgdown.yml | grep -v "title:" | sort`
33 |
34 | # get things that appear in man files that don't appear in pkgdown sections
35 | pkgdown_missing=`echo ${pkgdown_sections[@]} ${pkgdown_sections[@]} ${rd_files[@]} | tr ' ' '\n' | sort | uniq -u`
36 |
37 | # if any sections are missing raise an error
38 | if ([ ${#pkgdown_missing} -ge 1 ]); then
39 | echo "Error! $pkgdown_missing missing from ./r/_pkgdown.yml"
40 | exit 1
41 | fi
42 |
--------------------------------------------------------------------------------
/ci/scripts/r_sanitize.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 |
19 | set -ex
20 |
21 | : ${R_BIN:=RDsan}
22 |
23 | source_dir=${1}/r
24 |
25 | ${R_BIN} CMD INSTALL ${source_dir}
26 | pushd ${source_dir}/tests
27 |
28 | export TEST_R_WITH_ARROW=TRUE
29 | export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp"
30 | ${R_BIN} < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; }
31 |
32 | cat testthat.out
33 | if grep -q "runtime error" testthat.out; then
34 | exit 1
35 | fi
36 | popd
37 |
--------------------------------------------------------------------------------
/ci/scripts/release_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -eux
21 |
22 | arrow_dir=${1}
23 |
24 | pushd ${arrow_dir}
25 |
26 | dev/release/run-test.rb
27 |
28 | popd
29 |
--------------------------------------------------------------------------------
/ci/scripts/ruby_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | source_dir=${1}/ruby
23 | build_dir=${2}/ruby
24 |
25 | export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
26 | export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
27 | export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
28 |
29 | rake -f ${source_dir}/Rakefile BUILD_DIR=${build_dir} USE_BUNDLER=yes
30 |
--------------------------------------------------------------------------------
/ci/scripts/rust_build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 |
22 | source_dir=${1}/rust
23 |
24 | # This file is used to build the rust binaries needed for the
25 | # archery integration tests. Testing of the rust implementation
26 | # in normal CI is handled by github workflows
27 |
28 | # Disable full debug symbol generation to speed up CI build / reduce memory required
29 | export RUSTFLAGS="-C debuginfo=1"
30 |
31 | export ARROW_TEST_DATA=${arrow_dir}/testing/data
32 | export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
33 |
34 | # show activated toolchain
35 | rustup show
36 |
37 | pushd ${source_dir}
38 |
39 | # build only the integration testing binaries
40 | cargo build -p arrow-integration-testing
41 |
42 | # Remove incremental build artifacts to save space
43 | rm -rf target/debug/deps/ target/debug/build/
44 |
45 | popd
46 |
--------------------------------------------------------------------------------
/ci/scripts/rust_clippy.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 | cargo clippy --all-targets --workspace -- -D warnings
22 |
--------------------------------------------------------------------------------
/ci/scripts/rust_fmt.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 | cargo fmt --all -- --check
22 |
--------------------------------------------------------------------------------
/ci/scripts/rust_toml_fmt.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -ex
21 | find . -mindepth 2 -name 'Cargo.toml' -exec cargo tomlfmt -k -p {} \;
--------------------------------------------------------------------------------
/ci/scripts/util_checkout.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | # this script is github actions specific to check out the submodules and tags
21 |
22 | # TODO(kszucs): remove it once the "submodules: recursive" feature is released
23 | auth_header="$(git config --local --get http.https://github.com/.extraheader)"
24 | git submodule sync --recursive
25 | git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
26 |
27 | # fetch all the tags
28 | git fetch --depth=1 origin +refs/tags/*:refs/tags/*
29 |
--------------------------------------------------------------------------------
/ci/scripts/util_cleanup.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | # This script is Github Actions-specific to free up disk space,
21 | # to avoid disk full errors on some builds
22 |
23 | if [ $RUNNER_OS = "Linux" ]; then
24 | df -h
25 |
26 | # remove swap
27 | sudo swapoff -a
28 | sudo rm -f /swapfile
29 |
30 | # clean apt cache
31 | sudo apt clean
32 |
33 | # remove haskell, consumes 8.6 GB
34 | sudo rm -rf /opt/ghc
35 |
36 | # 1 GB
37 | sudo rm -rf /home/linuxbrew/.linuxbrew
38 |
39 | # 1+ GB
40 | sudo rm -rf /opt/hostedtoolcache/CodeQL
41 |
42 | # 1+ GB
43 | sudo rm -rf /usr/share/swift
44 |
45 | # 12 GB, but takes a lot of time to delete
46 | #sudo rm -rf /usr/local/lib/android
47 |
48 | # remove cached docker images, around 13 GB
49 | docker rmi $(docker image ls -aq)
50 |
51 | # NOTE: /usr/share/dotnet is 25 GB
52 | fi
53 |
54 | df -h
55 |
--------------------------------------------------------------------------------
/ci/scripts/util_download_apache.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -x
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 |
19 | if [ "$#" -ne 2 ]; then
20 | echo "Usage: $0 "
21 | exit 1
22 | fi
23 |
24 | tarball_path=$1
25 | target_dir=$2
26 |
27 | APACHE_MIRRORS=(
28 | "http://www.apache.org/dyn/closer.cgi?action=download&filename="
29 | "https://downloads.apache.org"
30 | "https://apache.claz.org"
31 | "https://apache.cs.utah.edu"
32 | "https://apache.mirrors.lucidnetworks.net"
33 | "https://apache.osuosl.org"
34 | "https://ftp.wayne.edu/apache"
35 | "https://mirror.olnevhost.net/pub/apache"
36 | "https://mirrors.gigenet.com/apache"
37 | "https://mirrors.koehn.com/apache"
38 | "https://mirrors.ocf.berkeley.edu/apache"
39 | "https://mirrors.sonic.net/apache"
40 | "https://us.mirrors.quenda.co/apache"
41 | )
42 |
43 | mkdir -p "${target_dir}"
44 |
45 | for mirror in ${APACHE_MIRRORS[*]}
46 | do
47 | curl -SL "${mirror}/${tarball_path}" | tar -xzf - -C "${target_dir}"
48 | if [ $? == 0 ]; then
49 | exit 0
50 | fi
51 | done
52 |
53 | exit 1
54 |
--------------------------------------------------------------------------------
/ci/vcpkg/arm64-linux-static-debug.cmake:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | set(VCPKG_TARGET_ARCHITECTURE arm64)
19 | set(VCPKG_CRT_LINKAGE dynamic)
20 | set(VCPKG_LIBRARY_LINKAGE static)
21 | set(VCPKG_CMAKE_SYSTEM_NAME Linux)
22 | set(VCPKG_BUILD_TYPE debug)
23 |
24 | if(NOT CMAKE_HOST_SYSTEM_PROCESSOR)
25 | execute_process(COMMAND "uname" "-m"
26 | OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR
27 | OUTPUT_STRIP_TRAILING_WHITESPACE)
28 | endif()
29 |
--------------------------------------------------------------------------------
/ci/vcpkg/arm64-linux-static-release.cmake:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | set(VCPKG_TARGET_ARCHITECTURE arm64)
19 | set(VCPKG_CRT_LINKAGE dynamic)
20 | set(VCPKG_LIBRARY_LINKAGE static)
21 | set(VCPKG_CMAKE_SYSTEM_NAME Linux)
22 | set(VCPKG_BUILD_TYPE release)
23 |
24 | if(NOT CMAKE_HOST_SYSTEM_PROCESSOR)
25 | execute_process(COMMAND "uname" "-m"
26 | OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR
27 | OUTPUT_STRIP_TRAILING_WHITESPACE)
28 | endif()
29 |
--------------------------------------------------------------------------------
/ci/vcpkg/x64-linux-static-debug.cmake:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | set(VCPKG_TARGET_ARCHITECTURE x64)
19 | set(VCPKG_CRT_LINKAGE dynamic)
20 | set(VCPKG_LIBRARY_LINKAGE static)
21 |
22 | set(VCPKG_CMAKE_SYSTEM_NAME Linux)
23 |
24 | set(VCPKG_BUILD_TYPE debug)
25 |
--------------------------------------------------------------------------------
/ci/vcpkg/x64-linux-static-release.cmake:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | set(VCPKG_TARGET_ARCHITECTURE x64)
19 | set(VCPKG_CRT_LINKAGE dynamic)
20 | set(VCPKG_LIBRARY_LINKAGE static)
21 |
22 | set(VCPKG_CMAKE_SYSTEM_NAME Linux)
23 |
24 | set(VCPKG_BUILD_TYPE release)
25 |
--------------------------------------------------------------------------------
/ci/vcpkg/x64-osx-static-debug.cmake:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | set(VCPKG_TARGET_ARCHITECTURE x64)
19 | set(VCPKG_CRT_LINKAGE dynamic)
20 | set(VCPKG_LIBRARY_LINKAGE static)
21 |
22 | set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
23 | set(VCPKG_OSX_ARCHITECTURES x86_64)
24 |
25 | set(VCPKG_BUILD_TYPE debug)
26 |
--------------------------------------------------------------------------------
/ci/vcpkg/x64-osx-static-release.cmake:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | set(VCPKG_TARGET_ARCHITECTURE x64)
19 | set(VCPKG_CRT_LINKAGE dynamic)
20 | set(VCPKG_LIBRARY_LINKAGE static)
21 |
22 | set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
23 | set(VCPKG_OSX_ARCHITECTURES x86_64)
24 |
25 | set(VCPKG_BUILD_TYPE release)
26 |
--------------------------------------------------------------------------------
/ci/vcpkg/x64-windows-static-md-debug.cmake:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | set(VCPKG_TARGET_ARCHITECTURE x64)
19 | set(VCPKG_CRT_LINKAGE dynamic)
20 | set(VCPKG_LIBRARY_LINKAGE static)
21 |
22 | set(VCPKG_BUILD_TYPE debug)
23 |
--------------------------------------------------------------------------------
/ci/vcpkg/x64-windows-static-md-release.cmake:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | set(VCPKG_TARGET_ARCHITECTURE x64)
19 | set(VCPKG_CRT_LINKAGE dynamic)
20 | set(VCPKG_LIBRARY_LINKAGE static)
21 |
22 | set(VCPKG_BUILD_TYPE release)
23 |
--------------------------------------------------------------------------------
/dev/build-ballista-docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | RELEASE_FLAG=${RELEASE_FLAG:=release}
23 |
24 | ./dev/build-ballista-executables.sh
25 |
26 | docker-compose build
27 |
28 | . ./dev/build-set-env.sh
29 | docker build -t "apache/arrow-ballista-standalone:$BALLISTA_VERSION" -f dev/docker/ballista-standalone.Dockerfile .
30 |
31 | docker tag ballista-executor "apache/arrow-ballista-executor:$BALLISTA_VERSION"
32 | docker tag ballista-scheduler "apache/arrow-ballista-scheduler:$BALLISTA_VERSION"
33 | docker tag ballista-benchmarks "apache/arrow-ballista-benchmarks:$BALLISTA_VERSION"
34 |
35 | docker build -t "apache/arrow-ballista-cli:$BALLISTA_VERSION" -f dev/docker/ballista-cli.Dockerfile .
36 |
--------------------------------------------------------------------------------
/dev/build-ballista-executables.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | RELEASE_FLAG=${RELEASE_FLAG:=release}
23 |
24 | # TODO: it would be very nice if we could make CI work the exact same way so the build logic isn't duplicated
25 |
26 | # build a docker container in which to run the build - this is to make life easier for Windows & Mac users
27 | docker build -t ballista-builder --build-arg EXT_UID="$(id -u)" -f dev/docker/ballista-builder.Dockerfile .
28 |
29 | # run cargo & yarn builds inside the builder container
30 | docker run -v $(pwd):/home/builder/workspace --env RELEASE_FLAG=$RELEASE_FLAG ballista-builder
31 |
--------------------------------------------------------------------------------
/dev/build-set-env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | cd ballista/core/
21 | export BALLISTA_VERSION=$(cargo pkgid | cut '-d@' -f2)
22 | cd -
23 |
--------------------------------------------------------------------------------
/dev/docker/ballista-benchmarks.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM ubuntu:22.04
19 |
20 | ARG RELEASE_FLAG=release
21 |
22 | ENV RELEASE_FLAG=${RELEASE_FLAG}
23 | ENV RUST_LOG=info
24 | ENV RUST_BACKTRACE=full
25 |
26 | COPY target/$RELEASE_FLAG/ballista-scheduler /root/ballista-scheduler
27 | COPY target/$RELEASE_FLAG/ballista-executor /root/ballista-executor
28 | COPY target/$RELEASE_FLAG/tpch /root/tpch
29 |
30 | COPY benchmarks/run.sh /root/run.sh
31 | COPY benchmarks/queries/ /root/benchmarks/queries
32 |
33 | WORKDIR /root
34 |
35 | CMD ["/root/run.sh"]
--------------------------------------------------------------------------------
/dev/docker/ballista-builder.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM rust:1-buster
19 |
20 | ARG EXT_UID
21 |
22 | ENV RUST_LOG=info
23 | ENV RUST_BACKTRACE=full
24 | ENV DEBIAN_FRONTEND=noninteractive
25 |
26 | RUN apt-get update && \
27 | apt-get -y install libssl-dev openssl zlib1g zlib1g-dev libpq-dev cmake protobuf-compiler netcat curl unzip \
28 | nodejs npm && \
29 | npm install -g yarn
30 |
31 | # create build user with same UID as
32 | RUN adduser -q -u $EXT_UID builder --home /home/builder && \
33 | mkdir -p /home/builder/workspace
34 | USER builder
35 |
36 | ENV NODE_VER=18.9.0
37 | ENV HOME=/home/builder
38 | ENV PATH=$HOME/.cargo/bin:$PATH
39 |
40 | # prepare rust
41 | RUN rustup update && \
42 | rustup component add rustfmt && \
43 | cargo install cargo-chef --version 0.1.34
44 |
45 | WORKDIR /home/builder/workspace
46 |
47 | COPY dev/docker/builder-entrypoint.sh /home/builder
48 | ENTRYPOINT ["/home/builder/builder-entrypoint.sh"]
49 |
--------------------------------------------------------------------------------
/dev/docker/ballista-cli.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM ubuntu:22.04
19 |
20 | ARG RELEASE_FLAG=release
21 |
22 | ENV RELEASE_FLAG=${RELEASE_FLAG}
23 | ENV RUST_LOG=info
24 | ENV RUST_BACKTRACE=full
25 |
26 | COPY target/$RELEASE_FLAG/ballista-cli /root/ballista-cli
27 |
28 | COPY dev/docker/cli-entrypoint.sh /root/cli-entrypoint.sh
29 | ENTRYPOINT ["/root/cli-entrypoint.sh"]
30 |
--------------------------------------------------------------------------------
/dev/docker/ballista-executor.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM ubuntu:22.04
19 |
20 | ARG RELEASE_FLAG=release
21 |
22 | ENV RELEASE_FLAG=${RELEASE_FLAG}
23 | ENV RUST_LOG=info
24 | ENV RUST_BACKTRACE=full
25 |
26 | RUN apt-get update && apt-get install -y netcat
27 |
28 | COPY target/$RELEASE_FLAG/ballista-executor /root/ballista-executor
29 |
30 | # Expose Ballista Executor gRPC port
31 | EXPOSE 50051
32 |
33 | COPY dev/docker/executor-entrypoint.sh /root/executor-entrypoint.sh
34 | ENTRYPOINT ["/root/executor-entrypoint.sh"]
35 |
--------------------------------------------------------------------------------
/dev/docker/ballista-scheduler.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM ubuntu:22.04
19 |
20 | ARG RELEASE_FLAG=release
21 |
22 | ENV RELEASE_FLAG=${RELEASE_FLAG}
23 | ENV RUST_LOG=info
24 | ENV RUST_BACKTRACE=full
25 | ENV DEBIAN_FRONTEND=noninteractive
26 |
27 | RUN apt-get update && apt-get install -y nginx netcat
28 |
29 | COPY target/$RELEASE_FLAG/ballista-scheduler /root/ballista-scheduler
30 |
31 | COPY ballista/scheduler/ui/build /var/www/html
32 | COPY dev/docker/nginx.conf /etc/nginx/sites-enabled/default
33 |
34 | # Expose Ballista Scheduler web UI port
35 | EXPOSE 80
36 |
37 | # Expose Ballista Scheduler gRPC port
38 | EXPOSE 50050
39 |
40 | COPY dev/docker/scheduler-entrypoint.sh /root/scheduler-entrypoint.sh
41 | ENTRYPOINT ["/root/scheduler-entrypoint.sh"]
42 |
--------------------------------------------------------------------------------
/dev/docker/ballista-standalone.Dockerfile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | FROM ubuntu:22.04
19 |
20 | LABEL org.opencontainers.image.source="https://github.com/apache/arrow-ballista"
21 | LABEL org.opencontainers.image.description="Apache Arrow Ballista Distributed SQL Query Engine"
22 | LABEL org.opencontainers.image.licenses="Apache-2.0"
23 |
24 | ARG RELEASE_FLAG=release
25 |
26 | ENV RELEASE_FLAG=${RELEASE_FLAG}
27 | ENV RUST_LOG=info
28 | ENV RUST_BACKTRACE=full
29 | ENV DEBIAN_FRONTEND=noninteractive
30 |
31 | RUN apt-get -qq update && apt-get install -qq -y nginx netcat wget
32 |
33 | COPY target/$RELEASE_FLAG/ballista-scheduler /root/ballista-scheduler
34 | COPY target/$RELEASE_FLAG/ballista-executor /root/ballista-executor
35 |
36 | RUN chmod a+x /root/ballista-scheduler && \
37 | chmod a+x /root/ballista-executor
38 |
39 | # populate some sample data for ListingSchemaProvider
40 | RUN mkdir -p /data && \
41 | wget -q https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2022-01.parquet -P /data/
42 | ENV DATAFUSION_CATALOG_LOCATION=/data
43 | ENV DATAFUSION_CATALOG_TYPE=csv
44 |
45 | COPY ballista/scheduler/ui/build /var/www/html
46 | COPY dev/docker/nginx.conf /etc/nginx/sites-enabled/default
47 |
48 | # Expose Ballista Scheduler web UI port
49 | EXPOSE 80
50 |
51 | # Expose Ballista Scheduler gRPC port
52 | EXPOSE 50050
53 |
54 | # Expose Ballista Executor gRPC port
55 | EXPOSE 50051
56 |
57 | COPY dev/docker/standalone-entrypoint.sh /root/standalone-entrypoint.sh
58 | ENTRYPOINT ["/root/standalone-entrypoint.sh"]
59 |
--------------------------------------------------------------------------------
/dev/docker/builder-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 | set -x
22 |
23 | printenv
24 | RELEASE_FLAG=${RELEASE_FLAG:=release}
25 | cargo build --features flight-sql --profile $RELEASE_FLAG "$@"
26 |
27 | cd ballista/scheduler/ui
28 | yarn install
29 | yarn build
30 |
--------------------------------------------------------------------------------
/dev/docker/cli-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | /root/ballista-cli "$@"
23 |
--------------------------------------------------------------------------------
/dev/docker/executor-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | /root/ballista-executor "$@"
23 |
--------------------------------------------------------------------------------
/dev/docker/nginx.conf:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | server {
19 | listen 80 default_server;
20 | listen [::]:80 default_server;
21 |
22 | root /var/www/html;
23 |
24 | index index.html index.htm index.nginx-debian.html;
25 |
26 | server_name _;
27 |
28 | location / {
29 | # First attempt to serve request as file, then
30 | # as directory, then fall back to displaying a 404.
31 | try_files $uri $uri/ =404;
32 | }
33 |
34 | # pass REST api calls through to Ballista scheduler process
35 | location /api/ {
36 | proxy_redirect http://localhost:50050/ /api/;
37 | proxy_pass_header Server;
38 | proxy_set_header X-Real-IP $remote_addr;
39 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
40 | proxy_set_header X-Scheme $scheme;
41 | proxy_set_header Host $http_host;
42 | proxy_set_header X-NginX-Proxy true;
43 | proxy_connect_timeout 5;
44 | proxy_read_timeout 240;
45 | proxy_intercept_errors on;
46 |
47 | # no longer sure if this part is needed
48 | proxy_next_upstream error http_403 non_idempotent;
49 | proxy_next_upstream error http_502 non_idempotent;
50 |
51 | proxy_pass http://localhost:50050;
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/dev/docker/scheduler-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | echo "Starting nginx to serve Ballista Scheduler web UI on port 80"
23 | nohup nginx -g "daemon off;" &
24 | /root/ballista-scheduler "$@"
25 |
--------------------------------------------------------------------------------
/dev/docker/standalone-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | set -e
21 |
22 | echo "Starting nginx web UI..."
23 | nohup nginx -g "daemon off;" &
24 |
25 | echo "Starting for scheduler..."
26 | /root/ballista-scheduler &
27 | while ! nc -z 127.0.0.1 50050; do
28 | sleep 1
29 | done
30 |
31 | echo "Starting executor"
32 | /root/ballista-executor
33 |
--------------------------------------------------------------------------------
/dev/integration-tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | set -e
20 |
21 | echo "Generating benchmark data ..."
22 | pushd benchmarks
23 | ./tpch-gen.sh
24 | popd
25 |
26 | echo "Building Docker images ..."
27 | ./dev/build-ballista-docker.sh
28 |
29 | echo "Starting docker-compose in background ..."
30 | docker-compose up -d
31 |
32 | # give the scheduler a chance to start up
33 | echo "Sleeping (wait for scheduler to start)..."
34 | sleep 10
35 |
36 | echo "Running benchmarks ..."
37 | docker-compose run ballista-client /root/run.sh
38 |
39 | #TODO need to call docker-compose down even if benchmarks fail
40 |
41 | echo "Stopping docker-compose ..."
42 | docker-compose down
43 |
44 | popd
45 |
--------------------------------------------------------------------------------
/dev/python_lint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | # This script runs all the Rust lints locally the same way the
21 | # DataFusion CI does
22 |
23 | set -e
24 | source venv/bin/activate
25 | flake8 --exclude venv --ignore=E501,W503
26 | black --line-length 79 --diff --check .
--------------------------------------------------------------------------------
/dev/release/check-rat-report.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | ##############################################################################
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | ##############################################################################
20 | import fnmatch
21 | import re
22 | import sys
23 | import xml.etree.ElementTree as ET
24 |
25 | if len(sys.argv) != 3:
26 | sys.stderr.write(
27 | "Usage: %s exclude_globs.lst rat_report.xml\n" % sys.argv[0]
28 | )
29 | sys.exit(1)
30 |
31 | exclude_globs_filename = sys.argv[1]
32 | xml_filename = sys.argv[2]
33 |
34 | globs = [line.strip() for line in open(exclude_globs_filename, "r")]
35 |
36 | tree = ET.parse(xml_filename)
37 | root = tree.getroot()
38 | resources = root.findall("resource")
39 |
40 | all_ok = True
41 | for r in resources:
42 | approvals = r.findall("license-approval")
43 | if not approvals or approvals[0].attrib["name"] == "true":
44 | continue
45 | clean_name = re.sub("^[^/]+/", "", r.attrib["name"])
46 | excluded = False
47 | for g in globs:
48 | if fnmatch.fnmatch(clean_name, g):
49 | excluded = True
50 | break
51 | if not excluded:
52 | sys.stdout.write(
53 | "NOT APPROVED: %s (%s): %s\n"
54 | % (clean_name, r.attrib["name"], approvals[0].attrib["name"])
55 | )
56 | all_ok = False
57 |
58 | if not all_ok:
59 | sys.exit(1)
60 |
61 | print("OK")
62 | sys.exit(0)
63 |
--------------------------------------------------------------------------------
/dev/release/crate-deps.dot:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | digraph G {
19 |
20 | ballista_core
21 | ballista_scheduler
22 | ballista_executor
23 | ballista
24 | ballista_cli
25 |
26 | ballista_scheduler -> ballista_core
27 |
28 | ballista_executor -> ballista_core
29 |
30 | ballista -> ballista_core
31 | ballista -> ballista_scheduler
32 | ballista -> ballista_executor
33 |
34 | ballista_cli -> ballista
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/dev/release/rat_exclude_files.txt:
--------------------------------------------------------------------------------
1 | *.npmrc
2 | *.gitignore
3 | *.dockerignore
4 | .gitmodules
5 | *_generated.js
6 | *_generated.ts
7 | *.csv
8 | *.json
9 | *.snap
10 | .github/ISSUE_TEMPLATE/*.md
11 | .github/pull_request_template.md
12 | ci/etc/rprofile
13 | ci/etc/*.patch
14 | ci/vcpkg/*.patch
15 | CHANGELOG.md
16 | ballista/CHANGELOG.md
17 | python/CHANGELOG.md
18 | dev/requirements*.txt
19 | dev/release/rat_exclude_files.txt
20 | helm/ballista/Chart.lock
21 | pax_global_header
22 | MANIFEST.in
23 | __init__.pxd
24 | __init__.py
25 | requirements.txt
26 | *.html
27 | *.sgml
28 | *.css
29 | *.png
30 | *.ico
31 | *.svg
32 | *.devhelp2
33 | *.scss
34 | .gitattributes
35 | benchmarks/queries/q*.sql
36 | ballista/scheduler/testdata/*
37 | **/yarn.lock
38 | requirements*.txt
39 | **/testdata/*
40 | benchmarks/queries/*
41 | benchmarks/data/*
42 | ci/*
43 | **/*.svg
44 | **/*.csv
45 | **/*.json
46 | **/*.sql
47 | venv/*
48 | testing/*
49 | target/*
50 | **/target/*
51 | Cargo.lock
52 | **/Cargo.lock
53 | .history
54 | parquet-testing/*
55 | *rat.txt
56 | ballista/core/src/serde/generated/ballista.rs
--------------------------------------------------------------------------------
/dev/release/run-rat.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | #
20 |
21 | RAT_VERSION=0.13
22 |
23 | # download apache rat
24 | if [ ! -f apache-rat-${RAT_VERSION}.jar ]; then
25 | curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar > apache-rat-${RAT_VERSION}.jar
26 | fi
27 |
28 | RAT="java -jar apache-rat-${RAT_VERSION}.jar -x "
29 |
30 | RELEASE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
31 |
32 | # generate the rat report
33 | $RAT $1 > rat.txt
34 | python $RELEASE_DIR/check-rat-report.py $RELEASE_DIR/rat_exclude_files.txt rat.txt > filtered_rat.txt
35 | cat filtered_rat.txt
36 | UNAPPROVED=`cat filtered_rat.txt | grep "NOT APPROVED" | wc -l`
37 |
38 | if [ "0" -eq "${UNAPPROVED}" ]; then
39 | echo "No unapproved licenses"
40 | else
41 | echo "${UNAPPROVED} unapproved licences. Check rat report: rat.txt"
42 | exit 1
43 | fi
44 |
--------------------------------------------------------------------------------
/dev/release/update_change_log-ballista.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | #
20 |
21 | # Usage:
22 | # CHANGELOG_GITHUB_TOKEN= ./update_change_log-ballista.sh main 0.7.0 0.6.0
23 |
24 | RELEASE_BRANCH=$1
25 | RELEASE_TAG=$2
26 | BASE_TAG=$3
27 |
28 | SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
29 | ${SOURCE_DIR}/update_change_log.sh \
30 | ballista \
31 | "${BASE_TAG}" \
32 | --exclude-tags-regex "python-.+" \
33 | --future-release "${RELEASE_TAG}" \
34 | --release-branch "${RELEASE_BRANCH}"
35 |
--------------------------------------------------------------------------------
/dev/rust_lint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 | set -e
20 | if ! command -v cargo-tomlfmt &> /dev/null; then
21 | echo "Installing cargo-tomlfmt using cargo"
22 | cargo install cargo-tomlfmt
23 | fi
24 |
25 | ci/scripts/rust_fmt.sh
26 | ci/scripts/rust_clippy.sh
27 | ci/scripts/rust_toml_fmt.sh
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | build
19 | source/python/generated
20 | venv/
21 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | #
19 | # Minimal makefile for Sphinx documentation
20 | #
21 |
22 | # You can set these variables from the command line, and also
23 | # from the environment for the first two.
24 | SPHINXOPTS ?=
25 | SPHINXBUILD ?= sphinx-build
26 | SOURCEDIR = source
27 | BUILDDIR = build
28 |
29 | # Put it first so that "make" without argument is like "make help".
30 | help:
31 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
32 |
33 | .PHONY: help Makefile
34 |
35 | # Catch-all target: route all unknown targets to Sphinx using the new
36 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
37 | %: Makefile
38 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
39 |
--------------------------------------------------------------------------------
/docs/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Licensed to the Apache Software Foundation (ASF) under one
4 | # or more contributor license agreements. See the NOTICE file
5 | # distributed with this work for additional information
6 | # regarding copyright ownership. The ASF licenses this file
7 | # to you under the Apache License, Version 2.0 (the
8 | # "License"); you may not use this file except in compliance
9 | # with the License. You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing,
14 | # software distributed under the License is distributed on an
15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | # KIND, either express or implied. See the License for the
17 | # specific language governing permissions and limitations
18 | # under the License.
19 |
20 | rm -rf build
21 | make html
22 |
--------------------------------------------------------------------------------
/docs/developer/README.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Ballista Developer Documentation
21 |
22 | - Read the [Architecture Overview](architecture.md) to get an understanding of the scheduler and executor
23 | processes and how distributed query execution works.
24 | - Watch the [Ballista: Distributed Compute with Rust and Apache Arrow](https://www.youtube.com/watch?v=ZZHQaOap9pQ)
25 | talk from the New York Open Statistical Programming Meetup (Feb 2021)
26 |
--------------------------------------------------------------------------------
/docs/developer/images/query-execution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista-python/30a47ae960ff9766fb300fe9908b1aaf8d479aac/docs/developer/images/query-execution.png
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @rem Licensed to the Apache Software Foundation (ASF) under one
2 | @rem or more contributor license agreements. See the NOTICE file
3 | @rem distributed with this work for additional information
4 | @rem regarding copyright ownership. The ASF licenses this file
5 | @rem to you under the Apache License, Version 2.0 (the
6 | @rem "License"); you may not use this file except in compliance
7 | @rem with the License. You may obtain a copy of the License at
8 | @rem
9 | @rem http://www.apache.org/licenses/LICENSE-2.0
10 | @rem
11 | @rem Unless required by applicable law or agreed to in writing,
12 | @rem software distributed under the License is distributed on an
13 | @rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | @rem KIND, either express or implied. See the License for the
15 | @rem specific language governing permissions and limitations
16 | @rem under the License.
17 |
18 | @ECHO OFF
19 |
20 | pushd %~dp0
21 |
22 | REM Command file for Sphinx documentation
23 |
24 | if "%SPHINXBUILD%" == "" (
25 | set SPHINXBUILD=sphinx-build
26 | )
27 | set SOURCEDIR=source
28 | set BUILDDIR=build
29 |
30 | if "%1" == "" goto help
31 |
32 | %SPHINXBUILD% >NUL 2>NUL
33 | if errorlevel 9009 (
34 | echo.
35 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
36 | echo.installed, then set the SPHINXBUILD environment variable to point
37 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
38 | echo.may add the Sphinx directory to PATH.
39 | echo.
40 | echo.If you don't have Sphinx installed, grab it from
41 | echo.http://sphinx-doc.org/
42 | exit /b 1
43 | )
44 |
45 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
46 | goto end
47 |
48 | :help
49 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
50 |
51 | :end
52 | popd
53 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | sphinx==2.4.4
19 | pydata-sphinx-theme
20 | myst-parser<1
21 | maturin<0.12
22 |
--------------------------------------------------------------------------------
/docs/source/_static/images/ballista-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista-python/30a47ae960ff9766fb300fe9908b1aaf8d479aac/docs/source/_static/images/ballista-logo.png
--------------------------------------------------------------------------------
/docs/source/_templates/docs-sidebar.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
10 |
11 |
20 |
--------------------------------------------------------------------------------
/docs/source/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "pydata_sphinx_theme/layout.html" %}
2 |
3 | {# Silence the navbar #}
4 | {% block docs_navbar %}
5 | {% endblock %}
6 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. Licensed to the Apache Software Foundation (ASF) under one
2 | .. or more contributor license agreements. See the NOTICE file
3 | .. distributed with this work for additional information
4 | .. regarding copyright ownership. The ASF licenses this file
5 | .. to you under the Apache License, Version 2.0 (the
6 | .. "License"); you may not use this file except in compliance
7 | .. with the License. You may obtain a copy of the License at
8 |
9 | .. http://www.apache.org/licenses/LICENSE-2.0
10 |
11 | .. Unless required by applicable law or agreed to in writing,
12 | .. software distributed under the License is distributed on an
13 | .. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | .. KIND, either express or implied. See the License for the
15 | .. specific language governing permissions and limitations
16 | .. under the License.
17 |
18 | =====================
19 | Apache Arrow Ballista
20 | =====================
21 |
22 | Table of content
23 | ================
24 |
25 |
26 | .. _toc.guide:
27 |
28 | .. toctree::
29 | :maxdepth: 1
30 | :caption: User Guide
31 |
32 | Introduction
33 |
34 | .. toctree::
35 | :maxdepth: 1
36 | :caption: Cluster Deployment
37 |
38 | Deployment
39 | Scheduler
40 |
41 | .. toctree::
42 | :maxdepth: 1
43 | :caption: Clients
44 |
45 | Python
46 | Rust
47 | Flight SQL JDBC
48 | SQL CLI
49 |
50 | .. toctree::
51 | :maxdepth: 1
52 | :caption: Reference
53 |
54 | user-guide/configs
55 | user-guide/tuning-guide
56 | user-guide/faq
57 |
58 | .. _toc.source:
59 |
60 | .. toctree::
61 | :maxdepth: 1
62 | :caption: Source Code
63 |
64 | Ballista
65 |
66 | .. _toc.community:
67 |
68 | .. toctree::
69 | :maxdepth: 1
70 | :caption: Community
71 |
72 | community/communication
73 | Issue tracker
74 | Code of conduct
75 |
--------------------------------------------------------------------------------
/docs/source/user-guide/deployment/cargo-install.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Deploying a standalone Ballista cluster using cargo install
21 |
22 | A simple way to start a local cluster for testing purposes is to use cargo to install
23 | the scheduler and executor crates.
24 |
25 | ```bash
26 | cargo install --locked ballista-scheduler
27 | cargo install --locked ballista-executor
28 | ```
29 |
30 | With these crates installed, it is now possible to start a scheduler process.
31 |
32 | ```bash
33 | RUST_LOG=info ballista-scheduler
34 | ```
35 |
36 | The scheduler will bind to port 50050 by default.
37 |
38 | Next, start an executor processes in a new terminal session.
39 |
40 | ```bash
41 | RUST_LOG=info ballista-executor
42 | ```
43 |
44 | The executor will bind to port 50051 by default. Additional executors can be started by
45 | manually specifying a bind port. For example:
46 |
47 | ```bash
48 | RUST_LOG=info ballista-executor --bind-port 50052
49 | ```
50 |
--------------------------------------------------------------------------------
/docs/source/user-guide/deployment/index.rst:
--------------------------------------------------------------------------------
1 | .. Licensed to the Apache Software Foundation (ASF) under one
2 | .. or more contributor license agreements. See the NOTICE file
3 | .. distributed with this work for additional information
4 | .. regarding copyright ownership. The ASF licenses this file
5 | .. to you under the Apache License, Version 2.0 (the
6 | .. "License"); you may not use this file except in compliance
7 | .. with the License. You may obtain a copy of the License at
8 |
9 | .. http://www.apache.org/licenses/LICENSE-2.0
10 |
11 | .. Unless required by applicable law or agreed to in writing,
12 | .. software distributed under the License is distributed on an
13 | .. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | .. KIND, either express or implied. See the License for the
15 | .. specific language governing permissions and limitations
16 | .. under the License.
17 |
18 | Start a Ballista Cluster
19 | ========================
20 |
21 | .. toctree::
22 | :maxdepth: 2
23 |
24 | Cargo Install
25 | Docker
26 | Docker Compose
27 | Kubernetes
28 |
--------------------------------------------------------------------------------
/docs/source/user-guide/faq.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Frequently Asked Questions
21 |
22 | ## What is the relationship between Apache Arrow, DataFusion, and Ballista?
23 |
24 | Apache Arrow is a library which provides a standardized memory representation for columnar data. It also provides
25 | "kernels" for performing common operations on this data.
26 |
27 | DataFusion is a library for executing queries in-process using the Apache Arrow memory
28 | model and computational kernels. It is designed to run within a single process, using threads
29 | for parallel query execution.
30 |
31 | Ballista is a distributed compute platform built on DataFusion.
32 |
--------------------------------------------------------------------------------
/docs/source/user-guide/images/ballista-web-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista-python/30a47ae960ff9766fb300fe9908b1aaf8d479aac/docs/source/user-guide/images/ballista-web-ui.png
--------------------------------------------------------------------------------
/docs/source/user-guide/images/example-query-plan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista-python/30a47ae960ff9766fb300fe9908b1aaf8d479aac/docs/source/user-guide/images/example-query-plan.png
--------------------------------------------------------------------------------
/docs/source/user-guide/metrics.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Ballista Scheduler Metrics
21 |
22 | ## Prometheus
23 |
24 | Built with default features, the ballista scheduler will automatically collect and expose a standard set of prometheus metrics.
25 | The metrics currently collected automatically include:
26 |
27 | - _job_exec_time_seconds_ - Histogram of successful job execution time in seconds
28 | - _planning_time_ms_ - Histogram of job planning time in milliseconds
29 | - _failed_ - Counter of failed jobs
30 | - _job_failed_total_ - Counter of failed jobs
31 | - _job_cancelled_total_ - Counter of cancelled jobs
32 | - _job_completed_total_ - Counter of completed jobs
33 | - _job_submitted_total_ - Counter of submitted jobs
34 | - _pending_task_queue_size_ - Number of pending tasks
35 |
36 | **NOTE** Currently the histogram buckets for the above metrics are set to reasonable defaults. If the defaults are not
37 | appropriate for a given use case, the only workaround is to implement a customer `SchedulerMetricsCollector`. In the future
38 | the buckets should be made configurable.
39 |
40 | The metrics are then exported through the scheduler REST API at `GET /api/metrics`. It should be sufficient to ingest metrics
41 | into an existing metrics system by point your chosen prometheus exporter at that endpoint.
42 |
--------------------------------------------------------------------------------
/docs/source/user-guide/scheduler.md:
--------------------------------------------------------------------------------
1 |
19 |
20 | # Ballista Scheduler
21 |
22 | ## Web User Interface
23 |
24 | The scheduler provides a web user interface that allows queries to be monitored.
25 |
26 | 
27 |
28 | ## REST API
29 |
30 | The scheduler also provides a REST API that allows jobs to be monitored.
31 |
32 | | API | Method | Description |
33 | | --------------------- | ------ | ----------------------------------------------------------- |
34 | | /api/jobs | GET | Get a list of jobs that have been submitted to the cluster. |
35 | | /api/job/{job_id} | GET | Get a summary of a submitted job. |
36 | | /api/job/{job_id}/dot | GET | Produce a query plan in DOT (graphviz) format. |
37 | | /api/job/{job_id} | PATCH | Cancel a currently running job |
38 | | /api/metrics | GET | Return current scheduler metric set |
39 |
--------------------------------------------------------------------------------
/docs/sqlbench-h-workstation-10-distributed-perquery.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datafusion-ballista-python/30a47ae960ff9766fb300fe9908b1aaf8d479aac/docs/sqlbench-h-workstation-10-distributed-perquery.png
--------------------------------------------------------------------------------
/examples/dataframe-parquet.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | from ballista import BallistaContext
19 | from ballista import functions as f
20 |
21 |
22 | ctx = BallistaContext(
23 | "localhost", 50050, shuffle_partitions=16, batch_size=8192
24 | )
25 | df = ctx.read_parquet("yellow_tripdata_2021-01.parquet").aggregate(
26 | [f.col("passenger_count")], [f.count_star()]
27 | )
28 | df.show()
29 |
--------------------------------------------------------------------------------
/examples/run-executor.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | from ballista import Executor
19 |
20 |
21 | # start an executor from this Python process
22 | exec = Executor(
23 | scheduler_host="localhost",
24 | scheduler_port=50050,
25 | bind_host="127.0.0.1",
26 | bind_port=50051,
27 | grpc_port=50052,
28 | concurrent_tasks=1,
29 | )
30 |
--------------------------------------------------------------------------------
/examples/run-scheduler.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | from ballista import Scheduler
19 |
20 |
21 | # start a scheduler from this Python process
22 | scheduler = Scheduler(
23 | bind_host="127.0.0.1",
24 | bind_port=50050,
25 | external_host="127.0.0.1",
26 | )
27 |
--------------------------------------------------------------------------------
/examples/sql-parquet.py:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | from ballista import BallistaContext
19 |
20 |
21 | ctx = BallistaContext(
22 | "localhost", 50050, shuffle_partitions=16, batch_size=8192
23 | )
24 | ctx.register_parquet("taxi", "yellow_tripdata_2021-01.parquet")
25 | df = ctx.sql(
26 | "select passenger_count, count(*) from taxi where passenger_count is not null group by passenger_count order by passenger_count"
27 | )
28 | df.show()
29 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | [build-system]
19 | requires = ["maturin>=0.11,<0.12"]
20 | build-backend = "maturin"
21 |
22 | [project]
23 | name = "ballista"
24 | description = "Build and run queries against data"
25 | readme = "README.md"
26 | license = {file = "LICENSE.txt"}
27 | requires-python = ">=3.6"
28 | keywords = ["ballista", "dataframe", "rust", "query-engine"]
29 | classifier = [
30 | "Development Status :: 2 - Pre-Alpha",
31 | "Intended Audience :: Developers",
32 | "License :: OSI Approved :: Apache Software License",
33 | "License :: OSI Approved",
34 | "Operating System :: MacOS",
35 | "Operating System :: Microsoft :: Windows",
36 | "Operating System :: POSIX :: Linux",
37 | "Programming Language :: Python :: 3",
38 | "Programming Language :: Python :: 3.6",
39 | "Programming Language :: Python :: 3.7",
40 | "Programming Language :: Python :: 3.8",
41 | "Programming Language :: Python :: 3.9",
42 | "Programming Language :: Python :: 3.10",
43 | "Programming Language :: Python",
44 | "Programming Language :: Rust",
45 | ]
46 | dependencies = [
47 | "pyarrow>=1",
48 | ]
49 |
50 | [project.urls]
51 | documentation = "https://arrow.apache.org/apache/arrow-ballista/python"
52 | repository = "https://github.com/apache/arrow-ballista"
53 |
54 | [tool.isort]
55 | profile = "black"
56 |
--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 |
18 | black
19 | flake8
20 | isort
21 | maturin
22 | mypy
23 | numpy
24 | pandas
25 | pyarrow
26 | pytest
27 | toml
28 |
--------------------------------------------------------------------------------
/src/datatype.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | /// Copied from https://github.com/apache/arrow-datafusion-python/pull/103
19 | use datafusion::arrow::datatypes::DataType;
20 | use pyo3::pyclass;
21 |
22 | #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
23 | #[pyclass(name = "PyDataType", module = "datafusion", subclass)]
24 | pub struct PyDataType {
25 | pub(crate) data_type: DataType,
26 | }
27 |
28 | impl From for DataType {
29 | fn from(data_type: PyDataType) -> DataType {
30 | data_type.data_type
31 | }
32 | }
33 |
34 | impl From for PyDataType {
35 | fn from(data_type: DataType) -> PyDataType {
36 | PyDataType { data_type }
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/src/utils.rs:
--------------------------------------------------------------------------------
1 | // Licensed to the Apache Software Foundation (ASF) under one
2 | // or more contributor license agreements. See the NOTICE file
3 | // distributed with this work for additional information
4 | // regarding copyright ownership. The ASF licenses this file
5 | // to you under the Apache License, Version 2.0 (the
6 | // "License"); you may not use this file except in compliance
7 | // with the License. You may obtain a copy of the License at
8 | //
9 | // http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied. See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 |
18 | use crate::errors::DataFusionError;
19 | use datafusion::logical_expr::Volatility;
20 | use pyo3::prelude::*;
21 | use std::future::Future;
22 | use tokio::runtime::Runtime;
23 |
24 | /// Utility to collect rust futures with GIL released
25 | pub fn wait_for_future(py: Python, f: F) -> F::Output
26 | where
27 | F: Send,
28 | F::Output: Send,
29 | {
30 | let rt = Runtime::new().unwrap();
31 | py.allow_threads(|| rt.block_on(f))
32 | }
33 |
34 | pub(crate) fn parse_volatility(value: &str) -> Result {
35 | Ok(match value {
36 | "immutable" => Volatility::Immutable,
37 | "stable" => Volatility::Stable,
38 | "volatile" => Volatility::Volatile,
39 | value => {
40 | return Err(DataFusionError::Common(format!(
41 | "Unsupportad volatility type: `{}`, supported \
42 | values are: immutable, stable and volatile.",
43 | value
44 | )))
45 | }
46 | })
47 | }
48 |
--------------------------------------------------------------------------------