├── .asf.yaml ├── .cargo └── config ├── .dockerignore ├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── actions │ └── setup-builder │ │ └── action.yaml ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── cancel.yml │ ├── comment_bot.yml │ ├── dev.yml │ ├── dev_pr.yml │ ├── dev_pr │ └── labeler.yml │ ├── python_build.yml │ ├── python_test.yaml │ └── rust.yml ├── .gitignore ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE.txt ├── OLDREADME.md ├── README.md ├── ballista ├── __init__.py ├── functions.py └── tests │ ├── __init__.py │ └── test_imports.py ├── ci ├── appveyor-cpp-build.bat ├── appveyor-cpp-setup.bat ├── conda_env_archery.yml ├── conda_env_cpp.yml ├── conda_env_crossbow.txt ├── conda_env_gandiva.yml ├── conda_env_gandiva_win.yml ├── conda_env_python.yml ├── conda_env_r.yml ├── conda_env_sphinx.yml ├── conda_env_unix.yml ├── detect-changes.py ├── docker │ ├── conda-cpp.Dockerfile │ ├── conda-integration.Dockerfile │ ├── conda-python-dask.Dockerfile │ ├── conda-python-hdfs.Dockerfile │ ├── conda-python-jpype.Dockerfile │ ├── conda-python-kartothek.Dockerfile │ ├── conda-python-pandas.Dockerfile │ ├── conda-python-spark.Dockerfile │ ├── conda-python-turbodbc.Dockerfile │ ├── conda-python.Dockerfile │ ├── conda.Dockerfile │ ├── debian-10-cpp.Dockerfile │ ├── debian-10-go.Dockerfile │ ├── debian-10-js.Dockerfile │ ├── debian-9-java.Dockerfile │ ├── fedora-33-cpp.Dockerfile │ ├── linux-apt-c-glib.Dockerfile │ ├── linux-apt-docs.Dockerfile │ ├── linux-apt-jni.Dockerfile │ ├── linux-apt-lint.Dockerfile │ ├── linux-apt-python-3.Dockerfile │ ├── linux-apt-r.Dockerfile │ ├── linux-apt-ruby.Dockerfile │ ├── linux-dnf-python-3.Dockerfile │ ├── linux-r.Dockerfile │ ├── python-sdist.Dockerfile │ ├── python-wheel-manylinux-201x.Dockerfile │ ├── python-wheel-manylinux-test.Dockerfile │ ├── python-wheel-windows-vs2017.Dockerfile │ ├── ubuntu-18.04-cpp.Dockerfile │ ├── ubuntu-18.04-csharp.Dockerfile │ ├── ubuntu-20.04-cpp.Dockerfile │ └── ubuntu-20.10-cpp.Dockerfile ├── etc │ ├── hdfs-site.xml │ └── rprofile ├── scripts │ ├── PKGBUILD │ ├── c_glib_build.sh │ ├── c_glib_test.sh │ ├── ccache_setup.sh │ ├── cpp_build.sh │ ├── cpp_test.sh │ ├── csharp_build.sh │ ├── csharp_pack.sh │ ├── csharp_test.sh │ ├── docs_build.sh │ ├── go_build.sh │ ├── go_test.sh │ ├── install_conda.sh │ ├── install_dask.sh │ ├── install_iwyu.sh │ ├── install_kartothek.sh │ ├── install_minio.sh │ ├── install_osx_sdk.sh │ ├── install_pandas.sh │ ├── install_spark.sh │ ├── install_turbodbc.sh │ ├── integration_arrow.sh │ ├── integration_dask.sh │ ├── integration_hdfs.sh │ ├── integration_hiveserver2.sh │ ├── integration_kartothek.sh │ ├── integration_spark.sh │ ├── integration_turbodbc.sh │ ├── java_build.sh │ ├── java_test.sh │ ├── js_build.sh │ ├── js_test.sh │ ├── msys2_setup.sh │ ├── msys2_system_clean.sh │ ├── msys2_system_upgrade.sh │ ├── python_benchmark.sh │ ├── python_build.sh │ ├── python_sdist_build.sh │ ├── python_sdist_test.sh │ ├── python_test.sh │ ├── python_wheel_macos_build.sh │ ├── python_wheel_macos_test.sh │ ├── python_wheel_manylinux_build.sh │ ├── python_wheel_manylinux_test.sh │ ├── python_wheel_windows_build.bat │ ├── python_wheel_windows_test.bat │ ├── r_build.sh │ ├── r_deps.sh │ ├── r_docker_configure.sh │ ├── r_pkgdown_check.sh │ ├── r_sanitize.sh │ ├── r_test.sh │ ├── r_windows_build.sh │ ├── release_test.sh │ ├── ruby_test.sh │ ├── rust_build.sh │ ├── rust_clippy.sh │ ├── rust_fmt.sh │ ├── rust_toml_fmt.sh │ ├── util_checkout.sh │ ├── util_cleanup.sh │ ├── util_download_apache.sh │ └── util_wait_for_it.sh └── vcpkg │ ├── arm64-linux-static-debug.cmake │ ├── arm64-linux-static-release.cmake │ ├── ports.patch │ ├── x64-linux-static-debug.cmake │ ├── x64-linux-static-release.cmake │ ├── x64-osx-static-debug.cmake │ ├── x64-osx-static-release.cmake │ ├── x64-windows-static-md-debug.cmake │ └── x64-windows-static-md-release.cmake ├── dev ├── build-ballista-docker.sh ├── build-ballista-executables.sh ├── build-set-env.sh ├── create_license.py ├── docker │ ├── ballista-benchmarks.Dockerfile │ ├── ballista-builder.Dockerfile │ ├── ballista-cli.Dockerfile │ ├── ballista-executor.Dockerfile │ ├── ballista-scheduler.Dockerfile │ ├── ballista-standalone.Dockerfile │ ├── builder-entrypoint.sh │ ├── cli-entrypoint.sh │ ├── executor-entrypoint.sh │ ├── nginx.conf │ ├── scheduler-entrypoint.sh │ └── standalone-entrypoint.sh ├── integration-tests.sh ├── python_lint.sh ├── release │ ├── README.md │ ├── check-rat-report.py │ ├── crate-deps.dot │ ├── crate-deps.svg │ ├── create-tarball.sh │ ├── download-python-wheels.py │ ├── rat_exclude_files.txt │ ├── release-tarball.sh │ ├── run-rat.sh │ ├── update_change_log-ballista.sh │ ├── update_change_log.sh │ └── verify-release-candidate.sh ├── rust_lint.sh ├── update_arrow_deps.py ├── update_ballista_versions.py └── update_datafusion_versions.py ├── docs ├── .gitignore ├── Makefile ├── README.md ├── build.sh ├── developer │ ├── README.md │ ├── architecture.md │ └── images │ │ └── query-execution.png ├── make.bat ├── requirements.txt ├── source │ ├── _static │ │ ├── images │ │ │ └── ballista-logo.png │ │ └── theme_overrides.css │ ├── _templates │ │ ├── docs-sidebar.html │ │ └── layout.html │ ├── community │ │ └── communication.md │ ├── conf.py │ ├── index.rst │ └── user-guide │ │ ├── cli.md │ │ ├── configs.md │ │ ├── deployment │ │ ├── cargo-install.md │ │ ├── docker-compose.md │ │ ├── docker.md │ │ ├── index.rst │ │ └── kubernetes.md │ │ ├── faq.md │ │ ├── flightsql.md │ │ ├── images │ │ ├── ballista-web-ui.png │ │ └── example-query-plan.png │ │ ├── introduction.md │ │ ├── metrics.md │ │ ├── python.md │ │ ├── rust.md │ │ ├── scheduler.md │ │ └── tuning-guide.md └── sqlbench-h-workstation-10-distributed-perquery.png ├── examples ├── dataframe-parquet.py ├── run-executor.py ├── run-scheduler.py └── sql-parquet.py ├── pyproject.toml ├── requirements-310.txt ├── requirements-37.txt ├── requirements.in ├── requirements.txt └── src ├── context.rs ├── dataframe.rs ├── datatype.rs ├── errors.rs ├── executor.rs ├── expression.rs ├── functions.rs ├── lib.rs ├── scheduler.rs ├── udaf.rs ├── udf.rs └── utils.rs /.asf.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | github: 19 | description: Apache Arrow Ballista Python bindings 20 | homepage: https://arrow.apache.org/ballista-python/ 21 | labels: 22 | - arrow 23 | - big-data 24 | - dataframe 25 | - distributed 26 | - olap 27 | - python 28 | - query-engine 29 | - rust 30 | - sql 31 | enabled_merge_buttons: 32 | merge: false 33 | rebase: false 34 | squash: true 35 | features: 36 | issues: true 37 | 38 | notifications: 39 | commits: commits@arrow.apache.org 40 | issues_status: issues@arrow.apache.org 41 | issues: github@arrow.apache.org 42 | pullrequests: github@arrow.apache.org 43 | 44 | publish: 45 | whoami: asf-site 46 | subdir: ballista-python 47 | -------------------------------------------------------------------------------- /.cargo/config: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [target.x86_64-apple-darwin] 19 | rustflags = [ 20 | "-C", "link-arg=-undefined", 21 | "-C", "link-arg=dynamic_lookup", 22 | ] 23 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | target 19 | venv 20 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [flake8] 19 | exclude = 20 | venv 21 | dev 22 | docs 23 | ci 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Additional context** 20 | Add any other context about the problem here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem or challenge? Please describe what you are trying to do.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | (This section helps Arrow developers understand the context and *why* for this feature, in addition to the *what*) 13 | 14 | **Describe the solution you'd like** 15 | A clear and concise description of what you want to happen. 16 | 17 | **Describe alternatives you've considered** 18 | A clear and concise description of any alternative solutions or features you've considered. 19 | 20 | **Additional context** 21 | Add any other context or screenshots about the feature request here. 22 | -------------------------------------------------------------------------------- /.github/actions/setup-builder/action.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: Prepare Rust Builder 19 | description: 'Prepare Rust Build Environment' 20 | inputs: 21 | rust-version: 22 | description: 'version of rust to install (e.g. stable)' 23 | required: true 24 | default: 'stable' 25 | runs: 26 | using: "composite" 27 | steps: 28 | - name: Install Build Dependencies 29 | shell: bash 30 | run: | 31 | apt-get update 32 | apt-get install -y protobuf-compiler 33 | - name: Setup Rust toolchain 34 | shell: bash 35 | run: | 36 | echo "Installing ${{ inputs.rust-version }}" 37 | rustup toolchain install ${{ inputs.rust-version }} 38 | rustup default ${{ inputs.rust-version }} 39 | rustup component add rustfmt 40 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | version: 2 19 | updates: 20 | - package-ecosystem: cargo 21 | directory: "/" 22 | schedule: 23 | interval: daily 24 | open-pull-requests-limit: 10 25 | target-branch: main 26 | labels: [auto-dependencies] 27 | ignore: 28 | # arrow and datafusion are bumped manually 29 | - dependency-name: "arrow*" 30 | update-types: ["version-update:semver-major"] 31 | - dependency-name: "datafusion*" 32 | update-types: ["version-update:semver-major"] 33 | - dependency-name: "sqlparser" 34 | update-types: ["version-update:semver-major"] 35 | - package-ecosystem: "github-actions" 36 | directory: "/" 37 | schedule: 38 | interval: "daily" 39 | open-pull-requests-limit: 10 40 | labels: [auto-dependencies] 41 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Which issue does this PR close? 2 | 3 | 6 | 7 | Closes #. 8 | 9 | # Rationale for this change 10 | 14 | 15 | # What changes are included in this PR? 16 | 19 | 20 | # Are there any user-facing changes? 21 | 24 | 25 | 28 | -------------------------------------------------------------------------------- /.github/workflows/cancel.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: Cancel stale runs 19 | 20 | on: 21 | workflow_run: 22 | # The name of another workflow (whichever one) that always runs on PRs 23 | workflows: ['Dev'] 24 | types: ['requested'] 25 | 26 | jobs: 27 | cancel-stale-workflow-runs: 28 | name: "Cancel stale workflow runs" 29 | runs-on: ubuntu-latest 30 | steps: 31 | # Unfortunately, we need to define a separate cancellation step for 32 | # each workflow where we want to cancel stale runs. 33 | - uses: potiuk/cancel-workflow-runs@master 34 | name: "Cancel stale Dev runs" 35 | with: 36 | cancelMode: allDuplicates 37 | token: ${{ secrets.GITHUB_TOKEN }} 38 | workflowFileName: dev.yml 39 | skipEventTypes: '["push", "schedule"]' 40 | - uses: potiuk/cancel-workflow-runs@master 41 | name: "Cancel stale Rust runs" 42 | with: 43 | cancelMode: allDuplicates 44 | token: ${{ secrets.GITHUB_TOKEN }} 45 | workflowFileName: rust.yml 46 | skipEventTypes: '["push", "schedule"]' 47 | -------------------------------------------------------------------------------- /.github/workflows/dev_pr.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: Labeler 19 | 20 | on: 21 | pull_request_target: 22 | types: 23 | - opened 24 | - edited 25 | - synchronize 26 | 27 | jobs: 28 | process: 29 | name: Process 30 | runs-on: ubuntu-latest 31 | steps: 32 | - uses: actions/checkout@v3 33 | 34 | - name: Assign GitHub labels 35 | if: | 36 | github.event_name == 'pull_request_target' && 37 | (github.event.action == 'opened' || 38 | github.event.action == 'synchronize') 39 | uses: actions/labeler@4.1.0 40 | with: 41 | repo-token: ${{ secrets.GITHUB_TOKEN }} 42 | configuration-path: .github/workflows/dev_pr/labeler.yml 43 | sync-labels: true 44 | 45 | # TODO: Enable this when eps1lon/actions-label-merge-conflict is available. 46 | # - name: Checks if PR needs rebase 47 | # if: | 48 | # github.event_name == 'push' || 49 | # (github.event_name == 'pull_request_target' && 50 | # (github.event.action == 'opened' || 51 | # github.event.action == 'synchronize')) 52 | # uses: eps1lon/actions-label-merge-conflict@releases/2.x 53 | # with: 54 | # dirtyLabel: "needs-rebase" 55 | # repoToken: "${{ secrets.GITHUB_TOKEN }}" 56 | -------------------------------------------------------------------------------- /.github/workflows/dev_pr/labeler.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | python: 19 | - python/**/* 20 | 21 | development-process: 22 | - dev/**.* 23 | - .github/**.* 24 | - ci/**.* 25 | - .asf.yaml 26 | 27 | documentation: 28 | - docs/**.* 29 | - README.md 30 | - ./**/README.md 31 | - DEVELOPERS.md 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | apache-rat-*.jar 19 | arrow-src.tar 20 | arrow-src.tar.gz 21 | CHANGELOG.md.bak 22 | 23 | # Compiled source 24 | *.a 25 | *.dll 26 | *.o 27 | *.py[ocd] 28 | *.so 29 | *.so.* 30 | *.bundle 31 | *.dylib 32 | .build_cache_dir 33 | dependency-reduced-pom.xml 34 | MANIFEST 35 | compile_commands.json 36 | build.ninja 37 | 38 | # Generated Visual Studio files 39 | *.vcxproj 40 | *.vcxproj.* 41 | *.sln 42 | *.iml 43 | 44 | # Linux perf sample data 45 | perf.data 46 | perf.data.old 47 | 48 | cpp/.idea/ 49 | .clangd/ 50 | cpp/.clangd/ 51 | cpp/apidoc/xml/ 52 | docs/example.gz 53 | docs/example1.dat 54 | docs/example3.dat 55 | python/.eggs/ 56 | python/doc/ 57 | # Egg metadata 58 | *.egg-info 59 | 60 | .vscode 61 | .idea/ 62 | .pytest_cache/ 63 | pkgs 64 | docker_cache 65 | .gdb_history 66 | *.orig 67 | .*.swp 68 | .*.swo 69 | 70 | site/ 71 | 72 | # R files 73 | **/.Rproj.user 74 | **/*.Rcheck/ 75 | **/.Rhistory 76 | .Rproj.user 77 | 78 | # macOS 79 | cpp/Brewfile.lock.json 80 | .DS_Store 81 | 82 | # docker volumes used for caching 83 | .docker 84 | 85 | # Rust 86 | target 87 | Cargo.lock 88 | !ballista-cli/Cargo.lock 89 | 90 | rusty-tags.vi 91 | .history 92 | .flatbuffers/ 93 | 94 | .vscode 95 | venv/* 96 | .venv 97 | 98 | # apache release artifacts 99 | dev/dist 100 | 101 | # logs 102 | logs/ 103 | 104 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Code of Conduct 21 | 22 | - [Code of Conduct for The Apache Software Foundation][1] 23 | 24 | [1]: https://www.apache.org/foundation/policies/conduct.html 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Ballista Python Bindings (PyBallista 21 | 22 | PyBallista is now located within the main Ballista repo [here](https://github.com/apache/arrow-ballista/tree/main/python). 23 | 24 | The original README is [here](OLDREADME.md). 25 | -------------------------------------------------------------------------------- /ballista/functions.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | 19 | from ._internal import functions 20 | 21 | 22 | def __getattr__(name): 23 | return getattr(functions, name) 24 | -------------------------------------------------------------------------------- /ballista/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | -------------------------------------------------------------------------------- /ballista/tests/test_imports.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | import pytest 19 | 20 | import ballista 21 | from ballista import ( 22 | AggregateUDF, 23 | BallistaContext, 24 | DataFrame, 25 | Expression, 26 | ScalarUDF, 27 | functions, 28 | ) 29 | 30 | 31 | def test_import_ballista(): 32 | assert ballista.__name__ == "ballista" 33 | 34 | 35 | def test_class_module_is_ballista(): 36 | for klass in [ 37 | BallistaContext, 38 | Expression, 39 | DataFrame, 40 | ScalarUDF, 41 | AggregateUDF, 42 | ]: 43 | assert klass.__module__ == "ballista" 44 | 45 | 46 | def test_import_from_functions_submodule(): 47 | from ballista.functions import abs, sin # noqa 48 | 49 | assert functions.abs is abs 50 | assert functions.sin is sin 51 | 52 | msg = "cannot import name 'foobar' from 'ballista.functions'" 53 | with pytest.raises(ImportError, match=msg): 54 | from ballista.functions import foobar # noqa 55 | 56 | 57 | def test_classes_are_inheritable(): 58 | class MyExecContext(BallistaContext): 59 | pass 60 | 61 | class MyExpression(Expression): 62 | pass 63 | 64 | class MyDataFrame(DataFrame): 65 | pass 66 | -------------------------------------------------------------------------------- /ci/conda_env_archery.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # cli 19 | click 20 | 21 | # bot, crossbow 22 | github3.py 23 | jinja2 24 | jira 25 | pygit2 26 | pygithub 27 | ruamel.yaml 28 | setuptools_scm 29 | toolz 30 | 31 | # benchmark 32 | pandas 33 | 34 | # docker 35 | python-dotenv 36 | #ruamel.yaml 37 | 38 | # release 39 | gitpython 40 | #jinja2 41 | #jira 42 | semver 43 | -------------------------------------------------------------------------------- /ci/conda_env_cpp.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | aws-sdk-cpp 19 | benchmark=1.5.2 20 | boost-cpp>=1.68.0 21 | brotli 22 | bzip2 23 | c-ares 24 | cmake 25 | gflags 26 | glog 27 | gmock>=1.10.0 28 | grpc-cpp>=1.27.3 29 | gtest=1.10.0 30 | libprotobuf 31 | libutf8proc 32 | lz4-c 33 | make 34 | ninja 35 | pkg-config 36 | python 37 | rapidjson 38 | re2 39 | snappy 40 | thrift-cpp>=0.11.0 41 | zlib 42 | zstd 43 | -------------------------------------------------------------------------------- /ci/conda_env_crossbow.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | click 19 | github3.py 20 | jinja2 21 | jira 22 | pygit2 23 | ruamel.yaml 24 | setuptools_scm 25 | toolz 26 | -------------------------------------------------------------------------------- /ci/conda_env_gandiva.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | clang=11 19 | llvmdev=11 20 | -------------------------------------------------------------------------------- /ci/conda_env_gandiva_win.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # llvmdev=9 or later require Visual Studio 2017 19 | clangdev=8 20 | llvmdev=8 21 | -------------------------------------------------------------------------------- /ci/conda_env_python.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # don't add pandas here, because it is not a mandatory test dependency 19 | boto3 # not a direct dependency of s3fs, but needed for our s3fs fixture 20 | cffi 21 | cython 22 | cloudpickle 23 | fsspec 24 | hypothesis 25 | numpy>=1.16.6 26 | pytest 27 | pytest-faulthandler 28 | pytest-lazy-fixture 29 | pytz 30 | s3fs>=0.4 31 | setuptools 32 | setuptools_scm 33 | -------------------------------------------------------------------------------- /ci/conda_env_r.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | r-assertthat 19 | r-base 20 | r-bit64 21 | r-dplyr 22 | r-purrr 23 | r-r6 24 | r-cpp11 25 | r-rlang 26 | r-tidyselect 27 | r-vctrs 28 | # Test/"Suggests" dependencies 29 | pandoc 30 | r-covr 31 | r-hms 32 | r-lubridate 33 | r-rcmdcheck 34 | r-reticulate 35 | r-rmarkdown 36 | r-testthat 37 | r-tibble 38 | -------------------------------------------------------------------------------- /ci/conda_env_sphinx.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # Requirements for building the documentation 19 | breathe 20 | doxygen 21 | ipython 22 | # Pinned per ARROW-9693 23 | sphinx=3.1.2 24 | pydata-sphinx-theme 25 | -------------------------------------------------------------------------------- /ci/conda_env_unix.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # conda package dependencies specific to Unix-like environments (Linux and macOS) 19 | 20 | autoconf 21 | ccache 22 | orc 23 | pkg-config 24 | -------------------------------------------------------------------------------- /ci/docker/conda-cpp.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG repo 19 | ARG arch 20 | FROM ${repo}:${arch}-conda 21 | 22 | # install the required conda packages into the test environment 23 | COPY ci/conda_env_cpp.yml \ 24 | ci/conda_env_gandiva.yml \ 25 | /arrow/ci/ 26 | RUN conda install \ 27 | --file arrow/ci/conda_env_cpp.yml \ 28 | --file arrow/ci/conda_env_gandiva.yml \ 29 | compilers \ 30 | doxygen \ 31 | gdb \ 32 | valgrind && \ 33 | conda clean --all 34 | 35 | ENV ARROW_BUILD_TESTS=ON \ 36 | ARROW_DATASET=ON \ 37 | ARROW_DEPENDENCY_SOURCE=CONDA \ 38 | ARROW_FLIGHT=ON \ 39 | ARROW_GANDIVA=ON \ 40 | ARROW_HOME=$CONDA_PREFIX \ 41 | ARROW_ORC=ON \ 42 | ARROW_PARQUET=ON \ 43 | ARROW_PLASMA=ON \ 44 | ARROW_S3=ON \ 45 | ARROW_USE_CCACHE=ON \ 46 | ARROW_WITH_BROTLI=ON \ 47 | ARROW_WITH_BZ2=ON \ 48 | ARROW_WITH_LZ4=ON \ 49 | ARROW_WITH_SNAPPY=ON \ 50 | ARROW_WITH_ZLIB=ON \ 51 | ARROW_WITH_ZSTD=ON \ 52 | PARQUET_BUILD_EXAMPLES=ON \ 53 | PARQUET_BUILD_EXECUTABLES=ON \ 54 | PARQUET_HOME=$CONDA_PREFIX 55 | -------------------------------------------------------------------------------- /ci/docker/conda-python-dask.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG repo 19 | ARG arch=amd64 20 | ARG python=3.6 21 | FROM ${repo}:${arch}-conda-python-${python} 22 | 23 | ARG dask=latest 24 | COPY ci/scripts/install_dask.sh /arrow/ci/scripts/ 25 | RUN /arrow/ci/scripts/install_dask.sh ${dask} -------------------------------------------------------------------------------- /ci/docker/conda-python-hdfs.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG repo 19 | ARG arch=amd64 20 | ARG python=3.6 21 | FROM ${repo}:${arch}-conda-python-${python} 22 | 23 | ARG jdk=8 24 | ARG maven=3.5 25 | RUN conda install -q \ 26 | maven=${maven} \ 27 | openjdk=${jdk} \ 28 | pandas && \ 29 | conda clean --all 30 | 31 | # installing libhdfs (JNI) 32 | ARG hdfs=3.2.1 33 | ENV HADOOP_HOME=/opt/hadoop-${hdfs} \ 34 | HADOOP_OPTS=-Djava.library.path=/opt/hadoop-${hdfs}/lib/native \ 35 | PATH=$PATH:/opt/hadoop-${hdfs}/bin:/opt/hadoop-${hdfs}/sbin 36 | COPY ci/scripts/util_download_apache.sh /arrow/ci/scripts/ 37 | RUN /arrow/ci/scripts/util_download_apache.sh \ 38 | "hadoop/common/hadoop-${hdfs}/hadoop-${hdfs}.tar.gz" /opt 39 | 40 | COPY ci/etc/hdfs-site.xml $HADOOP_HOME/etc/hadoop/ 41 | 42 | # build cpp with tests 43 | ENV CC=gcc \ 44 | CXX=g++ \ 45 | ARROW_FLIGHT=OFF \ 46 | ARROW_GANDIVA=OFF \ 47 | ARROW_PLASMA=OFF \ 48 | ARROW_PARQUET=ON \ 49 | ARROW_ORC=OFF \ 50 | ARROW_HDFS=ON \ 51 | ARROW_PYTHON=ON \ 52 | ARROW_BUILD_TESTS=ON 53 | -------------------------------------------------------------------------------- /ci/docker/conda-python-jpype.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG repo 19 | ARG arch=amd64 20 | ARG python=3.6 21 | FROM ${repo}:${arch}-conda-python-${python} 22 | 23 | ARG jdk=11 24 | ARG maven=3.6 25 | RUN conda install -q \ 26 | maven=${maven} \ 27 | openjdk=${jdk} \ 28 | jpype1 && \ 29 | conda clean --all 30 | -------------------------------------------------------------------------------- /ci/docker/conda-python-kartothek.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG repo 19 | ARG arch=amd64 20 | ARG python=3.6 21 | FROM ${repo}:${arch}-conda-python-${python} 22 | 23 | # install kartothek dependencies from conda-forge 24 | RUN conda install -c conda-forge -q \ 25 | attrs \ 26 | click \ 27 | cloudpickle \ 28 | dask \ 29 | decorator \ 30 | freezegun \ 31 | msgpack-python \ 32 | prompt-toolkit \ 33 | pytest-mock \ 34 | pytest-xdist \ 35 | pyyaml \ 36 | simplejson \ 37 | simplekv \ 38 | storefact \ 39 | toolz \ 40 | urlquote \ 41 | zstandard && \ 42 | conda clean --all 43 | 44 | ARG kartothek=latest 45 | COPY ci/scripts/install_kartothek.sh /arrow/ci/scripts/ 46 | RUN /arrow/ci/scripts/install_kartothek.sh ${kartothek} /kartothek 47 | -------------------------------------------------------------------------------- /ci/docker/conda-python-pandas.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG repo 19 | ARG arch=amd64 20 | ARG python=3.6 21 | FROM ${repo}:${arch}-conda-python-${python} 22 | 23 | ARG pandas=latest 24 | ARG numpy=latest 25 | COPY ci/scripts/install_pandas.sh /arrow/ci/scripts/ 26 | RUN conda uninstall -q -y numpy && \ 27 | /arrow/ci/scripts/install_pandas.sh ${pandas} ${numpy} 28 | -------------------------------------------------------------------------------- /ci/docker/conda-python-spark.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG repo 19 | ARG arch=amd64 20 | ARG python=3.6 21 | FROM ${repo}:${arch}-conda-python-${python} 22 | 23 | ARG jdk=8 24 | ARG maven=3.5 25 | 26 | RUN conda install -q \ 27 | openjdk=${jdk} \ 28 | maven=${maven} \ 29 | pandas && \ 30 | conda clean --all 31 | 32 | # installing specific version of spark 33 | ARG spark=master 34 | COPY ci/scripts/install_spark.sh /arrow/ci/scripts/ 35 | RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark 36 | 37 | # build cpp with tests 38 | ENV CC=gcc \ 39 | CXX=g++ \ 40 | ARROW_PYTHON=ON \ 41 | ARROW_HDFS=ON \ 42 | ARROW_BUILD_TESTS=OFF \ 43 | SPARK_VERSION=${spark} 44 | -------------------------------------------------------------------------------- /ci/docker/conda-python-turbodbc.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG repo 19 | ARG arch=amd64 20 | ARG python=3.6 21 | FROM ${repo}:${arch}-conda-python-${python} 22 | 23 | RUN export DEBIAN_FRONTEND=noninteractive && \ 24 | apt-get update -y -q && \ 25 | apt-get install -y -q --no-install-recommends \ 26 | odbc-postgresql \ 27 | postgresql \ 28 | sudo && \ 29 | apt-get clean && \ 30 | rm -rf /var/lib/apt/lists/* 31 | 32 | # install turbodbc dependencies from conda-forge 33 | RUN conda install -c conda-forge -q\ 34 | pybind11 \ 35 | pytest-cov \ 36 | mock \ 37 | unixodbc && \ 38 | conda clean --all 39 | 40 | RUN service postgresql start && \ 41 | sudo -u postgres psql -U postgres -c \ 42 | "CREATE DATABASE test_db;" && \ 43 | sudo -u postgres psql -U postgres -c \ 44 | "ALTER USER postgres WITH PASSWORD 'password';" 45 | 46 | ARG turbodbc=latest 47 | COPY ci/scripts/install_turbodbc.sh /arrow/ci/scripts/ 48 | RUN /arrow/ci/scripts/install_turbodbc.sh ${turbodbc} /turbodbc 49 | 50 | ENV TURBODBC_TEST_CONFIGURATION_FILES "query_fixtures_postgresql.json" 51 | -------------------------------------------------------------------------------- /ci/docker/conda-python.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG repo 19 | ARG arch 20 | FROM ${repo}:${arch}-conda-cpp 21 | 22 | # install python specific packages 23 | ARG python=3.6 24 | COPY ci/conda_env_python.yml /arrow/ci/ 25 | RUN conda install -q \ 26 | --file arrow/ci/conda_env_python.yml \ 27 | $([ "$python" == "3.6" -o "$python" == "3.7" ] && echo "pickle5") \ 28 | python=${python} \ 29 | nomkl && \ 30 | conda clean --all 31 | 32 | ENV ARROW_PYTHON=ON \ 33 | ARROW_BUILD_STATIC=OFF \ 34 | ARROW_BUILD_TESTS=OFF \ 35 | ARROW_BUILD_UTILITIES=OFF \ 36 | ARROW_TENSORFLOW=ON \ 37 | ARROW_USE_GLOG=OFF 38 | -------------------------------------------------------------------------------- /ci/docker/conda.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG arch=amd64 19 | FROM ${arch}/ubuntu:18.04 20 | 21 | # arch is unset after the FROM statement, so need to define it again 22 | ARG arch=amd64 23 | ARG prefix=/opt/conda 24 | 25 | # install build essentials 26 | RUN export DEBIAN_FRONTEND=noninteractive && \ 27 | apt-get update -y -q && \ 28 | apt-get install -y -q wget tzdata libc6-dbg \ 29 | && apt-get clean \ 30 | && rm -rf /var/lib/apt/lists/* 31 | 32 | ENV PATH=${prefix}/bin:$PATH 33 | # install conda and minio 34 | COPY ci/scripts/install_conda.sh \ 35 | ci/scripts/install_minio.sh \ 36 | /arrow/ci/scripts/ 37 | RUN /arrow/ci/scripts/install_conda.sh ${arch} linux latest ${prefix} 38 | RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest ${prefix} 39 | 40 | # create a conda environment 41 | ADD ci/conda_env_unix.yml /arrow/ci/ 42 | RUN conda create -n arrow --file arrow/ci/conda_env_unix.yml git && \ 43 | conda clean --all 44 | 45 | # activate the created environment by default 46 | RUN echo "conda activate arrow" >> ~/.profile 47 | ENV CONDA_PREFIX=${prefix}/envs/arrow 48 | 49 | # use login shell to activate arrow environment un the RUN commands 50 | SHELL [ "/bin/bash", "-c", "-l" ] 51 | 52 | # use login shell when running the container 53 | ENTRYPOINT [ "/bin/bash", "-c", "-l" ] 54 | -------------------------------------------------------------------------------- /ci/docker/debian-10-go.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG arch=amd64 19 | ARG go=1.15 20 | FROM ${arch}/golang:${go} 21 | 22 | # TODO(kszucs): 23 | # 1. add the files required to install the dependencies to .dockerignore 24 | # 2. copy these files to their appropriate path 25 | # 3. download and compile the dependencies 26 | -------------------------------------------------------------------------------- /ci/docker/debian-10-js.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG arch=amd64 19 | ARG node=14 20 | FROM ${arch}/node:${node} 21 | 22 | ENV NODE_NO_WARNINGS=1 23 | 24 | # TODO(kszucs): 25 | # 1. add the files required to install the dependencies to .dockerignore 26 | # 2. copy these files to their appropriate path 27 | # 3. download and compile the dependencies 28 | -------------------------------------------------------------------------------- /ci/docker/debian-9-java.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG arch=amd64 19 | ARG jdk=8 20 | ARG maven=3.5.4 21 | FROM ${arch}/maven:${maven}-jdk-${jdk} 22 | 23 | ENV ARROW_JAVA_SHADE_FLATBUFS=ON 24 | 25 | # TODO(kszucs): 26 | # 1. add the files required to install the dependencies to .dockerignore 27 | # 2. copy these files to their appropriate path 28 | # 3. download and compile the dependencies 29 | -------------------------------------------------------------------------------- /ci/docker/linux-apt-python-3.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG base 19 | FROM ${base} 20 | 21 | RUN apt-get update -y -q && \ 22 | apt-get install -y -q \ 23 | python3 \ 24 | python3-pip \ 25 | python3-dev && \ 26 | apt-get clean && \ 27 | rm -rf /var/lib/apt/lists/* 28 | 29 | RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ 30 | ln -s /usr/bin/pip3 /usr/local/bin/pip 31 | 32 | RUN pip install -U pip setuptools 33 | 34 | COPY python/requirements-build.txt \ 35 | python/requirements-test.txt \ 36 | /arrow/python/ 37 | 38 | RUN pip install \ 39 | -r arrow/python/requirements-build.txt \ 40 | -r arrow/python/requirements-test.txt 41 | 42 | ENV ARROW_PYTHON=ON \ 43 | ARROW_BUILD_STATIC=OFF \ 44 | ARROW_BUILD_TESTS=OFF \ 45 | ARROW_BUILD_UTILITIES=OFF \ 46 | ARROW_USE_GLOG=OFF \ 47 | -------------------------------------------------------------------------------- /ci/docker/linux-apt-ruby.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # depends on a C GLib image 19 | ARG base 20 | FROM ${base} 21 | 22 | COPY ruby/ /arrow/ruby/ 23 | RUN bundle install --gemfile /arrow/ruby/Gemfile 24 | RUN \ 25 | for package in /arrow/ruby/*; do \ 26 | bundle install --gemfile ${package}/Gemfile; \ 27 | done 28 | -------------------------------------------------------------------------------- /ci/docker/linux-dnf-python-3.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG base 19 | FROM ${base} 20 | 21 | RUN dnf install -y \ 22 | python3 \ 23 | python3-pip \ 24 | python3-devel 25 | 26 | RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ 27 | ln -s /usr/bin/pip3 /usr/local/bin/pip 28 | 29 | COPY python/requirements-build.txt \ 30 | python/requirements-test.txt \ 31 | /arrow/python/ 32 | 33 | RUN pip install \ 34 | -r arrow/python/requirements-build.txt \ 35 | -r arrow/python/requirements-test.txt 36 | 37 | ENV ARROW_PYTHON=ON \ 38 | ARROW_BUILD_STATIC=OFF \ 39 | ARROW_BUILD_TESTS=OFF \ 40 | ARROW_BUILD_UTILITIES=OFF \ 41 | ARROW_USE_GLOG=OFF \ 42 | -------------------------------------------------------------------------------- /ci/docker/linux-r.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # General purpose Dockerfile to take a Docker image containing R 19 | # and install Arrow R package dependencies 20 | 21 | ARG base 22 | FROM ${base} 23 | 24 | ARG r_bin=R 25 | ENV R_BIN=${r_bin} 26 | 27 | ARG r_dev=FALSE 28 | ENV ARROW_R_DEV=${r_dev} 29 | 30 | ARG devtoolset_version=-1 31 | ENV DEVTOOLSET_VERSION=${devtoolset_version} 32 | 33 | # Make sure R is on the path for the R-hub devel versions (where RPREFIX is set in its Dockerfile) 34 | ENV PATH "${RPREFIX}/bin:${PATH}" 35 | 36 | # Patch up some of the docker images 37 | COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/ 38 | COPY ci/etc/rprofile /arrow/ci/etc/ 39 | COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ 40 | RUN /arrow/ci/scripts/r_docker_configure.sh 41 | 42 | COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ 43 | COPY r/DESCRIPTION /arrow/r/ 44 | RUN /arrow/ci/scripts/r_deps.sh /arrow 45 | -------------------------------------------------------------------------------- /ci/docker/python-sdist.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | FROM amd64/ubuntu:20.04 19 | 20 | SHELL ["/bin/bash", "-o", "pipefail", "-c"] 21 | 22 | RUN echo "debconf debconf/frontend select Noninteractive" | \ 23 | debconf-set-selections 24 | 25 | RUN apt-get update -y -q && \ 26 | apt-get install -y -q --no-install-recommends \ 27 | git \ 28 | python3-pip && \ 29 | apt-get clean && \ 30 | rm -rf /var/lib/apt/lists* 31 | 32 | COPY python/requirements-build.txt \ 33 | /arrow/python/requirements-build.txt 34 | RUN pip3 install --requirement /arrow/python/requirements-build.txt 35 | 36 | ENV PYTHON=/usr/bin/python3 37 | -------------------------------------------------------------------------------- /ci/docker/python-wheel-manylinux-test.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG arch 19 | ARG python 20 | FROM ${arch}/python:${python} 21 | 22 | # RUN pip install --upgrade pip 23 | 24 | # pandas doesn't provide wheel for aarch64 yet, so cache the compiled 25 | # test dependencies in a docker image 26 | COPY python/requirements-wheel-test.txt /arrow/python/ 27 | RUN pip install -r /arrow/python/requirements-wheel-test.txt 28 | -------------------------------------------------------------------------------- /ci/docker/ubuntu-18.04-csharp.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | ARG platform=bionic 19 | ARG dotnet=3.1 20 | FROM mcr.microsoft.com/dotnet/core/sdk:${dotnet}-${platform} 21 | 22 | RUN dotnet tool install --tool-path /usr/local/bin sourcelink 23 | -------------------------------------------------------------------------------- /ci/etc/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | dfs.replication 22 | 2 23 | 24 | 25 | dfs.datanode.data.dir 26 | file:///data/dfs/data 27 | 28 | 29 | dfs.namenode.name.dir 30 | file:///data/dfs/name 31 | 32 | 33 | dfs.namenode.checkpoint.dir 34 | file:///data/dfs/namesecondary 35 | 36 | 37 | dfs.namenode.datanode.registration.ip-hostname-check 38 | false 39 | 40 | 41 | dfs.default.replica 42 | 1 43 | 44 | 45 | dfs.support.append 46 | true 47 | 48 | 49 | dfs.client.block.write.replace-datanode-on-failure.enable 50 | false 51 | 52 | 53 | -------------------------------------------------------------------------------- /ci/etc/rprofile: -------------------------------------------------------------------------------- 1 | local({ 2 | .pick_cran <- function() { 3 | # Return a CRAN repo URL, preferring RSPM binaries if available for this OS 4 | rspm_template <- "https://packagemanager.rstudio.com/cran/__linux__/%s/latest" 5 | supported_os <- c("focal", "xenial", "bionic", "centos7", "centos8", "opensuse42", "opensuse15", "opensuse152") 6 | 7 | if (nzchar(Sys.which("lsb_release"))) { 8 | os <- tolower(system("lsb_release -cs", intern = TRUE)) 9 | if (os %in% supported_os) { 10 | return(sprintf(rspm_template, os)) 11 | } 12 | } 13 | if (file.exists("/etc/os-release")) { 14 | os_release <- readLines("/etc/os-release") 15 | vals <- sub("^.*=(.*)$", "\\1", os_release) 16 | os <- intersect(vals, supported_os) 17 | if (length(os)) { 18 | # e.g. "bionic" 19 | return(sprintf(rspm_template, os)) 20 | } else { 21 | names(vals) <- sub("^(.*)=.*$", "\\1", os_release) 22 | if (vals["ID"] == "opensuse") { 23 | version <- sub('^"?([0-9]+).*"?.*$', "\\1", vals["VERSION_ID"]) 24 | os <- paste0("opensuse", version) 25 | if (os %in% supported_os) { 26 | return(sprintf(rspm_template, os)) 27 | } 28 | } 29 | } 30 | } 31 | if (file.exists("/etc/system-release")) { 32 | # Something like "CentOS Linux release 7.7.1908 (Core)" 33 | system_release <- tolower(utils::head(readLines("/etc/system-release"), 1)) 34 | # Extract from that the distro and the major version number 35 | os <- sub("^([a-z]+) .* ([0-9]+).*$", "\\1\\2", system_release) 36 | if (os %in% supported_os) { 37 | return(sprintf(rspm_template, os)) 38 | } 39 | } 40 | 41 | return("https://cloud.r-project.org") 42 | } 43 | 44 | options( 45 | Ncpus = parallel::detectCores(), 46 | repos = tryCatch(.pick_cran(), error = function(e) "https://cloud.r-project.org"), 47 | HTTPUserAgent = sprintf( 48 | 'R/%s R (%s)', 49 | getRversion(), 50 | paste(getRversion(), R.version$platform, R.version$arch, R.version$os) 51 | ) 52 | ) 53 | }) 54 | -------------------------------------------------------------------------------- /ci/scripts/c_glib_build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | source_dir=${1}/c_glib 23 | build_dir=${2}/c_glib 24 | : ${ARROW_GLIB_GTK_DOC:=false} 25 | : ${ARROW_GLIB_DEVELOPMENT_MODE:=false} 26 | 27 | export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig 28 | 29 | export CFLAGS="-DARROW_NO_DEPRECATED_API" 30 | export CXXFLAGS="-DARROW_NO_DEPRECATED_API" 31 | 32 | mkdir -p ${build_dir} 33 | 34 | # Build with Meson 35 | meson --prefix=$ARROW_HOME \ 36 | --libdir=lib \ 37 | -Ddevelopment_mode=${ARROW_GLIB_DEVELOPMENT_MODE} \ 38 | -Dgtk_doc=${ARROW_GLIB_GTK_DOC} \ 39 | ${build_dir} \ 40 | ${source_dir} 41 | 42 | pushd ${build_dir} 43 | ninja 44 | ninja install 45 | popd 46 | -------------------------------------------------------------------------------- /ci/scripts/c_glib_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | source_dir=${1}/c_glib 23 | build_dir=${2}/c_glib 24 | 25 | export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} 26 | export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig 27 | export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0 28 | 29 | pushd ${source_dir} 30 | 31 | ruby test/run-test.rb 32 | 33 | if [[ "$(uname -s)" == "Linux" ]]; then 34 | # TODO(kszucs): on osx it fails to load 'lgi.corelgilua51' despite that lgi 35 | # was installed by luarocks 36 | pushd example/lua 37 | lua write-batch.lua 38 | lua read-batch.lua 39 | lua write-stream.lua 40 | lua read-stream.lua 41 | popd 42 | fi 43 | 44 | popd 45 | 46 | pushd ${build_dir} 47 | example/extension-type 48 | popd 49 | -------------------------------------------------------------------------------- /ci/scripts/ccache_setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -eux 21 | 22 | echo "ARROW_USE_CCACHE=ON" >> $GITHUB_ENV 23 | echo "CCACHE_COMPILERCHECK=content" >> $GITHUB_ENV 24 | echo "CCACHE_COMPRESS=1" >> $GITHUB_ENV 25 | echo "CCACHE_COMPRESSLEVEL=6" >> $GITHUB_ENV 26 | echo "CCACHE_MAXSIZE=500M" >> $GITHUB_ENV 27 | -------------------------------------------------------------------------------- /ci/scripts/csharp_build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | source_dir=${1}/csharp 23 | 24 | pushd ${source_dir} 25 | dotnet build 26 | popd 27 | -------------------------------------------------------------------------------- /ci/scripts/csharp_pack.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -eux 21 | 22 | source_dir=${1}/csharp 23 | 24 | pushd ${source_dir} 25 | dotnet pack -c Release 26 | popd 27 | -------------------------------------------------------------------------------- /ci/scripts/csharp_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | source_dir=${1}/csharp 23 | 24 | pushd ${source_dir} 25 | dotnet test 26 | for pdb in artifacts/Apache.Arrow/*/*/Apache.Arrow.pdb; do 27 | sourcelink test ${pdb} 28 | done 29 | popd 30 | -------------------------------------------------------------------------------- /ci/scripts/docs_build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -ex 20 | 21 | arrow_dir=${1} 22 | build_dir=${2}/docs 23 | 24 | export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} 25 | export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig:${PKG_CONFIG_PATH} 26 | export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0 27 | export CFLAGS="-DARROW_NO_DEPRECATED_API" 28 | export CXXFLAGS="-DARROW_NO_DEPRECATED_API" 29 | 30 | ncpus=$(python3 -c "import os; print(os.cpu_count())") 31 | 32 | # Sphinx docs 33 | sphinx-build -b html -j ${ncpus} ${arrow_dir}/docs/source ${build_dir} 34 | 35 | # C++ - original doxygen 36 | # rsync -a ${arrow_dir}/cpp/apidoc/ ${build_dir}/cpp 37 | 38 | # R 39 | rsync -a ${arrow_dir}/r/docs/ ${build_dir}/r 40 | 41 | # C GLib 42 | rsync -a ${ARROW_HOME}/share/gtk-doc/html/ ${build_dir}/c_glib 43 | 44 | # Java 45 | rsync -a ${arrow_dir}/java/target/site/apidocs/ ${build_dir}/java/reference 46 | 47 | # Javascript 48 | rsync -a ${arrow_dir}/js/doc/ ${build_dir}/js 49 | -------------------------------------------------------------------------------- /ci/scripts/go_build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | source_dir=${1}/go 23 | 24 | pushd ${source_dir}/arrow 25 | 26 | go get -d -t -v ./... 27 | go install -v ./... 28 | 29 | popd 30 | 31 | pushd ${source_dir}/parquet 32 | 33 | go get -d -t -v ./... 34 | go install -v ./... 35 | 36 | popd 37 | -------------------------------------------------------------------------------- /ci/scripts/go_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | source_dir=${1}/go 23 | 24 | pushd ${source_dir}/arrow 25 | 26 | for d in $(go list ./... | grep -v vendor); do 27 | go test $d 28 | done 29 | 30 | popd 31 | 32 | pushd ${source_dir}/parquet 33 | 34 | for d in $(go list ./... | grep -v vendor); do 35 | go test $d 36 | done 37 | 38 | popd 39 | -------------------------------------------------------------------------------- /ci/scripts/install_conda.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | declare -A archs 23 | archs=([amd64]=x86_64 24 | [arm32v7]=armv7l 25 | [ppc64le]=ppc64le 26 | [i386]=x86) 27 | 28 | declare -A platforms 29 | platforms=([windows]=Windows 30 | [macos]=MacOSX 31 | [linux]=Linux) 32 | 33 | if [ "$#" -ne 4 ]; then 34 | echo "Usage: $0 " 35 | exit 1 36 | elif [[ -z ${archs[$1]} ]]; then 37 | echo "Unexpected architecture: ${1}" 38 | exit 1 39 | elif [[ -z ${platforms[$2]} ]]; then 40 | echo "Unexpected platform: ${2}" 41 | exit 1 42 | fi 43 | 44 | arch=${archs[$1]} 45 | platform=${platforms[$2]} 46 | version=$3 47 | prefix=$4 48 | 49 | echo "Downloading Miniconda installer..." 50 | wget -nv https://repo.continuum.io/miniconda/Miniconda3-${version}-${platform}-${arch}.sh -O /tmp/miniconda.sh 51 | bash /tmp/miniconda.sh -b -p ${prefix} 52 | rm /tmp/miniconda.sh 53 | 54 | # Like "conda init", but for POSIX sh rather than bash 55 | ln -s ${prefix}/etc/profile.d/conda.sh /etc/profile.d/conda.sh 56 | 57 | # Configure 58 | source /etc/profile.d/conda.sh 59 | conda config --add channels conda-forge 60 | conda config --set channel_priority strict 61 | conda config --set show_channel_urls True 62 | conda config --set remote_connect_timeout_secs 12 63 | 64 | # Update and clean 65 | conda update --all -y 66 | conda clean --all -y 67 | -------------------------------------------------------------------------------- /ci/scripts/install_dask.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | if [ "$#" -ne 1 ]; then 23 | echo "Usage: $0 " 24 | exit 1 25 | fi 26 | 27 | dask=$1 28 | 29 | if [ "${dask}" = "master" ]; then 30 | pip install https://github.com/dask/dask/archive/main.tar.gz#egg=dask[dataframe] 31 | elif [ "${dask}" = "latest" ]; then 32 | conda install -q dask 33 | else 34 | conda install -q dask=${dask} 35 | fi 36 | conda clean --all 37 | -------------------------------------------------------------------------------- /ci/scripts/install_iwyu.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -eu 20 | 21 | source_dir=${1:-/tmp/iwyu} 22 | install_prefix=${2:-/usr/local} 23 | clang_tools_version=${3:-8} 24 | 25 | iwyu_branch_name="clang_${clang_tools_version}" 26 | if [ ${clang_tools_version} -lt 10 ]; then 27 | iwyu_branch_name="${iwyu_branch_name}.0" 28 | fi 29 | 30 | git clone --single-branch --branch ${iwyu_branch_name} \ 31 | https://github.com/include-what-you-use/include-what-you-use.git ${source_dir} 32 | 33 | mkdir -p ${source_dir}/build 34 | pushd ${source_dir}/build 35 | 36 | # Build IWYU for current Clang 37 | export CC=clang-${clang_tools_version} 38 | export CXX=clang++-${clang_tools_version} 39 | 40 | cmake -DCMAKE_PREFIX_PATH=/usr/lib/llvm-${clang_tools_version} \ 41 | -DCMAKE_INSTALL_PREFIX=${install_prefix} \ 42 | ${source_dir} 43 | make -j4 44 | make install 45 | 46 | popd 47 | 48 | rm -rf ${source_dir} 49 | -------------------------------------------------------------------------------- /ci/scripts/install_kartothek.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | if [ "$#" -ne 2 ]; then 23 | echo "Usage: $0 " 24 | exit 1 25 | fi 26 | 27 | karthothek=$1 28 | target=$2 29 | 30 | git clone --recurse-submodules https://github.com/JDASoftwareGroup/kartothek "${target}" 31 | if [ "${kartothek}" = "master" ]; then 32 | git -C "${target}" checkout master; 33 | elif [ "${kartothek}" = "latest" ]; then 34 | git -C "${target}" checkout $(git describe --tags); 35 | else 36 | git -C "${target}" checkout ${kartothek}; 37 | fi 38 | 39 | pushd "${target}" 40 | pip install --no-deps . 41 | popd 42 | -------------------------------------------------------------------------------- /ci/scripts/install_minio.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | declare -A archs 23 | archs=([amd64]=amd64 24 | [arm64v8]=arm64 25 | [arm32v7]=arm 26 | [s390x]=s390x) 27 | 28 | declare -A platforms 29 | platforms=([linux]=linux 30 | [macos]=darwin) 31 | 32 | arch=${archs[$1]} 33 | platform=${platforms[$2]} 34 | version=$3 35 | prefix=$4 36 | 37 | if [ "$#" -ne 4 ]; then 38 | echo "Usage: $0 " 39 | exit 1 40 | elif [[ -z ${arch} ]]; then 41 | echo "Unexpected architecture: ${1}" 42 | exit 1 43 | elif [[ -z ${platform} ]]; then 44 | echo "Unexpected platform: ${2}" 45 | exit 1 46 | elif [[ ${version} != "latest" ]]; then 47 | echo "Cannot fetch specific versions of minio, only latest is supported." 48 | exit 1 49 | fi 50 | 51 | wget -nv -P ${prefix}/bin https://dl.min.io/server/minio/release/${platform}-${arch}/minio 52 | chmod +x ${prefix}/bin/minio 53 | -------------------------------------------------------------------------------- /ci/scripts/install_osx_sdk.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | if [ ${using_homebrew} != "yes" ]; then 23 | export MACOSX_DEPLOYMENT_TARGET="10.9" 24 | export CONDA_BUILD_SYSROOT="$(xcode-select -p)/Platforms/MacOSX.platform/Developer/SDKs/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk" 25 | 26 | if [[ ! -d ${CONDA_BUILD_SYSROOT} || "$OSX_FORCE_SDK_DOWNLOAD" == "1" ]]; then 27 | echo "downloading ${macosx_deployment_target} sdk" 28 | curl -L -O https://github.com/phracker/MacOSX-SDKs/releases/download/10.13/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz 29 | tar -xf MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz -C "$(dirname "$CONDA_BUILD_SYSROOT")" 30 | # set minimum sdk version to our target 31 | plutil -replace MinimumSDKVersion -string ${MACOSX_DEPLOYMENT_TARGET} $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist 32 | plutil -replace DTSDKName -string macosx${MACOSX_DEPLOYMENT_TARGET}internal $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist 33 | fi 34 | 35 | if [ -d "${CONDA_BUILD_SYSROOT}" ]; then 36 | echo "Found CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}" 37 | else 38 | echo "Missing CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}" 39 | exit 1 40 | fi 41 | fi 42 | -------------------------------------------------------------------------------- /ci/scripts/install_pandas.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | if [ "$#" -lt 1 ]; then 23 | echo "Usage: $0 " 24 | exit 1 25 | fi 26 | 27 | pandas=$1 28 | numpy=${2:-"latest"} 29 | 30 | if [ "${numpy}" = "nightly" ]; then 31 | pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre numpy 32 | elif [ "${numpy}" = "latest" ]; then 33 | pip install numpy 34 | else 35 | pip install numpy==${numpy} 36 | fi 37 | 38 | if [ "${pandas}" = "master" ]; then 39 | pip install git+https://github.com/pandas-dev/pandas.git --no-build-isolation 40 | elif [ "${pandas}" = "nightly" ]; then 41 | pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas 42 | elif [ "${pandas}" = "latest" ]; then 43 | pip install pandas 44 | else 45 | pip install pandas==${pandas} 46 | fi 47 | -------------------------------------------------------------------------------- /ci/scripts/install_spark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | if [ "$#" -ne 2 ]; then 23 | echo "Usage: $0 " 24 | exit 1 25 | fi 26 | 27 | spark=$1 28 | target=$2 29 | 30 | git clone https://github.com/apache/spark "${target}" 31 | git -C "${target}" checkout "${spark}" 32 | -------------------------------------------------------------------------------- /ci/scripts/install_turbodbc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | if [ "$#" -ne 2 ]; then 23 | echo "Usage: $0 " 24 | exit 1 25 | fi 26 | 27 | turbodbc=$1 28 | target=$2 29 | 30 | git clone --recurse-submodules https://github.com/blue-yonder/turbodbc "${target}" 31 | if [ "${turbodbc}" = "master" ]; then 32 | git -C "${target}" checkout master; 33 | elif [ "${turbodbc}" = "latest" ]; then 34 | git -C "${target}" checkout $(git describe --tags); 35 | else 36 | git -C "${target}" checkout ${turbodbc}; 37 | fi 38 | -------------------------------------------------------------------------------- /ci/scripts/integration_arrow.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | arrow_dir=${1} 23 | source_dir=${1}/cpp 24 | build_dir=${2}/cpp 25 | 26 | gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration 27 | 28 | pip install -e $arrow_dir/dev/archery 29 | 30 | archery integration --with-all --run-flight \ 31 | --gold-dirs=$gold_dir/0.14.1 \ 32 | --gold-dirs=$gold_dir/0.17.1 \ 33 | --gold-dirs=$gold_dir/1.0.0-bigendian \ 34 | --gold-dirs=$gold_dir/1.0.0-littleendian \ 35 | --gold-dirs=$gold_dir/2.0.0-compression \ 36 | --gold-dirs=$gold_dir/4.0.0-shareddict \ 37 | -------------------------------------------------------------------------------- /ci/scripts/integration_dask.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | # check that optional pyarrow modules are available 23 | # because pytest would just skip the dask tests 24 | python -c "import pyarrow.orc" 25 | python -c "import pyarrow.parquet" 26 | 27 | # check that dask.dataframe is correctly installed 28 | python -c "import dask.dataframe" 29 | 30 | # TODO(kszucs): the following tests are also uses pyarrow 31 | # pytest -sv --pyargs dask.bytes.tests.test_s3 32 | # pytest -sv --pyargs dask.bytes.tests.test_hdfs 33 | # pytest -sv --pyargs dask.bytes.tests.test_local 34 | 35 | # skip failing pickle test, see https://github.com/dask/dask/issues/6374 36 | pytest -v --pyargs dask.dataframe.tests.test_dataframe -k "not test_dataframe_picklable" 37 | pytest -v --pyargs dask.dataframe.io.tests.test_orc 38 | # skip failing parquet tests, see https://github.com/dask/dask/issues/6243 39 | # test_illegal_column_name can be removed once next dask release is out 40 | # (https://github.com/dask/dask/pull/6378) 41 | pytest -v --pyargs dask.dataframe.io.tests.test_parquet \ 42 | -k "not test_to_parquet_pyarrow_w_inconsistent_schema_by_partition_fails_by_default and not test_timeseries_nulls_in_schema and not test_illegal_column_name" 43 | -------------------------------------------------------------------------------- /ci/scripts/integration_hdfs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | source_dir=${1}/cpp 23 | build_dir=${2}/cpp 24 | 25 | export CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath --glob) 26 | export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop 27 | export LIBHDFS3_CONF=$HADOOP_CONF_DIR/hdfs-site.xml 28 | export ARROW_LIBHDFS3_DIR=$CONDA_PREFIX/lib 29 | 30 | libhdfs_dir=$HADOOP_HOME/lib/native 31 | hadoop_home=$HADOOP_HOME 32 | 33 | function use_hadoop_home() { 34 | unset ARROW_LIBHDFS_DIR 35 | export HADOOP_HOME=$hadoop_home 36 | } 37 | 38 | function use_libhdfs_dir() { 39 | unset HADOOP_HOME 40 | export ARROW_LIBHDFS_DIR=$libhdfs_dir 41 | } 42 | 43 | # execute cpp tests 44 | export ARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON 45 | pushd ${build_dir} 46 | 47 | debug/arrow-io-hdfs-test 48 | debug/arrow-hdfs-test 49 | 50 | use_libhdfs_dir 51 | debug/arrow-io-hdfs-test 52 | debug/arrow-hdfs-test 53 | use_hadoop_home 54 | 55 | popd 56 | 57 | # cannot use --pyargs with custom arguments like --hdfs or --only-hdfs, because 58 | # pytest ignores them, see https://github.com/pytest-dev/pytest/issues/3517 59 | export PYARROW_TEST_HDFS=ON 60 | 61 | export PYARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON 62 | 63 | pytest -vs --pyargs pyarrow.tests.test_fs 64 | pytest -vs --pyargs pyarrow.tests.test_hdfs 65 | 66 | use_libhdfs_dir 67 | pytest -vs --pyargs pyarrow.tests.test_fs 68 | pytest -vs --pyargs pyarrow.tests.test_hdfs 69 | use_hadoop_home 70 | -------------------------------------------------------------------------------- /ci/scripts/integration_hiveserver2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -e 20 | 21 | arrow_dir=${1} 22 | source_dir=${1}/cpp 23 | build_dir=${2}/cpp 24 | 25 | ${arrow_dir}/ci/scripts/util_wait_for_it.sh impala:21050 -t 300 -s -- echo "impala is up" 26 | 27 | pushd ${build_dir} 28 | 29 | # ninja hiveserver2-test 30 | debug/hiveserver2-test 31 | 32 | popd 33 | -------------------------------------------------------------------------------- /ci/scripts/integration_kartothek.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | # check that optional pyarrow modules are available 23 | # because pytest would just skip the pyarrow tests 24 | python -c "import pyarrow.parquet" 25 | 26 | # check that kartothek is correctly installed 27 | python -c "import kartothek" 28 | 29 | pushd /kartothek 30 | # See ARROW-12314, test_load_dataframes_columns_raises_missing skipped because of changed error message 31 | pytest -n0 --ignore tests/cli/test_query.py -k "not test_load_dataframes_columns_raises_missing" 32 | -------------------------------------------------------------------------------- /ci/scripts/integration_turbodbc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | source_dir=${1} 23 | build_dir=${2}/turbodbc 24 | 25 | # check that optional pyarrow modules are available 26 | # because pytest would just skip the pyarrow tests 27 | python -c "import pyarrow.orc" 28 | python -c "import pyarrow.parquet" 29 | 30 | mkdir -p ${build_dir} 31 | pushd ${build_dir} 32 | 33 | cmake -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \ 34 | -DCMAKE_CXX_FLAGS=${CXXFLAGS} \ 35 | -DPYTHON_EXECUTABLE=$(which python) \ 36 | -GNinja \ 37 | ${source_dir} 38 | ninja install 39 | 40 | # TODO(ARROW-5074) 41 | export LD_LIBRARY_PATH="${ARROW_HOME}/lib:${LD_LIBRARY_PATH}" 42 | export ODBCSYSINI="${source_dir}/travis/odbc/" 43 | 44 | service postgresql start 45 | ctest --output-on-failure 46 | 47 | popd 48 | -------------------------------------------------------------------------------- /ci/scripts/java_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | set -ex 20 | 21 | arrow_dir=${1} 22 | source_dir=${1}/java 23 | cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug} 24 | 25 | # For JNI and Plasma tests 26 | export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} 27 | export PLASMA_STORE=${ARROW_HOME}/bin/plasma-store-server 28 | 29 | mvn="mvn -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" 30 | # Use `2 * ncores` threads 31 | mvn="${mvn} -T 2C" 32 | 33 | pushd ${source_dir} 34 | 35 | ${mvn} test 36 | 37 | if [ "${ARROW_JNI}" = "ON" ]; then 38 | ${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${cpp_build_dir} 39 | fi 40 | 41 | if [ "${ARROW_PLASMA}" = "ON" ]; then 42 | pushd ${source_dir}/plasma 43 | java -cp target/test-classes:target/classes \ 44 | -Djava.library.path=${cpp_build_dir} \ 45 | org.apache.arrow.plasma.PlasmaClientTest 46 | popd 47 | fi 48 | 49 | popd 50 | -------------------------------------------------------------------------------- /ci/scripts/js_build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | source_dir=${1}/js 23 | with_docs=${2:-false} 24 | 25 | pushd ${source_dir} 26 | 27 | yarn --frozen-lockfile 28 | # TODO(kszucs): linting should be moved to archery 29 | yarn lint:ci 30 | yarn build 31 | 32 | if [ "${with_docs}" == "true" ]; then 33 | yarn doc 34 | fi 35 | 36 | popd 37 | -------------------------------------------------------------------------------- /ci/scripts/js_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | source_dir=${1}/js 23 | 24 | pushd ${source_dir} 25 | 26 | yarn lint 27 | yarn test 28 | 29 | popd 30 | -------------------------------------------------------------------------------- /ci/scripts/msys2_system_clean.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -eux 21 | 22 | pacman \ 23 | --cascade \ 24 | --noconfirm \ 25 | --nosave \ 26 | --recursive \ 27 | --remove \ 28 | ${MINGW_PACKAGE_PREFIX}-clang-tools-extra \ 29 | ${MINGW_PACKAGE_PREFIX}-gcc-ada \ 30 | ${MINGW_PACKAGE_PREFIX}-gcc-fortran \ 31 | ${MINGW_PACKAGE_PREFIX}-gcc-libgfortran \ 32 | ${MINGW_PACKAGE_PREFIX}-gcc-objc \ 33 | ${MINGW_PACKAGE_PREFIX}-libgccjit 34 | -------------------------------------------------------------------------------- /ci/scripts/msys2_system_upgrade.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -eux 21 | 22 | # https://www.msys2.org/news/#2020-06-29-new-packagers 23 | msys2_repo_base_url=https://repo.msys2.org/msys 24 | # Mirror 25 | msys2_repo_base_url=https://sourceforge.net/projects/msys2/files/REPOS/MSYS2 26 | msys2_keyring_pkg=msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz 27 | for suffix in "" ".sig"; do 28 | curl \ 29 | --location \ 30 | --remote-name \ 31 | --show-error \ 32 | --silent \ 33 | ${msys2_repo_base_url}/x86_64/${msys2_keyring_pkg}${suffix} 34 | done 35 | pacman-key --verify ${msys2_keyring_pkg}.sig 36 | pacman \ 37 | --noconfirm \ 38 | --upgrade \ 39 | ${msys2_keyring_pkg} 40 | 41 | 42 | pacman \ 43 | --noconfirm \ 44 | --refresh \ 45 | --refresh \ 46 | --sync \ 47 | --sysupgrade \ 48 | --sysupgrade 49 | -------------------------------------------------------------------------------- /ci/scripts/python_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | # Check the ASV benchmarking setup. 21 | # Unfortunately this won't ensure that all benchmarks succeed 22 | # (see https://github.com/airspeed-velocity/asv/issues/449) 23 | source deactivate 24 | conda create -y -q -n pyarrow_asv python=$PYTHON_VERSION 25 | conda activate pyarrow_asv 26 | pip install -q git+https://github.com/pitrou/asv.git@customize_commands 27 | 28 | export PYARROW_WITH_PARQUET=1 29 | export PYARROW_WITH_PLASMA=1 30 | export PYARROW_WITH_ORC=0 31 | export PYARROW_WITH_GANDIVA=0 32 | 33 | pushd $ARROW_PYTHON_DIR 34 | # Workaround for https://github.com/airspeed-velocity/asv/issues/631 35 | git fetch --depth=100 origin master:master 36 | # Generate machine information (mandatory) 37 | asv machine --yes 38 | # Run benchmarks on the changeset being tested 39 | asv run --no-pull --show-stderr --quick HEAD^! 40 | popd # $ARROW_PYTHON_DIR 41 | -------------------------------------------------------------------------------- /ci/scripts/python_build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | source_dir=${1}/python 23 | build_dir=${2}/python 24 | 25 | if [ ! -z "${CONDA_PREFIX}" ]; then 26 | echo -e "===\n=== Conda environment for build\n===" 27 | conda list 28 | fi 29 | 30 | export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} 31 | export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} 32 | export PYARROW_WITH_S3=${ARROW_S3:-OFF} 33 | export PYARROW_WITH_ORC=${ARROW_ORC:-OFF} 34 | export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} 35 | export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF} 36 | export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF} 37 | export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF} 38 | export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF} 39 | export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF} 40 | export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF} 41 | 42 | export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} 43 | 44 | pushd ${source_dir} 45 | 46 | relative_build_dir=$(realpath --relative-to=. $build_dir) 47 | 48 | # not nice, but prevents mutating the mounted the source directory for docker 49 | ${PYTHON:-python} \ 50 | setup.py build --build-base $build_dir \ 51 | install --single-version-externally-managed \ 52 | --record $relative_build_dir/record.txt 53 | 54 | popd 55 | -------------------------------------------------------------------------------- /ci/scripts/python_sdist_build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -eux 21 | 22 | source_dir=${1}/python 23 | 24 | pushd ${source_dir} 25 | export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION:-} 26 | ${PYTHON:-python} setup.py sdist 27 | popd 28 | -------------------------------------------------------------------------------- /ci/scripts/python_sdist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -eux 21 | 22 | arrow_dir=${1} 23 | 24 | export ARROW_SOURCE_DIR=${arrow_dir} 25 | export ARROW_TEST_DATA=${arrow_dir}/testing/data 26 | export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data 27 | 28 | export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} 29 | export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} 30 | export PYARROW_WITH_S3=${ARROW_S3:-OFF} 31 | export PYARROW_WITH_ORC=${ARROW_ORC:-OFF} 32 | export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} 33 | export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF} 34 | export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF} 35 | export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF} 36 | export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF} 37 | export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF} 38 | export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF} 39 | 40 | # TODO: Users should not require ARROW_HOME and pkg-config to find Arrow C++. 41 | # Related: ARROW-9171 42 | # unset ARROW_HOME 43 | # apt purge -y pkg-config 44 | 45 | if [ -n "${PYARROW_VERSION:-}" ]; then 46 | sdist="${arrow_dir}/python/dist/pyarrow-${PYARROW_VERSION}.tar.gz" 47 | else 48 | sdist=$(ls "${arrow_dir}/python/dist/pyarrow-*.tar.gz" | sort -r | head -n1) 49 | fi 50 | ${PYTHON:-python} -m pip install ${sdist} 51 | 52 | pytest -r s ${PYTEST_ARGS:-} --pyargs pyarrow 53 | -------------------------------------------------------------------------------- /ci/scripts/python_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | arrow_dir=${1} 23 | 24 | export ARROW_SOURCE_DIR=${arrow_dir} 25 | export ARROW_TEST_DATA=${arrow_dir}/testing/data 26 | export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data 27 | export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} 28 | 29 | # Enable some checks inside Python itself 30 | export PYTHONDEVMODE=1 31 | 32 | pytest -r s ${PYTEST_ARGS} --pyargs pyarrow 33 | -------------------------------------------------------------------------------- /ci/scripts/python_wheel_macos_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | source_dir=${1} 23 | 24 | : ${ARROW_S3:=ON} 25 | 26 | export PYARROW_TEST_CYTHON=OFF 27 | export PYARROW_TEST_DATASET=ON 28 | export PYARROW_TEST_GANDIVA=OFF 29 | export PYARROW_TEST_HDFS=ON 30 | export PYARROW_TEST_ORC=ON 31 | export PYARROW_TEST_PANDAS=ON 32 | export PYARROW_TEST_PARQUET=ON 33 | export PYARROW_TEST_PLASMA=ON 34 | export PYARROW_TEST_S3=${ARROW_S3} 35 | export PYARROW_TEST_TENSORFLOW=ON 36 | export PYARROW_TEST_FLIGHT=ON 37 | 38 | export ARROW_TEST_DATA=${source_dir}/testing/data 39 | export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data 40 | 41 | # Install the built wheels 42 | pip install ${source_dir}/python/dist/*.whl 43 | 44 | # Test that the modules are importable 45 | python -c " 46 | import pyarrow 47 | import pyarrow._hdfs 48 | import pyarrow.csv 49 | import pyarrow.dataset 50 | import pyarrow.flight 51 | import pyarrow.fs 52 | import pyarrow.json 53 | import pyarrow.orc 54 | import pyarrow.parquet 55 | import pyarrow.plasma 56 | " 57 | 58 | if [ "${PYARROW_TEST_S3}" == "ON" ]; then 59 | python -c "import pyarrow._s3fs" 60 | fi 61 | 62 | # Install testing dependencies 63 | pip install -r ${source_dir}/python/requirements-wheel-test.txt 64 | 65 | # Execute unittest 66 | pytest -r s --pyargs pyarrow 67 | -------------------------------------------------------------------------------- /ci/scripts/python_wheel_windows_test.bat: -------------------------------------------------------------------------------- 1 | @rem Licensed to the Apache Software Foundation (ASF) under one 2 | @rem or more contributor license agreements. See the NOTICE file 3 | @rem distributed with this work for additional information 4 | @rem regarding copyright ownership. The ASF licenses this file 5 | @rem to you under the Apache License, Version 2.0 (the 6 | @rem "License"); you may not use this file except in compliance 7 | @rem with the License. You may obtain a copy of the License at 8 | @rem 9 | @rem http://www.apache.org/licenses/LICENSE-2.0 10 | @rem 11 | @rem Unless required by applicable law or agreed to in writing, 12 | @rem software distributed under the License is distributed on an 13 | @rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | @rem KIND, either express or implied. See the License for the 15 | @rem specific language governing permissions and limitations 16 | @rem under the License. 17 | 18 | @echo on 19 | 20 | set PYARROW_TEST_CYTHON=OFF 21 | set PYARROW_TEST_DATASET=ON 22 | set PYARROW_TEST_GANDIVA=OFF 23 | set PYARROW_TEST_HDFS=ON 24 | set PYARROW_TEST_ORC=OFF 25 | set PYARROW_TEST_PANDAS=ON 26 | set PYARROW_TEST_PARQUET=ON 27 | set PYARROW_TEST_PLASMA=OFF 28 | set PYARROW_TEST_S3=OFF 29 | set PYARROW_TEST_TENSORFLOW=ON 30 | set PYARROW_TEST_FLIGHT=ON 31 | 32 | set ARROW_TEST_DATA=C:\arrow\testing\data 33 | set PARQUET_TEST_DATA=C:\arrow\submodules\parquet-testing\data 34 | 35 | @REM Install the built wheels 36 | python -m pip install numpy 37 | python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 38 | 39 | @REM Test that the modules are importable 40 | python -c "import pyarrow" 41 | python -c "import pyarrow._hdfs" 42 | python -c "import pyarrow._s3fs" 43 | python -c "import pyarrow.csv" 44 | python -c "import pyarrow.dataset" 45 | python -c "import pyarrow.flight" 46 | python -c "import pyarrow.fs" 47 | python -c "import pyarrow.json" 48 | python -c "import pyarrow.parquet" 49 | 50 | @REM Install testing dependencies 51 | pip install -r C:\arrow\python\requirements-wheel-test.txt 52 | 53 | @REM Execute unittest 54 | pytest -r s --pyargs pyarrow 55 | -------------------------------------------------------------------------------- /ci/scripts/r_build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | set -ex 20 | 21 | : ${R_BIN:=R} 22 | source_dir=${1}/r 23 | with_docs=${2:-false} 24 | 25 | pushd ${source_dir} 26 | 27 | ${R_BIN} CMD INSTALL . 28 | 29 | if [ "${with_docs}" == "true" ]; then 30 | ${R_BIN} -e "pkgdown::build_site(install = FALSE)" 31 | fi 32 | 33 | popd -------------------------------------------------------------------------------- /ci/scripts/r_deps.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | set -ex 20 | 21 | : ${R_BIN:=R} 22 | 23 | source_dir=${1}/r 24 | 25 | pushd ${source_dir} 26 | 27 | # Install R package dependencies 28 | ${R_BIN} -e "install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck', 'sys'))" 29 | ${R_BIN} -e "remotes::install_deps(dependencies = TRUE)" 30 | 31 | popd 32 | -------------------------------------------------------------------------------- /ci/scripts/r_pkgdown_check.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | # Use this script to prevent errors in the pkgdown site being rendered due to missing YAML entries 21 | 22 | # all .Rd files in the repo 23 | all_rd_files=`find ./r/man -maxdepth 1 -name "*.Rd" | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort` 24 | 25 | # .Rd files to exclude from search (i.e. are internal) 26 | exclusions=`grep "\keyword{internal}" -rl ./r/man --include=*.Rd | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort` 27 | 28 | # .Rd files to check against pkgdown.yml 29 | rd_files=`echo ${exclusions[@]} ${all_rd_files[@]} | tr ' ' '\n' | sort | uniq -u` 30 | 31 | # pkgdown sections 32 | pkgdown_sections=`awk '/^[^ ]/{ f=/reference:/; next } f{ if (sub(/:$/,"")) pkg=$2; else print pkg, $2 }' ./r/_pkgdown.yml | grep -v "title:" | sort` 33 | 34 | # get things that appear in man files that don't appear in pkgdown sections 35 | pkgdown_missing=`echo ${pkgdown_sections[@]} ${pkgdown_sections[@]} ${rd_files[@]} | tr ' ' '\n' | sort | uniq -u` 36 | 37 | # if any sections are missing raise an error 38 | if ([ ${#pkgdown_missing} -ge 1 ]); then 39 | echo "Error! $pkgdown_missing missing from ./r/_pkgdown.yml" 40 | exit 1 41 | fi 42 | -------------------------------------------------------------------------------- /ci/scripts/r_sanitize.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | set -ex 20 | 21 | : ${R_BIN:=RDsan} 22 | 23 | source_dir=${1}/r 24 | 25 | ${R_BIN} CMD INSTALL ${source_dir} 26 | pushd ${source_dir}/tests 27 | 28 | export TEST_R_WITH_ARROW=TRUE 29 | export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp" 30 | ${R_BIN} < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; } 31 | 32 | cat testthat.out 33 | if grep -q "runtime error" testthat.out; then 34 | exit 1 35 | fi 36 | popd 37 | -------------------------------------------------------------------------------- /ci/scripts/release_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -eux 21 | 22 | arrow_dir=${1} 23 | 24 | pushd ${arrow_dir} 25 | 26 | dev/release/run-test.rb 27 | 28 | popd 29 | -------------------------------------------------------------------------------- /ci/scripts/ruby_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | source_dir=${1}/ruby 23 | build_dir=${2}/ruby 24 | 25 | export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} 26 | export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig 27 | export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0 28 | 29 | rake -f ${source_dir}/Rakefile BUILD_DIR=${build_dir} USE_BUNDLER=yes 30 | -------------------------------------------------------------------------------- /ci/scripts/rust_build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | 22 | source_dir=${1}/rust 23 | 24 | # This file is used to build the rust binaries needed for the 25 | # archery integration tests. Testing of the rust implementation 26 | # in normal CI is handled by github workflows 27 | 28 | # Disable full debug symbol generation to speed up CI build / reduce memory required 29 | export RUSTFLAGS="-C debuginfo=1" 30 | 31 | export ARROW_TEST_DATA=${arrow_dir}/testing/data 32 | export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data 33 | 34 | # show activated toolchain 35 | rustup show 36 | 37 | pushd ${source_dir} 38 | 39 | # build only the integration testing binaries 40 | cargo build -p arrow-integration-testing 41 | 42 | # Remove incremental build artifacts to save space 43 | rm -rf target/debug/deps/ target/debug/build/ 44 | 45 | popd 46 | -------------------------------------------------------------------------------- /ci/scripts/rust_clippy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | cargo clippy --all-targets --workspace -- -D warnings 22 | -------------------------------------------------------------------------------- /ci/scripts/rust_fmt.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | cargo fmt --all -- --check 22 | -------------------------------------------------------------------------------- /ci/scripts/rust_toml_fmt.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -ex 21 | find . -mindepth 2 -name 'Cargo.toml' -exec cargo tomlfmt -k -p {} \; -------------------------------------------------------------------------------- /ci/scripts/util_checkout.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | # this script is github actions specific to check out the submodules and tags 21 | 22 | # TODO(kszucs): remove it once the "submodules: recursive" feature is released 23 | auth_header="$(git config --local --get http.https://github.com/.extraheader)" 24 | git submodule sync --recursive 25 | git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1 26 | 27 | # fetch all the tags 28 | git fetch --depth=1 origin +refs/tags/*:refs/tags/* 29 | -------------------------------------------------------------------------------- /ci/scripts/util_cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | # This script is Github Actions-specific to free up disk space, 21 | # to avoid disk full errors on some builds 22 | 23 | if [ $RUNNER_OS = "Linux" ]; then 24 | df -h 25 | 26 | # remove swap 27 | sudo swapoff -a 28 | sudo rm -f /swapfile 29 | 30 | # clean apt cache 31 | sudo apt clean 32 | 33 | # remove haskell, consumes 8.6 GB 34 | sudo rm -rf /opt/ghc 35 | 36 | # 1 GB 37 | sudo rm -rf /home/linuxbrew/.linuxbrew 38 | 39 | # 1+ GB 40 | sudo rm -rf /opt/hostedtoolcache/CodeQL 41 | 42 | # 1+ GB 43 | sudo rm -rf /usr/share/swift 44 | 45 | # 12 GB, but takes a lot of time to delete 46 | #sudo rm -rf /usr/local/lib/android 47 | 48 | # remove cached docker images, around 13 GB 49 | docker rmi $(docker image ls -aq) 50 | 51 | # NOTE: /usr/share/dotnet is 25 GB 52 | fi 53 | 54 | df -h 55 | -------------------------------------------------------------------------------- /ci/scripts/util_download_apache.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | 19 | if [ "$#" -ne 2 ]; then 20 | echo "Usage: $0 " 21 | exit 1 22 | fi 23 | 24 | tarball_path=$1 25 | target_dir=$2 26 | 27 | APACHE_MIRRORS=( 28 | "http://www.apache.org/dyn/closer.cgi?action=download&filename=" 29 | "https://downloads.apache.org" 30 | "https://apache.claz.org" 31 | "https://apache.cs.utah.edu" 32 | "https://apache.mirrors.lucidnetworks.net" 33 | "https://apache.osuosl.org" 34 | "https://ftp.wayne.edu/apache" 35 | "https://mirror.olnevhost.net/pub/apache" 36 | "https://mirrors.gigenet.com/apache" 37 | "https://mirrors.koehn.com/apache" 38 | "https://mirrors.ocf.berkeley.edu/apache" 39 | "https://mirrors.sonic.net/apache" 40 | "https://us.mirrors.quenda.co/apache" 41 | ) 42 | 43 | mkdir -p "${target_dir}" 44 | 45 | for mirror in ${APACHE_MIRRORS[*]} 46 | do 47 | curl -SL "${mirror}/${tarball_path}" | tar -xzf - -C "${target_dir}" 48 | if [ $? == 0 ]; then 49 | exit 0 50 | fi 51 | done 52 | 53 | exit 1 54 | -------------------------------------------------------------------------------- /ci/vcpkg/arm64-linux-static-debug.cmake: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | set(VCPKG_TARGET_ARCHITECTURE arm64) 19 | set(VCPKG_CRT_LINKAGE dynamic) 20 | set(VCPKG_LIBRARY_LINKAGE static) 21 | set(VCPKG_CMAKE_SYSTEM_NAME Linux) 22 | set(VCPKG_BUILD_TYPE debug) 23 | 24 | if(NOT CMAKE_HOST_SYSTEM_PROCESSOR) 25 | execute_process(COMMAND "uname" "-m" 26 | OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR 27 | OUTPUT_STRIP_TRAILING_WHITESPACE) 28 | endif() 29 | -------------------------------------------------------------------------------- /ci/vcpkg/arm64-linux-static-release.cmake: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | set(VCPKG_TARGET_ARCHITECTURE arm64) 19 | set(VCPKG_CRT_LINKAGE dynamic) 20 | set(VCPKG_LIBRARY_LINKAGE static) 21 | set(VCPKG_CMAKE_SYSTEM_NAME Linux) 22 | set(VCPKG_BUILD_TYPE release) 23 | 24 | if(NOT CMAKE_HOST_SYSTEM_PROCESSOR) 25 | execute_process(COMMAND "uname" "-m" 26 | OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR 27 | OUTPUT_STRIP_TRAILING_WHITESPACE) 28 | endif() 29 | -------------------------------------------------------------------------------- /ci/vcpkg/x64-linux-static-debug.cmake: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | set(VCPKG_TARGET_ARCHITECTURE x64) 19 | set(VCPKG_CRT_LINKAGE dynamic) 20 | set(VCPKG_LIBRARY_LINKAGE static) 21 | 22 | set(VCPKG_CMAKE_SYSTEM_NAME Linux) 23 | 24 | set(VCPKG_BUILD_TYPE debug) 25 | -------------------------------------------------------------------------------- /ci/vcpkg/x64-linux-static-release.cmake: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | set(VCPKG_TARGET_ARCHITECTURE x64) 19 | set(VCPKG_CRT_LINKAGE dynamic) 20 | set(VCPKG_LIBRARY_LINKAGE static) 21 | 22 | set(VCPKG_CMAKE_SYSTEM_NAME Linux) 23 | 24 | set(VCPKG_BUILD_TYPE release) 25 | -------------------------------------------------------------------------------- /ci/vcpkg/x64-osx-static-debug.cmake: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | set(VCPKG_TARGET_ARCHITECTURE x64) 19 | set(VCPKG_CRT_LINKAGE dynamic) 20 | set(VCPKG_LIBRARY_LINKAGE static) 21 | 22 | set(VCPKG_CMAKE_SYSTEM_NAME Darwin) 23 | set(VCPKG_OSX_ARCHITECTURES x86_64) 24 | 25 | set(VCPKG_BUILD_TYPE debug) 26 | -------------------------------------------------------------------------------- /ci/vcpkg/x64-osx-static-release.cmake: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | set(VCPKG_TARGET_ARCHITECTURE x64) 19 | set(VCPKG_CRT_LINKAGE dynamic) 20 | set(VCPKG_LIBRARY_LINKAGE static) 21 | 22 | set(VCPKG_CMAKE_SYSTEM_NAME Darwin) 23 | set(VCPKG_OSX_ARCHITECTURES x86_64) 24 | 25 | set(VCPKG_BUILD_TYPE release) 26 | -------------------------------------------------------------------------------- /ci/vcpkg/x64-windows-static-md-debug.cmake: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | set(VCPKG_TARGET_ARCHITECTURE x64) 19 | set(VCPKG_CRT_LINKAGE dynamic) 20 | set(VCPKG_LIBRARY_LINKAGE static) 21 | 22 | set(VCPKG_BUILD_TYPE debug) 23 | -------------------------------------------------------------------------------- /ci/vcpkg/x64-windows-static-md-release.cmake: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | set(VCPKG_TARGET_ARCHITECTURE x64) 19 | set(VCPKG_CRT_LINKAGE dynamic) 20 | set(VCPKG_LIBRARY_LINKAGE static) 21 | 22 | set(VCPKG_BUILD_TYPE release) 23 | -------------------------------------------------------------------------------- /dev/build-ballista-docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | RELEASE_FLAG=${RELEASE_FLAG:=release} 23 | 24 | ./dev/build-ballista-executables.sh 25 | 26 | docker-compose build 27 | 28 | . ./dev/build-set-env.sh 29 | docker build -t "apache/arrow-ballista-standalone:$BALLISTA_VERSION" -f dev/docker/ballista-standalone.Dockerfile . 30 | 31 | docker tag ballista-executor "apache/arrow-ballista-executor:$BALLISTA_VERSION" 32 | docker tag ballista-scheduler "apache/arrow-ballista-scheduler:$BALLISTA_VERSION" 33 | docker tag ballista-benchmarks "apache/arrow-ballista-benchmarks:$BALLISTA_VERSION" 34 | 35 | docker build -t "apache/arrow-ballista-cli:$BALLISTA_VERSION" -f dev/docker/ballista-cli.Dockerfile . 36 | -------------------------------------------------------------------------------- /dev/build-ballista-executables.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | RELEASE_FLAG=${RELEASE_FLAG:=release} 23 | 24 | # TODO: it would be very nice if we could make CI work the exact same way so the build logic isn't duplicated 25 | 26 | # build a docker container in which to run the build - this is to make life easier for Windows & Mac users 27 | docker build -t ballista-builder --build-arg EXT_UID="$(id -u)" -f dev/docker/ballista-builder.Dockerfile . 28 | 29 | # run cargo & yarn builds inside the builder container 30 | docker run -v $(pwd):/home/builder/workspace --env RELEASE_FLAG=$RELEASE_FLAG ballista-builder 31 | -------------------------------------------------------------------------------- /dev/build-set-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | cd ballista/core/ 21 | export BALLISTA_VERSION=$(cargo pkgid | cut '-d@' -f2) 22 | cd - 23 | -------------------------------------------------------------------------------- /dev/docker/ballista-benchmarks.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | FROM ubuntu:22.04 19 | 20 | ARG RELEASE_FLAG=release 21 | 22 | ENV RELEASE_FLAG=${RELEASE_FLAG} 23 | ENV RUST_LOG=info 24 | ENV RUST_BACKTRACE=full 25 | 26 | COPY target/$RELEASE_FLAG/ballista-scheduler /root/ballista-scheduler 27 | COPY target/$RELEASE_FLAG/ballista-executor /root/ballista-executor 28 | COPY target/$RELEASE_FLAG/tpch /root/tpch 29 | 30 | COPY benchmarks/run.sh /root/run.sh 31 | COPY benchmarks/queries/ /root/benchmarks/queries 32 | 33 | WORKDIR /root 34 | 35 | CMD ["/root/run.sh"] -------------------------------------------------------------------------------- /dev/docker/ballista-builder.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | FROM rust:1-buster 19 | 20 | ARG EXT_UID 21 | 22 | ENV RUST_LOG=info 23 | ENV RUST_BACKTRACE=full 24 | ENV DEBIAN_FRONTEND=noninteractive 25 | 26 | RUN apt-get update && \ 27 | apt-get -y install libssl-dev openssl zlib1g zlib1g-dev libpq-dev cmake protobuf-compiler netcat curl unzip \ 28 | nodejs npm && \ 29 | npm install -g yarn 30 | 31 | # create build user with same UID as 32 | RUN adduser -q -u $EXT_UID builder --home /home/builder && \ 33 | mkdir -p /home/builder/workspace 34 | USER builder 35 | 36 | ENV NODE_VER=18.9.0 37 | ENV HOME=/home/builder 38 | ENV PATH=$HOME/.cargo/bin:$PATH 39 | 40 | # prepare rust 41 | RUN rustup update && \ 42 | rustup component add rustfmt && \ 43 | cargo install cargo-chef --version 0.1.34 44 | 45 | WORKDIR /home/builder/workspace 46 | 47 | COPY dev/docker/builder-entrypoint.sh /home/builder 48 | ENTRYPOINT ["/home/builder/builder-entrypoint.sh"] 49 | -------------------------------------------------------------------------------- /dev/docker/ballista-cli.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | FROM ubuntu:22.04 19 | 20 | ARG RELEASE_FLAG=release 21 | 22 | ENV RELEASE_FLAG=${RELEASE_FLAG} 23 | ENV RUST_LOG=info 24 | ENV RUST_BACKTRACE=full 25 | 26 | COPY target/$RELEASE_FLAG/ballista-cli /root/ballista-cli 27 | 28 | COPY dev/docker/cli-entrypoint.sh /root/cli-entrypoint.sh 29 | ENTRYPOINT ["/root/cli-entrypoint.sh"] 30 | -------------------------------------------------------------------------------- /dev/docker/ballista-executor.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | FROM ubuntu:22.04 19 | 20 | ARG RELEASE_FLAG=release 21 | 22 | ENV RELEASE_FLAG=${RELEASE_FLAG} 23 | ENV RUST_LOG=info 24 | ENV RUST_BACKTRACE=full 25 | 26 | RUN apt-get update && apt-get install -y netcat 27 | 28 | COPY target/$RELEASE_FLAG/ballista-executor /root/ballista-executor 29 | 30 | # Expose Ballista Executor gRPC port 31 | EXPOSE 50051 32 | 33 | COPY dev/docker/executor-entrypoint.sh /root/executor-entrypoint.sh 34 | ENTRYPOINT ["/root/executor-entrypoint.sh"] 35 | -------------------------------------------------------------------------------- /dev/docker/ballista-scheduler.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | FROM ubuntu:22.04 19 | 20 | ARG RELEASE_FLAG=release 21 | 22 | ENV RELEASE_FLAG=${RELEASE_FLAG} 23 | ENV RUST_LOG=info 24 | ENV RUST_BACKTRACE=full 25 | ENV DEBIAN_FRONTEND=noninteractive 26 | 27 | RUN apt-get update && apt-get install -y nginx netcat 28 | 29 | COPY target/$RELEASE_FLAG/ballista-scheduler /root/ballista-scheduler 30 | 31 | COPY ballista/scheduler/ui/build /var/www/html 32 | COPY dev/docker/nginx.conf /etc/nginx/sites-enabled/default 33 | 34 | # Expose Ballista Scheduler web UI port 35 | EXPOSE 80 36 | 37 | # Expose Ballista Scheduler gRPC port 38 | EXPOSE 50050 39 | 40 | COPY dev/docker/scheduler-entrypoint.sh /root/scheduler-entrypoint.sh 41 | ENTRYPOINT ["/root/scheduler-entrypoint.sh"] 42 | -------------------------------------------------------------------------------- /dev/docker/ballista-standalone.Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | FROM ubuntu:22.04 19 | 20 | LABEL org.opencontainers.image.source="https://github.com/apache/arrow-ballista" 21 | LABEL org.opencontainers.image.description="Apache Arrow Ballista Distributed SQL Query Engine" 22 | LABEL org.opencontainers.image.licenses="Apache-2.0" 23 | 24 | ARG RELEASE_FLAG=release 25 | 26 | ENV RELEASE_FLAG=${RELEASE_FLAG} 27 | ENV RUST_LOG=info 28 | ENV RUST_BACKTRACE=full 29 | ENV DEBIAN_FRONTEND=noninteractive 30 | 31 | RUN apt-get -qq update && apt-get install -qq -y nginx netcat wget 32 | 33 | COPY target/$RELEASE_FLAG/ballista-scheduler /root/ballista-scheduler 34 | COPY target/$RELEASE_FLAG/ballista-executor /root/ballista-executor 35 | 36 | RUN chmod a+x /root/ballista-scheduler && \ 37 | chmod a+x /root/ballista-executor 38 | 39 | # populate some sample data for ListingSchemaProvider 40 | RUN mkdir -p /data && \ 41 | wget -q https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2022-01.parquet -P /data/ 42 | ENV DATAFUSION_CATALOG_LOCATION=/data 43 | ENV DATAFUSION_CATALOG_TYPE=csv 44 | 45 | COPY ballista/scheduler/ui/build /var/www/html 46 | COPY dev/docker/nginx.conf /etc/nginx/sites-enabled/default 47 | 48 | # Expose Ballista Scheduler web UI port 49 | EXPOSE 80 50 | 51 | # Expose Ballista Scheduler gRPC port 52 | EXPOSE 50050 53 | 54 | # Expose Ballista Executor gRPC port 55 | EXPOSE 50051 56 | 57 | COPY dev/docker/standalone-entrypoint.sh /root/standalone-entrypoint.sh 58 | ENTRYPOINT ["/root/standalone-entrypoint.sh"] 59 | -------------------------------------------------------------------------------- /dev/docker/builder-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | set -x 22 | 23 | printenv 24 | RELEASE_FLAG=${RELEASE_FLAG:=release} 25 | cargo build --features flight-sql --profile $RELEASE_FLAG "$@" 26 | 27 | cd ballista/scheduler/ui 28 | yarn install 29 | yarn build 30 | -------------------------------------------------------------------------------- /dev/docker/cli-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | /root/ballista-cli "$@" 23 | -------------------------------------------------------------------------------- /dev/docker/executor-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | /root/ballista-executor "$@" 23 | -------------------------------------------------------------------------------- /dev/docker/nginx.conf: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | server { 19 | listen 80 default_server; 20 | listen [::]:80 default_server; 21 | 22 | root /var/www/html; 23 | 24 | index index.html index.htm index.nginx-debian.html; 25 | 26 | server_name _; 27 | 28 | location / { 29 | # First attempt to serve request as file, then 30 | # as directory, then fall back to displaying a 404. 31 | try_files $uri $uri/ =404; 32 | } 33 | 34 | # pass REST api calls through to Ballista scheduler process 35 | location /api/ { 36 | proxy_redirect http://localhost:50050/ /api/; 37 | proxy_pass_header Server; 38 | proxy_set_header X-Real-IP $remote_addr; 39 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 40 | proxy_set_header X-Scheme $scheme; 41 | proxy_set_header Host $http_host; 42 | proxy_set_header X-NginX-Proxy true; 43 | proxy_connect_timeout 5; 44 | proxy_read_timeout 240; 45 | proxy_intercept_errors on; 46 | 47 | # no longer sure if this part is needed 48 | proxy_next_upstream error http_403 non_idempotent; 49 | proxy_next_upstream error http_502 non_idempotent; 50 | 51 | proxy_pass http://localhost:50050; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /dev/docker/scheduler-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | echo "Starting nginx to serve Ballista Scheduler web UI on port 80" 23 | nohup nginx -g "daemon off;" & 24 | /root/ballista-scheduler "$@" 25 | -------------------------------------------------------------------------------- /dev/docker/standalone-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | set -e 21 | 22 | echo "Starting nginx web UI..." 23 | nohup nginx -g "daemon off;" & 24 | 25 | echo "Starting for scheduler..." 26 | /root/ballista-scheduler & 27 | while ! nc -z 127.0.0.1 50050; do 28 | sleep 1 29 | done 30 | 31 | echo "Starting executor" 32 | /root/ballista-executor 33 | -------------------------------------------------------------------------------- /dev/integration-tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | set -e 20 | 21 | echo "Generating benchmark data ..." 22 | pushd benchmarks 23 | ./tpch-gen.sh 24 | popd 25 | 26 | echo "Building Docker images ..." 27 | ./dev/build-ballista-docker.sh 28 | 29 | echo "Starting docker-compose in background ..." 30 | docker-compose up -d 31 | 32 | # give the scheduler a chance to start up 33 | echo "Sleeping (wait for scheduler to start)..." 34 | sleep 10 35 | 36 | echo "Running benchmarks ..." 37 | docker-compose run ballista-client /root/run.sh 38 | 39 | #TODO need to call docker-compose down even if benchmarks fail 40 | 41 | echo "Stopping docker-compose ..." 42 | docker-compose down 43 | 44 | popd 45 | -------------------------------------------------------------------------------- /dev/python_lint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | # This script runs all the Rust lints locally the same way the 21 | # DataFusion CI does 22 | 23 | set -e 24 | source venv/bin/activate 25 | flake8 --exclude venv --ignore=E501,W503 26 | black --line-length 79 --diff --check . -------------------------------------------------------------------------------- /dev/release/check-rat-report.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | ############################################################################## 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | ############################################################################## 20 | import fnmatch 21 | import re 22 | import sys 23 | import xml.etree.ElementTree as ET 24 | 25 | if len(sys.argv) != 3: 26 | sys.stderr.write( 27 | "Usage: %s exclude_globs.lst rat_report.xml\n" % sys.argv[0] 28 | ) 29 | sys.exit(1) 30 | 31 | exclude_globs_filename = sys.argv[1] 32 | xml_filename = sys.argv[2] 33 | 34 | globs = [line.strip() for line in open(exclude_globs_filename, "r")] 35 | 36 | tree = ET.parse(xml_filename) 37 | root = tree.getroot() 38 | resources = root.findall("resource") 39 | 40 | all_ok = True 41 | for r in resources: 42 | approvals = r.findall("license-approval") 43 | if not approvals or approvals[0].attrib["name"] == "true": 44 | continue 45 | clean_name = re.sub("^[^/]+/", "", r.attrib["name"]) 46 | excluded = False 47 | for g in globs: 48 | if fnmatch.fnmatch(clean_name, g): 49 | excluded = True 50 | break 51 | if not excluded: 52 | sys.stdout.write( 53 | "NOT APPROVED: %s (%s): %s\n" 54 | % (clean_name, r.attrib["name"], approvals[0].attrib["name"]) 55 | ) 56 | all_ok = False 57 | 58 | if not all_ok: 59 | sys.exit(1) 60 | 61 | print("OK") 62 | sys.exit(0) 63 | -------------------------------------------------------------------------------- /dev/release/crate-deps.dot: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | digraph G { 19 | 20 | ballista_core 21 | ballista_scheduler 22 | ballista_executor 23 | ballista 24 | ballista_cli 25 | 26 | ballista_scheduler -> ballista_core 27 | 28 | ballista_executor -> ballista_core 29 | 30 | ballista -> ballista_core 31 | ballista -> ballista_scheduler 32 | ballista -> ballista_executor 33 | 34 | ballista_cli -> ballista 35 | 36 | } 37 | -------------------------------------------------------------------------------- /dev/release/rat_exclude_files.txt: -------------------------------------------------------------------------------- 1 | *.npmrc 2 | *.gitignore 3 | *.dockerignore 4 | .gitmodules 5 | *_generated.js 6 | *_generated.ts 7 | *.csv 8 | *.json 9 | *.snap 10 | .github/ISSUE_TEMPLATE/*.md 11 | .github/pull_request_template.md 12 | ci/etc/rprofile 13 | ci/etc/*.patch 14 | ci/vcpkg/*.patch 15 | CHANGELOG.md 16 | ballista/CHANGELOG.md 17 | python/CHANGELOG.md 18 | dev/requirements*.txt 19 | dev/release/rat_exclude_files.txt 20 | helm/ballista/Chart.lock 21 | pax_global_header 22 | MANIFEST.in 23 | __init__.pxd 24 | __init__.py 25 | requirements.txt 26 | *.html 27 | *.sgml 28 | *.css 29 | *.png 30 | *.ico 31 | *.svg 32 | *.devhelp2 33 | *.scss 34 | .gitattributes 35 | benchmarks/queries/q*.sql 36 | ballista/scheduler/testdata/* 37 | **/yarn.lock 38 | requirements*.txt 39 | **/testdata/* 40 | benchmarks/queries/* 41 | benchmarks/data/* 42 | ci/* 43 | **/*.svg 44 | **/*.csv 45 | **/*.json 46 | **/*.sql 47 | venv/* 48 | testing/* 49 | target/* 50 | **/target/* 51 | Cargo.lock 52 | **/Cargo.lock 53 | .history 54 | parquet-testing/* 55 | *rat.txt 56 | ballista/core/src/serde/generated/ballista.rs -------------------------------------------------------------------------------- /dev/release/run-rat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | RAT_VERSION=0.13 22 | 23 | # download apache rat 24 | if [ ! -f apache-rat-${RAT_VERSION}.jar ]; then 25 | curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar > apache-rat-${RAT_VERSION}.jar 26 | fi 27 | 28 | RAT="java -jar apache-rat-${RAT_VERSION}.jar -x " 29 | 30 | RELEASE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) 31 | 32 | # generate the rat report 33 | $RAT $1 > rat.txt 34 | python $RELEASE_DIR/check-rat-report.py $RELEASE_DIR/rat_exclude_files.txt rat.txt > filtered_rat.txt 35 | cat filtered_rat.txt 36 | UNAPPROVED=`cat filtered_rat.txt | grep "NOT APPROVED" | wc -l` 37 | 38 | if [ "0" -eq "${UNAPPROVED}" ]; then 39 | echo "No unapproved licenses" 40 | else 41 | echo "${UNAPPROVED} unapproved licences. Check rat report: rat.txt" 42 | exit 1 43 | fi 44 | -------------------------------------------------------------------------------- /dev/release/update_change_log-ballista.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | # Usage: 22 | # CHANGELOG_GITHUB_TOKEN= ./update_change_log-ballista.sh main 0.7.0 0.6.0 23 | 24 | RELEASE_BRANCH=$1 25 | RELEASE_TAG=$2 26 | BASE_TAG=$3 27 | 28 | SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 29 | ${SOURCE_DIR}/update_change_log.sh \ 30 | ballista \ 31 | "${BASE_TAG}" \ 32 | --exclude-tags-regex "python-.+" \ 33 | --future-release "${RELEASE_TAG}" \ 34 | --release-branch "${RELEASE_BRANCH}" 35 | -------------------------------------------------------------------------------- /dev/rust_lint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | set -e 20 | if ! command -v cargo-tomlfmt &> /dev/null; then 21 | echo "Installing cargo-tomlfmt using cargo" 22 | cargo install cargo-tomlfmt 23 | fi 24 | 25 | ci/scripts/rust_fmt.sh 26 | ci/scripts/rust_clippy.sh 27 | ci/scripts/rust_toml_fmt.sh -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | build 19 | source/python/generated 20 | venv/ 21 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # 19 | # Minimal makefile for Sphinx documentation 20 | # 21 | 22 | # You can set these variables from the command line, and also 23 | # from the environment for the first two. 24 | SPHINXOPTS ?= 25 | SPHINXBUILD ?= sphinx-build 26 | SOURCEDIR = source 27 | BUILDDIR = build 28 | 29 | # Put it first so that "make" without argument is like "make help". 30 | help: 31 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 32 | 33 | .PHONY: help Makefile 34 | 35 | # Catch-all target: route all unknown targets to Sphinx using the new 36 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 37 | %: Makefile 38 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 39 | -------------------------------------------------------------------------------- /docs/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | rm -rf build 21 | make html 22 | -------------------------------------------------------------------------------- /docs/developer/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Ballista Developer Documentation 21 | 22 | - Read the [Architecture Overview](architecture.md) to get an understanding of the scheduler and executor 23 | processes and how distributed query execution works. 24 | - Watch the [Ballista: Distributed Compute with Rust and Apache Arrow](https://www.youtube.com/watch?v=ZZHQaOap9pQ) 25 | talk from the New York Open Statistical Programming Meetup (Feb 2021) 26 | -------------------------------------------------------------------------------- /docs/developer/images/query-execution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datafusion-ballista-python/30a47ae960ff9766fb300fe9908b1aaf8d479aac/docs/developer/images/query-execution.png -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @rem Licensed to the Apache Software Foundation (ASF) under one 2 | @rem or more contributor license agreements. See the NOTICE file 3 | @rem distributed with this work for additional information 4 | @rem regarding copyright ownership. The ASF licenses this file 5 | @rem to you under the Apache License, Version 2.0 (the 6 | @rem "License"); you may not use this file except in compliance 7 | @rem with the License. You may obtain a copy of the License at 8 | @rem 9 | @rem http://www.apache.org/licenses/LICENSE-2.0 10 | @rem 11 | @rem Unless required by applicable law or agreed to in writing, 12 | @rem software distributed under the License is distributed on an 13 | @rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | @rem KIND, either express or implied. See the License for the 15 | @rem specific language governing permissions and limitations 16 | @rem under the License. 17 | 18 | @ECHO OFF 19 | 20 | pushd %~dp0 21 | 22 | REM Command file for Sphinx documentation 23 | 24 | if "%SPHINXBUILD%" == "" ( 25 | set SPHINXBUILD=sphinx-build 26 | ) 27 | set SOURCEDIR=source 28 | set BUILDDIR=build 29 | 30 | if "%1" == "" goto help 31 | 32 | %SPHINXBUILD% >NUL 2>NUL 33 | if errorlevel 9009 ( 34 | echo. 35 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 36 | echo.installed, then set the SPHINXBUILD environment variable to point 37 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 38 | echo.may add the Sphinx directory to PATH. 39 | echo. 40 | echo.If you don't have Sphinx installed, grab it from 41 | echo.http://sphinx-doc.org/ 42 | exit /b 1 43 | ) 44 | 45 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 46 | goto end 47 | 48 | :help 49 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 50 | 51 | :end 52 | popd 53 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | sphinx==2.4.4 19 | pydata-sphinx-theme 20 | myst-parser<1 21 | maturin<0.12 22 | -------------------------------------------------------------------------------- /docs/source/_static/images/ballista-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datafusion-ballista-python/30a47ae960ff9766fb300fe9908b1aaf8d479aac/docs/source/_static/images/ballista-logo.png -------------------------------------------------------------------------------- /docs/source/_templates/docs-sidebar.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 10 | 11 | 20 | -------------------------------------------------------------------------------- /docs/source/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "pydata_sphinx_theme/layout.html" %} 2 | 3 | {# Silence the navbar #} 4 | {% block docs_navbar %} 5 | {% endblock %} 6 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Licensed to the Apache Software Foundation (ASF) under one 2 | .. or more contributor license agreements. See the NOTICE file 3 | .. distributed with this work for additional information 4 | .. regarding copyright ownership. The ASF licenses this file 5 | .. to you under the Apache License, Version 2.0 (the 6 | .. "License"); you may not use this file except in compliance 7 | .. with the License. You may obtain a copy of the License at 8 | 9 | .. http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | .. Unless required by applicable law or agreed to in writing, 12 | .. software distributed under the License is distributed on an 13 | .. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | .. KIND, either express or implied. See the License for the 15 | .. specific language governing permissions and limitations 16 | .. under the License. 17 | 18 | ===================== 19 | Apache Arrow Ballista 20 | ===================== 21 | 22 | Table of content 23 | ================ 24 | 25 | 26 | .. _toc.guide: 27 | 28 | .. toctree:: 29 | :maxdepth: 1 30 | :caption: User Guide 31 | 32 | Introduction 33 | 34 | .. toctree:: 35 | :maxdepth: 1 36 | :caption: Cluster Deployment 37 | 38 | Deployment 39 | Scheduler 40 | 41 | .. toctree:: 42 | :maxdepth: 1 43 | :caption: Clients 44 | 45 | Python 46 | Rust 47 | Flight SQL JDBC 48 | SQL CLI 49 | 50 | .. toctree:: 51 | :maxdepth: 1 52 | :caption: Reference 53 | 54 | user-guide/configs 55 | user-guide/tuning-guide 56 | user-guide/faq 57 | 58 | .. _toc.source: 59 | 60 | .. toctree:: 61 | :maxdepth: 1 62 | :caption: Source Code 63 | 64 | Ballista 65 | 66 | .. _toc.community: 67 | 68 | .. toctree:: 69 | :maxdepth: 1 70 | :caption: Community 71 | 72 | community/communication 73 | Issue tracker 74 | Code of conduct 75 | -------------------------------------------------------------------------------- /docs/source/user-guide/deployment/cargo-install.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Deploying a standalone Ballista cluster using cargo install 21 | 22 | A simple way to start a local cluster for testing purposes is to use cargo to install 23 | the scheduler and executor crates. 24 | 25 | ```bash 26 | cargo install --locked ballista-scheduler 27 | cargo install --locked ballista-executor 28 | ``` 29 | 30 | With these crates installed, it is now possible to start a scheduler process. 31 | 32 | ```bash 33 | RUST_LOG=info ballista-scheduler 34 | ``` 35 | 36 | The scheduler will bind to port 50050 by default. 37 | 38 | Next, start an executor processes in a new terminal session. 39 | 40 | ```bash 41 | RUST_LOG=info ballista-executor 42 | ``` 43 | 44 | The executor will bind to port 50051 by default. Additional executors can be started by 45 | manually specifying a bind port. For example: 46 | 47 | ```bash 48 | RUST_LOG=info ballista-executor --bind-port 50052 49 | ``` 50 | -------------------------------------------------------------------------------- /docs/source/user-guide/deployment/index.rst: -------------------------------------------------------------------------------- 1 | .. Licensed to the Apache Software Foundation (ASF) under one 2 | .. or more contributor license agreements. See the NOTICE file 3 | .. distributed with this work for additional information 4 | .. regarding copyright ownership. The ASF licenses this file 5 | .. to you under the Apache License, Version 2.0 (the 6 | .. "License"); you may not use this file except in compliance 7 | .. with the License. You may obtain a copy of the License at 8 | 9 | .. http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | .. Unless required by applicable law or agreed to in writing, 12 | .. software distributed under the License is distributed on an 13 | .. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | .. KIND, either express or implied. See the License for the 15 | .. specific language governing permissions and limitations 16 | .. under the License. 17 | 18 | Start a Ballista Cluster 19 | ======================== 20 | 21 | .. toctree:: 22 | :maxdepth: 2 23 | 24 | Cargo Install 25 | Docker 26 | Docker Compose 27 | Kubernetes 28 | -------------------------------------------------------------------------------- /docs/source/user-guide/faq.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Frequently Asked Questions 21 | 22 | ## What is the relationship between Apache Arrow, DataFusion, and Ballista? 23 | 24 | Apache Arrow is a library which provides a standardized memory representation for columnar data. It also provides 25 | "kernels" for performing common operations on this data. 26 | 27 | DataFusion is a library for executing queries in-process using the Apache Arrow memory 28 | model and computational kernels. It is designed to run within a single process, using threads 29 | for parallel query execution. 30 | 31 | Ballista is a distributed compute platform built on DataFusion. 32 | -------------------------------------------------------------------------------- /docs/source/user-guide/images/ballista-web-ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datafusion-ballista-python/30a47ae960ff9766fb300fe9908b1aaf8d479aac/docs/source/user-guide/images/ballista-web-ui.png -------------------------------------------------------------------------------- /docs/source/user-guide/images/example-query-plan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datafusion-ballista-python/30a47ae960ff9766fb300fe9908b1aaf8d479aac/docs/source/user-guide/images/example-query-plan.png -------------------------------------------------------------------------------- /docs/source/user-guide/metrics.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Ballista Scheduler Metrics 21 | 22 | ## Prometheus 23 | 24 | Built with default features, the ballista scheduler will automatically collect and expose a standard set of prometheus metrics. 25 | The metrics currently collected automatically include: 26 | 27 | - _job_exec_time_seconds_ - Histogram of successful job execution time in seconds 28 | - _planning_time_ms_ - Histogram of job planning time in milliseconds 29 | - _failed_ - Counter of failed jobs 30 | - _job_failed_total_ - Counter of failed jobs 31 | - _job_cancelled_total_ - Counter of cancelled jobs 32 | - _job_completed_total_ - Counter of completed jobs 33 | - _job_submitted_total_ - Counter of submitted jobs 34 | - _pending_task_queue_size_ - Number of pending tasks 35 | 36 | **NOTE** Currently the histogram buckets for the above metrics are set to reasonable defaults. If the defaults are not 37 | appropriate for a given use case, the only workaround is to implement a customer `SchedulerMetricsCollector`. In the future 38 | the buckets should be made configurable. 39 | 40 | The metrics are then exported through the scheduler REST API at `GET /api/metrics`. It should be sufficient to ingest metrics 41 | into an existing metrics system by point your chosen prometheus exporter at that endpoint. 42 | -------------------------------------------------------------------------------- /docs/source/user-guide/scheduler.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Ballista Scheduler 21 | 22 | ## Web User Interface 23 | 24 | The scheduler provides a web user interface that allows queries to be monitored. 25 | 26 | ![Ballista Scheduler Web UI](./images/ballista-web-ui.png) 27 | 28 | ## REST API 29 | 30 | The scheduler also provides a REST API that allows jobs to be monitored. 31 | 32 | | API | Method | Description | 33 | | --------------------- | ------ | ----------------------------------------------------------- | 34 | | /api/jobs | GET | Get a list of jobs that have been submitted to the cluster. | 35 | | /api/job/{job_id} | GET | Get a summary of a submitted job. | 36 | | /api/job/{job_id}/dot | GET | Produce a query plan in DOT (graphviz) format. | 37 | | /api/job/{job_id} | PATCH | Cancel a currently running job | 38 | | /api/metrics | GET | Return current scheduler metric set | 39 | -------------------------------------------------------------------------------- /docs/sqlbench-h-workstation-10-distributed-perquery.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datafusion-ballista-python/30a47ae960ff9766fb300fe9908b1aaf8d479aac/docs/sqlbench-h-workstation-10-distributed-perquery.png -------------------------------------------------------------------------------- /examples/dataframe-parquet.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from ballista import BallistaContext 19 | from ballista import functions as f 20 | 21 | 22 | ctx = BallistaContext( 23 | "localhost", 50050, shuffle_partitions=16, batch_size=8192 24 | ) 25 | df = ctx.read_parquet("yellow_tripdata_2021-01.parquet").aggregate( 26 | [f.col("passenger_count")], [f.count_star()] 27 | ) 28 | df.show() 29 | -------------------------------------------------------------------------------- /examples/run-executor.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from ballista import Executor 19 | 20 | 21 | # start an executor from this Python process 22 | exec = Executor( 23 | scheduler_host="localhost", 24 | scheduler_port=50050, 25 | bind_host="127.0.0.1", 26 | bind_port=50051, 27 | grpc_port=50052, 28 | concurrent_tasks=1, 29 | ) 30 | -------------------------------------------------------------------------------- /examples/run-scheduler.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from ballista import Scheduler 19 | 20 | 21 | # start a scheduler from this Python process 22 | scheduler = Scheduler( 23 | bind_host="127.0.0.1", 24 | bind_port=50050, 25 | external_host="127.0.0.1", 26 | ) 27 | -------------------------------------------------------------------------------- /examples/sql-parquet.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | from ballista import BallistaContext 19 | 20 | 21 | ctx = BallistaContext( 22 | "localhost", 50050, shuffle_partitions=16, batch_size=8192 23 | ) 24 | ctx.register_parquet("taxi", "yellow_tripdata_2021-01.parquet") 25 | df = ctx.sql( 26 | "select passenger_count, count(*) from taxi where passenger_count is not null group by passenger_count order by passenger_count" 27 | ) 28 | df.show() 29 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [build-system] 19 | requires = ["maturin>=0.11,<0.12"] 20 | build-backend = "maturin" 21 | 22 | [project] 23 | name = "ballista" 24 | description = "Build and run queries against data" 25 | readme = "README.md" 26 | license = {file = "LICENSE.txt"} 27 | requires-python = ">=3.6" 28 | keywords = ["ballista", "dataframe", "rust", "query-engine"] 29 | classifier = [ 30 | "Development Status :: 2 - Pre-Alpha", 31 | "Intended Audience :: Developers", 32 | "License :: OSI Approved :: Apache Software License", 33 | "License :: OSI Approved", 34 | "Operating System :: MacOS", 35 | "Operating System :: Microsoft :: Windows", 36 | "Operating System :: POSIX :: Linux", 37 | "Programming Language :: Python :: 3", 38 | "Programming Language :: Python :: 3.6", 39 | "Programming Language :: Python :: 3.7", 40 | "Programming Language :: Python :: 3.8", 41 | "Programming Language :: Python :: 3.9", 42 | "Programming Language :: Python :: 3.10", 43 | "Programming Language :: Python", 44 | "Programming Language :: Rust", 45 | ] 46 | dependencies = [ 47 | "pyarrow>=1", 48 | ] 49 | 50 | [project.urls] 51 | documentation = "https://arrow.apache.org/apache/arrow-ballista/python" 52 | repository = "https://github.com/apache/arrow-ballista" 53 | 54 | [tool.isort] 55 | profile = "black" 56 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | black 19 | flake8 20 | isort 21 | maturin 22 | mypy 23 | numpy 24 | pandas 25 | pyarrow 26 | pytest 27 | toml 28 | -------------------------------------------------------------------------------- /src/datatype.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | /// Copied from https://github.com/apache/arrow-datafusion-python/pull/103 19 | use datafusion::arrow::datatypes::DataType; 20 | use pyo3::pyclass; 21 | 22 | #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] 23 | #[pyclass(name = "PyDataType", module = "datafusion", subclass)] 24 | pub struct PyDataType { 25 | pub(crate) data_type: DataType, 26 | } 27 | 28 | impl From for DataType { 29 | fn from(data_type: PyDataType) -> DataType { 30 | data_type.data_type 31 | } 32 | } 33 | 34 | impl From for PyDataType { 35 | fn from(data_type: DataType) -> PyDataType { 36 | PyDataType { data_type } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::errors::DataFusionError; 19 | use datafusion::logical_expr::Volatility; 20 | use pyo3::prelude::*; 21 | use std::future::Future; 22 | use tokio::runtime::Runtime; 23 | 24 | /// Utility to collect rust futures with GIL released 25 | pub fn wait_for_future(py: Python, f: F) -> F::Output 26 | where 27 | F: Send, 28 | F::Output: Send, 29 | { 30 | let rt = Runtime::new().unwrap(); 31 | py.allow_threads(|| rt.block_on(f)) 32 | } 33 | 34 | pub(crate) fn parse_volatility(value: &str) -> Result { 35 | Ok(match value { 36 | "immutable" => Volatility::Immutable, 37 | "stable" => Volatility::Stable, 38 | "volatile" => Volatility::Volatile, 39 | value => { 40 | return Err(DataFusionError::Common(format!( 41 | "Unsupportad volatility type: `{}`, supported \ 42 | values are: immutable, stable and volatile.", 43 | value 44 | ))) 45 | } 46 | }) 47 | } 48 | --------------------------------------------------------------------------------